summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/xe
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/xe')
-rw-r--r--drivers/gpu/drm/xe/Kconfig29
-rw-r--r--drivers/gpu/drm/xe/Kconfig.debug28
-rw-r--r--drivers/gpu/drm/xe/Makefile60
-rw-r--r--drivers/gpu/drm/xe/abi/guc_actions_abi.h38
-rw-r--r--drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h5
-rw-r--r--drivers/gpu/drm/xe/abi/guc_errors_abi.h17
-rw-r--r--drivers/gpu/drm/xe/abi/guc_klvs_abi.h55
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h4
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h112
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h26
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_scheduler_types.h13
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_utils.h9
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h2
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h31
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h37
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/vlv_iosf_sb.h42
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/vlv_iosf_sb_reg.h (renamed from drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband_reg.h)2
-rw-r--r--drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband.h132
-rw-r--r--drivers/gpu/drm/xe/display/ext/i915_utils.c26
-rw-r--r--drivers/gpu/drm/xe/display/intel_bo.c60
-rw-r--r--drivers/gpu/drm/xe/display/intel_fb_bo.c3
-rw-r--r--drivers/gpu/drm/xe/display/intel_fbdev_fb.c83
-rw-r--r--drivers/gpu/drm/xe/display/xe_display.c153
-rw-r--r--drivers/gpu/drm/xe/display/xe_display.h8
-rw-r--r--drivers/gpu/drm/xe/display/xe_display_rpm.c63
-rw-r--r--drivers/gpu/drm/xe/display/xe_display_rpm.h11
-rw-r--r--drivers/gpu/drm/xe/display/xe_display_wa.c5
-rw-r--r--drivers/gpu/drm/xe/display/xe_dsb_buffer.c21
-rw-r--r--drivers/gpu/drm/xe/display/xe_fb_pin.c153
-rw-r--r--drivers/gpu/drm/xe/display/xe_hdcp_gsc.c10
-rw-r--r--drivers/gpu/drm/xe/display/xe_panic.c102
-rw-r--r--drivers/gpu/drm/xe/display/xe_plane_initial.c28
-rw-r--r--drivers/gpu/drm/xe/display/xe_stolen.c123
-rw-r--r--drivers/gpu/drm/xe/display/xe_tdf.c4
-rw-r--r--drivers/gpu/drm/xe/instructions/xe_gpu_commands.h6
-rw-r--r--drivers/gpu/drm/xe/instructions/xe_mi_commands.h1
-rw-r--r--drivers/gpu/drm/xe/regs/xe_bars.h1
-rw-r--r--drivers/gpu/drm/xe/regs/xe_engine_regs.h7
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gsc_regs.h6
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gt_regs.h35
-rw-r--r--drivers/gpu/drm/xe/regs/xe_hw_error_regs.h20
-rw-r--r--drivers/gpu/drm/xe/regs/xe_i2c_regs.h23
-rw-r--r--drivers/gpu/drm/xe/regs/xe_irq_regs.h10
-rw-r--r--drivers/gpu/drm/xe/regs/xe_lrc_layout.h4
-rw-r--r--drivers/gpu/drm/xe/regs/xe_mchbar_regs.h11
-rw-r--r--drivers/gpu/drm/xe/regs/xe_oa_regs.h3
-rw-r--r--drivers/gpu/drm/xe/regs/xe_pcode_regs.h6
-rw-r--r--drivers/gpu/drm/xe/regs/xe_pmt.h18
-rw-r--r--drivers/gpu/drm/xe/regs/xe_regs.h4
-rw-r--r--drivers/gpu/drm/xe/tests/xe_bo.c42
-rw-r--r--drivers/gpu/drm/xe/tests/xe_dma_buf.c56
-rw-r--r--drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_config_kunit.c208
-rw-r--r--drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c232
-rw-r--r--drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c13
-rw-r--r--drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c776
-rw-r--r--drivers/gpu/drm/xe/tests/xe_live_test_mod.c2
-rw-r--r--drivers/gpu/drm/xe/tests/xe_migrate.c118
-rw-r--r--drivers/gpu/drm/xe/tests/xe_mocs.c2
-rw-r--r--drivers/gpu/drm/xe/tests/xe_pci.c329
-rw-r--r--drivers/gpu/drm/xe/tests/xe_pci_test.c34
-rw-r--r--drivers/gpu/drm/xe/tests/xe_pci_test.h21
-rw-r--r--drivers/gpu/drm/xe/tests/xe_rtp_test.c6
-rw-r--r--drivers/gpu/drm/xe/tests/xe_sriov_pf_service_kunit.c227
-rw-r--r--drivers/gpu/drm/xe/tests/xe_wa_test.c90
-rw-r--r--drivers/gpu/drm/xe/xe_assert.h4
-rw-r--r--drivers/gpu/drm/xe/xe_bb.c37
-rw-r--r--drivers/gpu/drm/xe/xe_bb.h5
-rw-r--r--drivers/gpu/drm/xe/xe_bo.c1180
-rw-r--r--drivers/gpu/drm/xe/xe_bo.h106
-rw-r--r--drivers/gpu/drm/xe/xe_bo_doc.h8
-rw-r--r--drivers/gpu/drm/xe/xe_bo_evict.c25
-rw-r--r--drivers/gpu/drm/xe/xe_bo_types.h29
-rw-r--r--drivers/gpu/drm/xe/xe_configfs.c1153
-rw-r--r--drivers/gpu/drm/xe/xe_configfs.h33
-rw-r--r--drivers/gpu/drm/xe/xe_debugfs.c202
-rw-r--r--drivers/gpu/drm/xe/xe_dep_job_types.h29
-rw-r--r--drivers/gpu/drm/xe/xe_dep_scheduler.c143
-rw-r--r--drivers/gpu/drm/xe/xe_dep_scheduler.h21
-rw-r--r--drivers/gpu/drm/xe/xe_devcoredump.c54
-rw-r--r--drivers/gpu/drm/xe/xe_device.c381
-rw-r--r--drivers/gpu/drm/xe/xe_device.h56
-rw-r--r--drivers/gpu/drm/xe/xe_device_sysfs.c164
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h246
-rw-r--r--drivers/gpu/drm/xe/xe_device_wa_oob.rules5
-rw-r--r--drivers/gpu/drm/xe/xe_dma_buf.c123
-rw-r--r--drivers/gpu/drm/xe/xe_drm_client.c2
-rw-r--r--drivers/gpu/drm/xe/xe_drv.h2
-rw-r--r--drivers/gpu/drm/xe/xe_eu_stall.c53
-rw-r--r--drivers/gpu/drm/xe/xe_exec.c72
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.c326
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.h29
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue_types.h43
-rw-r--r--drivers/gpu/drm/xe/xe_execlist.c27
-rw-r--r--drivers/gpu/drm/xe/xe_execlist_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_force_wake.c2
-rw-r--r--drivers/gpu/drm/xe/xe_force_wake_types.h26
-rw-r--r--drivers/gpu/drm/xe/xe_gen_wa_oob.c53
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt.c455
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt.h32
-rw-r--r--drivers/gpu/drm/xe/xe_ggtt_types.h6
-rw-r--r--drivers/gpu/drm/xe/xe_gpu_scheduler.c14
-rw-r--r--drivers/gpu/drm/xe/xe_gpu_scheduler.h37
-rw-r--r--drivers/gpu/drm/xe/xe_gsc.c20
-rw-r--r--drivers/gpu/drm/xe/xe_gsc_proxy.c3
-rw-r--r--drivers/gpu/drm/xe/xe_gt.c399
-rw-r--r--drivers/gpu/drm/xe/xe_gt.h27
-rw-r--r--drivers/gpu/drm/xe/xe_gt_clock.c26
-rw-r--r--drivers/gpu/drm/xe/xe_gt_debugfs.c304
-rw-r--r--drivers/gpu/drm/xe/xe_gt_debugfs.h1
-rw-r--r--drivers/gpu/drm/xe/xe_gt_freq.c68
-rw-r--r--drivers/gpu/drm/xe/xe_gt_idle.c39
-rw-r--r--drivers/gpu/drm/xe/xe_gt_idle.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_mcr.c127
-rw-r--r--drivers/gpu/drm/xe/xe_gt_mcr.h3
-rw-r--r--drivers/gpu/drm/xe/xe_gt_pagefault.c713
-rw-r--r--drivers/gpu/drm/xe/xe_gt_pagefault.h19
-rw-r--r--drivers/gpu/drm/xe/xe_gt_printk.h32
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf.c132
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf.h6
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c538
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h16
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c757
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h12
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h36
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c470
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.h1
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c1022
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h48
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h34
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c189
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h2
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h5
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_printk.h7
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf.c719
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf.h13
-rw-r--r--drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h63
-rw-r--r--drivers/gpu/drm/xe/xe_gt_stats.c57
-rw-r--r--drivers/gpu/drm/xe/xe_gt_stats.h1
-rw-r--r--drivers/gpu/drm/xe/xe_gt_stats_types.h33
-rw-r--r--drivers/gpu/drm/xe/xe_gt_throttle.c355
-rw-r--r--drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c578
-rw-r--r--drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h42
-rw-r--r--drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h32
-rw-r--r--drivers/gpu/drm/xe/xe_gt_topology.c115
-rw-r--r--drivers/gpu/drm/xe/xe_gt_topology.h10
-rw-r--r--drivers/gpu/drm/xe/xe_gt_types.h114
-rw-r--r--drivers/gpu/drm/xe/xe_guard.h119
-rw-r--r--drivers/gpu/drm/xe/xe_guc.c559
-rw-r--r--drivers/gpu/drm/xe/xe_guc.h7
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ads.c169
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ads_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_buf.c63
-rw-r--r--drivers/gpu/drm/xe/xe_guc_buf.h2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_capture.c35
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.c509
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct.h18
-rw-r--r--drivers/gpu/drm/xe/xe_guc_ct_types.h17
-rw-r--r--drivers/gpu/drm/xe/xe_guc_engine_activity.c15
-rw-r--r--drivers/gpu/drm/xe/xe_guc_exec_queue_types.h21
-rw-r--r--drivers/gpu/drm/xe/xe_guc_fwif.h38
-rw-r--r--drivers/gpu/drm/xe/xe_guc_log.c2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_log.h4
-rw-r--r--drivers/gpu/drm/xe/xe_guc_log_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pagefault.c95
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pagefault.h15
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.c485
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc.h4
-rw-r--r--drivers/gpu/drm/xe/xe_guc_pc_types.h8
-rw-r--r--drivers/gpu/drm/xe/xe_guc_relay.c17
-rw-r--r--drivers/gpu/drm/xe/xe_guc_relay_types.h4
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c828
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.h9
-rw-r--r--drivers/gpu/drm/xe/xe_guc_tlb_inval.c242
-rw-r--r--drivers/gpu/drm/xe/xe_guc_tlb_inval.h19
-rw-r--r--drivers/gpu/drm/xe/xe_guc_types.h6
-rw-r--r--drivers/gpu/drm/xe/xe_heci_gsc.c13
-rw-r--r--drivers/gpu/drm/xe/xe_hmm.c325
-rw-r--r--drivers/gpu/drm/xe/xe_hmm.h18
-rw-r--r--drivers/gpu/drm/xe/xe_huc.c12
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine.c102
-rw-r--r--drivers/gpu/drm/xe/xe_hw_engine_group.c57
-rw-r--r--drivers/gpu/drm/xe/xe_hw_error.c182
-rw-r--r--drivers/gpu/drm/xe/xe_hw_error.h15
-rw-r--r--drivers/gpu/drm/xe/xe_hw_fence.c5
-rw-r--r--drivers/gpu/drm/xe/xe_hwmon.c572
-rw-r--r--drivers/gpu/drm/xe/xe_i2c.c372
-rw-r--r--drivers/gpu/drm/xe/xe_i2c.h68
-rw-r--r--drivers/gpu/drm/xe/xe_irq.c167
-rw-r--r--drivers/gpu/drm/xe/xe_late_bind_fw.c464
-rw-r--r--drivers/gpu/drm/xe/xe_late_bind_fw.h17
-rw-r--r--drivers/gpu/drm/xe/xe_late_bind_fw_types.h75
-rw-r--r--drivers/gpu/drm/xe/xe_lmtt.c93
-rw-r--r--drivers/gpu/drm/xe/xe_lmtt.h1
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.c580
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.h27
-rw-r--r--drivers/gpu/drm/xe/xe_lrc_types.h8
-rw-r--r--drivers/gpu/drm/xe/xe_map.h4
-rw-r--r--drivers/gpu/drm/xe/xe_memirq.c57
-rw-r--r--drivers/gpu/drm/xe/xe_memirq.h2
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.c919
-rw-r--r--drivers/gpu/drm/xe/xe_migrate.h49
-rw-r--r--drivers/gpu/drm/xe/xe_migrate_doc.h2
-rw-r--r--drivers/gpu/drm/xe/xe_mmio.c66
-rw-r--r--drivers/gpu/drm/xe/xe_mmio.h4
-rw-r--r--drivers/gpu/drm/xe/xe_mmio_gem.c226
-rw-r--r--drivers/gpu/drm/xe/xe_mmio_gem.h20
-rw-r--r--drivers/gpu/drm/xe/xe_mocs.c42
-rw-r--r--drivers/gpu/drm/xe/xe_mocs.h8
-rw-r--r--drivers/gpu/drm/xe/xe_module.c77
-rw-r--r--drivers/gpu/drm/xe/xe_nvm.c170
-rw-r--r--drivers/gpu/drm/xe/xe_nvm.h15
-rw-r--r--drivers/gpu/drm/xe/xe_oa.c315
-rw-r--r--drivers/gpu/drm/xe/xe_oa_types.h17
-rw-r--r--drivers/gpu/drm/xe/xe_pagefault.c444
-rw-r--r--drivers/gpu/drm/xe/xe_pagefault.h19
-rw-r--r--drivers/gpu/drm/xe/xe_pagefault_types.h136
-rw-r--r--drivers/gpu/drm/xe/xe_pat.c189
-rw-r--r--drivers/gpu/drm/xe/xe_pat.h12
-rw-r--r--drivers/gpu/drm/xe/xe_pci.c488
-rw-r--r--drivers/gpu/drm/xe/xe_pci.h3
-rw-r--r--drivers/gpu/drm/xe/xe_pci_sriov.c144
-rw-r--r--drivers/gpu/drm/xe/xe_pci_sriov.h1
-rw-r--r--drivers/gpu/drm/xe/xe_pci_types.h49
-rw-r--r--drivers/gpu/drm/xe/xe_pcode.c81
-rw-r--r--drivers/gpu/drm/xe/xe_pcode.h15
-rw-r--r--drivers/gpu/drm/xe/xe_pcode_api.h28
-rw-r--r--drivers/gpu/drm/xe/xe_platform_types.h3
-rw-r--r--drivers/gpu/drm/xe/xe_pm.c180
-rw-r--r--drivers/gpu/drm/xe/xe_pm.h19
-rw-r--r--drivers/gpu/drm/xe/xe_pmu.c18
-rw-r--r--drivers/gpu/drm/xe/xe_preempt_fence.c11
-rw-r--r--drivers/gpu/drm/xe/xe_preempt_fence_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_printk.h129
-rw-r--r--drivers/gpu/drm/xe/xe_psmi.c294
-rw-r--r--drivers/gpu/drm/xe/xe_psmi.h14
-rw-r--r--drivers/gpu/drm/xe/xe_pt.c540
-rw-r--r--drivers/gpu/drm/xe/xe_pt.h3
-rw-r--r--drivers/gpu/drm/xe/xe_pt_types.h5
-rw-r--r--drivers/gpu/drm/xe/xe_pxp.c148
-rw-r--r--drivers/gpu/drm/xe/xe_pxp_submit.c36
-rw-r--r--drivers/gpu/drm/xe/xe_query.c64
-rw-r--r--drivers/gpu/drm/xe/xe_range_fence.h4
-rw-r--r--drivers/gpu/drm/xe/xe_reg_whitelist.c10
-rw-r--r--drivers/gpu/drm/xe/xe_res_cursor.h10
-rw-r--r--drivers/gpu/drm/xe/xe_ring_ops.c61
-rw-r--r--drivers/gpu/drm/xe/xe_rtp.c69
-rw-r--r--drivers/gpu/drm/xe/xe_rtp.h40
-rw-r--r--drivers/gpu/drm/xe/xe_rtp_types.h6
-rw-r--r--drivers/gpu/drm/xe/xe_sa.c22
-rw-r--r--drivers/gpu/drm/xe/xe_sa.h16
-rw-r--r--drivers/gpu/drm/xe/xe_sa_types.h1
-rw-r--r--drivers/gpu/drm/xe/xe_sched_job.c42
-rw-r--r--drivers/gpu/drm/xe/xe_sched_job.h13
-rw-r--r--drivers/gpu/drm/xe/xe_sched_job_types.h11
-rw-r--r--drivers/gpu/drm/xe/xe_shrinker.c96
-rw-r--r--drivers/gpu/drm/xe/xe_shrinker.h4
-rw-r--r--drivers/gpu/drm/xe/xe_sriov.c17
-rw-r--r--drivers/gpu/drm/xe/xe_sriov.h1
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_packet.c520
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_packet.h30
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_packet_types.h75
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf.c181
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf.h19
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_control.c279
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_control.h22
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c395
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_debugfs.h18
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_helpers.h27
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_migration.c365
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_migration.h30
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_migration_types.h37
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_provision.c438
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_provision.h45
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_provision_types.h36
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_service.c216
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_service.h23
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_service_types.h36
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c647
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_sysfs.h16
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_pf_types.h70
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_printk.h12
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_types.h36
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf.c174
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf.h8
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf_ccs.c480
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf_ccs.h35
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h51
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vf_types.h47
-rw-r--r--drivers/gpu/drm/xe/xe_sriov_vfio.c80
-rw-r--r--drivers/gpu/drm/xe/xe_step.c2
-rw-r--r--drivers/gpu/drm/xe/xe_survivability_mode.c200
-rw-r--r--drivers/gpu/drm/xe/xe_survivability_mode.h5
-rw-r--r--drivers/gpu/drm/xe/xe_survivability_mode_types.h8
-rw-r--r--drivers/gpu/drm/xe/xe_svm.c1085
-rw-r--r--drivers/gpu/drm/xe/xe_svm.h230
-rw-r--r--drivers/gpu/drm/xe/xe_sync.c93
-rw-r--r--drivers/gpu/drm/xe/xe_sync.h3
-rw-r--r--drivers/gpu/drm/xe/xe_sync_types.h3
-rw-r--r--drivers/gpu/drm/xe/xe_tile.c87
-rw-r--r--drivers/gpu/drm/xe/xe_tile.h7
-rw-r--r--drivers/gpu/drm/xe/xe_tile_debugfs.c142
-rw-r--r--drivers/gpu/drm/xe/xe_tile_debugfs.h16
-rw-r--r--drivers/gpu/drm/xe/xe_tile_printk.h127
-rw-r--r--drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c253
-rw-r--r--drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.h15
-rw-r--r--drivers/gpu/drm/xe/xe_tile_sriov_printk.h33
-rw-r--r--drivers/gpu/drm/xe/xe_tile_sriov_vf.c350
-rw-r--r--drivers/gpu/drm/xe/xe_tile_sriov_vf.h23
-rw-r--r--drivers/gpu/drm/xe/xe_tile_sriov_vf_types.h23
-rw-r--r--drivers/gpu/drm/xe/xe_tile_sysfs.c12
-rw-r--r--drivers/gpu/drm/xe/xe_tlb_inval.c433
-rw-r--r--drivers/gpu/drm/xe/xe_tlb_inval.h46
-rw-r--r--drivers/gpu/drm/xe/xe_tlb_inval_job.c285
-rw-r--r--drivers/gpu/drm/xe/xe_tlb_inval_job.h34
-rw-r--r--drivers/gpu/drm/xe/xe_tlb_inval_types.h130
-rw-r--r--drivers/gpu/drm/xe/xe_trace.h63
-rw-r--r--drivers/gpu/drm/xe/xe_trace_bo.h4
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c18
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_sys_mgr.c6
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_vram_mgr.c28
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_vram_mgr.h3
-rw-r--r--drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h4
-rw-r--r--drivers/gpu/drm/xe/xe_tuning.c32
-rw-r--r--drivers/gpu/drm/xe/xe_tuning.h2
-rw-r--r--drivers/gpu/drm/xe/xe_uc.c78
-rw-r--r--drivers/gpu/drm/xe/xe_uc.h5
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw.c89
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw_abi.h130
-rw-r--r--drivers/gpu/drm/xe/xe_uc_fw_types.h11
-rw-r--r--drivers/gpu/drm/xe/xe_uc_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_userptr.c322
-rw-r--r--drivers/gpu/drm/xe/xe_userptr.h107
-rw-r--r--drivers/gpu/drm/xe/xe_validation.c278
-rw-r--r--drivers/gpu/drm/xe/xe_validation.h192
-rw-r--r--drivers/gpu/drm/xe/xe_vm.c1807
-rw-r--r--drivers/gpu/drm/xe/xe_vm.h141
-rw-r--r--drivers/gpu/drm/xe/xe_vm_doc.h8
-rw-r--r--drivers/gpu/drm/xe/xe_vm_madvise.c431
-rw-r--r--drivers/gpu/drm/xe/xe_vm_madvise.h15
-rw-r--r--drivers/gpu/drm/xe/xe_vm_types.h179
-rw-r--r--drivers/gpu/drm/xe/xe_vram.c291
-rw-r--r--drivers/gpu/drm/xe/xe_vram.h12
-rw-r--r--drivers/gpu/drm/xe/xe_vram_freq.c4
-rw-r--r--drivers/gpu/drm/xe/xe_vram_types.h85
-rw-r--r--drivers/gpu/drm/xe/xe_vsec.c24
-rw-r--r--drivers/gpu/drm/xe/xe_vsec.h4
-rw-r--r--drivers/gpu/drm/xe/xe_wa.c246
-rw-r--r--drivers/gpu/drm/xe/xe_wa.h30
-rw-r--r--drivers/gpu/drm/xe/xe_wa_oob.rules40
349 files changed, 31304 insertions, 9639 deletions
diff --git a/drivers/gpu/drm/xe/Kconfig b/drivers/gpu/drm/xe/Kconfig
index 2169bc969ea1..4b288eb3f5b0 100644
--- a/drivers/gpu/drm/xe/Kconfig
+++ b/drivers/gpu/drm/xe/Kconfig
@@ -1,7 +1,11 @@
# SPDX-License-Identifier: GPL-2.0-only
config DRM_XE
- tristate "Intel Xe Graphics"
- depends on DRM && PCI && (m || (y && KUNIT=y))
+ tristate "Intel Xe2 Graphics"
+ depends on DRM && PCI
+ depends on KUNIT || !KUNIT
+ depends on INTEL_VSEC || !INTEL_VSEC
+ depends on X86_PLATFORM_DEVICES || !(X86 && ACPI)
+ depends on PAGE_SIZE_4KB || COMPILE_TEST || BROKEN
select INTERVAL_TREE
# we need shmfs for the swappable backing store, and in particular
# the shmem_readpage() which depends upon tmpfs
@@ -9,7 +13,6 @@ config DRM_XE
select TMPFS
select DRM_BUDDY
select DRM_CLIENT_SELECTION
- select DRM_EXEC
select DRM_KMS_HELPER
select DRM_KUNIT_TEST_HELPERS if DRM_XE_KUNIT_TEST != n
select DRM_PANEL
@@ -27,10 +30,8 @@ config DRM_XE
select BACKLIGHT_CLASS_DEVICE if ACPI
select INPUT if ACPI
select ACPI_VIDEO if X86 && ACPI
- select X86_PLATFORM_DEVICES if X86 && ACPI
select ACPI_WMI if X86 && ACPI
select SYNC_FILE
- select IOSF_MBI
select CRC32
select SND_HDA_I915 if SND_HDA_CORE
select CEC_CORE if CEC_NOTIFIER
@@ -38,14 +39,16 @@ config DRM_XE
select DRM_TTM
select DRM_TTM_HELPER
select DRM_EXEC
+ select DRM_GPUSVM if !UML && DEVICE_PRIVATE
select DRM_GPUVM
select DRM_SCHED
select MMU_NOTIFIER
select WANT_DEV_COREDUMP
select AUXILIARY_BUS
- select HMM_MIRROR
+ select REGMAP if I2C
help
- Experimental driver for Intel Xe series GPUs
+ Driver for Intel Xe2 series GPUs and later. Experimental support
+ for Xe series is also available.
If "M" is selected, the module will be called xe.
@@ -84,16 +87,18 @@ config DRM_XE_GPUSVM
Enable this option if you want support for CPU to GPU address
mirroring.
- If in doubut say "Y".
+ If in doubt say "Y".
-config DRM_XE_DEVMEM_MIRROR
- bool "Enable device memory mirror"
+config DRM_XE_PAGEMAP
+ bool "Enable device memory pool for SVM"
depends on DRM_XE_GPUSVM
select GET_FREE_REGION
default y
help
- Disable this option only if you want to compile out without device
- memory mirror. Will reduce KMD memory footprint when disabled.
+ Disable this option only if you don't want to expose local device
+ memory for SVM. Will reduce KMD memory footprint when disabled.
+
+ If in doubt say "Y".
config DRM_XE_FORCE_PROBE
string "Force probe xe for selected Intel hardware IDs"
diff --git a/drivers/gpu/drm/xe/Kconfig.debug b/drivers/gpu/drm/xe/Kconfig.debug
index 0d749ed44878..01227c77f6d7 100644
--- a/drivers/gpu/drm/xe/Kconfig.debug
+++ b/drivers/gpu/drm/xe/Kconfig.debug
@@ -40,23 +40,23 @@ config DRM_XE_DEBUG_VM
If in doubt, say "N".
-config DRM_XE_DEBUG_MEMIRQ
- bool "Enable extra memirq debugging"
+config DRM_XE_DEBUG_SRIOV
+ bool "Enable extra SR-IOV debugging"
default n
+ imply DRM_XE_DEBUG_MEMIRQ
help
- Choose this option to enable additional debugging info for
- memory based interrupts.
+ Enable extra SR-IOV debugging info.
Recommended for driver developers only.
If in doubt, say "N".
-config DRM_XE_DEBUG_SRIOV
- bool "Enable extra SR-IOV debugging"
+config DRM_XE_DEBUG_MEMIRQ
+ bool "Enable extra memirq debugging"
default n
- select DRM_XE_DEBUG_MEMIRQ
help
- Enable extra SR-IOV debugging info.
+ Choose this option to enable additional debugging info for
+ memory based interrupts.
Recommended for driver developers only.
@@ -86,12 +86,17 @@ config DRM_XE_KUNIT_TEST
If in doubt, say "N".
-config DRM_XE_LARGE_GUC_BUFFER
- bool "Enable larger guc log buffer"
+config DRM_XE_DEBUG_GUC
+ bool "Enable extra GuC related debug options"
+ depends on DRM_XE_DEBUG
default n
+ select STACKDEPOT
help
Choose this option when debugging guc issues.
- Buffer should be large enough for complex issues.
+ The GuC log buffer is increased to the maximum allowed, which should
+ be large enough for complex issues. The tracking of FAST_REQ messages
+ is extended to include a record of the calling stack, which is then
+ dumped on a FAST_REQ error notification.
Recommended for driver developers only.
@@ -99,6 +104,7 @@ config DRM_XE_LARGE_GUC_BUFFER
config DRM_XE_USERPTR_INVAL_INJECT
bool "Inject userptr invalidation -EINVAL errors"
+ depends on DRM_GPUSVM
default n
help
Choose this option when debugging error paths that
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index e4bf484d4121..62be4a5227e4 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -21,6 +21,13 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \
$(src)/xe_wa_oob.rules
$(call cmd,wa_oob)
+generated_device_oob := $(obj)/generated/xe_device_wa_oob.c $(obj)/generated/xe_device_wa_oob.h
+quiet_cmd_device_wa_oob = GEN $(notdir $(generated_device_oob))
+ cmd_device_wa_oob = mkdir -p $(@D); $^ $(generated_device_oob)
+$(obj)/generated/%_device_wa_oob.c $(obj)/generated/%_device_wa_oob.h: $(obj)/xe_gen_wa_oob \
+ $(src)/xe_device_wa_oob.rules
+ $(call cmd,device_wa_oob)
+
# Please keep these build lists sorted!
# core driver code
@@ -28,6 +35,7 @@ $(obj)/generated/%_wa_oob.c $(obj)/generated/%_wa_oob.h: $(obj)/xe_gen_wa_oob \
xe-y += xe_bb.o \
xe_bo.o \
xe_bo_evict.o \
+ xe_dep_scheduler.o \
xe_devcoredump.o \
xe_device.o \
xe_device_sysfs.o \
@@ -50,10 +58,8 @@ xe-y += xe_bb.o \
xe_gt_freq.o \
xe_gt_idle.o \
xe_gt_mcr.o \
- xe_gt_pagefault.o \
xe_gt_sysfs.o \
xe_gt_throttle.o \
- xe_gt_tlb_invalidation.o \
xe_gt_topology.o \
xe_guc.o \
xe_guc_ads.o \
@@ -66,27 +72,35 @@ xe-y += xe_bb.o \
xe_guc_id_mgr.o \
xe_guc_klv_helpers.o \
xe_guc_log.o \
+ xe_guc_pagefault.o \
xe_guc_pc.o \
xe_guc_submit.o \
+ xe_guc_tlb_inval.o \
xe_heci_gsc.o \
xe_huc.o \
xe_hw_engine.o \
xe_hw_engine_class_sysfs.o \
xe_hw_engine_group.o \
+ xe_hw_error.o \
xe_hw_fence.o \
xe_irq.o \
+ xe_late_bind_fw.o \
xe_lrc.o \
xe_migrate.o \
xe_mmio.o \
+ xe_mmio_gem.o \
xe_mocs.o \
xe_module.o \
+ xe_nvm.o \
xe_oa.o \
xe_observation.o \
+ xe_pagefault.o \
xe_pat.o \
xe_pci.o \
xe_pcode.o \
xe_pm.o \
xe_preempt_fence.o \
+ xe_psmi.o \
xe_pt.o \
xe_pt_walk.o \
xe_pxp.o \
@@ -106,6 +120,8 @@ xe-y += xe_bb.o \
xe_sync.o \
xe_tile.o \
xe_tile_sysfs.o \
+ xe_tlb_inval.o \
+ xe_tlb_inval_job.o \
xe_trace.o \
xe_trace_bo.o \
xe_trace_guc.o \
@@ -116,7 +132,9 @@ xe-y += xe_bb.o \
xe_tuning.o \
xe_uc.o \
xe_uc_fw.o \
+ xe_validation.o \
xe_vm.o \
+ xe_vm_madvise.o \
xe_vram.o \
xe_vram_freq.o \
xe_vsec.o \
@@ -124,8 +142,9 @@ xe-y += xe_bb.o \
xe_wait_user_fence.o \
xe_wopcm.o
-xe-$(CONFIG_HMM_MIRROR) += xe_hmm.o
+xe-$(CONFIG_I2C) += xe_i2c.o
xe-$(CONFIG_DRM_XE_GPUSVM) += xe_svm.o
+xe-$(CONFIG_DRM_GPUSVM) += xe_userptr.o
# graphics hardware monitoring (HWMON) support
xe-$(CONFIG_HWMON) += xe_hwmon.o
@@ -139,7 +158,9 @@ xe-y += \
xe_guc_relay.o \
xe_memirq.o \
xe_sriov.o \
- xe_sriov_vf.o
+ xe_sriov_vf.o \
+ xe_sriov_vf_ccs.o \
+ xe_tile_sriov_vf.o
xe-$(CONFIG_PCI_IOV) += \
xe_gt_sriov_pf.o \
@@ -153,7 +174,19 @@ xe-$(CONFIG_PCI_IOV) += \
xe_lmtt_2l.o \
xe_lmtt_ml.o \
xe_pci_sriov.o \
- xe_sriov_pf.o
+ xe_sriov_packet.o \
+ xe_sriov_pf.o \
+ xe_sriov_pf_control.o \
+ xe_sriov_pf_debugfs.o \
+ xe_sriov_pf_migration.o \
+ xe_sriov_pf_provision.o \
+ xe_sriov_pf_service.o \
+ xe_sriov_pf_sysfs.o \
+ xe_tile_sriov_pf_debugfs.o
+
+ifdef CONFIG_XE_VFIO_PCI
+ xe-$(CONFIG_PCI_IOV) += xe_sriov_vfio.o
+endif
# include helpers for tests even when XE is built-in
ifdef CONFIG_DRM_XE_KUNIT_TEST
@@ -180,7 +213,6 @@ $(obj)/i915-display/%.o: $(srctree)/drivers/gpu/drm/i915/display/%.c FORCE
# Display code specific to xe
xe-$(CONFIG_DRM_XE_DISPLAY) += \
display/ext/i915_irq.o \
- display/ext/i915_utils.o \
display/intel_bo.o \
display/intel_fb_bo.o \
display/intel_fbdev_fb.o \
@@ -191,7 +223,9 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \
display/xe_dsb_buffer.o \
display/xe_fb_pin.o \
display/xe_hdcp_gsc.o \
+ display/xe_panic.o \
display/xe_plane_initial.o \
+ display/xe_stolen.o \
display/xe_tdf.o
# SOC code shared with i915
@@ -204,20 +238,23 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \
i915-display/icl_dsi.o \
i915-display/intel_alpm.o \
i915-display/intel_atomic.o \
- i915-display/intel_atomic_plane.o \
i915-display/intel_audio.o \
i915-display/intel_backlight.o \
i915-display/intel_bios.o \
i915-display/intel_bw.o \
+ i915-display/intel_casf.o \
i915-display/intel_cdclk.o \
i915-display/intel_cmtg.o \
i915-display/intel_color.o \
+ i915-display/intel_colorop.o \
+ i915-display/intel_color_pipeline.o \
i915-display/intel_combo_phy.o \
i915-display/intel_connector.o \
i915-display/intel_crtc.o \
i915-display/intel_crtc_state_dump.o \
i915-display/intel_cursor.o \
i915-display/intel_cx0_phy.o \
+ i915-display/intel_dbuf_bw.o \
i915-display/intel_ddi.o \
i915-display/intel_ddi_buf_trans.o \
i915-display/intel_display.o \
@@ -229,7 +266,9 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \
i915-display/intel_display_power.o \
i915-display/intel_display_power_map.o \
i915-display/intel_display_power_well.o \
+ i915-display/intel_display_rpm.o \
i915-display/intel_display_trace.o \
+ i915-display/intel_display_utils.o \
i915-display/intel_display_wa.o \
i915-display/intel_dkl_phy.o \
i915-display/intel_dmc.o \
@@ -254,6 +293,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \
i915-display/intel_fbc.o \
i915-display/intel_fdi.o \
i915-display/intel_fifo_underrun.o \
+ i915-display/intel_flipq.o \
i915-display/intel_frontbuffer.o \
i915-display/intel_global_state.o \
i915-display/intel_gmbus.o \
@@ -265,11 +305,13 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \
i915-display/intel_hti.o \
i915-display/intel_link_bw.o \
i915-display/intel_lspcon.o \
+ i915-display/intel_lt_phy.o \
i915-display/intel_modeset_lock.o \
i915-display/intel_modeset_setup.o \
i915-display/intel_modeset_verify.o \
i915-display/intel_panel.o \
i915-display/intel_pfit.o \
+ i915-display/intel_plane.o \
i915-display/intel_pmdemand.o \
i915-display/intel_pch.o \
i915-display/intel_pps.o \
@@ -284,6 +326,7 @@ xe-$(CONFIG_DRM_XE_DISPLAY) += \
i915-display/intel_vga.o \
i915-display/intel_vrr.o \
i915-display/intel_wm.o \
+ i915-display/skl_prefill.o \
i915-display/skl_scaler.o \
i915-display/skl_universal_plane.o \
i915-display/skl_watermark.o
@@ -305,6 +348,7 @@ ifeq ($(CONFIG_DEBUG_FS),y)
xe_gt_stats.o \
xe_guc_debugfs.o \
xe_huc_debugfs.o \
+ xe_tile_debugfs.o \
xe_uc_debugfs.o
xe-$(CONFIG_PCI_IOV) += xe_gt_sriov_pf_debugfs.o
@@ -337,4 +381,4 @@ $(obj)/%.hdrtest: $(src)/%.h FORCE
$(call if_changed_dep,hdrtest)
uses_generated_oob := $(addprefix $(obj)/, $(xe-y))
-$(uses_generated_oob): $(obj)/generated/xe_wa_oob.h
+$(uses_generated_oob): $(obj)/generated/xe_wa_oob.h $(obj)/generated/xe_device_wa_oob.h
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
index 448afb86e05c..47756e4674a1 100644
--- a/drivers/gpu/drm/xe/abi/guc_actions_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_actions_abi.h
@@ -117,6 +117,7 @@ enum xe_guc_action {
XE_GUC_ACTION_ENTER_S_STATE = 0x501,
XE_GUC_ACTION_EXIT_S_STATE = 0x502,
XE_GUC_ACTION_GLOBAL_SCHED_POLICY_CHANGE = 0x506,
+ XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV = 0x509,
XE_GUC_ACTION_SCHED_CONTEXT = 0x1000,
XE_GUC_ACTION_SCHED_CONTEXT_MODE_SET = 0x1001,
XE_GUC_ACTION_SCHED_CONTEXT_MODE_DONE = 0x1002,
@@ -142,6 +143,7 @@ enum xe_guc_action {
XE_GUC_ACTION_SET_ENG_UTIL_BUFF = 0x550A,
XE_GUC_ACTION_SET_DEVICE_ENGINE_ACTIVITY_BUFFER = 0x550C,
XE_GUC_ACTION_SET_FUNCTION_ENGINE_ACTIVITY_BUFFER = 0x550D,
+ XE_GUC_ACTION_OPT_IN_FEATURE_KLV = 0x550E,
XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR = 0x6000,
XE_GUC_ACTION_REPORT_PAGE_FAULT_REQ_DESC = 0x6002,
XE_GUC_ACTION_PAGE_FAULT_RES_DESC = 0x6003,
@@ -153,6 +155,8 @@ enum xe_guc_action {
XE_GUC_ACTION_NOTIFY_FLUSH_LOG_BUFFER_TO_FILE = 0x8003,
XE_GUC_ACTION_NOTIFY_CRASH_DUMP_POSTED = 0x8004,
XE_GUC_ACTION_NOTIFY_EXCEPTION = 0x8005,
+ XE_GUC_ACTION_TEST_G2G_SEND = 0xF001,
+ XE_GUC_ACTION_TEST_G2G_RECV = 0xF002,
XE_GUC_ACTION_LIMIT
};
@@ -161,6 +165,37 @@ enum xe_guc_preempt_options {
XE_GUC_PREEMPT_OPTION_DROP_SUBMIT_Q = 0x8,
};
+enum xe_guc_register_context_param_offsets {
+ XE_GUC_REGISTER_CONTEXT_DATA_0_MBZ = 0,
+ XE_GUC_REGISTER_CONTEXT_DATA_1_FLAGS,
+ XE_GUC_REGISTER_CONTEXT_DATA_2_CONTEXT_INDEX,
+ XE_GUC_REGISTER_CONTEXT_DATA_3_ENGINE_CLASS,
+ XE_GUC_REGISTER_CONTEXT_DATA_4_ENGINE_SUBMIT_MASK,
+ XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER,
+ XE_GUC_REGISTER_CONTEXT_DATA_6_WQ_DESC_ADDR_UPPER,
+ XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER,
+ XE_GUC_REGISTER_CONTEXT_DATA_8_WQ_BUF_BASE_UPPER,
+ XE_GUC_REGISTER_CONTEXT_DATA_9_WQ_BUF_SIZE,
+ XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR,
+ XE_GUC_REGISTER_CONTEXT_MSG_LEN,
+};
+
+enum xe_guc_register_context_multi_lrc_param_offsets {
+ XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_0_MBZ = 0,
+ XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_1_FLAGS,
+ XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_2_PARENT_CONTEXT,
+ XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_3_ENGINE_CLASS,
+ XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_4_ENGINE_SUBMIT_MASK,
+ XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_5_WQ_DESC_ADDR_LOWER,
+ XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_6_WQ_DESC_ADDR_UPPER,
+ XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_7_WQ_BUF_BASE_LOWER,
+ XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_8_WQ_BUF_BASE_UPPER,
+ XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_9_WQ_BUF_SIZE,
+ XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_10_NUM_CTXS,
+ XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_11_HW_LRC_ADDR,
+ XE_GUC_REGISTER_CONTEXT_MULTI_LRC_MSG_MIN_LEN = 11,
+};
+
enum xe_guc_report_status {
XE_GUC_REPORT_STATUS_UNKNOWN = 0x0,
XE_GUC_REPORT_STATUS_ACKED = 0x1,
@@ -240,4 +275,7 @@ enum xe_guc_g2g_type {
#define XE_G2G_DEREGISTER_TILE REG_GENMASK(15, 12)
#define XE_G2G_DEREGISTER_TYPE REG_GENMASK(11, 8)
+/* invalid type for XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR */
+#define XE_GUC_CAT_ERR_TYPE_INVALID 0xdeadbeef
+
#endif
diff --git a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h
index b28c8fa061f7..ce5c59517528 100644
--- a/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_actions_slpc_abi.h
@@ -210,6 +210,11 @@ struct slpc_shared_data {
u8 reserved_mode_definition[4096];
} __packed;
+enum slpc_power_profile {
+ SLPC_POWER_PROFILE_BASE = 0x0,
+ SLPC_POWER_PROFILE_POWER_SAVING = 0x1
+};
+
/**
* DOC: SLPC H2G MESSAGE FORMAT
*
diff --git a/drivers/gpu/drm/xe/abi/guc_errors_abi.h b/drivers/gpu/drm/xe/abi/guc_errors_abi.h
index 2c627a21648f..ad76b4baf42e 100644
--- a/drivers/gpu/drm/xe/abi/guc_errors_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_errors_abi.h
@@ -6,8 +6,7 @@
#ifndef _ABI_GUC_ERRORS_ABI_H
#define _ABI_GUC_ERRORS_ABI_H
-enum xe_guc_response_status {
- XE_GUC_RESPONSE_STATUS_SUCCESS = 0x0,
+enum xe_guc_response {
XE_GUC_RESPONSE_ERROR_PROTOCOL = 0x04,
XE_GUC_RESPONSE_INVALID_STATE = 0x0A,
XE_GUC_RESPONSE_UNSUPPORTED_VERSION = 0x0B,
@@ -21,12 +20,20 @@ enum xe_guc_response_status {
XE_GUC_RESPONSE_CANNOT_COMPLETE_ACTION = 0x41,
XE_GUC_RESPONSE_INVALID_KLV_DATA = 0x50,
XE_GUC_RESPONSE_INVALID_PARAMS = 0x60,
+ XE_GUC_RESPONSE_INVALID_CONTEXT_INDEX = 0x61,
+ XE_GUC_RESPONSE_INVALID_CONTEXT_REGISTRATION = 0x62,
+ XE_GUC_RESPONSE_INVALID_DOORBELL_ID = 0x63,
+ XE_GUC_RESPONSE_INVALID_ENGINE_ID = 0x64,
XE_GUC_RESPONSE_INVALID_BUFFER_RANGE = 0x70,
XE_GUC_RESPONSE_INVALID_BUFFER = 0x71,
+ XE_GUC_RESPONSE_BUFFER_ALREADY_REGISTERED = 0x72,
XE_GUC_RESPONSE_INVALID_GGTT_ADDRESS = 0x80,
XE_GUC_RESPONSE_PENDING_ACTION = 0x90,
+ XE_GUC_RESPONSE_CONTEXT_NOT_REGISTERED = 0x100,
+ XE_GUC_RESPONSE_CONTEXT_ALREADY_REGISTERED = 0X101,
XE_GUC_RESPONSE_INVALID_SIZE = 0x102,
XE_GUC_RESPONSE_MALFORMED_KLV = 0x103,
+ XE_GUC_RESPONSE_INVALID_CONTEXT = 0x104,
XE_GUC_RESPONSE_INVALID_KLV_KEY = 0x105,
XE_GUC_RESPONSE_DATA_TOO_LARGE = 0x106,
XE_GUC_RESPONSE_VF_MIGRATED = 0x107,
@@ -40,10 +47,11 @@ enum xe_guc_response_status {
XE_GUC_RESPONSE_CTB_NOT_REGISTERED = 0x304,
XE_GUC_RESPONSE_CTB_IN_USE = 0x305,
XE_GUC_RESPONSE_CTB_INVALID_DESC = 0x306,
+ XE_GUC_RESPONSE_HW_TIMEOUT = 0x30C,
XE_GUC_RESPONSE_CTB_SOURCE_INVALID_DESCRIPTOR = 0x30D,
XE_GUC_RESPONSE_CTB_DESTINATION_INVALID_DESCRIPTOR = 0x30E,
XE_GUC_RESPONSE_INVALID_CONFIG_STATE = 0x30F,
- XE_GUC_RESPONSE_STATUS_GENERIC_FAIL = 0xF000,
+ XE_GUC_RESPONSE_GENERIC_FAIL = 0xF000,
};
enum xe_guc_load_status {
@@ -55,6 +63,7 @@ enum xe_guc_load_status {
XE_GUC_LOAD_STATUS_HWCONFIG_START = 0x05,
XE_GUC_LOAD_STATUS_HWCONFIG_DONE = 0x06,
XE_GUC_LOAD_STATUS_HWCONFIG_ERROR = 0x07,
+ XE_GUC_LOAD_STATUS_BOOTROM_VERSION_MISMATCH = 0x08,
XE_GUC_LOAD_STATUS_GDT_DONE = 0x10,
XE_GUC_LOAD_STATUS_IDT_DONE = 0x20,
XE_GUC_LOAD_STATUS_LAPIC_DONE = 0x30,
@@ -67,6 +76,8 @@ enum xe_guc_load_status {
XE_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_START,
XE_GUC_LOAD_STATUS_MPU_DATA_INVALID = 0x73,
XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID = 0x74,
+ XE_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR = 0x75,
+ XE_GUC_LOAD_STATUS_INVALID_FTR_FLAG = 0x76,
XE_GUC_LOAD_STATUS_INVALID_INIT_DATA_RANGE_END,
XE_GUC_LOAD_STATUS_READY = 0xF0,
diff --git a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
index 7de8f827281f..265a135e7061 100644
--- a/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
+++ b/drivers/gpu/drm/xe/abi/guc_klvs_abi.h
@@ -16,6 +16,8 @@
* +===+=======+==============================================================+
* | 0 | 31:16 | **KEY** - KLV key identifier |
* | | | - `GuC Self Config KLVs`_ |
+ * | | | - `GuC Opt In Feature KLVs`_ |
+ * | | | - `GuC Scheduling Policies KLVs`_ |
* | | | - `GuC VGT Policy KLVs`_ |
* | | | - `GuC VF Configuration KLVs`_ |
* | | | |
@@ -125,6 +127,57 @@ enum {
};
/**
+ * DOC: GuC Opt In Feature KLVs
+ *
+ * `GuC KLV`_ keys available for use with OPT_IN_FEATURE_KLV
+ *
+ * _`GUC_KLV_OPT_IN_FEATURE_EXT_CAT_ERR_TYPE` : 0x4001
+ * Adds an extra dword to the XE_GUC_ACTION_NOTIFY_MEMORY_CAT_ERROR G2H
+ * containing the type of the CAT error. On HW that does not support
+ * reporting the CAT error type, the extra dword is set to 0xdeadbeef.
+ *
+ * _`GUC_KLV_OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH` : 0x4003
+ * This KLV enables the Dynamic Inhibit Context Switch optimization, which
+ * consists in the GuC setting the CTX_CTRL_INHIBIT_SYN_CTX_SWITCH bit to
+ * zero in the CTX_CONTEXT_CONTROL register of LRCs that are submitted
+ * to an oversubscribed engine. This will cause those contexts to be
+ * switched out immediately if they hit an unsatisfied semaphore wait
+ * (instead of waiting the full timeslice duration). The bit is instead set
+ * to one if a single context is queued on the engine, to avoid it being
+ * switched out if there isn't another context that can run in its place.
+ */
+
+#define GUC_KLV_OPT_IN_FEATURE_EXT_CAT_ERR_TYPE_KEY 0x4001
+#define GUC_KLV_OPT_IN_FEATURE_EXT_CAT_ERR_TYPE_LEN 0u
+
+#define GUC_KLV_OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH_KEY 0x4003
+#define GUC_KLV_OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH_LEN 0u
+
+/**
+ * DOC: GuC Scheduling Policies KLVs
+ *
+ * `GuC KLV`_ keys available for use with UPDATE_SCHEDULING_POLICIES_KLV.
+ *
+ * _`GUC_KLV_SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD` : 0x1001
+ * Some platforms do not allow concurrent execution of RCS and CCS
+ * workloads from different address spaces. By default, the GuC prioritizes
+ * RCS submissions over CCS ones, which can lead to CCS workloads being
+ * significantly (or completely) starved of execution time. This KLV allows
+ * the driver to specify a quantum (in ms) and a ratio (percentage value
+ * between 0 and 100), and the GuC will prioritize the CCS for that
+ * percentage of each quantum. For example, specifying 100ms and 30% will
+ * make the GuC prioritize the CCS for 30ms of every 100ms.
+ * Note that this does not necessarly mean that RCS and CCS engines will
+ * only be active for their percentage of the quantum, as the restriction
+ * only kicks in if both classes are fully busy with non-compatible address
+ * spaces; i.e., if one engine is idle or running the same address space,
+ * a pending job on the other engine will still be submitted to the HW no
+ * matter what the ratio is
+ */
+#define GUC_KLV_SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD_KEY 0x1001
+#define GUC_KLV_SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD_LEN 2u
+
+/**
* DOC: GuC VGT Policy KLVs
*
* `GuC KLV`_ keys available for use with PF2GUC_UPDATE_VGT_POLICY.
@@ -362,12 +415,14 @@ enum {
*/
enum xe_guc_klv_ids {
GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED = 0x9002,
+ GUC_WORKAROUND_KLV_DISABLE_PSMI_INTERRUPTS_AT_C6_ENTRY_RESTORE_AT_EXIT = 0x9004,
GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING = 0x9005,
GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE = 0x9007,
GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE = 0x9008,
GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET = 0x9009,
GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO = 0x900a,
GUC_WA_KLV_RESET_BB_STACK_PTR_ON_VF_SWITCH = 0x900b,
+ GUC_WA_KLV_RESTORE_UNSAVED_MEDIA_CONTROL_REG = 0x900c,
};
#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h
index 8a048980ea38..0548b2e0316f 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_object.h
@@ -5,10 +5,8 @@
#define __I915_GEM_OBJECT_H__
struct dma_fence;
-struct i915_sched_attr;
-static inline void i915_gem_fence_wait_priority(struct dma_fence *fence,
- const struct i915_sched_attr *attr)
+static inline void i915_gem_fence_wait_priority_display(struct dma_fence *fence)
{
}
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h
index 41d39d67817a..48e3256ba37e 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/gem/i915_gem_stolen.h
@@ -6,87 +6,35 @@
#ifndef _I915_GEM_STOLEN_H_
#define _I915_GEM_STOLEN_H_
-#include "xe_ttm_stolen_mgr.h"
-#include "xe_res_cursor.h"
-
-struct xe_bo;
-
-struct i915_stolen_fb {
- struct xe_bo *bo;
-};
-
-static inline int i915_gem_stolen_insert_node_in_range(struct xe_device *xe,
- struct i915_stolen_fb *fb,
- u32 size, u32 align,
- u32 start, u32 end)
-{
- struct xe_bo *bo;
- int err;
- u32 flags = XE_BO_FLAG_PINNED | XE_BO_FLAG_STOLEN;
-
- if (start < SZ_4K)
- start = SZ_4K;
-
- if (align) {
- size = ALIGN(size, align);
- start = ALIGN(start, align);
- }
-
- bo = xe_bo_create_locked_range(xe, xe_device_get_root_tile(xe),
- NULL, size, start, end,
- ttm_bo_type_kernel, flags, 0);
- if (IS_ERR(bo)) {
- err = PTR_ERR(bo);
- bo = NULL;
- return err;
- }
- err = xe_bo_pin(bo);
- xe_bo_unlock_vm_held(bo);
-
- if (err) {
- xe_bo_put(fb->bo);
- bo = NULL;
- }
-
- fb->bo = bo;
-
- return err;
-}
-
-static inline int i915_gem_stolen_insert_node(struct xe_device *xe,
- struct i915_stolen_fb *fb,
- u32 size, u32 align)
-{
- /* Not used on xe */
- BUG_ON(1);
- return -ENODEV;
-}
-
-static inline void i915_gem_stolen_remove_node(struct xe_device *xe,
- struct i915_stolen_fb *fb)
-{
- xe_bo_unpin_map_no_vm(fb->bo);
- fb->bo = NULL;
-}
-
-#define i915_gem_stolen_initialized(xe) (!!ttm_manager_type(&(xe)->ttm, XE_PL_STOLEN))
-#define i915_gem_stolen_node_allocated(fb) (!!((fb)->bo))
-
-static inline u32 i915_gem_stolen_node_offset(struct i915_stolen_fb *fb)
-{
- struct xe_res_cursor res;
-
- xe_res_first(fb->bo->ttm.resource, 0, 4096, &res);
- return res.start;
-}
-
-/* Used for < gen4. These are not supported by Xe */
-#define i915_gem_stolen_area_address(xe) (!WARN_ON(1))
-/* Used for gen9 specific WA. Gen9 is not supported by Xe */
-#define i915_gem_stolen_area_size(xe) (!WARN_ON(1))
-
-#define i915_gem_stolen_node_address(xe, fb) (xe_ttm_stolen_gpu_offset(xe) + \
- i915_gem_stolen_node_offset(fb))
-#define i915_gem_stolen_node_size(fb) ((u64)((fb)->bo->ttm.base.size))
+#include <linux/types.h>
+
+struct drm_device;
+struct intel_stolen_node;
+
+int i915_gem_stolen_insert_node_in_range(struct intel_stolen_node *node, u64 size,
+ unsigned int align, u64 start, u64 end);
+
+int i915_gem_stolen_insert_node(struct intel_stolen_node *node, u64 size,
+ unsigned int align);
+
+void i915_gem_stolen_remove_node(struct intel_stolen_node *node);
+
+bool i915_gem_stolen_initialized(struct drm_device *drm);
+
+bool i915_gem_stolen_node_allocated(const struct intel_stolen_node *node);
+
+u32 i915_gem_stolen_node_offset(struct intel_stolen_node *node);
+
+u64 i915_gem_stolen_area_address(struct drm_device *drm);
+
+u64 i915_gem_stolen_area_size(struct drm_device *drm);
+
+u64 i915_gem_stolen_node_address(struct intel_stolen_node *node);
+
+u64 i915_gem_stolen_node_size(const struct intel_stolen_node *node);
+
+struct intel_stolen_node *i915_gem_stolen_node_alloc(struct drm_device *drm);
+
+void i915_gem_stolen_node_free(const struct intel_stolen_node *node);
#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
index 9b7572e06f34..3e79a74ff7de 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_drv.h
@@ -12,8 +12,6 @@
#include <drm/drm_drv.h>
-#include "i915_utils.h"
-#include "xe_device.h" /* for xe_device_has_flat_ccs() */
#include "xe_device_types.h"
static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
@@ -26,38 +24,14 @@ static inline struct drm_i915_private *to_i915(const struct drm_device *dev)
#define IS_I915G(dev_priv) (dev_priv && 0)
#define IS_I915GM(dev_priv) (dev_priv && 0)
#define IS_PINEVIEW(dev_priv) (dev_priv && 0)
-#define IS_IVYBRIDGE(dev_priv) (dev_priv && 0)
#define IS_VALLEYVIEW(dev_priv) (dev_priv && 0)
#define IS_CHERRYVIEW(dev_priv) (dev_priv && 0)
#define IS_HASWELL(dev_priv) (dev_priv && 0)
#define IS_BROADWELL(dev_priv) (dev_priv && 0)
-#define IS_SKYLAKE(dev_priv) (dev_priv && 0)
#define IS_BROXTON(dev_priv) (dev_priv && 0)
-#define IS_KABYLAKE(dev_priv) (dev_priv && 0)
#define IS_GEMINILAKE(dev_priv) (dev_priv && 0)
-#define IS_COFFEELAKE(dev_priv) (dev_priv && 0)
-#define IS_COMETLAKE(dev_priv) (dev_priv && 0)
-#define IS_ICELAKE(dev_priv) (dev_priv && 0)
-#define IS_JASPERLAKE(dev_priv) (dev_priv && 0)
-#define IS_ELKHARTLAKE(dev_priv) (dev_priv && 0)
-#define IS_TIGERLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_TIGERLAKE)
-#define IS_ROCKETLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_ROCKETLAKE)
-#define IS_DG1(dev_priv) IS_PLATFORM(dev_priv, XE_DG1)
-#define IS_ALDERLAKE_S(dev_priv) IS_PLATFORM(dev_priv, XE_ALDERLAKE_S)
-#define IS_ALDERLAKE_P(dev_priv) (IS_PLATFORM(dev_priv, XE_ALDERLAKE_P) || \
- IS_PLATFORM(dev_priv, XE_ALDERLAKE_N))
#define IS_DG2(dev_priv) IS_PLATFORM(dev_priv, XE_DG2)
-#define IS_METEORLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_METEORLAKE)
-#define IS_LUNARLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_LUNARLAKE)
-#define IS_BATTLEMAGE(dev_priv) IS_PLATFORM(dev_priv, XE_BATTLEMAGE)
-#define IS_PANTHERLAKE(dev_priv) IS_PLATFORM(dev_priv, XE_PANTHERLAKE)
-
-#define IS_HASWELL_ULT(dev_priv) (dev_priv && 0)
-#define IS_BROADWELL_ULT(dev_priv) (dev_priv && 0)
#define IS_MOBILE(xe) (xe && 0)
-#define HAS_FLAT_CCS(xe) (xe_device_has_flat_ccs(xe))
-#define HAS_128_BYTE_Y_TILING(xe) (xe || 1)
-
#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_scheduler_types.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_scheduler_types.h
deleted file mode 100644
index c11130440d31..000000000000
--- a/drivers/gpu/drm/xe/compat-i915-headers/i915_scheduler_types.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/* Copyright © 2025 Intel Corporation */
-
-#ifndef __I915_SCHEDULER_TYPES_H__
-#define __I915_SCHEDULER_TYPES_H__
-
-#define I915_PRIORITY_DISPLAY 0
-
-struct i915_sched_attr {
- int priority;
-};
-
-#endif
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_utils.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_utils.h
index 1d7c4360e5c0..bcd441dc0fce 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/i915_utils.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_utils.h
@@ -3,4 +3,11 @@
* Copyright © 2023 Intel Corporation
*/
-#include "../../i915/i915_utils.h"
+/* for soc/ */
+#ifndef MISSING_CASE
+#define MISSING_CASE(x) WARN(1, "Missing case (%s == %ld)\n", \
+ __stringify(x), (long)(x))
+#endif
+
+/* for a couple of users under i915/display */
+#define i915_inject_probe_failure(unused) ((unused) && 0)
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h
index 4465c40f8134..b17e3bab23d5 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/i915_vma.h
@@ -26,8 +26,6 @@ struct i915_vma {
struct xe_ggtt_node *node;
};
-#define i915_ggtt_clear_scanout(bo) do { } while (0)
-
#define i915_vma_fence_id(vma) -1
static inline u32 i915_ggtt_offset(const struct i915_vma *vma)
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h
index a473aa6697d0..4fcd3bf6b76f 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_pcode.h
@@ -6,37 +6,6 @@
#ifndef __INTEL_PCODE_H__
#define __INTEL_PCODE_H__
-#include "intel_uncore.h"
#include "xe_pcode.h"
-static inline int
-snb_pcode_write_timeout(struct intel_uncore *uncore, u32 mbox, u32 val,
- int fast_timeout_us, int slow_timeout_ms)
-{
- return xe_pcode_write_timeout(__compat_uncore_to_tile(uncore), mbox, val,
- slow_timeout_ms ?: 1);
-}
-
-static inline int
-snb_pcode_write(struct intel_uncore *uncore, u32 mbox, u32 val)
-{
-
- return xe_pcode_write(__compat_uncore_to_tile(uncore), mbox, val);
-}
-
-static inline int
-snb_pcode_read(struct intel_uncore *uncore, u32 mbox, u32 *val, u32 *val1)
-{
- return xe_pcode_read(__compat_uncore_to_tile(uncore), mbox, val, val1);
-}
-
-static inline int
-skl_pcode_request(struct intel_uncore *uncore, u32 mbox,
- u32 request, u32 reply_mask, u32 reply,
- int timeout_base_ms)
-{
- return xe_pcode_request(__compat_uncore_to_tile(uncore), mbox, request, reply_mask, reply,
- timeout_base_ms);
-}
-
#endif /* __INTEL_PCODE_H__ */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h
index 0c1e88e36a1e..d93ddacdf743 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/intel_uncore.h
@@ -24,13 +24,6 @@ static inline struct xe_mmio *__compat_uncore_to_mmio(struct intel_uncore *uncor
return xe_root_tile_mmio(xe);
}
-static inline struct xe_tile *__compat_uncore_to_tile(struct intel_uncore *uncore)
-{
- struct xe_device *xe = container_of(uncore, struct xe_device, uncore);
-
- return xe_device_get_root_tile(xe);
-}
-
static inline u32 intel_uncore_read(struct intel_uncore *uncore,
i915_reg_t i915_reg)
{
@@ -98,26 +91,6 @@ static inline u32 intel_uncore_rmw(struct intel_uncore *uncore,
return xe_mmio_rmw32(__compat_uncore_to_mmio(uncore), reg, clear, set);
}
-static inline int intel_wait_for_register(struct intel_uncore *uncore,
- i915_reg_t i915_reg, u32 mask,
- u32 value, unsigned int timeout)
-{
- struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
-
- return xe_mmio_wait32(__compat_uncore_to_mmio(uncore), reg, mask, value,
- timeout * USEC_PER_MSEC, NULL, false);
-}
-
-static inline int intel_wait_for_register_fw(struct intel_uncore *uncore,
- i915_reg_t i915_reg, u32 mask,
- u32 value, unsigned int timeout)
-{
- struct xe_reg reg = XE_REG(i915_mmio_reg_offset(i915_reg));
-
- return xe_mmio_wait32(__compat_uncore_to_mmio(uncore), reg, mask, value,
- timeout * USEC_PER_MSEC, NULL, false);
-}
-
static inline int
__intel_wait_for_register(struct intel_uncore *uncore, i915_reg_t i915_reg,
u32 mask, u32 value, unsigned int fast_timeout_us,
@@ -139,6 +112,16 @@ __intel_wait_for_register(struct intel_uncore *uncore, i915_reg_t i915_reg,
out_value, atomic);
}
+static inline int
+__intel_wait_for_register_fw(struct intel_uncore *uncore, i915_reg_t i915_reg,
+ u32 mask, u32 value, unsigned int fast_timeout_us,
+ unsigned int slow_timeout_ms, u32 *out_value)
+{
+ return __intel_wait_for_register(uncore, i915_reg, mask, value,
+ fast_timeout_us, slow_timeout_ms,
+ out_value);
+}
+
static inline u32 intel_uncore_read_fw(struct intel_uncore *uncore,
i915_reg_t i915_reg)
{
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/vlv_iosf_sb.h b/drivers/gpu/drm/xe/compat-i915-headers/vlv_iosf_sb.h
new file mode 100644
index 000000000000..69e1935e9cdf
--- /dev/null
+++ b/drivers/gpu/drm/xe/compat-i915-headers/vlv_iosf_sb.h
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2013-2021 Intel Corporation
+ */
+
+#ifndef _VLV_IOSF_SB_H_
+#define _VLV_IOSF_SB_H_
+
+#include <linux/types.h>
+
+#include "vlv_iosf_sb_reg.h"
+
+struct drm_device;
+
+enum vlv_iosf_sb_unit {
+ VLV_IOSF_SB_BUNIT,
+ VLV_IOSF_SB_CCK,
+ VLV_IOSF_SB_CCU,
+ VLV_IOSF_SB_DPIO,
+ VLV_IOSF_SB_DPIO_2,
+ VLV_IOSF_SB_FLISDSI,
+ VLV_IOSF_SB_GPIO,
+ VLV_IOSF_SB_NC,
+ VLV_IOSF_SB_PUNIT,
+};
+
+static inline void vlv_iosf_sb_get(struct drm_device *drm, unsigned long ports)
+{
+}
+static inline u32 vlv_iosf_sb_read(struct drm_device *drm, enum vlv_iosf_sb_unit unit, u32 addr)
+{
+ return 0;
+}
+static inline int vlv_iosf_sb_write(struct drm_device *drm, enum vlv_iosf_sb_unit unit, u32 addr, u32 val)
+{
+ return 0;
+}
+static inline void vlv_iosf_sb_put(struct drm_device *drm, unsigned long ports)
+{
+}
+
+#endif /* _VLV_IOSF_SB_H_ */
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband_reg.h b/drivers/gpu/drm/xe/compat-i915-headers/vlv_iosf_sb_reg.h
index 949f134ce3cf..cb7fa8e794a6 100644
--- a/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband_reg.h
+++ b/drivers/gpu/drm/xe/compat-i915-headers/vlv_iosf_sb_reg.h
@@ -3,4 +3,4 @@
* Copyright © 2023 Intel Corporation
*/
-#include "../../i915/vlv_sideband_reg.h"
+#include "../../i915/vlv_iosf_sb_reg.h"
diff --git a/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband.h b/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband.h
deleted file mode 100644
index ec6f12de5727..000000000000
--- a/drivers/gpu/drm/xe/compat-i915-headers/vlv_sideband.h
+++ /dev/null
@@ -1,132 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2013-2021 Intel Corporation
- */
-
-#ifndef _VLV_SIDEBAND_H_
-#define _VLV_SIDEBAND_H_
-
-#include <linux/types.h>
-
-#include "vlv_sideband_reg.h"
-
-enum pipe;
-struct drm_i915_private;
-
-enum {
- VLV_IOSF_SB_BUNIT,
- VLV_IOSF_SB_CCK,
- VLV_IOSF_SB_CCU,
- VLV_IOSF_SB_DPIO,
- VLV_IOSF_SB_FLISDSI,
- VLV_IOSF_SB_GPIO,
- VLV_IOSF_SB_NC,
- VLV_IOSF_SB_PUNIT,
-};
-
-static inline void vlv_iosf_sb_get(struct drm_i915_private *i915, unsigned long ports)
-{
-}
-static inline u32 vlv_iosf_sb_read(struct drm_i915_private *i915, u8 port, u32 reg)
-{
- return 0;
-}
-static inline void vlv_iosf_sb_write(struct drm_i915_private *i915,
- u8 port, u32 reg, u32 val)
-{
-}
-static inline void vlv_iosf_sb_put(struct drm_i915_private *i915, unsigned long ports)
-{
-}
-static inline void vlv_bunit_get(struct drm_i915_private *i915)
-{
-}
-static inline u32 vlv_bunit_read(struct drm_i915_private *i915, u32 reg)
-{
- return 0;
-}
-static inline void vlv_bunit_write(struct drm_i915_private *i915, u32 reg, u32 val)
-{
-}
-static inline void vlv_bunit_put(struct drm_i915_private *i915)
-{
-}
-static inline void vlv_cck_get(struct drm_i915_private *i915)
-{
-}
-static inline u32 vlv_cck_read(struct drm_i915_private *i915, u32 reg)
-{
- return 0;
-}
-static inline void vlv_cck_write(struct drm_i915_private *i915, u32 reg, u32 val)
-{
-}
-static inline void vlv_cck_put(struct drm_i915_private *i915)
-{
-}
-static inline void vlv_ccu_get(struct drm_i915_private *i915)
-{
-}
-static inline u32 vlv_ccu_read(struct drm_i915_private *i915, u32 reg)
-{
- return 0;
-}
-static inline void vlv_ccu_write(struct drm_i915_private *i915, u32 reg, u32 val)
-{
-}
-static inline void vlv_ccu_put(struct drm_i915_private *i915)
-{
-}
-static inline void vlv_dpio_get(struct drm_i915_private *i915)
-{
-}
-static inline u32 vlv_dpio_read(struct drm_i915_private *i915, int pipe, int reg)
-{
- return 0;
-}
-static inline void vlv_dpio_write(struct drm_i915_private *i915,
- int pipe, int reg, u32 val)
-{
-}
-static inline void vlv_dpio_put(struct drm_i915_private *i915)
-{
-}
-static inline void vlv_flisdsi_get(struct drm_i915_private *i915)
-{
-}
-static inline u32 vlv_flisdsi_read(struct drm_i915_private *i915, u32 reg)
-{
- return 0;
-}
-static inline void vlv_flisdsi_write(struct drm_i915_private *i915, u32 reg, u32 val)
-{
-}
-static inline void vlv_flisdsi_put(struct drm_i915_private *i915)
-{
-}
-static inline void vlv_nc_get(struct drm_i915_private *i915)
-{
-}
-static inline u32 vlv_nc_read(struct drm_i915_private *i915, u8 addr)
-{
- return 0;
-}
-static inline void vlv_nc_put(struct drm_i915_private *i915)
-{
-}
-static inline void vlv_punit_get(struct drm_i915_private *i915)
-{
-}
-static inline u32 vlv_punit_read(struct drm_i915_private *i915, u32 addr)
-{
- return 0;
-}
-static inline int vlv_punit_write(struct drm_i915_private *i915, u32 addr, u32 val)
-{
- return 0;
-}
-static inline void vlv_punit_put(struct drm_i915_private *i915)
-{
-}
-
-#endif /* _VLV_SIDEBAND_H_ */
diff --git a/drivers/gpu/drm/xe/display/ext/i915_utils.c b/drivers/gpu/drm/xe/display/ext/i915_utils.c
deleted file mode 100644
index 43b10a2cc508..000000000000
--- a/drivers/gpu/drm/xe/display/ext/i915_utils.c
+++ /dev/null
@@ -1,26 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2023 Intel Corporation
- */
-
-#include "i915_drv.h"
-
-bool i915_vtd_active(struct drm_i915_private *i915)
-{
- if (device_iommu_mapped(i915->drm.dev))
- return true;
-
- /* Running as a guest, we assume the host is enforcing VT'd */
- return i915_run_as_guest();
-}
-
-#if IS_ENABLED(CONFIG_DRM_I915_DEBUG)
-
-/* i915 specific, just put here for shutting it up */
-int __i915_inject_probe_error(struct drm_i915_private *i915, int err,
- const char *func, int line)
-{
- return 0;
-}
-
-#endif
diff --git a/drivers/gpu/drm/xe/display/intel_bo.c b/drivers/gpu/drm/xe/display/intel_bo.c
index 27437c22bd70..bad2243b9114 100644
--- a/drivers/gpu/drm/xe/display/intel_bo.c
+++ b/drivers/gpu/drm/xe/display/intel_bo.c
@@ -5,6 +5,7 @@
#include "xe_bo.h"
#include "intel_bo.h"
+#include "intel_frontbuffer.h"
bool intel_bo_is_tiled(struct drm_gem_object *obj)
{
@@ -28,10 +29,6 @@ bool intel_bo_is_protected(struct drm_gem_object *obj)
return xe_bo_is_protected(gem_to_xe_bo(obj));
}
-void intel_bo_flush_if_display(struct drm_gem_object *obj)
-{
-}
-
int intel_bo_fb_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
{
return drm_gem_prime_mmap(obj, vma);
@@ -44,15 +41,60 @@ int intel_bo_read_from_page(struct drm_gem_object *obj, u64 offset, void *dst, i
return xe_bo_read(bo, offset, dst, size);
}
-struct intel_frontbuffer *intel_bo_get_frontbuffer(struct drm_gem_object *obj)
+struct xe_frontbuffer {
+ struct intel_frontbuffer base;
+ struct drm_gem_object *obj;
+ struct kref ref;
+};
+
+struct intel_frontbuffer *intel_bo_frontbuffer_get(struct drm_gem_object *obj)
+{
+ struct xe_frontbuffer *front;
+
+ front = kmalloc(sizeof(*front), GFP_KERNEL);
+ if (!front)
+ return NULL;
+
+ intel_frontbuffer_init(&front->base, obj->dev);
+
+ kref_init(&front->ref);
+
+ drm_gem_object_get(obj);
+ front->obj = obj;
+
+ return &front->base;
+}
+
+void intel_bo_frontbuffer_ref(struct intel_frontbuffer *_front)
{
- return NULL;
+ struct xe_frontbuffer *front =
+ container_of(_front, typeof(*front), base);
+
+ kref_get(&front->ref);
+}
+
+static void frontbuffer_release(struct kref *ref)
+{
+ struct xe_frontbuffer *front =
+ container_of(ref, typeof(*front), ref);
+
+ intel_frontbuffer_fini(&front->base);
+
+ drm_gem_object_put(front->obj);
+
+ kfree(front);
+}
+
+void intel_bo_frontbuffer_put(struct intel_frontbuffer *_front)
+{
+ struct xe_frontbuffer *front =
+ container_of(_front, typeof(*front), base);
+
+ kref_put(&front->ref, frontbuffer_release);
}
-struct intel_frontbuffer *intel_bo_set_frontbuffer(struct drm_gem_object *obj,
- struct intel_frontbuffer *front)
+void intel_bo_frontbuffer_flush_for_display(struct intel_frontbuffer *front)
{
- return front;
}
void intel_bo_describe(struct seq_file *m, struct drm_gem_object *obj)
diff --git a/drivers/gpu/drm/xe/display/intel_fb_bo.c b/drivers/gpu/drm/xe/display/intel_fb_bo.c
index ebdb22c9499d..db8b1a27b4de 100644
--- a/drivers/gpu/drm/xe/display/intel_fb_bo.c
+++ b/drivers/gpu/drm/xe/display/intel_fb_bo.c
@@ -24,8 +24,7 @@ void intel_fb_bo_framebuffer_fini(struct drm_gem_object *obj)
xe_bo_put(bo);
}
-int intel_fb_bo_framebuffer_init(struct drm_framebuffer *fb,
- struct drm_gem_object *obj,
+int intel_fb_bo_framebuffer_init(struct drm_gem_object *obj,
struct drm_mode_fb_cmd2 *mode_cmd)
{
struct xe_bo *bo = gem_to_xe_bo(obj);
diff --git a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
index e8191562d122..7ad76022cb14 100644
--- a/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
+++ b/drivers/gpu/drm/xe/display/intel_fbdev_fb.c
@@ -3,49 +3,39 @@
* Copyright © 2023 Intel Corporation
*/
-#include <drm/drm_fb_helper.h>
+#include <linux/fb.h>
-#include "intel_display_types.h"
-#include "intel_fb.h"
#include "intel_fbdev_fb.h"
#include "xe_bo.h"
#include "xe_ttm_stolen_mgr.h"
#include "xe_wa.h"
-#include <generated/xe_wa_oob.h>
+#include <generated/xe_device_wa_oob.h>
-struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
- struct drm_fb_helper_surface_size *sizes)
+/*
+ * FIXME: There shouldn't be any reason to have XE_PAGE_SIZE stride
+ * alignment. The same 64 as i915 uses should be fine, and we shouldn't need to
+ * have driver specific values. However, dropping the stride alignment to 64
+ * leads to underflowing the bo pin count in the atomic cleanup work.
+ */
+u32 intel_fbdev_fb_pitch_align(u32 stride)
{
- struct drm_framebuffer *fb;
- struct drm_device *dev = helper->dev;
- struct xe_device *xe = to_xe_device(dev);
- struct drm_mode_fb_cmd2 mode_cmd = {};
- struct xe_bo *obj;
- int size;
-
- /* we don't do packed 24bpp */
- if (sizes->surface_bpp == 24)
- sizes->surface_bpp = 32;
-
- mode_cmd.width = sizes->surface_width;
- mode_cmd.height = sizes->surface_height;
+ return ALIGN(stride, XE_PAGE_SIZE);
+}
- mode_cmd.pitches[0] = ALIGN(mode_cmd.width *
- DIV_ROUND_UP(sizes->surface_bpp, 8), XE_PAGE_SIZE);
- mode_cmd.pixel_format = drm_mode_legacy_fb_format(sizes->surface_bpp,
- sizes->surface_depth);
+struct drm_gem_object *intel_fbdev_fb_bo_create(struct drm_device *drm, int size)
+{
+ struct xe_device *xe = to_xe_device(drm);
+ struct xe_bo *obj;
- size = mode_cmd.pitches[0] * mode_cmd.height;
- size = PAGE_ALIGN(size);
obj = ERR_PTR(-ENODEV);
- if (!IS_DGFX(xe) && !XE_WA(xe_root_mmio_gt(xe), 22019338487_display)) {
- obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe),
- NULL, size,
- ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT |
- XE_BO_FLAG_STOLEN |
- XE_BO_FLAG_GGTT);
+ if (!IS_DGFX(xe) && !XE_DEVICE_WA(xe, 22019338487_display)) {
+ obj = xe_bo_create_pin_map_novm(xe, xe_device_get_root_tile(xe),
+ size,
+ ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT |
+ XE_BO_FLAG_STOLEN |
+ XE_BO_FLAG_GGTT, false);
if (!IS_ERR(obj))
drm_info(&xe->drm, "Allocated fbdev into stolen\n");
else
@@ -53,37 +43,30 @@ struct intel_framebuffer *intel_fbdev_fb_alloc(struct drm_fb_helper *helper,
}
if (IS_ERR(obj)) {
- obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), NULL, size,
- ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT |
- XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
- XE_BO_FLAG_GGTT);
+ obj = xe_bo_create_pin_map_novm(xe, xe_device_get_root_tile(xe), size,
+ ttm_bo_type_kernel, XE_BO_FLAG_SCANOUT |
+ XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
+ XE_BO_FLAG_GGTT, false);
}
if (IS_ERR(obj)) {
drm_err(&xe->drm, "failed to allocate framebuffer (%pe)\n", obj);
- fb = ERR_PTR(-ENOMEM);
- goto err;
- }
-
- fb = intel_framebuffer_create(&obj->ttm.base, &mode_cmd);
- if (IS_ERR(fb)) {
- xe_bo_unpin_map_no_vm(obj);
- goto err;
+ return ERR_PTR(-ENOMEM);
}
- drm_gem_object_put(&obj->ttm.base);
-
- return to_intel_framebuffer(fb);
+ return &obj->ttm.base;
+}
-err:
- return ERR_CAST(fb);
+void intel_fbdev_fb_bo_destroy(struct drm_gem_object *obj)
+{
+ xe_bo_unpin_map_no_vm(gem_to_xe_bo(obj));
}
-int intel_fbdev_fb_fill_info(struct intel_display *display, struct fb_info *info,
+int intel_fbdev_fb_fill_info(struct drm_device *drm, struct fb_info *info,
struct drm_gem_object *_obj, struct i915_vma *vma)
{
struct xe_bo *obj = gem_to_xe_bo(_obj);
- struct pci_dev *pdev = to_pci_dev(display->drm->dev);
+ struct pci_dev *pdev = to_pci_dev(drm->dev);
if (!(obj->flags & XE_BO_FLAG_SYSTEM)) {
if (obj->flags & XE_BO_FLAG_STOLEN)
diff --git a/drivers/gpu/drm/xe/display/xe_display.c b/drivers/gpu/drm/xe/display/xe_display.c
index 68f064f33d4b..8b0afa270216 100644
--- a/drivers/gpu/drm/xe/display/xe_display.c
+++ b/drivers/gpu/drm/xe/display/xe_display.c
@@ -13,6 +13,8 @@
#include <drm/drm_drv.h>
#include <drm/drm_managed.h>
#include <drm/drm_probe_helper.h>
+#include <drm/intel/display_member.h>
+#include <drm/intel/display_parent_interface.h>
#include <uapi/drm/xe_drm.h>
#include "soc/intel_dram.h"
@@ -20,6 +22,7 @@
#include "intel_audio.h"
#include "intel_bw.h"
#include "intel_display.h"
+#include "intel_display_device.h"
#include "intel_display_driver.h"
#include "intel_display_irq.h"
#include "intel_display_types.h"
@@ -32,20 +35,21 @@
#include "intel_hotplug.h"
#include "intel_opregion.h"
#include "skl_watermark.h"
+#include "xe_display_rpm.h"
#include "xe_module.h"
-/* Xe device functions */
+/* Ensure drm and display members are placed properly. */
+INTEL_DISPLAY_MEMBER_STATIC_ASSERT(struct xe_device, drm, display);
-static bool has_display(struct xe_device *xe)
-{
- return HAS_DISPLAY(&xe->display);
-}
+/* Xe device functions */
/**
* xe_display_driver_probe_defer - Detect if we need to wait for other drivers
* early on
* @pdev: PCI device
*
+ * Note: This is called before xe or display device creation.
+ *
* Returns: true if probe needs to be deferred, false otherwise
*/
bool xe_display_driver_probe_defer(struct pci_dev *pdev)
@@ -63,6 +67,8 @@ bool xe_display_driver_probe_defer(struct pci_dev *pdev)
* Set features and function hooks in @driver that are needed for driving the
* display IP. This sets the driver's capability of driving display, regardless
* if the device has it enabled
+ *
+ * Note: This is called before xe or display device creation.
*/
void xe_display_driver_set_hooks(struct drm_driver *driver)
{
@@ -81,41 +87,15 @@ static void unset_display_features(struct xe_device *xe)
xe->drm.driver_features &= ~(DRIVER_MODESET | DRIVER_ATOMIC);
}
-static void display_destroy(struct drm_device *dev, void *dummy)
-{
- struct xe_device *xe = to_xe_device(dev);
-
- destroy_workqueue(xe->display.hotplug.dp_wq);
-}
-
-/**
- * xe_display_create - create display struct
- * @xe: XE device instance
- *
- * Initialize all fields used by the display part.
- *
- * TODO: once everything can be inside a single struct, make the struct opaque
- * to the rest of xe and return it to be xe->display.
- *
- * Returns: 0 on success
- */
-int xe_display_create(struct xe_device *xe)
-{
- spin_lock_init(&xe->display.fb_tracking.lock);
-
- xe->display.hotplug.dp_wq = alloc_ordered_workqueue("xe-dp", 0);
-
- return drmm_add_action_or_reset(&xe->drm, display_destroy, NULL);
-}
-
static void xe_display_fini_early(void *arg)
{
struct xe_device *xe = arg;
- struct intel_display *display = &xe->display;
+ struct intel_display *display = xe->display;
if (!xe->info.probe_display)
return;
+ intel_hpd_cancel_work(display);
intel_display_driver_remove_nogem(display);
intel_display_driver_remove_noirq(display);
intel_opregion_cleanup(display);
@@ -124,7 +104,7 @@ static void xe_display_fini_early(void *arg)
int xe_display_init_early(struct xe_device *xe)
{
- struct intel_display *display = &xe->display;
+ struct intel_display *display = xe->display;
int err;
if (!xe->info.probe_display)
@@ -142,7 +122,9 @@ int xe_display_init_early(struct xe_device *xe)
* Fill the dram structure to get the system dram info. This will be
* used for memory latency calculation.
*/
- intel_dram_detect(xe);
+ err = intel_dram_detect(xe);
+ if (err)
+ goto err_opregion;
intel_bw_init_hw(display);
@@ -168,7 +150,7 @@ err_opregion:
static void xe_display_fini(void *arg)
{
struct xe_device *xe = arg;
- struct intel_display *display = &xe->display;
+ struct intel_display *display = xe->display;
intel_hpd_poll_fini(display);
intel_hdcp_component_fini(display);
@@ -178,7 +160,7 @@ static void xe_display_fini(void *arg)
int xe_display_init(struct xe_device *xe)
{
- struct intel_display *display = &xe->display;
+ struct intel_display *display = xe->display;
int err;
if (!xe->info.probe_display)
@@ -193,7 +175,7 @@ int xe_display_init(struct xe_device *xe)
void xe_display_register(struct xe_device *xe)
{
- struct intel_display *display = &xe->display;
+ struct intel_display *display = xe->display;
if (!xe->info.probe_display)
return;
@@ -204,7 +186,7 @@ void xe_display_register(struct xe_device *xe)
void xe_display_unregister(struct xe_device *xe)
{
- struct intel_display *display = &xe->display;
+ struct intel_display *display = xe->display;
if (!xe->info.probe_display)
return;
@@ -217,7 +199,7 @@ void xe_display_unregister(struct xe_device *xe)
void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl)
{
- struct intel_display *display = &xe->display;
+ struct intel_display *display = xe->display;
if (!xe->info.probe_display)
return;
@@ -228,7 +210,7 @@ void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl)
void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir)
{
- struct intel_display *display = &xe->display;
+ struct intel_display *display = xe->display;
if (!xe->info.probe_display)
return;
@@ -239,7 +221,7 @@ void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir)
void xe_display_irq_reset(struct xe_device *xe)
{
- struct intel_display *display = &xe->display;
+ struct intel_display *display = xe->display;
if (!xe->info.probe_display)
return;
@@ -247,15 +229,14 @@ void xe_display_irq_reset(struct xe_device *xe)
gen11_display_irq_reset(display);
}
-void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt)
+void xe_display_irq_postinstall(struct xe_device *xe)
{
- struct intel_display *display = &xe->display;
+ struct intel_display *display = xe->display;
if (!xe->info.probe_display)
return;
- if (gt->info.id == XE_GT0)
- gen11_de_irq_postinstall(display);
+ gen11_de_irq_postinstall(display);
}
static bool suspend_to_idle(void)
@@ -290,7 +271,7 @@ static void xe_display_flush_cleanup_work(struct xe_device *xe)
static void xe_display_enable_d3cold(struct xe_device *xe)
{
- struct intel_display *display = &xe->display;
+ struct intel_display *display = xe->display;
if (!xe->info.probe_display)
return;
@@ -307,27 +288,27 @@ static void xe_display_enable_d3cold(struct xe_device *xe)
intel_dmc_suspend(display);
- if (has_display(xe))
+ if (intel_display_device_present(display))
intel_hpd_poll_enable(display);
}
static void xe_display_disable_d3cold(struct xe_device *xe)
{
- struct intel_display *display = &xe->display;
+ struct intel_display *display = xe->display;
if (!xe->info.probe_display)
return;
intel_dmc_resume(display);
- if (has_display(xe))
+ if (intel_display_device_present(display))
drm_mode_config_reset(&xe->drm);
intel_display_driver_init_hw(display);
intel_hpd_init(display);
- if (has_display(xe))
+ if (intel_display_device_present(display))
intel_hpd_poll_disable(display);
intel_opregion_resume(display);
@@ -337,7 +318,7 @@ static void xe_display_disable_d3cold(struct xe_device *xe)
void xe_display_pm_suspend(struct xe_device *xe)
{
- struct intel_display *display = &xe->display;
+ struct intel_display *display = xe->display;
bool s2idle = suspend_to_idle();
if (!xe->info.probe_display)
@@ -348,9 +329,9 @@ void xe_display_pm_suspend(struct xe_device *xe)
* properly.
*/
intel_power_domains_disable(display);
- drm_client_dev_suspend(&xe->drm, false);
+ drm_client_dev_suspend(&xe->drm);
- if (has_display(xe)) {
+ if (intel_display_device_present(display)) {
drm_kms_helper_poll_disable(&xe->drm);
intel_display_driver_disable_user_access(display);
intel_display_driver_suspend(display);
@@ -358,11 +339,13 @@ void xe_display_pm_suspend(struct xe_device *xe)
xe_display_flush_cleanup_work(xe);
+ intel_encoder_block_all_hpds(display);
+
intel_hpd_cancel_work(display);
- if (has_display(xe)) {
+ if (intel_display_device_present(display)) {
intel_display_driver_suspend_access(display);
- intel_encoder_suspend_all(&xe->display);
+ intel_encoder_suspend_all(display);
}
intel_opregion_suspend(display, s2idle ? PCI_D1 : PCI_D3cold);
@@ -372,15 +355,15 @@ void xe_display_pm_suspend(struct xe_device *xe)
void xe_display_pm_shutdown(struct xe_device *xe)
{
- struct intel_display *display = &xe->display;
+ struct intel_display *display = xe->display;
if (!xe->info.probe_display)
return;
intel_power_domains_disable(display);
- drm_client_dev_suspend(&xe->drm, false);
+ drm_client_dev_suspend(&xe->drm);
- if (has_display(xe)) {
+ if (intel_display_device_present(display)) {
drm_kms_helper_poll_disable(&xe->drm);
intel_display_driver_disable_user_access(display);
intel_display_driver_suspend(display);
@@ -388,9 +371,10 @@ void xe_display_pm_shutdown(struct xe_device *xe)
xe_display_flush_cleanup_work(xe);
intel_dp_mst_suspend(display);
+ intel_encoder_block_all_hpds(display);
intel_hpd_cancel_work(display);
- if (has_display(xe))
+ if (intel_display_device_present(display))
intel_display_driver_suspend_access(display);
intel_encoder_suspend_all(display);
@@ -403,7 +387,7 @@ void xe_display_pm_shutdown(struct xe_device *xe)
void xe_display_pm_runtime_suspend(struct xe_device *xe)
{
- struct intel_display *display = &xe->display;
+ struct intel_display *display = xe->display;
if (!xe->info.probe_display)
return;
@@ -418,7 +402,7 @@ void xe_display_pm_runtime_suspend(struct xe_device *xe)
void xe_display_pm_suspend_late(struct xe_device *xe)
{
- struct intel_display *display = &xe->display;
+ struct intel_display *display = xe->display;
bool s2idle = suspend_to_idle();
if (!xe->info.probe_display)
@@ -429,7 +413,7 @@ void xe_display_pm_suspend_late(struct xe_device *xe)
void xe_display_pm_runtime_suspend_late(struct xe_device *xe)
{
- struct intel_display *display = &xe->display;
+ struct intel_display *display = xe->display;
if (!xe->info.probe_display)
return;
@@ -447,7 +431,7 @@ void xe_display_pm_runtime_suspend_late(struct xe_device *xe)
void xe_display_pm_shutdown_late(struct xe_device *xe)
{
- struct intel_display *display = &xe->display;
+ struct intel_display *display = xe->display;
if (!xe->info.probe_display)
return;
@@ -462,7 +446,7 @@ void xe_display_pm_shutdown_late(struct xe_device *xe)
void xe_display_pm_resume_early(struct xe_device *xe)
{
- struct intel_display *display = &xe->display;
+ struct intel_display *display = xe->display;
if (!xe->info.probe_display)
return;
@@ -472,42 +456,44 @@ void xe_display_pm_resume_early(struct xe_device *xe)
void xe_display_pm_resume(struct xe_device *xe)
{
- struct intel_display *display = &xe->display;
+ struct intel_display *display = xe->display;
if (!xe->info.probe_display)
return;
intel_dmc_resume(display);
- if (has_display(xe))
+ if (intel_display_device_present(display))
drm_mode_config_reset(&xe->drm);
intel_display_driver_init_hw(display);
- if (has_display(xe))
+ if (intel_display_device_present(display))
intel_display_driver_resume_access(display);
intel_hpd_init(display);
- if (has_display(xe)) {
+ intel_encoder_unblock_all_hpds(display);
+
+ if (intel_display_device_present(display)) {
intel_display_driver_resume(display);
drm_kms_helper_poll_enable(&xe->drm);
intel_display_driver_enable_user_access(display);
}
- if (has_display(xe))
+ if (intel_display_device_present(display))
intel_hpd_poll_disable(display);
intel_opregion_resume(display);
- drm_client_dev_resume(&xe->drm, false);
+ drm_client_dev_resume(&xe->drm);
intel_power_domains_enable(display);
}
void xe_display_pm_runtime_resume(struct xe_device *xe)
{
- struct intel_display *display = &xe->display;
+ struct intel_display *display = xe->display;
if (!xe->info.probe_display)
return;
@@ -530,6 +516,21 @@ static void display_device_remove(struct drm_device *dev, void *arg)
intel_display_device_remove(display);
}
+static const struct intel_display_parent_interface parent = {
+ .rpm = &xe_display_rpm_interface,
+};
+
+/**
+ * xe_display_probe - probe display and create display struct
+ * @xe: XE device instance
+ *
+ * Initialize all fields used by the display part.
+ *
+ * TODO: once everything can be inside a single struct, make the struct opaque
+ * to the rest of xe and return it to be xe->display.
+ *
+ * Returns: 0 on success
+ */
int xe_display_probe(struct xe_device *xe)
{
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
@@ -539,13 +540,17 @@ int xe_display_probe(struct xe_device *xe)
if (!xe->info.probe_display)
goto no_display;
- display = intel_display_device_probe(pdev);
+ display = intel_display_device_probe(pdev, &parent);
+ if (IS_ERR(display))
+ return PTR_ERR(display);
err = drmm_add_action_or_reset(&xe->drm, display_device_remove, display);
if (err)
return err;
- if (has_display(xe))
+ xe->display = display;
+
+ if (intel_display_device_present(display))
return 0;
no_display:
diff --git a/drivers/gpu/drm/xe/display/xe_display.h b/drivers/gpu/drm/xe/display/xe_display.h
index 46e14f8dee28..76db95c25f7e 100644
--- a/drivers/gpu/drm/xe/display/xe_display.h
+++ b/drivers/gpu/drm/xe/display/xe_display.h
@@ -15,8 +15,6 @@ struct drm_driver;
bool xe_display_driver_probe_defer(struct pci_dev *pdev);
void xe_display_driver_set_hooks(struct drm_driver *driver);
-int xe_display_create(struct xe_device *xe);
-
int xe_display_probe(struct xe_device *xe);
int xe_display_init_early(struct xe_device *xe);
@@ -28,7 +26,7 @@ void xe_display_unregister(struct xe_device *xe);
void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl);
void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir);
void xe_display_irq_reset(struct xe_device *xe);
-void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt);
+void xe_display_irq_postinstall(struct xe_device *xe);
void xe_display_pm_suspend(struct xe_device *xe);
void xe_display_pm_shutdown(struct xe_device *xe);
@@ -46,8 +44,6 @@ static inline int xe_display_driver_probe_defer(struct pci_dev *pdev) { return 0
static inline void xe_display_driver_set_hooks(struct drm_driver *driver) { }
static inline void xe_display_driver_remove(struct xe_device *xe) {}
-static inline int xe_display_create(struct xe_device *xe) { return 0; }
-
static inline int xe_display_probe(struct xe_device *xe) { return 0; }
static inline int xe_display_init_early(struct xe_device *xe) { return 0; }
@@ -59,7 +55,7 @@ static inline void xe_display_unregister(struct xe_device *xe) {}
static inline void xe_display_irq_handler(struct xe_device *xe, u32 master_ctl) {}
static inline void xe_display_irq_enable(struct xe_device *xe, u32 gu_misc_iir) {}
static inline void xe_display_irq_reset(struct xe_device *xe) {}
-static inline void xe_display_irq_postinstall(struct xe_device *xe, struct xe_gt *gt) {}
+static inline void xe_display_irq_postinstall(struct xe_device *xe) {}
static inline void xe_display_pm_suspend(struct xe_device *xe) {}
static inline void xe_display_pm_shutdown(struct xe_device *xe) {}
diff --git a/drivers/gpu/drm/xe/display/xe_display_rpm.c b/drivers/gpu/drm/xe/display/xe_display_rpm.c
index 1955153aadba..340f65884812 100644
--- a/drivers/gpu/drm/xe/display/xe_display_rpm.c
+++ b/drivers/gpu/drm/xe/display/xe_display_rpm.c
@@ -1,71 +1,74 @@
// SPDX-License-Identifier: MIT
/* Copyright © 2025 Intel Corporation */
+#include <drm/intel/display_parent_interface.h>
+
+#include "intel_display_core.h"
#include "intel_display_rpm.h"
+#include "xe_device.h"
#include "xe_device_types.h"
#include "xe_pm.h"
-static struct xe_device *display_to_xe(struct intel_display *display)
-{
- return container_of(display, struct xe_device, display);
-}
-
-struct ref_tracker *intel_display_rpm_get_raw(struct intel_display *display)
+static struct ref_tracker *xe_display_rpm_get(const struct drm_device *drm)
{
- return intel_display_rpm_get(display);
+ return xe_pm_runtime_resume_and_get(to_xe_device(drm)) ? INTEL_WAKEREF_DEF : NULL;
}
-void intel_display_rpm_put_raw(struct intel_display *display, struct ref_tracker *wakeref)
+static struct ref_tracker *xe_display_rpm_get_if_in_use(const struct drm_device *drm)
{
- intel_display_rpm_put(display, wakeref);
+ return xe_pm_runtime_get_if_in_use(to_xe_device(drm)) ? INTEL_WAKEREF_DEF : NULL;
}
-struct ref_tracker *intel_display_rpm_get(struct intel_display *display)
+static struct ref_tracker *xe_display_rpm_get_noresume(const struct drm_device *drm)
{
- return xe_pm_runtime_resume_and_get(display_to_xe(display)) ? INTEL_WAKEREF_DEF : NULL;
-}
-
-struct ref_tracker *intel_display_rpm_get_if_in_use(struct intel_display *display)
-{
- return xe_pm_runtime_get_if_in_use(display_to_xe(display)) ? INTEL_WAKEREF_DEF : NULL;
-}
-
-struct ref_tracker *intel_display_rpm_get_noresume(struct intel_display *display)
-{
- xe_pm_runtime_get_noresume(display_to_xe(display));
+ xe_pm_runtime_get_noresume(to_xe_device(drm));
return INTEL_WAKEREF_DEF;
}
-void intel_display_rpm_put(struct intel_display *display, struct ref_tracker *wakeref)
+static void xe_display_rpm_put(const struct drm_device *drm, struct ref_tracker *wakeref)
{
if (wakeref)
- xe_pm_runtime_put(display_to_xe(display));
+ xe_pm_runtime_put(to_xe_device(drm));
}
-void intel_display_rpm_put_unchecked(struct intel_display *display)
+static void xe_display_rpm_put_unchecked(const struct drm_device *drm)
{
- xe_pm_runtime_put(display_to_xe(display));
+ xe_pm_runtime_put(to_xe_device(drm));
}
-bool intel_display_rpm_suspended(struct intel_display *display)
+static bool xe_display_rpm_suspended(const struct drm_device *drm)
{
- struct xe_device *xe = display_to_xe(display);
+ struct xe_device *xe = to_xe_device(drm);
return pm_runtime_suspended(xe->drm.dev);
}
-void assert_display_rpm_held(struct intel_display *display)
+static void xe_display_rpm_assert_held(const struct drm_device *drm)
{
/* FIXME */
}
-void intel_display_rpm_assert_block(struct intel_display *display)
+static void xe_display_rpm_assert_block(const struct drm_device *drm)
{
/* FIXME */
}
-void intel_display_rpm_assert_unblock(struct intel_display *display)
+static void xe_display_rpm_assert_unblock(const struct drm_device *drm)
{
/* FIXME */
}
+
+const struct intel_display_rpm_interface xe_display_rpm_interface = {
+ .get = xe_display_rpm_get,
+ .get_raw = xe_display_rpm_get,
+ .get_if_in_use = xe_display_rpm_get_if_in_use,
+ .get_noresume = xe_display_rpm_get_noresume,
+ .put = xe_display_rpm_put,
+ .put_raw = xe_display_rpm_put,
+ .put_unchecked = xe_display_rpm_put_unchecked,
+ .suspended = xe_display_rpm_suspended,
+ .assert_held = xe_display_rpm_assert_held,
+ .assert_block = xe_display_rpm_assert_block,
+ .assert_unblock = xe_display_rpm_assert_unblock
+};
diff --git a/drivers/gpu/drm/xe/display/xe_display_rpm.h b/drivers/gpu/drm/xe/display/xe_display_rpm.h
new file mode 100644
index 000000000000..0bf9d31e87c1
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/xe_display_rpm.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_DISPLAY_RPM_H_
+#define _XE_DISPLAY_RPM_H_
+
+extern const struct intel_display_rpm_interface xe_display_rpm_interface;
+
+#endif /* _XE_DISPLAY_RPM_H_ */
diff --git a/drivers/gpu/drm/xe/display/xe_display_wa.c b/drivers/gpu/drm/xe/display/xe_display_wa.c
index 2933ca97d673..2aa1b8c03411 100644
--- a/drivers/gpu/drm/xe/display/xe_display_wa.c
+++ b/drivers/gpu/drm/xe/display/xe_display_wa.c
@@ -3,8 +3,8 @@
* Copyright © 2024 Intel Corporation
*/
+#include "intel_display_core.h"
#include "intel_display_wa.h"
-
#include "xe_device.h"
#include "xe_wa.h"
@@ -13,6 +13,7 @@
bool intel_display_needs_wa_16023588340(struct intel_display *display)
{
struct xe_device *xe = to_xe_device(display->drm);
+ struct xe_gt *wa_gt = xe_root_mmio_gt(xe);
- return XE_WA(xe_root_mmio_gt(xe), 16023588340);
+ return wa_gt && XE_GT_WA(wa_gt, 16023588340);
}
diff --git a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
index f95375451e2f..58581d7aaae6 100644
--- a/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
+++ b/drivers/gpu/drm/xe/display/xe_dsb_buffer.c
@@ -17,10 +17,7 @@ u32 intel_dsb_buffer_ggtt_offset(struct intel_dsb_buffer *dsb_buf)
void intel_dsb_buffer_write(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val)
{
- struct xe_device *xe = dsb_buf->vma->bo->tile->xe;
-
iosys_map_wr(&dsb_buf->vma->bo->vmap, idx * 4, u32, val);
- xe_device_l2_flush(xe);
}
u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx)
@@ -30,12 +27,9 @@ u32 intel_dsb_buffer_read(struct intel_dsb_buffer *dsb_buf, u32 idx)
void intel_dsb_buffer_memset(struct intel_dsb_buffer *dsb_buf, u32 idx, u32 val, size_t size)
{
- struct xe_device *xe = dsb_buf->vma->bo->tile->xe;
-
WARN_ON(idx > (dsb_buf->buf_size - size) / sizeof(*dsb_buf->cmd_buf));
iosys_map_memset(&dsb_buf->vma->bo->vmap, idx * 4, val, size);
- xe_device_l2_flush(xe);
}
bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *dsb_buf, size_t size)
@@ -49,11 +43,11 @@ bool intel_dsb_buffer_create(struct intel_crtc *crtc, struct intel_dsb_buffer *d
return false;
/* Set scanout flag for WC mapping */
- obj = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe),
- NULL, PAGE_ALIGN(size),
- ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
- XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT);
+ obj = xe_bo_create_pin_map_novm(xe, xe_device_get_root_tile(xe),
+ PAGE_ALIGN(size),
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
+ XE_BO_FLAG_SCANOUT | XE_BO_FLAG_GGTT, false);
if (IS_ERR(obj)) {
kfree(vma);
return false;
@@ -74,9 +68,12 @@ void intel_dsb_buffer_cleanup(struct intel_dsb_buffer *dsb_buf)
void intel_dsb_buffer_flush_map(struct intel_dsb_buffer *dsb_buf)
{
+ struct xe_device *xe = dsb_buf->vma->bo->tile->xe;
+
/*
* The memory barrier here is to ensure coherency of DSB vs MMIO,
* both for weak ordering archs and discrete cards.
*/
- xe_device_wmb(dsb_buf->vma->bo->tile->xe);
+ xe_device_wmb(xe);
+ xe_device_l2_flush(xe);
}
diff --git a/drivers/gpu/drm/xe/display/xe_fb_pin.c b/drivers/gpu/drm/xe/display/xe_fb_pin.c
index d918ae1c8061..1fd4a815e784 100644
--- a/drivers/gpu/drm/xe/display/xe_fb_pin.c
+++ b/drivers/gpu/drm/xe/display/xe_fb_pin.c
@@ -6,6 +6,7 @@
#include <drm/ttm/ttm_bo.h>
#include "i915_vma.h"
+#include "intel_display_core.h"
#include "intel_display_types.h"
#include "intel_dpt.h"
#include "intel_fb.h"
@@ -15,6 +16,7 @@
#include "xe_device.h"
#include "xe_ggtt.h"
#include "xe_pm.h"
+#include "xe_vram_types.h"
static void
write_dpt_rotated(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, u32 bo_ofs,
@@ -23,6 +25,7 @@ write_dpt_rotated(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, u32 bo_
struct xe_device *xe = xe_bo_device(bo);
struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt;
u32 column, row;
+ u64 pte = xe_ggtt_encode_pte_flags(ggtt, bo, xe->pat.idx[XE_CACHE_NONE]);
/* TODO: Maybe rewrite so we can traverse the bo addresses sequentially,
* by writing dpt/ggtt in a different order?
@@ -32,10 +35,9 @@ write_dpt_rotated(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs, u32 bo_
u32 src_idx = src_stride * (height - 1) + column + bo_ofs;
for (row = 0; row < height; row++) {
- u64 pte = ggtt->pt_ops->pte_encode_bo(bo, src_idx * XE_PAGE_SIZE,
- xe->pat.idx[XE_CACHE_NONE]);
+ u64 addr = xe_bo_addr(bo, src_idx * XE_PAGE_SIZE, XE_PAGE_SIZE);
- iosys_map_wr(map, *dpt_ofs, u64, pte);
+ iosys_map_wr(map, *dpt_ofs, u64, pte | addr);
*dpt_ofs += 8;
src_idx -= src_stride;
}
@@ -55,17 +57,15 @@ write_dpt_remapped(struct xe_bo *bo, struct iosys_map *map, u32 *dpt_ofs,
{
struct xe_device *xe = xe_bo_device(bo);
struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt;
- u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index)
- = ggtt->pt_ops->pte_encode_bo;
u32 column, row;
+ u64 pte = xe_ggtt_encode_pte_flags(ggtt, bo, xe->pat.idx[XE_CACHE_NONE]);
for (row = 0; row < height; row++) {
u32 src_idx = src_stride * row + bo_ofs;
for (column = 0; column < width; column++) {
- iosys_map_wr(map, *dpt_ofs, u64,
- pte_encode_bo(bo, src_idx * XE_PAGE_SIZE,
- xe->pat.idx[XE_CACHE_NONE]));
+ u64 addr = xe_bo_addr(bo, src_idx * XE_PAGE_SIZE, XE_PAGE_SIZE);
+ iosys_map_wr(map, *dpt_ofs, u64, pte | addr);
*dpt_ofs += 8;
src_idx++;
@@ -102,40 +102,40 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb,
XE_PAGE_SIZE);
if (IS_DGFX(xe))
- dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL,
- dpt_size, ~0ull,
- ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM0 |
- XE_BO_FLAG_GGTT |
- XE_BO_FLAG_PAGETABLE,
- alignment);
+ dpt = xe_bo_create_pin_map_at_novm(xe, tile0,
+ dpt_size, ~0ull,
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_VRAM0 |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_PAGETABLE,
+ alignment, false);
else
- dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL,
- dpt_size, ~0ull,
- ttm_bo_type_kernel,
- XE_BO_FLAG_STOLEN |
- XE_BO_FLAG_GGTT |
- XE_BO_FLAG_PAGETABLE,
- alignment);
+ dpt = xe_bo_create_pin_map_at_novm(xe, tile0,
+ dpt_size, ~0ull,
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_STOLEN |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_PAGETABLE,
+ alignment, false);
if (IS_ERR(dpt))
- dpt = xe_bo_create_pin_map_at_aligned(xe, tile0, NULL,
- dpt_size, ~0ull,
- ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM |
- XE_BO_FLAG_GGTT |
- XE_BO_FLAG_PAGETABLE,
- alignment);
+ dpt = xe_bo_create_pin_map_at_novm(xe, tile0,
+ dpt_size, ~0ull,
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_PAGETABLE,
+ alignment, false);
if (IS_ERR(dpt))
return PTR_ERR(dpt);
if (view->type == I915_GTT_VIEW_NORMAL) {
+ u64 pte = xe_ggtt_encode_pte_flags(ggtt, bo, xe->pat.idx[XE_CACHE_NONE]);
u32 x;
for (x = 0; x < size / XE_PAGE_SIZE; x++) {
- u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x * XE_PAGE_SIZE,
- xe->pat.idx[XE_CACHE_NONE]);
+ u64 addr = xe_bo_addr(bo, x * XE_PAGE_SIZE, XE_PAGE_SIZE);
- iosys_map_wr(&dpt->vmap, x * 8, u64, pte);
+ iosys_map_wr(&dpt->vmap, x * 8, u64, pte | addr);
}
} else if (view->type == I915_GTT_VIEW_REMAPPED) {
const struct intel_remapped_info *remap_info = &view->remapped;
@@ -164,6 +164,9 @@ static int __xe_pin_fb_vma_dpt(const struct intel_framebuffer *fb,
vma->dpt = dpt;
vma->node = dpt->ggtt_node[tile0->id];
+
+ /* Ensure DPT writes are flushed */
+ xe_device_l2_flush(xe);
return 0;
}
@@ -173,15 +176,15 @@ write_ggtt_rotated(struct xe_bo *bo, struct xe_ggtt *ggtt, u32 *ggtt_ofs, u32 bo
{
struct xe_device *xe = xe_bo_device(bo);
u32 column, row;
+ u64 pte = ggtt->pt_ops->pte_encode_flags(bo, xe->pat.idx[XE_CACHE_NONE]);
for (column = 0; column < width; column++) {
u32 src_idx = src_stride * (height - 1) + column + bo_ofs;
for (row = 0; row < height; row++) {
- u64 pte = ggtt->pt_ops->pte_encode_bo(bo, src_idx * XE_PAGE_SIZE,
- xe->pat.idx[XE_CACHE_NONE]);
+ u64 addr = xe_bo_addr(bo, src_idx * XE_PAGE_SIZE, XE_PAGE_SIZE);
- ggtt->pt_ops->ggtt_set_pte(ggtt, *ggtt_ofs, pte);
+ ggtt->pt_ops->ggtt_set_pte(ggtt, *ggtt_ofs, pte | addr);
*ggtt_ofs += XE_PAGE_SIZE;
src_idx -= src_stride;
}
@@ -199,14 +202,15 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb,
struct drm_gem_object *obj = intel_fb_bo(&fb->base);
struct xe_bo *bo = gem_to_xe_bo(obj);
struct xe_device *xe = to_xe_device(fb->base.dev);
- struct xe_ggtt *ggtt = xe_device_get_root_tile(xe)->mem.ggtt;
+ struct xe_tile *tile0 = xe_device_get_root_tile(xe);
+ struct xe_ggtt *ggtt = tile0->mem.ggtt;
u32 align;
int ret;
/* TODO: Consider sharing framebuffer mapping?
* embed i915_vma inside intel_framebuffer
*/
- xe_pm_runtime_get_noresume(tile_to_xe(ggtt->tile));
+ xe_pm_runtime_get_noresume(xe);
ret = mutex_lock_interruptible(&ggtt->lock);
if (ret)
goto out;
@@ -215,29 +219,22 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb,
if (xe_bo_is_vram(bo) && ggtt->flags & XE_GGTT_FLAGS_64K)
align = max_t(u32, align, SZ_64K);
- if (bo->ggtt_node[ggtt->tile->id] && view->type == I915_GTT_VIEW_NORMAL) {
- vma->node = bo->ggtt_node[ggtt->tile->id];
+ if (bo->ggtt_node[tile0->id] && view->type == I915_GTT_VIEW_NORMAL) {
+ vma->node = bo->ggtt_node[tile0->id];
} else if (view->type == I915_GTT_VIEW_NORMAL) {
- u32 x, size = bo->ttm.base.size;
-
vma->node = xe_ggtt_node_init(ggtt);
if (IS_ERR(vma->node)) {
ret = PTR_ERR(vma->node);
goto out_unlock;
}
- ret = xe_ggtt_node_insert_locked(vma->node, size, align, 0);
+ ret = xe_ggtt_node_insert_locked(vma->node, xe_bo_size(bo), align, 0);
if (ret) {
xe_ggtt_node_fini(vma->node);
goto out_unlock;
}
- for (x = 0; x < size; x += XE_PAGE_SIZE) {
- u64 pte = ggtt->pt_ops->pte_encode_bo(bo, x,
- xe->pat.idx[XE_CACHE_NONE]);
-
- ggtt->pt_ops->ggtt_set_pte(ggtt, vma->node->base.start + x, pte);
- }
+ xe_ggtt_map_bo(ggtt, vma->node, bo, xe->pat.idx[XE_CACHE_NONE]);
} else {
u32 i, ggtt_ofs;
const struct intel_rotation_info *rot_info = &view->rotated;
@@ -271,7 +268,7 @@ static int __xe_pin_fb_vma_ggtt(const struct intel_framebuffer *fb,
out_unlock:
mutex_unlock(&ggtt->lock);
out:
- xe_pm_runtime_put(tile_to_xe(ggtt->tile));
+ xe_pm_runtime_put(xe);
return ret;
}
@@ -284,7 +281,9 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb,
struct i915_vma *vma = kzalloc(sizeof(*vma), GFP_KERNEL);
struct drm_gem_object *obj = intel_fb_bo(&fb->base);
struct xe_bo *bo = gem_to_xe_bo(obj);
- int ret;
+ struct xe_validation_ctx ctx;
+ struct drm_exec exec;
+ int ret = 0;
if (!vma)
return ERR_PTR(-ENODEV);
@@ -293,7 +292,7 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb,
if (IS_DGFX(to_xe_device(bo->ttm.base.dev)) &&
intel_fb_rc_ccs_cc_plane(&fb->base) >= 0 &&
!(bo->flags & XE_BO_FLAG_NEEDS_CPU_ACCESS)) {
- struct xe_tile *tile = xe_device_get_root_tile(xe);
+ struct xe_vram_region *vram = xe_device_get_root_tile(xe)->mem.vram;
/*
* If we need to able to access the clear-color value stored in
@@ -301,7 +300,7 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb,
* accessible. This is important on small-bar systems where
* only some subset of VRAM is CPU accessible.
*/
- if (tile->mem.vram.io_size < tile->mem.vram.usable_size) {
+ if (xe_vram_region_io_size(vram) < xe_vram_region_usable_size(vram)) {
ret = -EINVAL;
goto err;
}
@@ -311,17 +310,22 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb,
* Pin the framebuffer, we can't use xe_bo_(un)pin functions as the
* assumptions are incorrect for framebuffers
*/
- ret = ttm_bo_reserve(&bo->ttm, false, false, NULL);
- if (ret)
- goto err;
-
- if (IS_DGFX(xe))
- ret = xe_bo_migrate(bo, XE_PL_VRAM0);
- else
- ret = xe_bo_validate(bo, NULL, true);
- if (!ret)
- ttm_bo_pin(&bo->ttm);
- ttm_bo_unreserve(&bo->ttm);
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true},
+ ret) {
+ ret = drm_exec_lock_obj(&exec, &bo->ttm.base);
+ drm_exec_retry_on_contention(&exec);
+ if (ret)
+ break;
+
+ if (IS_DGFX(xe))
+ ret = xe_bo_migrate(bo, XE_PL_VRAM0, NULL, &exec);
+ else
+ ret = xe_bo_validate(bo, NULL, true, &exec);
+ drm_exec_retry_on_contention(&exec);
+ xe_validation_retry_on_oom(&ctx, &ret);
+ if (!ret)
+ ttm_bo_pin(&bo->ttm);
+ }
if (ret)
goto err;
@@ -333,8 +337,6 @@ static struct i915_vma *__xe_pin_fb_vma(const struct intel_framebuffer *fb,
if (ret)
goto err_unpin;
- /* Ensure DPT writes are flushed */
- xe_device_l2_flush(xe);
return vma;
err_unpin:
@@ -348,7 +350,7 @@ err:
static void __xe_unpin_fb_vma(struct i915_vma *vma)
{
- u8 tile_id = vma->node->ggtt->tile->id;
+ u8 tile_id = xe_device_get_root_tile(xe_bo_device(vma->bo))->id;
if (!refcount_dec_and_test(&vma->ref))
return;
@@ -388,7 +390,9 @@ static bool reuse_vma(struct intel_plane_state *new_plane_state,
const struct intel_plane_state *old_plane_state)
{
struct intel_framebuffer *fb = to_intel_framebuffer(new_plane_state->hw.fb);
+ struct intel_plane *plane = to_intel_plane(new_plane_state->uapi.plane);
struct xe_device *xe = to_xe_device(fb->base.dev);
+ struct intel_display *display = xe->display;
struct i915_vma *vma;
if (old_plane_state->hw.fb == new_plane_state->hw.fb &&
@@ -399,8 +403,8 @@ static bool reuse_vma(struct intel_plane_state *new_plane_state,
goto found;
}
- if (fb == intel_fbdev_framebuffer(xe->display.fbdev.fbdev)) {
- vma = intel_fbdev_vma_pointer(xe->display.fbdev.fbdev);
+ if (fb == intel_fbdev_framebuffer(display->fbdev.fbdev)) {
+ vma = intel_fbdev_vma_pointer(display->fbdev.fbdev);
if (vma)
goto found;
}
@@ -410,6 +414,10 @@ static bool reuse_vma(struct intel_plane_state *new_plane_state,
found:
refcount_inc(&vma->ref);
new_plane_state->ggtt_vma = vma;
+
+ new_plane_state->surf = i915_ggtt_offset(new_plane_state->ggtt_vma) +
+ plane->surf_offset(new_plane_state);
+
return true;
}
@@ -436,6 +444,10 @@ int intel_plane_pin_fb(struct intel_plane_state *new_plane_state,
return PTR_ERR(vma);
new_plane_state->ggtt_vma = vma;
+
+ new_plane_state->surf = i915_ggtt_offset(new_plane_state->ggtt_vma) +
+ plane->surf_offset(new_plane_state);
+
return 0;
}
@@ -463,3 +475,8 @@ u64 intel_dpt_offset(struct i915_vma *dpt_vma)
{
return 0;
}
+
+void intel_fb_get_map(struct i915_vma *vma, struct iosys_map *map)
+{
+ *map = vma->bo->vmap;
+}
diff --git a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
index b35a6f201d4a..4ae847b628e2 100644
--- a/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
+++ b/drivers/gpu/drm/xe/display/xe_hdcp_gsc.c
@@ -72,10 +72,10 @@ static int intel_hdcp_gsc_initialize_message(struct xe_device *xe,
int ret = 0;
/* allocate object of two page for HDCP command memory and store it */
- bo = xe_bo_create_pin_map(xe, xe_device_get_root_tile(xe), NULL, PAGE_SIZE * 2,
- ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM |
- XE_BO_FLAG_GGTT);
+ bo = xe_bo_create_pin_map_novm(xe, xe_device_get_root_tile(xe), PAGE_SIZE * 2,
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT, false);
if (IS_ERR(bo)) {
drm_err(&xe->drm, "Failed to allocate bo for HDCP streaming command!\n");
@@ -85,7 +85,7 @@ static int intel_hdcp_gsc_initialize_message(struct xe_device *xe,
cmd_in = xe_bo_ggtt_addr(bo);
cmd_out = cmd_in + PAGE_SIZE;
- xe_map_memset(xe, &bo->vmap, 0, 0, bo->size);
+ xe_map_memset(xe, &bo->vmap, 0, 0, xe_bo_size(bo));
gsc_context->hdcp_bo = bo;
gsc_context->hdcp_cmd_in = cmd_in;
diff --git a/drivers/gpu/drm/xe/display/xe_panic.c b/drivers/gpu/drm/xe/display/xe_panic.c
new file mode 100644
index 000000000000..df663286092a
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/xe_panic.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: MIT
+/* Copyright © 2025 Intel Corporation */
+
+#include <drm/drm_cache.h>
+#include <drm/drm_panic.h>
+
+#include "intel_display_types.h"
+#include "intel_fb.h"
+#include "intel_panic.h"
+#include "xe_bo.h"
+#include "xe_res_cursor.h"
+
+struct intel_panic {
+ struct xe_res_cursor res;
+ struct iosys_map vmap;
+
+ int page;
+};
+
+static void xe_panic_kunmap(struct intel_panic *panic)
+{
+ if (!panic->vmap.is_iomem && iosys_map_is_set(&panic->vmap)) {
+ drm_clflush_virt_range(panic->vmap.vaddr, PAGE_SIZE);
+ kunmap_local(panic->vmap.vaddr);
+ }
+ iosys_map_clear(&panic->vmap);
+ panic->page = -1;
+}
+
+/*
+ * The scanout buffer pages are not mapped, so for each pixel,
+ * use kmap_local_page_try_from_panic() to map the page, and write the pixel.
+ * Try to keep the map from the previous pixel, to avoid too much map/unmap.
+ */
+static void xe_panic_page_set_pixel(struct drm_scanout_buffer *sb, unsigned int x,
+ unsigned int y, u32 color)
+{
+ struct intel_framebuffer *fb = (struct intel_framebuffer *)sb->private;
+ struct intel_panic *panic = fb->panic;
+ struct xe_bo *bo = gem_to_xe_bo(intel_fb_bo(&fb->base));
+ unsigned int new_page;
+ unsigned int offset;
+
+ if (fb->panic_tiling)
+ offset = fb->panic_tiling(sb->width, x, y);
+ else
+ offset = y * sb->pitch[0] + x * sb->format->cpp[0];
+
+ new_page = offset >> PAGE_SHIFT;
+ offset = offset % PAGE_SIZE;
+ if (new_page != panic->page) {
+ if (xe_bo_is_vram(bo)) {
+ /* Display is always mapped on root tile */
+ struct xe_vram_region *vram = xe_bo_device(bo)->mem.vram;
+
+ if (panic->page < 0 || new_page < panic->page) {
+ xe_res_first(bo->ttm.resource, new_page * PAGE_SIZE,
+ bo->ttm.base.size - new_page * PAGE_SIZE, &panic->res);
+ } else {
+ xe_res_next(&panic->res, PAGE_SIZE * (new_page - panic->page));
+ }
+ iosys_map_set_vaddr_iomem(&panic->vmap,
+ vram->mapping + panic->res.start);
+ } else {
+ xe_panic_kunmap(panic);
+ iosys_map_set_vaddr(&panic->vmap,
+ ttm_bo_kmap_try_from_panic(&bo->ttm,
+ new_page));
+ }
+ panic->page = new_page;
+ }
+
+ if (iosys_map_is_set(&panic->vmap))
+ iosys_map_wr(&panic->vmap, offset, u32, color);
+}
+
+struct intel_panic *intel_panic_alloc(void)
+{
+ struct intel_panic *panic;
+
+ panic = kzalloc(sizeof(*panic), GFP_KERNEL);
+
+ return panic;
+}
+
+int intel_panic_setup(struct intel_panic *panic, struct drm_scanout_buffer *sb)
+{
+ struct intel_framebuffer *fb = (struct intel_framebuffer *)sb->private;
+ struct xe_bo *bo = gem_to_xe_bo(intel_fb_bo(&fb->base));
+
+ if (xe_bo_is_vram(bo) && !xe_bo_is_visible_vram(bo))
+ return -ENODEV;
+
+ panic->page = -1;
+ sb->set_pixel = xe_panic_page_set_pixel;
+ return 0;
+}
+
+void intel_panic_finish(struct intel_panic *panic)
+{
+ xe_panic_kunmap(panic);
+}
diff --git a/drivers/gpu/drm/xe/display/xe_plane_initial.c b/drivers/gpu/drm/xe/display/xe_plane_initial.c
index 6502b8274173..12d25c5290fd 100644
--- a/drivers/gpu/drm/xe/display/xe_plane_initial.c
+++ b/drivers/gpu/drm/xe/display/xe_plane_initial.c
@@ -10,19 +10,22 @@
#include "xe_ggtt.h"
#include "xe_mmio.h"
-#include "i915_reg.h"
-#include "intel_atomic_plane.h"
+#include "i915_vma.h"
#include "intel_crtc.h"
#include "intel_display.h"
+#include "intel_display_core.h"
+#include "intel_display_regs.h"
#include "intel_display_types.h"
#include "intel_fb.h"
#include "intel_fb_pin.h"
#include "intel_frontbuffer.h"
+#include "intel_plane.h"
#include "intel_plane_initial.h"
#include "xe_bo.h"
+#include "xe_vram_types.h"
#include "xe_wa.h"
-#include <generated/xe_wa_oob.h>
+#include <generated/xe_device_wa_oob.h>
void intel_plane_initial_vblank_wait(struct intel_crtc *crtc)
{
@@ -87,12 +90,8 @@ initial_plane_bo(struct xe_device *xe,
base = round_down(plane_config->base, page_size);
if (IS_DGFX(xe)) {
- u64 __iomem *gte = tile0->mem.ggtt->gsm;
- u64 pte;
+ u64 pte = xe_ggtt_read_pte(tile0->mem.ggtt, base);
- gte += base / XE_PAGE_SIZE;
-
- pte = ioread64(gte);
if (!(pte & XE_GGTT_PTE_DM)) {
drm_err(&xe->drm,
"Initial plane programming missing DM bit\n");
@@ -106,7 +105,7 @@ initial_plane_bo(struct xe_device *xe,
* We don't currently expect this to ever be placed in the
* stolen portion.
*/
- if (phys_base >= tile0->mem.vram.usable_size) {
+ if (phys_base >= xe_vram_region_usable_size(tile0->mem.vram)) {
drm_err(&xe->drm,
"Initial plane programming using invalid range, phys_base=%pa\n",
&phys_base);
@@ -124,7 +123,7 @@ initial_plane_bo(struct xe_device *xe,
phys_base = base;
flags |= XE_BO_FLAG_STOLEN;
- if (XE_WA(xe_root_mmio_gt(xe), 22019338487_display))
+ if (XE_DEVICE_WA(xe, 22019338487_display))
return NULL;
/*
@@ -141,8 +140,8 @@ initial_plane_bo(struct xe_device *xe,
page_size);
size -= base;
- bo = xe_bo_create_pin_map_at(xe, tile0, NULL, size, phys_base,
- ttm_bo_type_kernel, flags);
+ bo = xe_bo_create_pin_map_at_novm(xe, tile0, size, phys_base,
+ ttm_bo_type_kernel, flags, 0, false);
if (IS_ERR(bo)) {
drm_dbg(&xe->drm,
"Failed to create bo phys_base=%pa size %u with flags %x: %li\n",
@@ -187,7 +186,7 @@ intel_alloc_initial_plane_obj(struct intel_crtc *crtc,
return false;
if (intel_framebuffer_init(to_intel_framebuffer(fb),
- &bo->ttm.base, &mode_cmd)) {
+ &bo->ttm.base, fb->format, &mode_cmd)) {
drm_dbg_kms(&xe->drm, "intel fb init failed\n");
goto err_bo;
}
@@ -237,6 +236,9 @@ intel_find_initial_plane_obj(struct intel_crtc *crtc,
goto nofb;
plane_state->ggtt_vma = vma;
+
+ plane_state->surf = i915_ggtt_offset(plane_state->ggtt_vma);
+
plane_state->uapi.src_x = 0;
plane_state->uapi.src_y = 0;
plane_state->uapi.src_w = fb->width << 16;
diff --git a/drivers/gpu/drm/xe/display/xe_stolen.c b/drivers/gpu/drm/xe/display/xe_stolen.c
new file mode 100644
index 000000000000..9f04ba36e930
--- /dev/null
+++ b/drivers/gpu/drm/xe/display/xe_stolen.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: MIT
+/* Copyright © 2025 Intel Corporation */
+
+#include "gem/i915_gem_stolen.h"
+#include "xe_res_cursor.h"
+#include "xe_ttm_stolen_mgr.h"
+#include "xe_validation.h"
+
+struct intel_stolen_node {
+ struct xe_device *xe;
+ struct xe_bo *bo;
+};
+
+int i915_gem_stolen_insert_node_in_range(struct intel_stolen_node *node, u64 size,
+ unsigned int align, u64 start, u64 end)
+{
+ struct xe_device *xe = node->xe;
+
+ struct xe_bo *bo;
+ int err = 0;
+ u32 flags = XE_BO_FLAG_PINNED | XE_BO_FLAG_STOLEN;
+
+ if (start < SZ_4K)
+ start = SZ_4K;
+
+ if (align) {
+ size = ALIGN(size, align);
+ start = ALIGN(start, align);
+ }
+
+ bo = xe_bo_create_pin_range_novm(xe, xe_device_get_root_tile(xe),
+ size, start, end, ttm_bo_type_kernel, flags);
+ if (IS_ERR(bo)) {
+ err = PTR_ERR(bo);
+ bo = NULL;
+ return err;
+ }
+
+ node->bo = bo;
+
+ return err;
+}
+
+int i915_gem_stolen_insert_node(struct intel_stolen_node *node, u64 size, unsigned int align)
+{
+ /* Not used on xe */
+ WARN_ON(1);
+
+ return -ENODEV;
+}
+
+void i915_gem_stolen_remove_node(struct intel_stolen_node *node)
+{
+ xe_bo_unpin_map_no_vm(node->bo);
+ node->bo = NULL;
+}
+
+bool i915_gem_stolen_initialized(struct drm_device *drm)
+{
+ struct xe_device *xe = to_xe_device(drm);
+
+ return ttm_manager_type(&xe->ttm, XE_PL_STOLEN);
+}
+
+bool i915_gem_stolen_node_allocated(const struct intel_stolen_node *node)
+{
+ return node->bo;
+}
+
+u32 i915_gem_stolen_node_offset(struct intel_stolen_node *node)
+{
+ struct xe_res_cursor res;
+
+ xe_res_first(node->bo->ttm.resource, 0, 4096, &res);
+ return res.start;
+}
+
+/* Used for < gen4. These are not supported by Xe */
+u64 i915_gem_stolen_area_address(struct drm_device *drm)
+{
+ WARN_ON(1);
+
+ return 0;
+}
+
+/* Used for gen9 specific WA. Gen9 is not supported by Xe */
+u64 i915_gem_stolen_area_size(struct drm_device *drm)
+{
+ WARN_ON(1);
+
+ return 0;
+}
+
+u64 i915_gem_stolen_node_address(struct intel_stolen_node *node)
+{
+ struct xe_device *xe = node->xe;
+
+ return xe_ttm_stolen_gpu_offset(xe) + i915_gem_stolen_node_offset(node);
+}
+
+u64 i915_gem_stolen_node_size(const struct intel_stolen_node *node)
+{
+ return node->bo->ttm.base.size;
+}
+
+struct intel_stolen_node *i915_gem_stolen_node_alloc(struct drm_device *drm)
+{
+ struct xe_device *xe = to_xe_device(drm);
+ struct intel_stolen_node *node;
+
+ node = kzalloc(sizeof(*node), GFP_KERNEL);
+ if (!node)
+ return NULL;
+
+ node->xe = xe;
+
+ return node;
+}
+
+void i915_gem_stolen_node_free(const struct intel_stolen_node *node)
+{
+ kfree(node);
+}
diff --git a/drivers/gpu/drm/xe/display/xe_tdf.c b/drivers/gpu/drm/xe/display/xe_tdf.c
index 2a7fccbeb1d5..78bda4c47874 100644
--- a/drivers/gpu/drm/xe/display/xe_tdf.c
+++ b/drivers/gpu/drm/xe/display/xe_tdf.c
@@ -3,9 +3,9 @@
* Copyright © 2024 Intel Corporation
*/
-#include "xe_device.h"
-#include "intel_display_types.h"
+#include "intel_display_core.h"
#include "intel_tdf.h"
+#include "xe_device.h"
void intel_td_flush(struct intel_display *display)
{
diff --git a/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h
index 8cfcd3360896..5d41ca297447 100644
--- a/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h
+++ b/drivers/gpu/drm/xe/instructions/xe_gpu_commands.h
@@ -31,6 +31,12 @@
#define XY_FAST_COPY_BLT_D1_DST_TILE4 REG_BIT(30)
#define XE2_XY_FAST_COPY_BLT_MOCS_INDEX_MASK GENMASK(23, 20)
+#define MEM_COPY_CMD (2 << 29 | 0x5a << 22 | 0x8)
+#define MEM_COPY_PAGE_COPY_MODE REG_BIT(19)
+#define MEM_COPY_MATRIX_COPY REG_BIT(17)
+#define MEM_COPY_SRC_MOCS_INDEX_MASK GENMASK(31, 28)
+#define MEM_COPY_DST_MOCS_INDEX_MASK GENMASK(6, 3)
+
#define PVC_MEM_SET_CMD (2 << 29 | 0x5b << 22)
#define PVC_MEM_SET_CMD_LEN_DW 7
#define PVC_MEM_SET_MATRIX REG_BIT(17)
diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
index e3f5e8bb3ebc..c47b290e0e9f 100644
--- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
+++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
@@ -65,6 +65,7 @@
#define MI_LOAD_REGISTER_MEM (__MI_INSTR(0x29) | XE_INSTR_NUM_DW(4))
#define MI_LRM_USE_GGTT REG_BIT(22)
+#define MI_LRM_ASYNC REG_BIT(21)
#define MI_LOAD_REGISTER_REG (__MI_INSTR(0x2a) | XE_INSTR_NUM_DW(3))
#define MI_LRR_DST_CS_MMIO REG_BIT(19)
diff --git a/drivers/gpu/drm/xe/regs/xe_bars.h b/drivers/gpu/drm/xe/regs/xe_bars.h
index ce05b6ae832f..880140d6ccdc 100644
--- a/drivers/gpu/drm/xe/regs/xe_bars.h
+++ b/drivers/gpu/drm/xe/regs/xe_bars.h
@@ -7,5 +7,6 @@
#define GTTMMADR_BAR 0 /* MMIO + GTT */
#define LMEM_BAR 2 /* VRAM */
+#define VF_LMEM_BAR 9 /* VF VRAM */
#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index 7ade41e2b7b3..68172b0248a6 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -111,6 +111,9 @@
#define PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS REG_BIT(14)
#define CS_PRIORITY_MEM_READ REG_BIT(7)
+#define CS_DEBUG_MODE2(base) XE_REG((base) + 0xd8, XE_REG_OPTION_MASKED)
+#define INSTRUCTION_STATE_CACHE_INVALIDATE REG_BIT(6)
+
#define FF_SLICE_CS_CHICKEN1(base) XE_REG((base) + 0xe0, XE_REG_OPTION_MASKED)
#define FFSC_PERCTX_PREEMPT_CTRL REG_BIT(14)
@@ -138,6 +141,8 @@
#define INHIBIT_SWITCH_UNTIL_PREEMPTED REG_BIT(31)
#define IDLE_DELAY REG_GENMASK(20, 0)
+#define RING_CURRENT_LRCA(base) XE_REG((base) + 0x240)
+
#define RING_CONTEXT_CONTROL(base) XE_REG((base) + 0x244, XE_REG_OPTION_MASKED)
#define CTX_CTRL_PXP_ENABLE REG_BIT(10)
#define CTX_CTRL_OAC_CONTEXT_ENABLE REG_BIT(8)
@@ -150,6 +155,8 @@
#define GFX_DISABLE_LEGACY_MODE REG_BIT(3)
#define GFX_MSIX_INTERRUPT_ENABLE REG_BIT(13)
+#define RING_CSMQDEBUG(base) XE_REG((base) + 0x2b0)
+
#define RING_TIMESTAMP(base) XE_REG((base) + 0x358)
#define RING_TIMESTAMP_UDW(base) XE_REG((base) + 0x358 + 4)
diff --git a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h
index 7702364b65f1..180be82672ab 100644
--- a/drivers/gpu/drm/xe/regs/xe_gsc_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gsc_regs.h
@@ -13,9 +13,15 @@
/* Definitions of GSC H/W registers, bits, etc */
+#define BMG_GSC_HECI1_BASE 0x373000
+
#define MTL_GSC_HECI1_BASE 0x00116000
#define MTL_GSC_HECI2_BASE 0x00117000
+#define DG1_GSC_HECI2_BASE 0x00259000
+#define PVC_GSC_HECI2_BASE 0x00285000
+#define DG2_GSC_HECI2_BASE 0x00374000
+
#define HECI_H_CSR(base) XE_REG((base) + 0x4)
#define HECI_H_CSR_IE REG_BIT(0)
#define HECI_H_CSR_IS REG_BIT(1)
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index 5cd5ab8529c5..917a088c28f2 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -37,12 +37,18 @@
#define GMD_ID XE_REG(0xd8c)
#define GMD_ID_ARCH_MASK REG_GENMASK(31, 22)
#define GMD_ID_RELEASE_MASK REG_GENMASK(21, 14)
+/*
+ * Spec defines these bits as "Reserved", but then make them assume some
+ * meaning that depends on the ARCH. To avoid any confusion, call them
+ * SUBIP_FLAG_MASK.
+ */
+#define GMD_ID_SUBIP_FLAG_MASK REG_GENMASK(13, 6)
#define GMD_ID_REVID REG_GENMASK(5, 0)
#define FORCEWAKE_ACK_GSC XE_REG(0xdf8)
#define FORCEWAKE_ACK_GT_MTL XE_REG(0xdfc)
-#define MCFG_MCR_SELECTOR XE_REG(0xfd0)
+#define STEER_SEMAPHORE XE_REG(0xfd0)
#define MTL_MCR_SELECTOR XE_REG(0xfd4)
#define SF_MCR_SELECTOR XE_REG(0xfd8)
#define MCR_SELECTOR XE_REG(0xfdc)
@@ -95,7 +101,6 @@
#define XE2_LMEM_CFG XE_REG(0x48b0)
-#define XEHP_TILE_ADDR_RANGE(_idx) XE_REG_MCR(0x4900 + (_idx) * 4)
#define XEHP_FLAT_CCS_BASE_ADDR XE_REG_MCR(0x4910)
#define XEHP_FLAT_CCS_PTR REG_GENMASK(31, 8)
@@ -168,6 +173,7 @@
#define XEHP_SLICE_COMMON_ECO_CHICKEN1 XE_REG_MCR(0x731c, XE_REG_OPTION_MASKED)
#define MSC_MSAA_REODER_BUF_BYPASS_DISABLE REG_BIT(14)
+#define FAST_CLEAR_VALIGN_FIX REG_BIT(13)
#define XE2LPM_CCCHKNREG1 XE_REG(0x82a8)
@@ -239,6 +245,9 @@
#define XE2_GT_GEOMETRY_DSS_1 XE_REG(0x9150)
#define XE2_GT_GEOMETRY_DSS_2 XE_REG(0x9154)
+#define SERVICE_COPY_ENABLE XE_REG(0x9170)
+#define FUSE_SERVICE_COPY_ENABLE_MASK REG_GENMASK(7, 0)
+
#define GDRST XE_REG(0x941c)
#define GRDOM_GUC REG_BIT(3)
#define GRDOM_FULL REG_BIT(0)
@@ -342,13 +351,10 @@
#define POWERGATE_ENABLE XE_REG(0xa210)
#define RENDER_POWERGATE_ENABLE REG_BIT(0)
#define MEDIA_POWERGATE_ENABLE REG_BIT(1)
+#define MEDIA_SAMPLERS_POWERGATE_ENABLE REG_BIT(2)
#define VDN_HCP_POWERGATE_ENABLE(n) REG_BIT(3 + 2 * (n))
#define VDN_MFXVDENC_POWERGATE_ENABLE(n) REG_BIT(4 + 2 * (n))
-#define CTC_MODE XE_REG(0xa26c)
-#define CTC_SHIFT_PARAMETER_MASK REG_GENMASK(2, 1)
-#define CTC_SOURCE_DIVIDE_LOGIC REG_BIT(0)
-
#define FORCEWAKE_RENDER XE_REG(0xa278)
#define POWERGATE_DOMAIN_STATUS XE_REG(0xa2a0)
@@ -522,6 +528,7 @@
#define TDL_CHICKEN XE_REG_MCR(0xe5f4, XE_REG_OPTION_MASKED)
#define QID_WAIT_FOR_THREAD_NOT_RUN_DISABLE REG_BIT(12)
+#define EUSTALL_PERF_SAMPLING_DISABLE REG_BIT(5)
#define LSC_CHICKEN_BIT_0 XE_REG_MCR(0xe7c8)
#define DISABLE_D8_D16_COASLESCE REG_BIT(30)
@@ -543,6 +550,9 @@
#define SARB_CHICKEN1 XE_REG_MCR(0xe90c)
#define COMP_CKN_IN REG_GENMASK(30, 29)
+#define MAIN_GAMCTRL_MODE XE_REG(0xef00)
+#define MAIN_GAMCTRL_QUEUE_SELECT REG_BIT(0)
+
#define RCU_MODE XE_REG(0x14800, XE_REG_OPTION_MASKED)
#define RCU_MODE_FIXED_SLICE_CCS_MODE REG_BIT(1)
#define RCU_MODE_CCS_ENABLE REG_BIT(0)
@@ -579,6 +589,7 @@
#define GT_GFX_RC6 XE_REG(0x138108)
#define GT0_PERF_LIMIT_REASONS XE_REG(0x1381a8)
+/* Common performance limit reason bits - available on all platforms */
#define GT0_PERF_LIMIT_REASONS_MASK 0xde3
#define PROCHOT_MASK REG_BIT(0)
#define THERMAL_LIMIT_MASK REG_BIT(1)
@@ -588,6 +599,18 @@
#define POWER_LIMIT_4_MASK REG_BIT(8)
#define POWER_LIMIT_1_MASK REG_BIT(10)
#define POWER_LIMIT_2_MASK REG_BIT(11)
+/* Platform-specific performance limit reason bits - for Crescent Island */
+#define CRI_PERF_LIMIT_REASONS_MASK 0xfdff
+#define SOC_THERMAL_LIMIT_MASK REG_BIT(1)
+#define MEM_THERMAL_MASK REG_BIT(2)
+#define VR_THERMAL_MASK REG_BIT(3)
+#define ICCMAX_MASK REG_BIT(4)
+#define SOC_AVG_THERMAL_MASK REG_BIT(6)
+#define FASTVMODE_MASK REG_BIT(7)
+#define PSYS_PL1_MASK REG_BIT(12)
+#define PSYS_PL2_MASK REG_BIT(13)
+#define P0_FREQ_MASK REG_BIT(14)
+#define PSYS_CRIT_MASK REG_BIT(15)
#define GT_PERF_STATUS XE_REG(0x1381b4)
#define VOLTAGE_MASK REG_GENMASK(10, 0)
diff --git a/drivers/gpu/drm/xe/regs/xe_hw_error_regs.h b/drivers/gpu/drm/xe/regs/xe_hw_error_regs.h
new file mode 100644
index 000000000000..c146b9ef44eb
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_hw_error_regs.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_HW_ERROR_REGS_H_
+#define _XE_HW_ERROR_REGS_H_
+
+#define HEC_UNCORR_ERR_STATUS(base) XE_REG((base) + 0x118)
+#define UNCORR_FW_REPORTED_ERR BIT(6)
+
+#define HEC_UNCORR_FW_ERR_DW0(base) XE_REG((base) + 0x124)
+
+#define DEV_ERR_STAT_NONFATAL 0x100178
+#define DEV_ERR_STAT_CORRECTABLE 0x10017c
+#define DEV_ERR_STAT_REG(x) XE_REG(_PICK_EVEN((x), \
+ DEV_ERR_STAT_CORRECTABLE, \
+ DEV_ERR_STAT_NONFATAL))
+#define XE_CSC_ERROR BIT(17)
+#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_i2c_regs.h b/drivers/gpu/drm/xe/regs/xe_i2c_regs.h
new file mode 100644
index 000000000000..f2e455e2bfe4
--- /dev/null
+++ b/drivers/gpu/drm/xe/regs/xe_i2c_regs.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: MIT */
+#ifndef _XE_I2C_REGS_H_
+#define _XE_I2C_REGS_H_
+
+#include <linux/pci_regs.h>
+
+#include "xe_reg_defs.h"
+#include "xe_regs.h"
+
+#define I2C_BRIDGE_OFFSET (SOC_BASE + 0xd9000)
+#define I2C_CONFIG_SPACE_OFFSET (SOC_BASE + 0xf6000)
+#define I2C_MEM_SPACE_OFFSET (SOC_BASE + 0xf7400)
+
+#define REG_SG_REMAP_ADDR_PREFIX XE_REG(SOC_BASE + 0x0164)
+#define REG_SG_REMAP_ADDR_POSTFIX XE_REG(SOC_BASE + 0x0168)
+
+#define I2C_BRIDGE_PCICFGCTL XE_REG(I2C_BRIDGE_OFFSET + 0x200)
+#define ACPI_INTR_EN REG_BIT(1)
+
+#define I2C_CONFIG_CMD XE_REG(I2C_CONFIG_SPACE_OFFSET + PCI_COMMAND)
+#define I2C_CONFIG_PMCSR XE_REG(I2C_CONFIG_SPACE_OFFSET + 0x84)
+
+#endif /* _XE_I2C_REGS_H_ */
diff --git a/drivers/gpu/drm/xe/regs/xe_irq_regs.h b/drivers/gpu/drm/xe/regs/xe_irq_regs.h
index f0ecfcac4003..2f97662d958d 100644
--- a/drivers/gpu/drm/xe/regs/xe_irq_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_irq_regs.h
@@ -18,7 +18,9 @@
#define GFX_MSTR_IRQ XE_REG(0x190010, XE_REG_OPTION_VF)
#define MASTER_IRQ REG_BIT(31)
#define GU_MISC_IRQ REG_BIT(29)
+#define ERROR_IRQ(x) REG_BIT(26 + (x))
#define DISPLAY_IRQ REG_BIT(16)
+#define I2C_IRQ REG_BIT(12)
#define GT_DW_IRQ(x) REG_BIT(x)
/*
@@ -63,7 +65,10 @@
#define BCS_RSVD_INTR_MASK XE_REG(0x1900a0, XE_REG_OPTION_VF)
#define VCS0_VCS1_INTR_MASK XE_REG(0x1900a8, XE_REG_OPTION_VF)
#define VCS2_VCS3_INTR_MASK XE_REG(0x1900ac, XE_REG_OPTION_VF)
+#define VCS4_VCS5_INTR_MASK XE_REG(0x1900b0, XE_REG_OPTION_VF)
+#define VCS6_VCS7_INTR_MASK XE_REG(0x1900b4, XE_REG_OPTION_VF)
#define VECS0_VECS1_INTR_MASK XE_REG(0x1900d0, XE_REG_OPTION_VF)
+#define VECS2_VECS3_INTR_MASK XE_REG(0x1900d4, XE_REG_OPTION_VF)
#define HECI2_RSVD_INTR_MASK XE_REG(0x1900e4)
#define GUC_SG_INTR_MASK XE_REG(0x1900e8, XE_REG_OPTION_VF)
#define GPM_WGBOXPERF_INTR_MASK XE_REG(0x1900ec, XE_REG_OPTION_VF)
@@ -78,9 +83,10 @@
#define GT_WAIT_SEMAPHORE_INTERRUPT REG_BIT(11)
#define GT_CONTEXT_SWITCH_INTERRUPT REG_BIT(8)
#define GSC_ER_COMPLETE REG_BIT(5)
-#define GT_RENDER_PIPECTL_NOTIFY_INTERRUPT REG_BIT(4)
+#define GT_FLUSH_COMPLETE_INTERRUPT REG_BIT(4)
#define GT_CS_MASTER_ERROR_INTERRUPT REG_BIT(3)
-#define GT_RENDER_USER_INTERRUPT REG_BIT(0)
+#define GT_COMPUTE_WALKER_INTERRUPT REG_BIT(2)
+#define GT_MI_USER_INTERRUPT REG_BIT(0)
/* irqs for OTHER_KCR_INSTANCE */
#define KCR_PXP_STATE_TERMINATED_INTERRUPT REG_BIT(1)
diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
index 994af591a2e8..b5eff383902c 100644
--- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
+++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
@@ -12,9 +12,13 @@
#define CTX_RING_START (0x08 + 1)
#define CTX_RING_CTL (0x0a + 1)
#define CTX_BB_PER_CTX_PTR (0x12 + 1)
+#define CTX_CS_INDIRECT_CTX (0x14 + 1)
+#define CTX_CS_INDIRECT_CTX_OFFSET (0x16 + 1)
#define CTX_TIMESTAMP (0x22 + 1)
#define CTX_TIMESTAMP_UDW (0x24 + 1)
#define CTX_INDIRECT_RING_STATE (0x26 + 1)
+#define CTX_ACC_CTR_THOLD (0x2a + 1)
+#define CTX_ASID (0x2e + 1)
#define CTX_PDP0_UDW (0x30 + 1)
#define CTX_PDP0_LDW (0x32 + 1)
diff --git a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h
index f5e5234857c1..ef2bf984723f 100644
--- a/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_mchbar_regs.h
@@ -38,10 +38,11 @@
#define TEMP_MASK REG_GENMASK(7, 0)
#define PCU_CR_PACKAGE_RAPL_LIMIT XE_REG(MCHBAR_MIRROR_BASE_SNB + 0x59a0)
-#define PKG_PWR_LIM_1 REG_GENMASK(14, 0)
-#define PKG_PWR_LIM_1_EN REG_BIT(15)
-#define PKG_PWR_LIM_1_TIME REG_GENMASK(23, 17)
-#define PKG_PWR_LIM_1_TIME_X REG_GENMASK(23, 22)
-#define PKG_PWR_LIM_1_TIME_Y REG_GENMASK(21, 17)
+#define PWR_LIM_VAL REG_GENMASK(14, 0)
+#define PWR_LIM_EN REG_BIT(15)
+#define PWR_LIM REG_GENMASK(15, 0)
+#define PWR_LIM_TIME REG_GENMASK(23, 17)
+#define PWR_LIM_TIME_X REG_GENMASK(23, 22)
+#define PWR_LIM_TIME_Y REG_GENMASK(21, 17)
#endif /* _XE_MCHBAR_REGS_H_ */
diff --git a/drivers/gpu/drm/xe/regs/xe_oa_regs.h b/drivers/gpu/drm/xe/regs/xe_oa_regs.h
index a79ad2da070c..e693a50706f8 100644
--- a/drivers/gpu/drm/xe/regs/xe_oa_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_oa_regs.h
@@ -97,4 +97,7 @@
#define OAM_STATUS(base) XE_REG((base) + OAM_STATUS_OFFSET)
#define OAM_MMIO_TRG(base) XE_REG((base) + OAM_MMIO_TRG_OFFSET)
+#define OAM_COMPRESSION_T3_CONTROL XE_REG(0x1c2e00)
+#define OAM_LAT_MEASURE_ENABLE REG_BIT(4)
+
#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_pcode_regs.h b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h
index c7d5d782e3f9..fb097607b86c 100644
--- a/drivers/gpu/drm/xe/regs/xe_pcode_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_pcode_regs.h
@@ -18,16 +18,10 @@
#define PVC_GT0_PLATFORM_ENERGY_STATUS XE_REG(0x28106c)
#define PVC_GT0_PACKAGE_POWER_SKU XE_REG(0x281080)
-#define BMG_PACKAGE_POWER_SKU XE_REG(0x138098)
-#define BMG_PACKAGE_POWER_SKU_UNIT XE_REG(0x1380dc)
-#define BMG_PACKAGE_ENERGY_STATUS XE_REG(0x138120)
#define BMG_FAN_1_SPEED XE_REG(0x138140)
#define BMG_FAN_2_SPEED XE_REG(0x138170)
#define BMG_FAN_3_SPEED XE_REG(0x1381a0)
#define BMG_VRAM_TEMPERATURE XE_REG(0x1382c0)
#define BMG_PACKAGE_TEMPERATURE XE_REG(0x138434)
-#define BMG_PACKAGE_RAPL_LIMIT XE_REG(0x138440)
-#define BMG_PLATFORM_ENERGY_STATUS XE_REG(0x138458)
-#define BMG_PLATFORM_POWER_LIMIT XE_REG(0x138460)
#endif /* _XE_PCODE_REGS_H_ */
diff --git a/drivers/gpu/drm/xe/regs/xe_pmt.h b/drivers/gpu/drm/xe/regs/xe_pmt.h
index f45abcd96ba8..0f79c0714454 100644
--- a/drivers/gpu/drm/xe/regs/xe_pmt.h
+++ b/drivers/gpu/drm/xe/regs/xe_pmt.h
@@ -5,15 +5,31 @@
#ifndef _XE_PMT_H_
#define _XE_PMT_H_
-#define SOC_BASE 0x280000
+#include "xe_regs.h"
#define BMG_PMT_BASE_OFFSET 0xDB000
#define BMG_DISCOVERY_OFFSET (SOC_BASE + BMG_PMT_BASE_OFFSET)
+#define PUNIT_TELEMETRY_GUID XE_REG(BMG_DISCOVERY_OFFSET + 0x4)
+#define BMG_ENERGY_STATUS_PMT_OFFSET (0x30)
+#define ENERGY_PKG REG_GENMASK64(31, 0)
+#define ENERGY_CARD REG_GENMASK64(63, 32)
+
#define BMG_TELEMETRY_BASE_OFFSET 0xE0000
#define BMG_TELEMETRY_OFFSET (SOC_BASE + BMG_TELEMETRY_BASE_OFFSET)
#define SG_REMAP_INDEX1 XE_REG(SOC_BASE + 0x08)
#define SG_REMAP_BITS REG_GENMASK(31, 24)
+#define BMG_MODS_RESIDENCY_OFFSET (0x4D0)
+#define BMG_G2_RESIDENCY_OFFSET (0x530)
+#define BMG_G6_RESIDENCY_OFFSET (0x538)
+#define BMG_G7_RESIDENCY_OFFSET (0x4B0)
+#define BMG_G8_RESIDENCY_OFFSET (0x540)
+#define BMG_G10_RESIDENCY_OFFSET (0x548)
+
+#define BMG_PCIE_LINK_L0_RESIDENCY_OFFSET (0x570)
+#define BMG_PCIE_LINK_L1_RESIDENCY_OFFSET (0x578)
+#define BMG_PCIE_LINK_L1_2_RESIDENCY_OFFSET (0x580)
+
#endif
diff --git a/drivers/gpu/drm/xe/regs/xe_regs.h b/drivers/gpu/drm/xe/regs/xe_regs.h
index 3abb17d2ca33..ad93c57edd17 100644
--- a/drivers/gpu/drm/xe/regs/xe_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_regs.h
@@ -7,6 +7,8 @@
#include "regs/xe_reg_defs.h"
+#define SOC_BASE 0x280000
+
#define GU_CNTL_PROTECTED XE_REG(0x10100C)
#define DRIVERINT_FLR_DIS REG_BIT(31)
@@ -38,6 +40,8 @@
#define STOLEN_RESERVED XE_REG(0x1082c0)
#define WOPCM_SIZE_MASK REG_GENMASK64(9, 7)
+#define SG_TILE_ADDR_RANGE(_idx) XE_REG(0x1083a0 + (_idx) * 4)
+
#define MTL_RP_STATE_CAP XE_REG(0x138000)
#define MTL_GT_RPA_FREQUENCY XE_REG(0x138008)
diff --git a/drivers/gpu/drm/xe/tests/xe_bo.c b/drivers/gpu/drm/xe/tests/xe_bo.c
index 378dcd0fb414..2294cf89f3e1 100644
--- a/drivers/gpu/drm/xe/tests/xe_bo.c
+++ b/drivers/gpu/drm/xe/tests/xe_bo.c
@@ -23,7 +23,7 @@
static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo,
bool clear, u64 get_val, u64 assign_val,
- struct kunit *test)
+ struct kunit *test, struct drm_exec *exec)
{
struct dma_fence *fence;
struct ttm_tt *ttm;
@@ -35,7 +35,7 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo,
u32 offset;
/* Move bo to VRAM if not already there. */
- ret = xe_bo_validate(bo, NULL, false);
+ ret = xe_bo_validate(bo, NULL, false, exec);
if (ret) {
KUNIT_FAIL(test, "Failed to validate bo.\n");
return ret;
@@ -60,7 +60,7 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo,
}
/* Evict to system. CCS data should be copied. */
- ret = xe_bo_evict(bo);
+ ret = xe_bo_evict(bo, exec);
if (ret) {
KUNIT_FAIL(test, "Failed to evict bo.\n");
return ret;
@@ -106,7 +106,7 @@ static int ccs_test_migrate(struct xe_tile *tile, struct xe_bo *bo,
}
/* Check last CCS value, or at least last value in page. */
- offset = xe_device_ccs_bytes(tile_to_xe(tile), bo->size);
+ offset = xe_device_ccs_bytes(tile_to_xe(tile), xe_bo_size(bo));
offset = min_t(u32, offset, PAGE_SIZE) / sizeof(u64) - 1;
if (cpu_map[offset] != get_val) {
KUNIT_FAIL(test,
@@ -132,14 +132,15 @@ static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile,
/* TODO: Sanity check */
unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
+ struct drm_exec *exec = XE_VALIDATION_OPT_OUT;
if (IS_DGFX(xe))
kunit_info(test, "Testing vram id %u\n", tile->id);
else
kunit_info(test, "Testing system memory\n");
- bo = xe_bo_create_user(xe, NULL, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC,
- bo_flags);
+ bo = xe_bo_create_user(xe, NULL, SZ_1M, DRM_XE_GEM_CPU_CACHING_WC,
+ bo_flags, exec);
if (IS_ERR(bo)) {
KUNIT_FAIL(test, "Failed to create bo.\n");
return;
@@ -149,18 +150,18 @@ static void ccs_test_run_tile(struct xe_device *xe, struct xe_tile *tile,
kunit_info(test, "Verifying that CCS data is cleared on creation.\n");
ret = ccs_test_migrate(tile, bo, false, 0ULL, 0xdeadbeefdeadbeefULL,
- test);
+ test, exec);
if (ret)
goto out_unlock;
kunit_info(test, "Verifying that CCS data survives migration.\n");
ret = ccs_test_migrate(tile, bo, false, 0xdeadbeefdeadbeefULL,
- 0xdeadbeefdeadbeefULL, test);
+ 0xdeadbeefdeadbeefULL, test, exec);
if (ret)
goto out_unlock;
kunit_info(test, "Verifying that CCS data can be properly cleared.\n");
- ret = ccs_test_migrate(tile, bo, true, 0ULL, 0ULL, test);
+ ret = ccs_test_migrate(tile, bo, true, 0ULL, 0ULL, test, exec);
out_unlock:
xe_bo_unlock(bo);
@@ -210,6 +211,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc
struct xe_bo *bo, *external;
unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
struct xe_vm *vm = xe_migrate_get_vm(xe_device_get_root_tile(xe)->migrate);
+ struct drm_exec *exec = XE_VALIDATION_OPT_OUT;
struct xe_gt *__gt;
int err, i, id;
@@ -218,25 +220,25 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc
for (i = 0; i < 2; ++i) {
xe_vm_lock(vm, false);
- bo = xe_bo_create_user(xe, NULL, vm, 0x10000,
+ bo = xe_bo_create_user(xe, vm, 0x10000,
DRM_XE_GEM_CPU_CACHING_WC,
- bo_flags);
+ bo_flags, exec);
xe_vm_unlock(vm);
if (IS_ERR(bo)) {
KUNIT_FAIL(test, "bo create err=%pe\n", bo);
break;
}
- external = xe_bo_create_user(xe, NULL, NULL, 0x10000,
+ external = xe_bo_create_user(xe, NULL, 0x10000,
DRM_XE_GEM_CPU_CACHING_WC,
- bo_flags);
+ bo_flags, NULL);
if (IS_ERR(external)) {
KUNIT_FAIL(test, "external bo create err=%pe\n", external);
goto cleanup_bo;
}
xe_bo_lock(external, false);
- err = xe_bo_pin_external(external);
+ err = xe_bo_pin_external(external, false, exec);
xe_bo_unlock(external);
if (err) {
KUNIT_FAIL(test, "external bo pin err=%pe\n",
@@ -294,7 +296,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc
if (i) {
down_read(&vm->lock);
xe_vm_lock(vm, false);
- err = xe_bo_validate(bo, bo->vm, false);
+ err = xe_bo_validate(bo, bo->vm, false, exec);
xe_vm_unlock(vm);
up_read(&vm->lock);
if (err) {
@@ -303,7 +305,7 @@ static int evict_test_run_tile(struct xe_device *xe, struct xe_tile *tile, struc
goto cleanup_all;
}
xe_bo_lock(external, false);
- err = xe_bo_validate(external, NULL, false);
+ err = xe_bo_validate(external, NULL, false, exec);
xe_bo_unlock(external);
if (err) {
KUNIT_FAIL(test, "external bo valid err=%pe\n",
@@ -495,9 +497,9 @@ static int shrink_test_run_device(struct xe_device *xe)
INIT_LIST_HEAD(&link->link);
/* We can create bos using WC caching here. But it is slower. */
- bo = xe_bo_create_user(xe, NULL, NULL, XE_BO_SHRINK_SIZE,
+ bo = xe_bo_create_user(xe, NULL, XE_BO_SHRINK_SIZE,
DRM_XE_GEM_CPU_CACHING_WB,
- XE_BO_FLAG_SYSTEM);
+ XE_BO_FLAG_SYSTEM, NULL);
if (IS_ERR(bo)) {
if (bo != ERR_PTR(-ENOMEM) && bo != ERR_PTR(-ENOSPC) &&
bo != ERR_PTR(-EINTR) && bo != ERR_PTR(-ERESTARTSYS))
@@ -514,9 +516,9 @@ static int shrink_test_run_device(struct xe_device *xe)
* other way around, they may not be subject to swapping...
*/
if (alloced < purgeable) {
- xe_ttm_tt_account_subtract(&xe_tt->ttm);
+ xe_ttm_tt_account_subtract(xe, &xe_tt->ttm);
xe_tt->purgeable = true;
- xe_ttm_tt_account_add(&xe_tt->ttm);
+ xe_ttm_tt_account_add(xe, &xe_tt->ttm);
bo->ttm.priority = 0;
spin_lock(&bo->ttm.bdev->lru_lock);
ttm_bo_move_to_lru_tail(&bo->ttm);
diff --git a/drivers/gpu/drm/xe/tests/xe_dma_buf.c b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
index c53f67ce4b0a..5df98de5ba3c 100644
--- a/drivers/gpu/drm/xe/tests/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/tests/xe_dma_buf.c
@@ -27,9 +27,11 @@ static bool is_dynamic(struct dma_buf_test_params *params)
}
static void check_residency(struct kunit *test, struct xe_bo *exported,
- struct xe_bo *imported, struct dma_buf *dmabuf)
+ struct xe_bo *imported, struct dma_buf *dmabuf,
+ struct drm_exec *exec)
{
struct dma_buf_test_params *params = to_dma_buf_test_params(test->priv);
+ struct dma_buf_attachment *attach;
u32 mem_type;
int ret;
@@ -45,7 +47,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported,
mem_type = XE_PL_TT;
else if (params->force_different_devices && !is_dynamic(params) &&
(params->mem_mask & XE_BO_FLAG_SYSTEM))
- /* Pin migrated to TT */
+ /* Pin migrated to TT on non-dynamic attachments. */
mem_type = XE_PL_TT;
if (!xe_bo_is_mem_type(exported, mem_type)) {
@@ -57,16 +59,12 @@ static void check_residency(struct kunit *test, struct xe_bo *exported,
return;
/*
- * Evict exporter. Note that the gem object dma_buf member isn't
- * set from xe_gem_prime_export(), and it's needed for the move_notify()
- * functionality, so hack that up here. Evicting the exported bo will
+ * Evict exporter. Evicting the exported bo will
* evict also the imported bo through the move_notify() functionality if
* importer is on a different device. If they're on the same device,
* the exporter and the importer should be the same bo.
*/
- swap(exported->ttm.base.dma_buf, dmabuf);
- ret = xe_bo_evict(exported);
- swap(exported->ttm.base.dma_buf, dmabuf);
+ ret = xe_bo_evict(exported, exec);
if (ret) {
if (ret != -EINTR && ret != -ERESTARTSYS)
KUNIT_FAIL(test, "Evicting exporter failed with err=%d.\n",
@@ -81,7 +79,7 @@ static void check_residency(struct kunit *test, struct xe_bo *exported,
}
/* Re-validate the importer. This should move also exporter in. */
- ret = xe_bo_validate(imported, NULL, false);
+ ret = xe_bo_validate(imported, NULL, false, exec);
if (ret) {
if (ret != -EINTR && ret != -ERESTARTSYS)
KUNIT_FAIL(test, "Validating importer failed with err=%d.\n",
@@ -89,15 +87,19 @@ static void check_residency(struct kunit *test, struct xe_bo *exported,
return;
}
- /*
- * If on different devices, the exporter is kept in system if
- * possible, saving a migration step as the transfer is just
- * likely as fast from system memory.
- */
- if (params->mem_mask & XE_BO_FLAG_SYSTEM)
- KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, XE_PL_TT));
- else
- KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, mem_type));
+ KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, mem_type));
+
+ /* Check that we can pin without migrating. */
+ attach = list_first_entry_or_null(&dmabuf->attachments, typeof(*attach), node);
+ if (attach) {
+ int err = dma_buf_pin(attach);
+
+ if (!err) {
+ KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(exported, mem_type));
+ dma_buf_unpin(attach);
+ }
+ KUNIT_EXPECT_EQ(test, err, 0);
+ }
if (params->force_different_devices)
KUNIT_EXPECT_TRUE(test, xe_bo_is_mem_type(imported, XE_PL_TT));
@@ -125,8 +127,8 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe)
size = SZ_64K;
kunit_info(test, "running %s\n", __func__);
- bo = xe_bo_create_user(xe, NULL, NULL, size, DRM_XE_GEM_CPU_CACHING_WC,
- params->mem_mask);
+ bo = xe_bo_create_user(xe, NULL, size, DRM_XE_GEM_CPU_CACHING_WC,
+ params->mem_mask, NULL);
if (IS_ERR(bo)) {
KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
PTR_ERR(bo));
@@ -139,6 +141,7 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe)
PTR_ERR(dmabuf));
goto out;
}
+ bo->ttm.base.dma_buf = dmabuf;
import = xe_gem_prime_import(&xe->drm, dmabuf);
if (!IS_ERR(import)) {
@@ -153,13 +156,14 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe)
KUNIT_FAIL(test,
"xe_gem_prime_import() succeeded when it shouldn't have\n");
} else {
+ struct drm_exec *exec = XE_VALIDATION_OPT_OUT;
int err;
/* Is everything where we expect it to be? */
xe_bo_lock(import_bo, false);
- err = xe_bo_validate(import_bo, NULL, false);
+ err = xe_bo_validate(import_bo, NULL, false, exec);
- /* Pinning in VRAM is not allowed. */
+ /* Pinning in VRAM is not allowed for non-dynamic attachments */
if (!is_dynamic(params) &&
params->force_different_devices &&
!(params->mem_mask & XE_BO_FLAG_SYSTEM))
@@ -170,7 +174,7 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe)
err == -ERESTARTSYS);
if (!err)
- check_residency(test, bo, import_bo, dmabuf);
+ check_residency(test, bo, import_bo, dmabuf, exec);
xe_bo_unlock(import_bo);
}
drm_gem_object_put(import);
@@ -186,6 +190,7 @@ static void xe_test_dmabuf_import_same_driver(struct xe_device *xe)
KUNIT_FAIL(test, "dynamic p2p attachment failed with err=%ld\n",
PTR_ERR(import));
}
+ bo->ttm.base.dma_buf = NULL;
dma_buf_put(dmabuf);
out:
drm_gem_object_put(&bo->ttm.base);
@@ -206,7 +211,7 @@ static const struct dma_buf_attach_ops nop2p_attach_ops = {
static const struct dma_buf_test_params test_params[] = {
{.mem_mask = XE_BO_FLAG_VRAM0,
.attach_ops = &xe_dma_buf_attach_ops},
- {.mem_mask = XE_BO_FLAG_VRAM0,
+ {.mem_mask = XE_BO_FLAG_VRAM0 | XE_BO_FLAG_NEEDS_CPU_ACCESS,
.attach_ops = &xe_dma_buf_attach_ops,
.force_different_devices = true},
@@ -238,7 +243,8 @@ static const struct dma_buf_test_params test_params[] = {
{.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0,
.attach_ops = &xe_dma_buf_attach_ops},
- {.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0,
+ {.mem_mask = XE_BO_FLAG_SYSTEM | XE_BO_FLAG_VRAM0 |
+ XE_BO_FLAG_NEEDS_CPU_ACCESS,
.attach_ops = &xe_dma_buf_attach_ops,
.force_different_devices = true},
diff --git a/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_config_kunit.c b/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_config_kunit.c
new file mode 100644
index 000000000000..42bfc4bcfbcf
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_config_kunit.c
@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include <kunit/static_stub.h>
+#include <kunit/test.h>
+#include <kunit/test-bug.h>
+
+#include "xe_kunit_helpers.h"
+#include "xe_pci_test.h"
+
+#define TEST_MAX_VFS 63
+
+static void pf_set_admin_mode(struct xe_device *xe, bool enable)
+{
+ /* should match logic of xe_sriov_pf_admin_only() */
+ xe->info.probe_display = !enable;
+ KUNIT_EXPECT_EQ(kunit_get_current_test(), enable, xe_sriov_pf_admin_only(xe));
+}
+
+static const void *num_vfs_gen_param(struct kunit *test, const void *prev, char *desc)
+{
+ unsigned long next = 1 + (unsigned long)prev;
+
+ if (next > TEST_MAX_VFS)
+ return NULL;
+ snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%lu VF%s",
+ next, str_plural(next));
+ return (void *)next;
+}
+
+static int pf_gt_config_test_init(struct kunit *test)
+{
+ struct xe_pci_fake_data fake = {
+ .sriov_mode = XE_SRIOV_MODE_PF,
+ .platform = XE_TIGERLAKE, /* any random platform with SR-IOV */
+ .subplatform = XE_SUBPLATFORM_NONE,
+ };
+ struct xe_device *xe;
+ struct xe_gt *gt;
+
+ test->priv = &fake;
+ xe_kunit_helper_xe_device_test_init(test);
+
+ xe = test->priv;
+ KUNIT_ASSERT_TRUE(test, IS_SRIOV_PF(xe));
+
+ gt = xe_root_mmio_gt(xe);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gt);
+ test->priv = gt;
+
+ /* pretend it can support up to 63 VFs */
+ xe->sriov.pf.device_total_vfs = TEST_MAX_VFS;
+ xe->sriov.pf.driver_max_vfs = TEST_MAX_VFS;
+ KUNIT_ASSERT_EQ(test, xe_sriov_pf_get_totalvfs(xe), 63);
+
+ pf_set_admin_mode(xe, false);
+ KUNIT_ASSERT_EQ(test, xe_sriov_init(xe), 0);
+
+ /* more sanity checks */
+ KUNIT_EXPECT_EQ(test, GUC_ID_MAX + 1, SZ_64K);
+ KUNIT_EXPECT_EQ(test, GUC_NUM_DOORBELLS, SZ_256);
+
+ return 0;
+}
+
+static void fair_contexts_1vf(struct kunit *test)
+{
+ struct xe_gt *gt = test->priv;
+ struct xe_device *xe = gt_to_xe(gt);
+
+ pf_set_admin_mode(xe, false);
+ KUNIT_ASSERT_FALSE(test, xe_sriov_pf_admin_only(xe));
+ KUNIT_EXPECT_EQ(test, SZ_32K, pf_profile_fair_ctxs(gt, 1));
+
+ pf_set_admin_mode(xe, true);
+ KUNIT_ASSERT_TRUE(test, xe_sriov_pf_admin_only(xe));
+ KUNIT_EXPECT_EQ(test, SZ_64K - SZ_1K, pf_profile_fair_ctxs(gt, 1));
+}
+
+static void fair_contexts(struct kunit *test)
+{
+ unsigned int num_vfs = (unsigned long)test->param_value;
+ struct xe_gt *gt = test->priv;
+ struct xe_device *xe = gt_to_xe(gt);
+
+ pf_set_admin_mode(xe, false);
+ KUNIT_ASSERT_FALSE(test, xe_sriov_pf_admin_only(xe));
+
+ KUNIT_EXPECT_TRUE(test, is_power_of_2(pf_profile_fair_ctxs(gt, num_vfs)));
+ KUNIT_EXPECT_GT(test, GUC_ID_MAX, num_vfs * pf_profile_fair_ctxs(gt, num_vfs));
+
+ if (num_vfs > 31)
+ KUNIT_ASSERT_EQ(test, SZ_1K, pf_profile_fair_ctxs(gt, num_vfs));
+ else if (num_vfs > 15)
+ KUNIT_ASSERT_EQ(test, SZ_2K, pf_profile_fair_ctxs(gt, num_vfs));
+ else if (num_vfs > 7)
+ KUNIT_ASSERT_EQ(test, SZ_4K, pf_profile_fair_ctxs(gt, num_vfs));
+ else if (num_vfs > 3)
+ KUNIT_ASSERT_EQ(test, SZ_8K, pf_profile_fair_ctxs(gt, num_vfs));
+ else if (num_vfs > 1)
+ KUNIT_ASSERT_EQ(test, SZ_16K, pf_profile_fair_ctxs(gt, num_vfs));
+ else
+ KUNIT_ASSERT_EQ(test, SZ_32K, pf_profile_fair_ctxs(gt, num_vfs));
+}
+
+static void fair_doorbells_1vf(struct kunit *test)
+{
+ struct xe_gt *gt = test->priv;
+ struct xe_device *xe = gt_to_xe(gt);
+
+ pf_set_admin_mode(xe, false);
+ KUNIT_ASSERT_FALSE(test, xe_sriov_pf_admin_only(xe));
+ KUNIT_EXPECT_EQ(test, 128, pf_profile_fair_dbs(gt, 1));
+
+ pf_set_admin_mode(xe, true);
+ KUNIT_ASSERT_TRUE(test, xe_sriov_pf_admin_only(xe));
+ KUNIT_EXPECT_EQ(test, 240, pf_profile_fair_dbs(gt, 1));
+}
+
+static void fair_doorbells(struct kunit *test)
+{
+ unsigned int num_vfs = (unsigned long)test->param_value;
+ struct xe_gt *gt = test->priv;
+ struct xe_device *xe = gt_to_xe(gt);
+
+ pf_set_admin_mode(xe, false);
+ KUNIT_ASSERT_FALSE(test, xe_sriov_pf_admin_only(xe));
+
+ KUNIT_EXPECT_TRUE(test, is_power_of_2(pf_profile_fair_dbs(gt, num_vfs)));
+ KUNIT_EXPECT_GE(test, GUC_NUM_DOORBELLS, (num_vfs + 1) * pf_profile_fair_dbs(gt, num_vfs));
+
+ if (num_vfs > 31)
+ KUNIT_ASSERT_EQ(test, SZ_4, pf_profile_fair_dbs(gt, num_vfs));
+ else if (num_vfs > 15)
+ KUNIT_ASSERT_EQ(test, SZ_8, pf_profile_fair_dbs(gt, num_vfs));
+ else if (num_vfs > 7)
+ KUNIT_ASSERT_EQ(test, SZ_16, pf_profile_fair_dbs(gt, num_vfs));
+ else if (num_vfs > 3)
+ KUNIT_ASSERT_EQ(test, SZ_32, pf_profile_fair_dbs(gt, num_vfs));
+ else if (num_vfs > 1)
+ KUNIT_ASSERT_EQ(test, SZ_64, pf_profile_fair_dbs(gt, num_vfs));
+ else
+ KUNIT_ASSERT_EQ(test, SZ_128, pf_profile_fair_dbs(gt, num_vfs));
+}
+
+static void fair_ggtt_1vf(struct kunit *test)
+{
+ struct xe_gt *gt = test->priv;
+ struct xe_device *xe = gt_to_xe(gt);
+
+ pf_set_admin_mode(xe, false);
+ KUNIT_ASSERT_FALSE(test, xe_sriov_pf_admin_only(xe));
+ KUNIT_EXPECT_EQ(test, SZ_2G, pf_profile_fair_ggtt(gt, 1));
+
+ pf_set_admin_mode(xe, true);
+ KUNIT_ASSERT_TRUE(test, xe_sriov_pf_admin_only(xe));
+ KUNIT_EXPECT_EQ(test, SZ_2G + SZ_1G + SZ_512M, pf_profile_fair_ggtt(gt, 1));
+}
+
+static void fair_ggtt(struct kunit *test)
+{
+ unsigned int num_vfs = (unsigned long)test->param_value;
+ struct xe_gt *gt = test->priv;
+ struct xe_device *xe = gt_to_xe(gt);
+ u64 alignment = pf_get_ggtt_alignment(gt);
+ u64 shareable = SZ_2G + SZ_1G + SZ_512M;
+
+ pf_set_admin_mode(xe, false);
+ KUNIT_ASSERT_FALSE(test, xe_sriov_pf_admin_only(xe));
+
+ KUNIT_EXPECT_TRUE(test, IS_ALIGNED(pf_profile_fair_ggtt(gt, num_vfs), alignment));
+ KUNIT_EXPECT_GE(test, shareable, num_vfs * pf_profile_fair_ggtt(gt, num_vfs));
+
+ if (num_vfs > 56)
+ KUNIT_ASSERT_EQ(test, SZ_64M - SZ_8M, pf_profile_fair_ggtt(gt, num_vfs));
+ else if (num_vfs > 28)
+ KUNIT_ASSERT_EQ(test, SZ_64M, pf_profile_fair_ggtt(gt, num_vfs));
+ else if (num_vfs > 14)
+ KUNIT_ASSERT_EQ(test, SZ_128M, pf_profile_fair_ggtt(gt, num_vfs));
+ else if (num_vfs > 7)
+ KUNIT_ASSERT_EQ(test, SZ_256M, pf_profile_fair_ggtt(gt, num_vfs));
+ else if (num_vfs > 3)
+ KUNIT_ASSERT_EQ(test, SZ_512M, pf_profile_fair_ggtt(gt, num_vfs));
+ else if (num_vfs > 1)
+ KUNIT_ASSERT_EQ(test, SZ_1G, pf_profile_fair_ggtt(gt, num_vfs));
+ else
+ KUNIT_ASSERT_EQ(test, SZ_2G, pf_profile_fair_ggtt(gt, num_vfs));
+}
+
+static struct kunit_case pf_gt_config_test_cases[] = {
+ KUNIT_CASE(fair_contexts_1vf),
+ KUNIT_CASE(fair_doorbells_1vf),
+ KUNIT_CASE(fair_ggtt_1vf),
+ KUNIT_CASE_PARAM(fair_contexts, num_vfs_gen_param),
+ KUNIT_CASE_PARAM(fair_doorbells, num_vfs_gen_param),
+ KUNIT_CASE_PARAM(fair_ggtt, num_vfs_gen_param),
+ {}
+};
+
+static struct kunit_suite pf_gt_config_suite = {
+ .name = "pf_gt_config",
+ .test_cases = pf_gt_config_test_cases,
+ .init = pf_gt_config_test_init,
+};
+
+kunit_test_suite(pf_gt_config_suite);
diff --git a/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c b/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c
deleted file mode 100644
index b683585db852..000000000000
--- a/drivers/gpu/drm/xe/tests/xe_gt_sriov_pf_service_test.c
+++ /dev/null
@@ -1,232 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0 AND MIT
-/*
- * Copyright © 2024 Intel Corporation
- */
-
-#include <kunit/test.h>
-
-#include "xe_device.h"
-#include "xe_kunit_helpers.h"
-#include "xe_pci_test.h"
-
-static int pf_service_test_init(struct kunit *test)
-{
- struct xe_pci_fake_data fake = {
- .sriov_mode = XE_SRIOV_MODE_PF,
- .platform = XE_TIGERLAKE, /* some random platform */
- .subplatform = XE_SUBPLATFORM_NONE,
- };
- struct xe_device *xe;
- struct xe_gt *gt;
-
- test->priv = &fake;
- xe_kunit_helper_xe_device_test_init(test);
-
- xe = test->priv;
- KUNIT_ASSERT_EQ(test, xe_sriov_init(xe), 0);
-
- gt = xe_device_get_gt(xe, 0);
- pf_init_versions(gt);
-
- /*
- * sanity check:
- * - all supported platforms VF/PF ABI versions must be defined
- * - base version can't be newer than latest
- */
- KUNIT_ASSERT_NE(test, 0, gt->sriov.pf.service.version.base.major);
- KUNIT_ASSERT_NE(test, 0, gt->sriov.pf.service.version.latest.major);
- KUNIT_ASSERT_LE(test, gt->sriov.pf.service.version.base.major,
- gt->sriov.pf.service.version.latest.major);
- if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major)
- KUNIT_ASSERT_LE(test, gt->sriov.pf.service.version.base.minor,
- gt->sriov.pf.service.version.latest.minor);
-
- test->priv = gt;
- return 0;
-}
-
-static void pf_negotiate_any(struct kunit *test)
-{
- struct xe_gt *gt = test->priv;
- u32 major, minor;
-
- KUNIT_ASSERT_EQ(test, 0,
- pf_negotiate_version(gt, VF2PF_HANDSHAKE_MAJOR_ANY,
- VF2PF_HANDSHAKE_MINOR_ANY,
- &major, &minor));
- KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major);
- KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor);
-}
-
-static void pf_negotiate_base_match(struct kunit *test)
-{
- struct xe_gt *gt = test->priv;
- u32 major, minor;
-
- KUNIT_ASSERT_EQ(test, 0,
- pf_negotiate_version(gt,
- gt->sriov.pf.service.version.base.major,
- gt->sriov.pf.service.version.base.minor,
- &major, &minor));
- KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.base.major);
- KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.base.minor);
-}
-
-static void pf_negotiate_base_newer(struct kunit *test)
-{
- struct xe_gt *gt = test->priv;
- u32 major, minor;
-
- KUNIT_ASSERT_EQ(test, 0,
- pf_negotiate_version(gt,
- gt->sriov.pf.service.version.base.major,
- gt->sriov.pf.service.version.base.minor + 1,
- &major, &minor));
- KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.base.major);
- KUNIT_ASSERT_GE(test, minor, gt->sriov.pf.service.version.base.minor);
- if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major)
- KUNIT_ASSERT_LE(test, minor, gt->sriov.pf.service.version.latest.minor);
- else
- KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n");
-}
-
-static void pf_negotiate_base_next(struct kunit *test)
-{
- struct xe_gt *gt = test->priv;
- u32 major, minor;
-
- KUNIT_ASSERT_EQ(test, 0,
- pf_negotiate_version(gt,
- gt->sriov.pf.service.version.base.major + 1, 0,
- &major, &minor));
- KUNIT_ASSERT_GE(test, major, gt->sriov.pf.service.version.base.major);
- KUNIT_ASSERT_LE(test, major, gt->sriov.pf.service.version.latest.major);
- if (major == gt->sriov.pf.service.version.latest.major)
- KUNIT_ASSERT_LE(test, minor, gt->sriov.pf.service.version.latest.minor);
- else
- KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n");
-}
-
-static void pf_negotiate_base_older(struct kunit *test)
-{
- struct xe_gt *gt = test->priv;
- u32 major, minor;
-
- if (!gt->sriov.pf.service.version.base.minor)
- kunit_skip(test, "no older minor\n");
-
- KUNIT_ASSERT_NE(test, 0,
- pf_negotiate_version(gt,
- gt->sriov.pf.service.version.base.major,
- gt->sriov.pf.service.version.base.minor - 1,
- &major, &minor));
-}
-
-static void pf_negotiate_base_prev(struct kunit *test)
-{
- struct xe_gt *gt = test->priv;
- u32 major, minor;
-
- KUNIT_ASSERT_NE(test, 0,
- pf_negotiate_version(gt,
- gt->sriov.pf.service.version.base.major - 1, 1,
- &major, &minor));
-}
-
-static void pf_negotiate_latest_match(struct kunit *test)
-{
- struct xe_gt *gt = test->priv;
- u32 major, minor;
-
- KUNIT_ASSERT_EQ(test, 0,
- pf_negotiate_version(gt,
- gt->sriov.pf.service.version.latest.major,
- gt->sriov.pf.service.version.latest.minor,
- &major, &minor));
- KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major);
- KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor);
-}
-
-static void pf_negotiate_latest_newer(struct kunit *test)
-{
- struct xe_gt *gt = test->priv;
- u32 major, minor;
-
- KUNIT_ASSERT_EQ(test, 0,
- pf_negotiate_version(gt,
- gt->sriov.pf.service.version.latest.major,
- gt->sriov.pf.service.version.latest.minor + 1,
- &major, &minor));
- KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major);
- KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor);
-}
-
-static void pf_negotiate_latest_next(struct kunit *test)
-{
- struct xe_gt *gt = test->priv;
- u32 major, minor;
-
- KUNIT_ASSERT_EQ(test, 0,
- pf_negotiate_version(gt,
- gt->sriov.pf.service.version.latest.major + 1, 0,
- &major, &minor));
- KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major);
- KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor);
-}
-
-static void pf_negotiate_latest_older(struct kunit *test)
-{
- struct xe_gt *gt = test->priv;
- u32 major, minor;
-
- if (!gt->sriov.pf.service.version.latest.minor)
- kunit_skip(test, "no older minor\n");
-
- KUNIT_ASSERT_EQ(test, 0,
- pf_negotiate_version(gt,
- gt->sriov.pf.service.version.latest.major,
- gt->sriov.pf.service.version.latest.minor - 1,
- &major, &minor));
- KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major);
- KUNIT_ASSERT_EQ(test, minor, gt->sriov.pf.service.version.latest.minor - 1);
-}
-
-static void pf_negotiate_latest_prev(struct kunit *test)
-{
- struct xe_gt *gt = test->priv;
- u32 major, minor;
-
- if (gt->sriov.pf.service.version.base.major == gt->sriov.pf.service.version.latest.major)
- kunit_skip(test, "no prev major");
-
- KUNIT_ASSERT_EQ(test, 0,
- pf_negotiate_version(gt,
- gt->sriov.pf.service.version.latest.major - 1,
- gt->sriov.pf.service.version.base.minor + 1,
- &major, &minor));
- KUNIT_ASSERT_EQ(test, major, gt->sriov.pf.service.version.latest.major - 1);
- KUNIT_ASSERT_GE(test, major, gt->sriov.pf.service.version.base.major);
-}
-
-static struct kunit_case pf_service_test_cases[] = {
- KUNIT_CASE(pf_negotiate_any),
- KUNIT_CASE(pf_negotiate_base_match),
- KUNIT_CASE(pf_negotiate_base_newer),
- KUNIT_CASE(pf_negotiate_base_next),
- KUNIT_CASE(pf_negotiate_base_older),
- KUNIT_CASE(pf_negotiate_base_prev),
- KUNIT_CASE(pf_negotiate_latest_match),
- KUNIT_CASE(pf_negotiate_latest_newer),
- KUNIT_CASE(pf_negotiate_latest_next),
- KUNIT_CASE(pf_negotiate_latest_older),
- KUNIT_CASE(pf_negotiate_latest_prev),
- {}
-};
-
-static struct kunit_suite pf_service_suite = {
- .name = "pf_service",
- .test_cases = pf_service_test_cases,
- .init = pf_service_test_init,
-};
-
-kunit_test_suite(pf_service_suite);
diff --git a/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c b/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c
index 6faffcd74869..d266882adc0e 100644
--- a/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c
+++ b/drivers/gpu/drm/xe/tests/xe_guc_buf_kunit.c
@@ -32,7 +32,7 @@ static struct xe_bo *replacement_xe_managed_bo_create_pin_map(struct xe_device *
bo->tile = tile;
bo->ttm.bdev = &xe->ttm;
- bo->size = size;
+ bo->ttm.base.size = size;
iosys_map_set_vaddr(&bo->vmap, buf);
if (flags & XE_BO_FLAG_GGTT) {
@@ -42,10 +42,8 @@ static struct xe_bo *replacement_xe_managed_bo_create_pin_map(struct xe_device *
KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bo->ggtt_node[tile->id]);
KUNIT_ASSERT_EQ(test, 0,
- drm_mm_insert_node_in_range(&ggtt->mm,
- &bo->ggtt_node[tile->id]->base,
- bo->size, SZ_4K,
- 0, 0, U64_MAX, 0));
+ xe_ggtt_node_insert(bo->ggtt_node[tile->id],
+ xe_bo_size(bo), SZ_4K));
}
return bo;
@@ -67,8 +65,9 @@ static int guc_buf_test_init(struct kunit *test)
ggtt = xe_device_get_root_tile(test->priv)->mem.ggtt;
guc = &xe_device_get_gt(test->priv, 0)->uc.guc;
- drm_mm_init(&ggtt->mm, DUT_GGTT_START, DUT_GGTT_SIZE);
- mutex_init(&ggtt->lock);
+ KUNIT_ASSERT_EQ(test, 0,
+ xe_ggtt_init_kunit(ggtt, DUT_GGTT_START,
+ DUT_GGTT_START + DUT_GGTT_SIZE));
kunit_activate_static_stub(test, xe_managed_bo_create_pin_map,
replacement_xe_managed_bo_create_pin_map);
diff --git a/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c b/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c
new file mode 100644
index 000000000000..3b213fcae916
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_guc_g2g_test.c
@@ -0,0 +1,776 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include <linux/delay.h>
+
+#include <kunit/test.h>
+#include <kunit/visibility.h>
+
+#include "tests/xe_kunit_helpers.h"
+#include "tests/xe_pci_test.h"
+#include "tests/xe_test.h"
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_pm.h"
+
+/*
+ * There are different ways to allocate the G2G buffers. The plan for this test
+ * is to make sure that all the possible options work. The particular option
+ * chosen by the driver may vary from one platform to another, it may also change
+ * with time. So to ensure consistency of testing, the relevant driver code is
+ * replicated here to guarantee it won't change without the test being updated
+ * to keep testing the other options.
+ *
+ * In order to test the actual code being used by the driver, there is also the
+ * 'default' scheme. That will use the official driver routines to test whatever
+ * method the driver is using on the current platform at the current time.
+ */
+enum {
+ /* Driver defined allocation scheme */
+ G2G_CTB_TYPE_DEFAULT,
+ /* Single buffer in host memory */
+ G2G_CTB_TYPE_HOST,
+ /* Single buffer in a specific tile, loops across all tiles */
+ G2G_CTB_TYPE_TILE,
+};
+
+/*
+ * Payload is opaque to GuC. So KMD can define any structure or size it wants.
+ */
+struct g2g_test_payload {
+ u32 tx_dev;
+ u32 tx_tile;
+ u32 rx_dev;
+ u32 rx_tile;
+ u32 seqno;
+};
+
+static void g2g_test_send(struct kunit *test, struct xe_guc *guc,
+ u32 far_tile, u32 far_dev,
+ struct g2g_test_payload *payload)
+{
+ struct xe_device *xe = guc_to_xe(guc);
+ struct xe_gt *gt = guc_to_gt(guc);
+ u32 *action, total;
+ size_t payload_len;
+ int ret;
+
+ static_assert(IS_ALIGNED(sizeof(*payload), sizeof(u32)));
+ payload_len = sizeof(*payload) / sizeof(u32);
+
+ total = 4 + payload_len;
+ action = kunit_kmalloc_array(test, total, sizeof(*action), GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, action);
+
+ action[0] = XE_GUC_ACTION_TEST_G2G_SEND;
+ action[1] = far_tile;
+ action[2] = far_dev;
+ action[3] = payload_len;
+ memcpy(action + 4, payload, payload_len * sizeof(u32));
+
+ atomic_inc(&xe->g2g_test_count);
+
+ /*
+ * Should specify the expected response notification here. Problem is that
+ * the response will be coming from a different GuC. By the end, it should
+ * all add up as long as an equal number of messages are sent from each GuC
+ * and to each GuC. However, in the middle negative reservation space errors
+ * and such like can occur. Rather than add intrusive changes to the CT layer
+ * it is simpler to just not bother counting it at all. The system should be
+ * idle when running the selftest, and the selftest's notification total size
+ * is well within the G2H allocation size. So there should be no issues with
+ * needing to block for space, which is all the tracking code is really for.
+ */
+ ret = xe_guc_ct_send(&guc->ct, action, total, 0, 0);
+ kunit_kfree(test, action);
+ KUNIT_ASSERT_EQ_MSG(test, 0, ret, "G2G send failed: %d [%d:%d -> %d:%d]\n", ret,
+ gt_to_tile(gt)->id, G2G_DEV(gt), far_tile, far_dev);
+}
+
+/*
+ * NB: Can't use KUNIT_ASSERT and friends in here as this is called asynchronously
+ * from the G2H notification handler. Need that to actually complete rather than
+ * thread-abort in order to keep the rest of the driver alive!
+ */
+int xe_guc_g2g_test_notification(struct xe_guc *guc, u32 *msg, u32 len)
+{
+ struct xe_device *xe = guc_to_xe(guc);
+ struct xe_gt *rx_gt = guc_to_gt(guc), *test_gt, *tx_gt = NULL;
+ u32 tx_tile, tx_dev, rx_tile, rx_dev, idx, got_len;
+ struct g2g_test_payload *payload;
+ size_t payload_len;
+ int ret = 0, i;
+
+ payload_len = sizeof(*payload) / sizeof(u32);
+
+ if (unlikely(len != (G2H_LEN_DW_G2G_NOTIFY_MIN + payload_len))) {
+ xe_gt_err(rx_gt, "G2G test notification invalid length %u", len);
+ ret = -EPROTO;
+ goto done;
+ }
+
+ tx_tile = msg[0];
+ tx_dev = msg[1];
+ got_len = msg[2];
+ payload = (struct g2g_test_payload *)(msg + 3);
+
+ rx_tile = gt_to_tile(rx_gt)->id;
+ rx_dev = G2G_DEV(rx_gt);
+
+ if (got_len != payload_len) {
+ xe_gt_err(rx_gt, "G2G: Invalid payload length: %u vs %zu\n", got_len, payload_len);
+ ret = -EPROTO;
+ goto done;
+ }
+
+ if (payload->tx_dev != tx_dev || payload->tx_tile != tx_tile ||
+ payload->rx_dev != rx_dev || payload->rx_tile != rx_tile) {
+ xe_gt_err(rx_gt, "G2G: Invalid payload: %d:%d -> %d:%d vs %d:%d -> %d:%d! [%d]\n",
+ payload->tx_tile, payload->tx_dev, payload->rx_tile, payload->rx_dev,
+ tx_tile, tx_dev, rx_tile, rx_dev, payload->seqno);
+ ret = -EPROTO;
+ goto done;
+ }
+
+ if (!xe->g2g_test_array) {
+ xe_gt_err(rx_gt, "G2G: Missing test array!\n");
+ ret = -ENOMEM;
+ goto done;
+ }
+
+ for_each_gt(test_gt, xe, i) {
+ if (gt_to_tile(test_gt)->id != tx_tile)
+ continue;
+
+ if (G2G_DEV(test_gt) != tx_dev)
+ continue;
+
+ if (tx_gt) {
+ xe_gt_err(rx_gt, "G2G: Got duplicate TX GTs: %d vs %d for %d:%d!\n",
+ tx_gt->info.id, test_gt->info.id, tx_tile, tx_dev);
+ ret = -EINVAL;
+ goto done;
+ }
+
+ tx_gt = test_gt;
+ }
+ if (!tx_gt) {
+ xe_gt_err(rx_gt, "G2G: Failed to find a TX GT for %d:%d!\n", tx_tile, tx_dev);
+ ret = -EINVAL;
+ goto done;
+ }
+
+ idx = (tx_gt->info.id * xe->info.gt_count) + rx_gt->info.id;
+
+ if (xe->g2g_test_array[idx] != payload->seqno - 1) {
+ xe_gt_err(rx_gt, "G2G: Seqno mismatch %d vs %d for %d:%d -> %d:%d!\n",
+ xe->g2g_test_array[idx], payload->seqno - 1,
+ tx_tile, tx_dev, rx_tile, rx_dev);
+ ret = -EINVAL;
+ goto done;
+ }
+
+ xe->g2g_test_array[idx] = payload->seqno;
+
+done:
+ atomic_dec(&xe->g2g_test_count);
+ return ret;
+}
+
+/*
+ * Send the given seqno from all GuCs to all other GuCs in tile/GT order
+ */
+static void g2g_test_in_order(struct kunit *test, struct xe_device *xe, u32 seqno)
+{
+ struct xe_gt *near_gt, *far_gt;
+ int i, j;
+
+ for_each_gt(near_gt, xe, i) {
+ u32 near_tile = gt_to_tile(near_gt)->id;
+ u32 near_dev = G2G_DEV(near_gt);
+
+ for_each_gt(far_gt, xe, j) {
+ u32 far_tile = gt_to_tile(far_gt)->id;
+ u32 far_dev = G2G_DEV(far_gt);
+ struct g2g_test_payload payload;
+
+ if (far_gt->info.id == near_gt->info.id)
+ continue;
+
+ payload.tx_dev = near_dev;
+ payload.tx_tile = near_tile;
+ payload.rx_dev = far_dev;
+ payload.rx_tile = far_tile;
+ payload.seqno = seqno;
+ g2g_test_send(test, &near_gt->uc.guc, far_tile, far_dev, &payload);
+ }
+ }
+}
+
+#define WAIT_TIME_MS 100
+#define WAIT_COUNT (1000 / WAIT_TIME_MS)
+
+static void g2g_wait_for_complete(void *_xe)
+{
+ struct xe_device *xe = (struct xe_device *)_xe;
+ struct kunit *test = kunit_get_current_test();
+ int wait = 0;
+
+ /* Wait for all G2H messages to be received */
+ while (atomic_read(&xe->g2g_test_count)) {
+ if (++wait > WAIT_COUNT)
+ break;
+
+ msleep(WAIT_TIME_MS);
+ }
+
+ KUNIT_ASSERT_EQ_MSG(test, 0, atomic_read(&xe->g2g_test_count),
+ "Timed out waiting for notifications\n");
+ kunit_info(test, "Got all notifications back\n");
+}
+
+#undef WAIT_TIME_MS
+#undef WAIT_COUNT
+
+static void g2g_clean_array(void *_xe)
+{
+ struct xe_device *xe = (struct xe_device *)_xe;
+
+ xe->g2g_test_array = NULL;
+}
+
+#define NUM_LOOPS 16
+
+static void g2g_run_test(struct kunit *test, struct xe_device *xe)
+{
+ u32 seqno, max_array;
+ int ret, i, j;
+
+ max_array = xe->info.gt_count * xe->info.gt_count;
+ xe->g2g_test_array = kunit_kcalloc(test, max_array, sizeof(u32), GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, xe->g2g_test_array);
+
+ ret = kunit_add_action_or_reset(test, g2g_clean_array, xe);
+ KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register clean up action\n");
+
+ /*
+ * Send incrementing seqnos from all GuCs to all other GuCs in tile/GT order.
+ * Tile/GT order doesn't really mean anything to the hardware but it is going
+ * to be a fixed sequence every time.
+ *
+ * Verify that each one comes back having taken the correct route.
+ */
+ ret = kunit_add_action(test, g2g_wait_for_complete, xe);
+ KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register clean up action\n");
+ for (seqno = 1; seqno < NUM_LOOPS; seqno++)
+ g2g_test_in_order(test, xe, seqno);
+ seqno--;
+
+ kunit_release_action(test, &g2g_wait_for_complete, xe);
+
+ /* Check for the final seqno in each slot */
+ for (i = 0; i < xe->info.gt_count; i++) {
+ for (j = 0; j < xe->info.gt_count; j++) {
+ u32 idx = (j * xe->info.gt_count) + i;
+
+ if (i == j)
+ KUNIT_ASSERT_EQ_MSG(test, 0, xe->g2g_test_array[idx],
+ "identity seqno modified: %d for %dx%d!\n",
+ xe->g2g_test_array[idx], i, j);
+ else
+ KUNIT_ASSERT_EQ_MSG(test, seqno, xe->g2g_test_array[idx],
+ "invalid seqno: %d vs %d for %dx%d!\n",
+ xe->g2g_test_array[idx], seqno, i, j);
+ }
+ }
+
+ kunit_kfree(test, xe->g2g_test_array);
+ kunit_release_action(test, &g2g_clean_array, xe);
+
+ kunit_info(test, "Test passed\n");
+}
+
+#undef NUM_LOOPS
+
+static void g2g_ct_stop(struct xe_guc *guc)
+{
+ struct xe_gt *remote_gt, *gt = guc_to_gt(guc);
+ struct xe_device *xe = gt_to_xe(gt);
+ int i, t;
+
+ for_each_gt(remote_gt, xe, i) {
+ u32 tile, dev;
+
+ if (remote_gt->info.id == gt->info.id)
+ continue;
+
+ tile = gt_to_tile(remote_gt)->id;
+ dev = G2G_DEV(remote_gt);
+
+ for (t = 0; t < XE_G2G_TYPE_LIMIT; t++)
+ guc_g2g_deregister(guc, tile, dev, t);
+ }
+}
+
+/* Size of a single allocation that contains all G2G CTBs across all GTs */
+static u32 g2g_ctb_size(struct kunit *test, struct xe_device *xe)
+{
+ unsigned int count = xe->info.gt_count;
+ u32 num_channels = (count * (count - 1)) / 2;
+
+ kunit_info(test, "Size: (%d * %d / 2) * %d * 0x%08X + 0x%08X => 0x%08X [%d]\n",
+ count, count - 1, XE_G2G_TYPE_LIMIT, G2G_BUFFER_SIZE, G2G_DESC_AREA_SIZE,
+ num_channels * XE_G2G_TYPE_LIMIT * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE,
+ num_channels * XE_G2G_TYPE_LIMIT);
+
+ return num_channels * XE_G2G_TYPE_LIMIT * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE;
+}
+
+/*
+ * Use the driver's regular CTB allocation scheme.
+ */
+static void g2g_alloc_default(struct kunit *test, struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ int i;
+
+ kunit_info(test, "Default [tiles = %d, GTs = %d]\n",
+ xe->info.tile_count, xe->info.gt_count);
+
+ for_each_gt(gt, xe, i) {
+ struct xe_guc *guc = &gt->uc.guc;
+ int ret;
+
+ ret = guc_g2g_alloc(guc);
+ KUNIT_ASSERT_EQ_MSG(test, 0, ret, "G2G alloc failed: %pe", ERR_PTR(ret));
+ continue;
+ }
+}
+
+static void g2g_distribute(struct kunit *test, struct xe_device *xe, struct xe_bo *bo)
+{
+ struct xe_gt *root_gt, *gt;
+ int i;
+
+ root_gt = xe_device_get_gt(xe, 0);
+ root_gt->uc.guc.g2g.bo = bo;
+ root_gt->uc.guc.g2g.owned = true;
+ kunit_info(test, "[%d.%d] Assigned 0x%p\n", gt_to_tile(root_gt)->id, root_gt->info.id, bo);
+
+ for_each_gt(gt, xe, i) {
+ if (gt->info.id != 0) {
+ gt->uc.guc.g2g.owned = false;
+ gt->uc.guc.g2g.bo = xe_bo_get(bo);
+ kunit_info(test, "[%d.%d] Pinned 0x%p\n",
+ gt_to_tile(gt)->id, gt->info.id, gt->uc.guc.g2g.bo);
+ }
+
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, gt->uc.guc.g2g.bo);
+ }
+}
+
+/*
+ * Allocate a single blob on the host and split between all G2G CTBs.
+ */
+static void g2g_alloc_host(struct kunit *test, struct xe_device *xe)
+{
+ struct xe_bo *bo;
+ u32 g2g_size;
+
+ kunit_info(test, "Host [tiles = %d, GTs = %d]\n", xe->info.tile_count, xe->info.gt_count);
+
+ g2g_size = g2g_ctb_size(test, xe);
+ bo = xe_managed_bo_create_pin_map(xe, xe_device_get_root_tile(xe), g2g_size,
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_ALL |
+ XE_BO_FLAG_GGTT_INVALIDATE);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bo);
+ kunit_info(test, "[HST] G2G buffer create: 0x%p\n", bo);
+
+ xe_map_memset(xe, &bo->vmap, 0, 0, g2g_size);
+
+ g2g_distribute(test, xe, bo);
+}
+
+/*
+ * Allocate a single blob on the given tile and split between all G2G CTBs.
+ */
+static void g2g_alloc_tile(struct kunit *test, struct xe_device *xe, struct xe_tile *tile)
+{
+ struct xe_bo *bo;
+ u32 g2g_size;
+
+ KUNIT_ASSERT_TRUE(test, IS_DGFX(xe));
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, tile);
+
+ kunit_info(test, "Tile %d [tiles = %d, GTs = %d]\n",
+ tile->id, xe->info.tile_count, xe->info.gt_count);
+
+ g2g_size = g2g_ctb_size(test, xe);
+ bo = xe_managed_bo_create_pin_map(xe, tile, g2g_size,
+ XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_ALL |
+ XE_BO_FLAG_GGTT_INVALIDATE);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bo);
+ kunit_info(test, "[%d.*] G2G buffer create: 0x%p\n", tile->id, bo);
+
+ xe_map_memset(xe, &bo->vmap, 0, 0, g2g_size);
+
+ g2g_distribute(test, xe, bo);
+}
+
+static void g2g_free(struct kunit *test, struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ struct xe_bo *bo;
+ int i;
+
+ for_each_gt(gt, xe, i) {
+ bo = gt->uc.guc.g2g.bo;
+ if (!bo)
+ continue;
+
+ if (gt->uc.guc.g2g.owned) {
+ xe_managed_bo_unpin_map_no_vm(bo);
+ kunit_info(test, "[%d.%d] Unmapped 0x%p\n",
+ gt_to_tile(gt)->id, gt->info.id, bo);
+ } else {
+ xe_bo_put(bo);
+ kunit_info(test, "[%d.%d] Unpinned 0x%p\n",
+ gt_to_tile(gt)->id, gt->info.id, bo);
+ }
+
+ gt->uc.guc.g2g.bo = NULL;
+ }
+}
+
+static void g2g_stop(struct kunit *test, struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ int i;
+
+ for_each_gt(gt, xe, i) {
+ struct xe_guc *guc = &gt->uc.guc;
+
+ if (!guc->g2g.bo)
+ continue;
+
+ g2g_ct_stop(guc);
+ }
+
+ g2g_free(test, xe);
+}
+
+/*
+ * Generate a unique id for each bi-directional CTB for each pair of
+ * near and far tiles/devices. The id can then be used as an index into
+ * a single allocation that is sub-divided into multiple CTBs.
+ *
+ * For example, with two devices per tile and two tiles, the table should
+ * look like:
+ * Far <tile>.<dev>
+ * 0.0 0.1 1.0 1.1
+ * N 0.0 --/-- 00/01 02/03 04/05
+ * e 0.1 01/00 --/-- 06/07 08/09
+ * a 1.0 03/02 07/06 --/-- 10/11
+ * r 1.1 05/04 09/08 11/10 --/--
+ *
+ * Where each entry is Rx/Tx channel id.
+ *
+ * So GuC #3 (tile 1, dev 1) talking to GuC #2 (tile 1, dev 0) would
+ * be reading from channel #11 and writing to channel #10. Whereas,
+ * GuC #2 talking to GuC #3 would be read on #10 and write to #11.
+ */
+static int g2g_slot_flat(u32 near_tile, u32 near_dev, u32 far_tile, u32 far_dev,
+ u32 type, u32 max_inst, bool have_dev)
+{
+ u32 near = near_tile, far = far_tile;
+ u32 idx = 0, x, y, direction;
+ int i;
+
+ if (have_dev) {
+ near = (near << 1) | near_dev;
+ far = (far << 1) | far_dev;
+ }
+
+ /* No need to send to one's self */
+ if (far == near)
+ return -1;
+
+ if (far > near) {
+ /* Top right table half */
+ x = far;
+ y = near;
+
+ /* T/R is 'forwards' direction */
+ direction = type;
+ } else {
+ /* Bottom left table half */
+ x = near;
+ y = far;
+
+ /* B/L is 'backwards' direction */
+ direction = (1 - type);
+ }
+
+ /* Count the rows prior to the target */
+ for (i = y; i > 0; i--)
+ idx += max_inst - i;
+
+ /* Count this row up to the target */
+ idx += (x - 1 - y);
+
+ /* Slots are in Rx/Tx pairs */
+ idx *= 2;
+
+ /* Pick Rx/Tx direction */
+ idx += direction;
+
+ return idx;
+}
+
+static int g2g_register_flat(struct xe_guc *guc, u32 far_tile, u32 far_dev, u32 type, bool have_dev)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_device *xe = gt_to_xe(gt);
+ u32 near_tile = gt_to_tile(gt)->id;
+ u32 near_dev = G2G_DEV(gt);
+ u32 max = xe->info.gt_count;
+ int idx;
+ u32 base, desc, buf;
+
+ if (!guc->g2g.bo)
+ return -ENODEV;
+
+ idx = g2g_slot_flat(near_tile, near_dev, far_tile, far_dev, type, max, have_dev);
+ xe_assert(xe, idx >= 0);
+
+ base = guc_bo_ggtt_addr(guc, guc->g2g.bo);
+ desc = base + idx * G2G_DESC_SIZE;
+ buf = base + idx * G2G_BUFFER_SIZE + G2G_DESC_AREA_SIZE;
+
+ xe_assert(xe, (desc - base + G2G_DESC_SIZE) <= G2G_DESC_AREA_SIZE);
+ xe_assert(xe, (buf - base + G2G_BUFFER_SIZE) <= xe_bo_size(guc->g2g.bo));
+
+ return guc_action_register_g2g_buffer(guc, type, far_tile, far_dev,
+ desc, buf, G2G_BUFFER_SIZE);
+}
+
+static void g2g_start(struct kunit *test, struct xe_guc *guc)
+{
+ struct xe_gt *remote_gt, *gt = guc_to_gt(guc);
+ struct xe_device *xe = gt_to_xe(gt);
+ unsigned int i;
+ int t, ret;
+ bool have_dev;
+
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, guc->g2g.bo);
+
+ /* GuC interface will need extending if more GT device types are ever created. */
+ KUNIT_ASSERT_TRUE(test,
+ (gt->info.type == XE_GT_TYPE_MAIN) ||
+ (gt->info.type == XE_GT_TYPE_MEDIA));
+
+ /* Channel numbering depends on whether there are multiple GTs per tile */
+ have_dev = xe->info.gt_count > xe->info.tile_count;
+
+ for_each_gt(remote_gt, xe, i) {
+ u32 tile, dev;
+
+ if (remote_gt->info.id == gt->info.id)
+ continue;
+
+ tile = gt_to_tile(remote_gt)->id;
+ dev = G2G_DEV(remote_gt);
+
+ for (t = 0; t < XE_G2G_TYPE_LIMIT; t++) {
+ ret = g2g_register_flat(guc, tile, dev, t, have_dev);
+ KUNIT_ASSERT_EQ_MSG(test, 0, ret, "G2G register failed: %pe", ERR_PTR(ret));
+ }
+ }
+}
+
+static void g2g_reinit(struct kunit *test, struct xe_device *xe, int ctb_type, struct xe_tile *tile)
+{
+ struct xe_gt *gt;
+ int i, found = 0;
+
+ g2g_stop(test, xe);
+
+ for_each_gt(gt, xe, i) {
+ struct xe_guc *guc = &gt->uc.guc;
+
+ KUNIT_ASSERT_NULL(test, guc->g2g.bo);
+ }
+
+ switch (ctb_type) {
+ case G2G_CTB_TYPE_DEFAULT:
+ g2g_alloc_default(test, xe);
+ break;
+
+ case G2G_CTB_TYPE_HOST:
+ g2g_alloc_host(test, xe);
+ break;
+
+ case G2G_CTB_TYPE_TILE:
+ g2g_alloc_tile(test, xe, tile);
+ break;
+
+ default:
+ KUNIT_ASSERT_TRUE(test, false);
+ }
+
+ for_each_gt(gt, xe, i) {
+ struct xe_guc *guc = &gt->uc.guc;
+
+ if (!guc->g2g.bo)
+ continue;
+
+ if (ctb_type == G2G_CTB_TYPE_DEFAULT)
+ guc_g2g_start(guc);
+ else
+ g2g_start(test, guc);
+ found++;
+ }
+
+ KUNIT_ASSERT_GT_MSG(test, found, 1, "insufficient G2G channels running: %d", found);
+
+ kunit_info(test, "Testing across %d GTs\n", found);
+}
+
+static void g2g_recreate_ctb(void *_xe)
+{
+ struct xe_device *xe = (struct xe_device *)_xe;
+ struct kunit *test = kunit_get_current_test();
+
+ g2g_stop(test, xe);
+
+ if (xe_guc_g2g_wanted(xe))
+ g2g_reinit(test, xe, G2G_CTB_TYPE_DEFAULT, NULL);
+}
+
+static void g2g_pm_runtime_put(void *_xe)
+{
+ struct xe_device *xe = (struct xe_device *)_xe;
+
+ xe_pm_runtime_put(xe);
+}
+
+static void g2g_pm_runtime_get(struct kunit *test)
+{
+ struct xe_device *xe = test->priv;
+ int ret;
+
+ xe_pm_runtime_get(xe);
+ ret = kunit_add_action_or_reset(test, g2g_pm_runtime_put, xe);
+ KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register runtime PM action\n");
+}
+
+static void g2g_check_skip(struct kunit *test)
+{
+ struct xe_device *xe = test->priv;
+ struct xe_gt *gt;
+ int i;
+
+ if (IS_SRIOV_VF(xe))
+ kunit_skip(test, "not supported from a VF");
+
+ if (xe->info.gt_count <= 1)
+ kunit_skip(test, "not enough GTs");
+
+ for_each_gt(gt, xe, i) {
+ struct xe_guc *guc = &gt->uc.guc;
+
+ if (guc->fw.build_type == CSS_UKERNEL_INFO_BUILDTYPE_PROD)
+ kunit_skip(test,
+ "G2G test interface not available in production firmware builds\n");
+ }
+}
+
+/*
+ * Simple test that does not try to recreate the CTBs.
+ * Requires that the platform already enables G2G comms
+ * but has no risk of leaving the system in a broken state
+ * afterwards.
+ */
+static void xe_live_guc_g2g_kunit_default(struct kunit *test)
+{
+ struct xe_device *xe = test->priv;
+
+ if (!xe_guc_g2g_wanted(xe))
+ kunit_skip(test, "G2G not enabled");
+
+ g2g_check_skip(test);
+
+ g2g_pm_runtime_get(test);
+
+ kunit_info(test, "Testing default CTBs\n");
+ g2g_run_test(test, xe);
+
+ kunit_release_action(test, &g2g_pm_runtime_put, xe);
+}
+
+/*
+ * More complex test that re-creates the CTBs in various location to
+ * test access to each location from each GuC. Can be run even on
+ * systems that do not enable G2G by default. On the other hand,
+ * because it recreates the CTBs, if something goes wrong it could
+ * leave the system with broken G2G comms.
+ */
+static void xe_live_guc_g2g_kunit_allmem(struct kunit *test)
+{
+ struct xe_device *xe = test->priv;
+ int ret;
+
+ g2g_check_skip(test);
+
+ g2g_pm_runtime_get(test);
+
+ /* Make sure to leave the system as we found it */
+ ret = kunit_add_action_or_reset(test, g2g_recreate_ctb, xe);
+ KUNIT_ASSERT_EQ_MSG(test, 0, ret, "Failed to register CTB re-creation action\n");
+
+ kunit_info(test, "Testing CTB type 'default'...\n");
+ g2g_reinit(test, xe, G2G_CTB_TYPE_DEFAULT, NULL);
+ g2g_run_test(test, xe);
+
+ kunit_info(test, "Testing CTB type 'host'...\n");
+ g2g_reinit(test, xe, G2G_CTB_TYPE_HOST, NULL);
+ g2g_run_test(test, xe);
+
+ if (IS_DGFX(xe)) {
+ struct xe_tile *tile;
+ int id;
+
+ for_each_tile(tile, xe, id) {
+ kunit_info(test, "Testing CTB type 'tile: #%d'...\n", id);
+
+ g2g_reinit(test, xe, G2G_CTB_TYPE_TILE, tile);
+ g2g_run_test(test, xe);
+ }
+ } else {
+ kunit_info(test, "Skipping local memory on integrated platform\n");
+ }
+
+ kunit_release_action(test, g2g_recreate_ctb, xe);
+ kunit_release_action(test, g2g_pm_runtime_put, xe);
+}
+
+static struct kunit_case xe_guc_g2g_tests[] = {
+ KUNIT_CASE_PARAM(xe_live_guc_g2g_kunit_default, xe_pci_live_device_gen_param),
+ KUNIT_CASE_PARAM(xe_live_guc_g2g_kunit_allmem, xe_pci_live_device_gen_param),
+ {}
+};
+
+VISIBLE_IF_KUNIT
+struct kunit_suite xe_guc_g2g_test_suite = {
+ .name = "xe_guc_g2g",
+ .test_cases = xe_guc_g2g_tests,
+ .init = xe_kunit_helper_xe_device_live_test_init,
+};
+EXPORT_SYMBOL_IF_KUNIT(xe_guc_g2g_test_suite);
diff --git a/drivers/gpu/drm/xe/tests/xe_live_test_mod.c b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c
index 81277c77016d..c55e46f1ae92 100644
--- a/drivers/gpu/drm/xe/tests/xe_live_test_mod.c
+++ b/drivers/gpu/drm/xe/tests/xe_live_test_mod.c
@@ -10,12 +10,14 @@ extern struct kunit_suite xe_bo_shrink_test_suite;
extern struct kunit_suite xe_dma_buf_test_suite;
extern struct kunit_suite xe_migrate_test_suite;
extern struct kunit_suite xe_mocs_test_suite;
+extern struct kunit_suite xe_guc_g2g_test_suite;
kunit_test_suite(xe_bo_test_suite);
kunit_test_suite(xe_bo_shrink_test_suite);
kunit_test_suite(xe_dma_buf_test_suite);
kunit_test_suite(xe_migrate_test_suite);
kunit_test_suite(xe_mocs_test_suite);
+kunit_test_suite(xe_guc_g2g_test_suite);
MODULE_AUTHOR("Intel Corporation");
MODULE_LICENSE("GPL");
diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c b/drivers/gpu/drm/xe/tests/xe_migrate.c
index 4a65e3103f77..5904d658d1f2 100644
--- a/drivers/gpu/drm/xe/tests/xe_migrate.c
+++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
@@ -70,28 +70,29 @@ static int run_sanity_job(struct xe_migrate *m, struct xe_device *xe,
} } while (0)
static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
- struct kunit *test, u32 region)
+ struct kunit *test, u32 region, struct drm_exec *exec)
{
struct xe_device *xe = tile_to_xe(m->tile);
u64 retval, expected = 0;
- bool big = bo->size >= SZ_2M;
+ bool big = xe_bo_size(bo) >= SZ_2M;
struct dma_fence *fence;
const char *str = big ? "Copying big bo" : "Copying small bo";
int err;
struct xe_bo *remote = xe_bo_create_locked(xe, m->tile, NULL,
- bo->size,
+ xe_bo_size(bo),
ttm_bo_type_kernel,
region |
XE_BO_FLAG_NEEDS_CPU_ACCESS |
- XE_BO_FLAG_PINNED);
+ XE_BO_FLAG_PINNED,
+ exec);
if (IS_ERR(remote)) {
KUNIT_FAIL(test, "Failed to allocate remote bo for %s: %pe\n",
str, remote);
return;
}
- err = xe_bo_validate(remote, NULL, false);
+ err = xe_bo_validate(remote, NULL, false, exec);
if (err) {
KUNIT_FAIL(test, "Failed to validate system bo for %s: %i\n",
str, err);
@@ -105,7 +106,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
goto out_unlock;
}
- xe_map_memset(xe, &remote->vmap, 0, 0xd0, remote->size);
+ xe_map_memset(xe, &remote->vmap, 0, 0xd0, xe_bo_size(remote));
fence = xe_migrate_clear(m, remote, remote->ttm.resource,
XE_MIGRATE_CLEAR_FLAG_FULL);
if (!sanity_fence_failed(xe, fence, big ? "Clearing remote big bo" :
@@ -113,15 +114,15 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
retval = xe_map_rd(xe, &remote->vmap, 0, u64);
check(retval, expected, "remote first offset should be cleared",
test);
- retval = xe_map_rd(xe, &remote->vmap, remote->size - 8, u64);
+ retval = xe_map_rd(xe, &remote->vmap, xe_bo_size(remote) - 8, u64);
check(retval, expected, "remote last offset should be cleared",
test);
}
dma_fence_put(fence);
/* Try to copy 0xc0 from remote to vram with 2MB or 64KiB/4KiB pages */
- xe_map_memset(xe, &remote->vmap, 0, 0xc0, remote->size);
- xe_map_memset(xe, &bo->vmap, 0, 0xd0, bo->size);
+ xe_map_memset(xe, &remote->vmap, 0, 0xc0, xe_bo_size(remote));
+ xe_map_memset(xe, &bo->vmap, 0, 0xd0, xe_bo_size(bo));
expected = 0xc0c0c0c0c0c0c0c0;
fence = xe_migrate_copy(m, remote, bo, remote->ttm.resource,
@@ -131,15 +132,15 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
retval = xe_map_rd(xe, &bo->vmap, 0, u64);
check(retval, expected,
"remote -> vram bo first offset should be copied", test);
- retval = xe_map_rd(xe, &bo->vmap, bo->size - 8, u64);
+ retval = xe_map_rd(xe, &bo->vmap, xe_bo_size(bo) - 8, u64);
check(retval, expected,
"remote -> vram bo offset should be copied", test);
}
dma_fence_put(fence);
/* And other way around.. slightly hacky.. */
- xe_map_memset(xe, &remote->vmap, 0, 0xd0, remote->size);
- xe_map_memset(xe, &bo->vmap, 0, 0xc0, bo->size);
+ xe_map_memset(xe, &remote->vmap, 0, 0xd0, xe_bo_size(remote));
+ xe_map_memset(xe, &bo->vmap, 0, 0xc0, xe_bo_size(bo));
fence = xe_migrate_copy(m, bo, remote, bo->ttm.resource,
remote->ttm.resource, false);
@@ -148,7 +149,7 @@ static void test_copy(struct xe_migrate *m, struct xe_bo *bo,
retval = xe_map_rd(xe, &remote->vmap, 0, u64);
check(retval, expected,
"vram -> remote bo first offset should be copied", test);
- retval = xe_map_rd(xe, &remote->vmap, bo->size - 8, u64);
+ retval = xe_map_rd(xe, &remote->vmap, xe_bo_size(bo) - 8, u64);
check(retval, expected,
"vram -> remote bo last offset should be copied", test);
}
@@ -161,13 +162,13 @@ out_unlock:
}
static void test_copy_sysmem(struct xe_migrate *m, struct xe_bo *bo,
- struct kunit *test)
+ struct drm_exec *exec, struct kunit *test)
{
- test_copy(m, bo, test, XE_BO_FLAG_SYSTEM);
+ test_copy(m, bo, test, XE_BO_FLAG_SYSTEM, exec);
}
static void test_copy_vram(struct xe_migrate *m, struct xe_bo *bo,
- struct kunit *test)
+ struct drm_exec *exec, struct kunit *test)
{
u32 region;
@@ -178,10 +179,11 @@ static void test_copy_vram(struct xe_migrate *m, struct xe_bo *bo,
region = XE_BO_FLAG_VRAM1;
else
region = XE_BO_FLAG_VRAM0;
- test_copy(m, bo, test, region);
+ test_copy(m, bo, test, region, exec);
}
-static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
+static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test,
+ struct drm_exec *exec)
{
struct xe_tile *tile = m->tile;
struct xe_device *xe = tile_to_xe(tile);
@@ -202,7 +204,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
big = xe_bo_create_pin_map(xe, tile, m->q->vm, SZ_4M,
ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(tile));
+ XE_BO_FLAG_VRAM_IF_DGFX(tile),
+ exec);
if (IS_ERR(big)) {
KUNIT_FAIL(test, "Failed to allocate bo: %li\n", PTR_ERR(big));
goto vunmap;
@@ -210,7 +213,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
pt = xe_bo_create_pin_map(xe, tile, m->q->vm, XE_PAGE_SIZE,
ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(tile));
+ XE_BO_FLAG_VRAM_IF_DGFX(tile),
+ exec);
if (IS_ERR(pt)) {
KUNIT_FAIL(test, "Failed to allocate fake pt: %li\n",
PTR_ERR(pt));
@@ -220,7 +224,8 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
tiny = xe_bo_create_pin_map(xe, tile, m->q->vm,
2 * SZ_4K,
ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(tile));
+ XE_BO_FLAG_VRAM_IF_DGFX(tile),
+ exec);
if (IS_ERR(tiny)) {
KUNIT_FAIL(test, "Failed to allocate tiny fake pt: %li\n",
PTR_ERR(tiny));
@@ -245,9 +250,9 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
if (m->q->vm->flags & XE_VM_FLAG_64K)
expected |= XE_PTE_PS64;
if (xe_bo_is_vram(pt))
- xe_res_first(pt->ttm.resource, 0, pt->size, &src_it);
+ xe_res_first(pt->ttm.resource, 0, xe_bo_size(pt), &src_it);
else
- xe_res_first_sg(xe_bo_sg(pt), 0, pt->size, &src_it);
+ xe_res_first_sg(xe_bo_sg(pt), 0, xe_bo_size(pt), &src_it);
emit_pte(m, bb, NUM_KERNEL_PDE - 1, xe_bo_is_vram(pt), false,
&src_it, XE_PAGE_SIZE, pt->ttm.resource);
@@ -276,7 +281,7 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
/* Clear a small bo */
kunit_info(test, "Clearing small buffer object\n");
- xe_map_memset(xe, &tiny->vmap, 0, 0x22, tiny->size);
+ xe_map_memset(xe, &tiny->vmap, 0, 0x22, xe_bo_size(tiny));
expected = 0;
fence = xe_migrate_clear(m, tiny, tiny->ttm.resource,
XE_MIGRATE_CLEAR_FLAG_FULL);
@@ -286,19 +291,19 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
dma_fence_put(fence);
retval = xe_map_rd(xe, &tiny->vmap, 0, u32);
check(retval, expected, "Command clear small first value", test);
- retval = xe_map_rd(xe, &tiny->vmap, tiny->size - 4, u32);
+ retval = xe_map_rd(xe, &tiny->vmap, xe_bo_size(tiny) - 4, u32);
check(retval, expected, "Command clear small last value", test);
kunit_info(test, "Copying small buffer object to system\n");
- test_copy_sysmem(m, tiny, test);
+ test_copy_sysmem(m, tiny, exec, test);
if (xe->info.tile_count > 1) {
kunit_info(test, "Copying small buffer object to other vram\n");
- test_copy_vram(m, tiny, test);
+ test_copy_vram(m, tiny, exec, test);
}
/* Clear a big bo */
kunit_info(test, "Clearing big buffer object\n");
- xe_map_memset(xe, &big->vmap, 0, 0x11, big->size);
+ xe_map_memset(xe, &big->vmap, 0, 0x11, xe_bo_size(big));
expected = 0;
fence = xe_migrate_clear(m, big, big->ttm.resource,
XE_MIGRATE_CLEAR_FLAG_FULL);
@@ -308,14 +313,14 @@ static void xe_migrate_sanity_test(struct xe_migrate *m, struct kunit *test)
dma_fence_put(fence);
retval = xe_map_rd(xe, &big->vmap, 0, u32);
check(retval, expected, "Command clear big first value", test);
- retval = xe_map_rd(xe, &big->vmap, big->size - 4, u32);
+ retval = xe_map_rd(xe, &big->vmap, xe_bo_size(big) - 4, u32);
check(retval, expected, "Command clear big last value", test);
kunit_info(test, "Copying big buffer object to system\n");
- test_copy_sysmem(m, big, test);
+ test_copy_sysmem(m, big, exec, test);
if (xe->info.tile_count > 1) {
kunit_info(test, "Copying big buffer object to other vram\n");
- test_copy_vram(m, big, test);
+ test_copy_vram(m, big, exec, test);
}
out:
@@ -343,10 +348,11 @@ static int migrate_test_run_device(struct xe_device *xe)
for_each_tile(tile, xe, id) {
struct xe_migrate *m = tile->migrate;
+ struct drm_exec *exec = XE_VALIDATION_OPT_OUT;
kunit_info(test, "Testing tile id %d.\n", id);
xe_vm_lock(m->q->vm, false);
- xe_migrate_sanity_test(m, test);
+ xe_migrate_sanity_test(m, test, exec);
xe_vm_unlock(m->q->vm);
}
@@ -370,7 +376,7 @@ static struct dma_fence *blt_copy(struct xe_tile *tile,
struct xe_migrate *m = tile->migrate;
struct xe_device *xe = gt_to_xe(gt);
struct dma_fence *fence = NULL;
- u64 size = src_bo->size;
+ u64 size = xe_bo_size(src_bo);
struct xe_res_cursor src_it, dst_it;
struct ttm_resource *src = src_bo->ttm.resource, *dst = dst_bo->ttm.resource;
u64 src_L0_ofs, dst_L0_ofs;
@@ -490,7 +496,7 @@ err_sync:
static void test_migrate(struct xe_device *xe, struct xe_tile *tile,
struct xe_bo *sys_bo, struct xe_bo *vram_bo, struct xe_bo *ccs_bo,
- struct kunit *test)
+ struct drm_exec *exec, struct kunit *test)
{
struct dma_fence *fence;
u64 expected, retval;
@@ -498,7 +504,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile,
long ret;
expected = 0xd0d0d0d0d0d0d0d0;
- xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, sys_bo->size);
+ xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, xe_bo_size(sys_bo));
fence = blt_copy(tile, sys_bo, vram_bo, false, "Blit copy from sysmem to vram", test);
if (!sanity_fence_failed(xe, fence, "Blit copy from sysmem to vram", test)) {
@@ -509,7 +515,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile,
dma_fence_put(fence);
kunit_info(test, "Evict vram buffer object\n");
- ret = xe_bo_evict(vram_bo);
+ ret = xe_bo_evict(vram_bo, exec);
if (ret) {
KUNIT_FAIL(test, "Failed to evict bo.\n");
return;
@@ -523,7 +529,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile,
retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64);
check(retval, expected, "Clear evicted vram data first value", test);
- retval = xe_map_rd(xe, &vram_bo->vmap, vram_bo->size - 8, u64);
+ retval = xe_map_rd(xe, &vram_bo->vmap, xe_bo_size(vram_bo) - 8, u64);
check(retval, expected, "Clear evicted vram data last value", test);
fence = blt_copy(tile, vram_bo, ccs_bo,
@@ -532,13 +538,13 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile,
retval = xe_map_rd(xe, &ccs_bo->vmap, 0, u64);
check(retval, 0, "Clear ccs data first value", test);
- retval = xe_map_rd(xe, &ccs_bo->vmap, ccs_bo->size - 8, u64);
+ retval = xe_map_rd(xe, &ccs_bo->vmap, xe_bo_size(ccs_bo) - 8, u64);
check(retval, 0, "Clear ccs data last value", test);
}
dma_fence_put(fence);
kunit_info(test, "Restore vram buffer object\n");
- ret = xe_bo_validate(vram_bo, NULL, false);
+ ret = xe_bo_validate(vram_bo, NULL, false, exec);
if (ret) {
KUNIT_FAIL(test, "Failed to validate vram bo for: %li\n", ret);
return;
@@ -562,7 +568,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile,
retval = xe_map_rd(xe, &vram_bo->vmap, 0, u64);
check(retval, expected, "Restored value must be equal to initial value", test);
- retval = xe_map_rd(xe, &vram_bo->vmap, vram_bo->size - 8, u64);
+ retval = xe_map_rd(xe, &vram_bo->vmap, xe_bo_size(vram_bo) - 8, u64);
check(retval, expected, "Restored value must be equal to initial value", test);
fence = blt_copy(tile, vram_bo, ccs_bo,
@@ -570,7 +576,7 @@ static void test_migrate(struct xe_device *xe, struct xe_tile *tile,
if (!sanity_fence_failed(xe, fence, "Clear ccs buffer data", test)) {
retval = xe_map_rd(xe, &ccs_bo->vmap, 0, u64);
check(retval, 0, "Clear ccs data first value", test);
- retval = xe_map_rd(xe, &ccs_bo->vmap, ccs_bo->size - 8, u64);
+ retval = xe_map_rd(xe, &ccs_bo->vmap, xe_bo_size(ccs_bo) - 8, u64);
check(retval, 0, "Clear ccs data last value", test);
}
dma_fence_put(fence);
@@ -583,7 +589,7 @@ static void test_clear(struct xe_device *xe, struct xe_tile *tile,
u64 expected, retval;
expected = 0xd0d0d0d0d0d0d0d0;
- xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, sys_bo->size);
+ xe_map_memset(xe, &sys_bo->vmap, 0, 0xd0, xe_bo_size(sys_bo));
fence = blt_copy(tile, sys_bo, vram_bo, false, "Blit copy from sysmem to vram", test);
if (!sanity_fence_failed(xe, fence, "Blit copy from sysmem to vram", test)) {
@@ -597,7 +603,7 @@ static void test_clear(struct xe_device *xe, struct xe_tile *tile,
if (!sanity_fence_failed(xe, fence, "Blit copy from vram to sysmem", test)) {
retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64);
check(retval, expected, "Decompressed value must be equal to initial value", test);
- retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64);
+ retval = xe_map_rd(xe, &sys_bo->vmap, xe_bo_size(sys_bo) - 8, u64);
check(retval, expected, "Decompressed value must be equal to initial value", test);
}
dma_fence_put(fence);
@@ -615,7 +621,7 @@ static void test_clear(struct xe_device *xe, struct xe_tile *tile,
if (!sanity_fence_failed(xe, fence, "Clear main buffer data", test)) {
retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64);
check(retval, expected, "Clear main buffer first value", test);
- retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64);
+ retval = xe_map_rd(xe, &sys_bo->vmap, xe_bo_size(sys_bo) - 8, u64);
check(retval, expected, "Clear main buffer last value", test);
}
dma_fence_put(fence);
@@ -625,7 +631,7 @@ static void test_clear(struct xe_device *xe, struct xe_tile *tile,
if (!sanity_fence_failed(xe, fence, "Clear ccs buffer data", test)) {
retval = xe_map_rd(xe, &sys_bo->vmap, 0, u64);
check(retval, expected, "Clear ccs data first value", test);
- retval = xe_map_rd(xe, &sys_bo->vmap, sys_bo->size - 8, u64);
+ retval = xe_map_rd(xe, &sys_bo->vmap, xe_bo_size(sys_bo) - 8, u64);
check(retval, expected, "Clear ccs data last value", test);
}
dma_fence_put(fence);
@@ -636,13 +642,14 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til
{
struct xe_bo *sys_bo, *vram_bo = NULL, *ccs_bo = NULL;
unsigned int bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile);
+ struct drm_exec *exec;
long ret;
- sys_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M,
+ sys_bo = xe_bo_create_user(xe, NULL, SZ_4M,
DRM_XE_GEM_CPU_CACHING_WC,
XE_BO_FLAG_SYSTEM |
XE_BO_FLAG_NEEDS_CPU_ACCESS |
- XE_BO_FLAG_PINNED);
+ XE_BO_FLAG_PINNED, NULL);
if (IS_ERR(sys_bo)) {
KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
@@ -650,8 +657,9 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til
return;
}
+ exec = XE_VALIDATION_OPT_OUT;
xe_bo_lock(sys_bo, false);
- ret = xe_bo_validate(sys_bo, NULL, false);
+ ret = xe_bo_validate(sys_bo, NULL, false, exec);
if (ret) {
KUNIT_FAIL(test, "Failed to validate system bo for: %li\n", ret);
goto free_sysbo;
@@ -664,10 +672,10 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til
}
xe_bo_unlock(sys_bo);
- ccs_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M,
+ ccs_bo = xe_bo_create_user(xe, NULL, SZ_4M,
DRM_XE_GEM_CPU_CACHING_WC,
bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS |
- XE_BO_FLAG_PINNED);
+ XE_BO_FLAG_PINNED, NULL);
if (IS_ERR(ccs_bo)) {
KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
@@ -676,7 +684,7 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til
}
xe_bo_lock(ccs_bo, false);
- ret = xe_bo_validate(ccs_bo, NULL, false);
+ ret = xe_bo_validate(ccs_bo, NULL, false, exec);
if (ret) {
KUNIT_FAIL(test, "Failed to validate system bo for: %li\n", ret);
goto free_ccsbo;
@@ -689,10 +697,10 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til
}
xe_bo_unlock(ccs_bo);
- vram_bo = xe_bo_create_user(xe, NULL, NULL, SZ_4M,
+ vram_bo = xe_bo_create_user(xe, NULL, SZ_4M,
DRM_XE_GEM_CPU_CACHING_WC,
bo_flags | XE_BO_FLAG_NEEDS_CPU_ACCESS |
- XE_BO_FLAG_PINNED);
+ XE_BO_FLAG_PINNED, NULL);
if (IS_ERR(vram_bo)) {
KUNIT_FAIL(test, "xe_bo_create() failed with err=%ld\n",
PTR_ERR(vram_bo));
@@ -700,7 +708,7 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til
}
xe_bo_lock(vram_bo, false);
- ret = xe_bo_validate(vram_bo, NULL, false);
+ ret = xe_bo_validate(vram_bo, NULL, false, exec);
if (ret) {
KUNIT_FAIL(test, "Failed to validate vram bo for: %li\n", ret);
goto free_vrambo;
@@ -713,7 +721,7 @@ static void validate_ccs_test_run_tile(struct xe_device *xe, struct xe_tile *til
}
test_clear(xe, tile, sys_bo, vram_bo, test);
- test_migrate(xe, tile, sys_bo, vram_bo, ccs_bo, test);
+ test_migrate(xe, tile, sys_bo, vram_bo, ccs_bo, exec, test);
xe_bo_unlock(vram_bo);
xe_bo_lock(vram_bo, false);
diff --git a/drivers/gpu/drm/xe/tests/xe_mocs.c b/drivers/gpu/drm/xe/tests/xe_mocs.c
index 0e502feaca81..6bb278167aaf 100644
--- a/drivers/gpu/drm/xe/tests/xe_mocs.c
+++ b/drivers/gpu/drm/xe/tests/xe_mocs.c
@@ -49,7 +49,7 @@ static void read_l3cc_table(struct xe_gt *gt,
fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
xe_force_wake_put(gt_to_fw(gt), fw_ref);
- KUNIT_ASSERT_TRUE_MSG(test, true, "Forcewake Failed.\n");
+ KUNIT_FAIL_AND_ABORT(test, "Forcewake Failed.\n");
}
for (i = 0; i < info->num_mocs_regs; i++) {
diff --git a/drivers/gpu/drm/xe/tests/xe_pci.c b/drivers/gpu/drm/xe/tests/xe_pci.c
index 1d3e2e50c355..f3179b31f13e 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci.c
+++ b/drivers/gpu/drm/xe/tests/xe_pci.c
@@ -12,63 +12,325 @@
#include <kunit/test-bug.h>
#include <kunit/visibility.h>
+#define PLATFORM_CASE(platform__, graphics_step__) \
+ { \
+ .platform = XE_ ## platform__, \
+ .subplatform = XE_SUBPLATFORM_NONE, \
+ .step = { .graphics = STEP_ ## graphics_step__ } \
+ }
+
+#define SUBPLATFORM_CASE(platform__, subplatform__, graphics_step__) \
+ { \
+ .platform = XE_ ## platform__, \
+ .subplatform = XE_SUBPLATFORM_ ## platform__ ## _ ## subplatform__, \
+ .step = { .graphics = STEP_ ## graphics_step__ } \
+ }
+
+#define GMDID_CASE(platform__, graphics_verx100__, graphics_step__, \
+ media_verx100__, media_step__) \
+ { \
+ .platform = XE_ ## platform__, \
+ .subplatform = XE_SUBPLATFORM_NONE, \
+ .graphics_verx100 = graphics_verx100__, \
+ .media_verx100 = media_verx100__, \
+ .step = { .graphics = STEP_ ## graphics_step__, \
+ .media = STEP_ ## media_step__ } \
+ }
+
+static const struct xe_pci_fake_data cases[] = {
+ PLATFORM_CASE(TIGERLAKE, B0),
+ PLATFORM_CASE(DG1, A0),
+ PLATFORM_CASE(DG1, B0),
+ PLATFORM_CASE(ALDERLAKE_S, A0),
+ PLATFORM_CASE(ALDERLAKE_S, B0),
+ PLATFORM_CASE(ALDERLAKE_S, C0),
+ PLATFORM_CASE(ALDERLAKE_S, D0),
+ PLATFORM_CASE(ALDERLAKE_P, A0),
+ PLATFORM_CASE(ALDERLAKE_P, B0),
+ PLATFORM_CASE(ALDERLAKE_P, C0),
+ SUBPLATFORM_CASE(ALDERLAKE_S, RPLS, D0),
+ SUBPLATFORM_CASE(ALDERLAKE_P, RPLU, E0),
+ SUBPLATFORM_CASE(DG2, G10, C0),
+ SUBPLATFORM_CASE(DG2, G11, B1),
+ SUBPLATFORM_CASE(DG2, G12, A1),
+ GMDID_CASE(METEORLAKE, 1270, A0, 1300, A0),
+ GMDID_CASE(METEORLAKE, 1271, A0, 1300, A0),
+ GMDID_CASE(METEORLAKE, 1274, A0, 1300, A0),
+ GMDID_CASE(LUNARLAKE, 2004, A0, 2000, A0),
+ GMDID_CASE(LUNARLAKE, 2004, B0, 2000, A0),
+ GMDID_CASE(BATTLEMAGE, 2001, A0, 1301, A1),
+ GMDID_CASE(PANTHERLAKE, 3000, A0, 3000, A0),
+};
+
+KUNIT_ARRAY_PARAM(platform, cases, xe_pci_fake_data_desc);
+
/**
- * xe_call_for_each_graphics_ip - Iterate over all recognized graphics IPs
- * @xe_fn: Function to call for each device.
+ * xe_pci_fake_data_gen_params - Generate struct xe_pci_fake_data parameters
+ * @test: test context object
+ * @prev: the pointer to the previous parameter to iterate from or NULL
+ * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE
+ *
+ * This function prepares struct xe_pci_fake_data parameter.
+ *
+ * To be used only as a parameter generator function in &KUNIT_CASE_PARAM.
*
- * This function iterates over the descriptors for all graphics IPs recognized
- * by the driver and calls @xe_fn: for each one of them.
+ * Return: pointer to the next parameter or NULL if no more parameters
*/
-void xe_call_for_each_graphics_ip(xe_graphics_fn xe_fn)
+const void *xe_pci_fake_data_gen_params(struct kunit *test, const void *prev, char *desc)
{
- const struct xe_graphics_desc *desc, *last = NULL;
+ return platform_gen_params(test, prev, desc);
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_data_gen_params);
- for (int i = 0; i < ARRAY_SIZE(graphics_ips); i++) {
- desc = graphics_ips[i].desc;
- if (desc == last)
- continue;
+static const struct xe_device_desc *lookup_desc(enum xe_platform p)
+{
+ const struct xe_device_desc *desc;
+ const struct pci_device_id *ids;
- xe_fn(desc);
- last = desc;
+ for (ids = pciidlist; ids->driver_data; ids++) {
+ desc = (const void *)ids->driver_data;
+ if (desc->platform == p)
+ return desc;
}
+ return NULL;
+}
+
+static const struct xe_subplatform_desc *lookup_sub_desc(enum xe_platform p, enum xe_subplatform s)
+{
+ const struct xe_device_desc *desc = lookup_desc(p);
+ const struct xe_subplatform_desc *spd;
+
+ if (desc && desc->subplatforms)
+ for (spd = desc->subplatforms; spd->subplatform; spd++)
+ if (spd->subplatform == s)
+ return spd;
+ return NULL;
+}
+
+static const char *lookup_platform_name(enum xe_platform p)
+{
+ const struct xe_device_desc *desc = lookup_desc(p);
+
+ return desc ? desc->platform_name : "INVALID";
+}
+
+static const char *__lookup_subplatform_name(enum xe_platform p, enum xe_subplatform s)
+{
+ const struct xe_subplatform_desc *desc = lookup_sub_desc(p, s);
+
+ return desc ? desc->name : "INVALID";
+}
+
+static const char *lookup_subplatform_name(enum xe_platform p, enum xe_subplatform s)
+{
+ return s == XE_SUBPLATFORM_NONE ? "" : __lookup_subplatform_name(p, s);
+}
+
+static const char *subplatform_prefix(enum xe_subplatform s)
+{
+ return s == XE_SUBPLATFORM_NONE ? "" : " ";
+}
+
+static const char *step_prefix(enum xe_step step)
+{
+ return step == STEP_NONE ? "" : " ";
+}
+
+static const char *step_name(enum xe_step step)
+{
+ return step == STEP_NONE ? "" : xe_step_name(step);
+}
+
+static const char *sriov_prefix(enum xe_sriov_mode mode)
+{
+ return mode <= XE_SRIOV_MODE_NONE ? "" : " ";
+}
+
+static const char *sriov_name(enum xe_sriov_mode mode)
+{
+ return mode <= XE_SRIOV_MODE_NONE ? "" : xe_sriov_mode_to_string(mode);
+}
+
+static const char *lookup_graphics_name(unsigned int verx100)
+{
+ const struct xe_ip *ip = find_graphics_ip(verx100);
+
+ return ip ? ip->name : "";
+}
+
+static const char *lookup_media_name(unsigned int verx100)
+{
+ const struct xe_ip *ip = find_media_ip(verx100);
+
+ return ip ? ip->name : "";
}
-EXPORT_SYMBOL_IF_KUNIT(xe_call_for_each_graphics_ip);
/**
- * xe_call_for_each_media_ip - Iterate over all recognized media IPs
- * @xe_fn: Function to call for each device.
+ * xe_pci_fake_data_desc - Describe struct xe_pci_fake_data parameter
+ * @param: the &struct xe_pci_fake_data parameter to describe
+ * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE
*
- * This function iterates over the descriptors for all media IPs recognized
- * by the driver and calls @xe_fn: for each one of them.
+ * This function prepares description of the struct xe_pci_fake_data parameter.
+ *
+ * It is tailored for use in parameterized KUnit tests where parameter generator
+ * is based on the struct xe_pci_fake_data arrays.
*/
-void xe_call_for_each_media_ip(xe_media_fn xe_fn)
+void xe_pci_fake_data_desc(const struct xe_pci_fake_data *param, char *desc)
{
- const struct xe_media_desc *desc, *last = NULL;
+ if (param->graphics_verx100 || param->media_verx100)
+ snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%s%s%s %u.%02u(%s)%s%s %u.%02u(%s)%s%s%s%s",
+ lookup_platform_name(param->platform),
+ subplatform_prefix(param->subplatform),
+ lookup_subplatform_name(param->platform, param->subplatform),
+ param->graphics_verx100 / 100, param->graphics_verx100 % 100,
+ lookup_graphics_name(param->graphics_verx100),
+ step_prefix(param->step.graphics), step_name(param->step.graphics),
+ param->media_verx100 / 100, param->media_verx100 % 100,
+ lookup_media_name(param->media_verx100),
+ step_prefix(param->step.media), step_name(param->step.media),
+ sriov_prefix(param->sriov_mode), sriov_name(param->sriov_mode));
+ else
+ snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%s%s%s%s%s%s%s",
+ lookup_platform_name(param->platform),
+ subplatform_prefix(param->subplatform),
+ lookup_subplatform_name(param->platform, param->subplatform),
+ step_prefix(param->step.graphics), step_name(param->step.graphics),
+ sriov_prefix(param->sriov_mode), sriov_name(param->sriov_mode));
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_data_desc);
- for (int i = 0; i < ARRAY_SIZE(media_ips); i++) {
- desc = media_ips[i].desc;
- if (desc == last)
- continue;
+static void xe_ip_kunit_desc(const struct xe_ip *param, char *desc)
+{
+ snprintf(desc, KUNIT_PARAM_DESC_SIZE, "%u.%02u %s",
+ param->verx100 / 100, param->verx100 % 100, param->name);
+}
- xe_fn(desc);
- last = desc;
- }
+/*
+ * Pre-GMDID Graphics and Media IPs definitions.
+ *
+ * Mimic the way GMDID IPs are declared so the same
+ * param generator can be used for both
+ */
+static const struct xe_ip pre_gmdid_graphics_ips[] = {
+ { 1200, "Xe_LP", &graphics_xelp },
+ { 1210, "Xe_LP+", &graphics_xelp },
+ { 1255, "Xe_HPG", &graphics_xehpg },
+ { 1260, "Xe_HPC", &graphics_xehpc },
+};
+
+static const struct xe_ip pre_gmdid_media_ips[] = {
+ { 1200, "Xe_M", &media_xem },
+ { 1255, "Xe_HPM", &media_xem },
+};
+
+KUNIT_ARRAY_PARAM(pre_gmdid_graphics_ip, pre_gmdid_graphics_ips, xe_ip_kunit_desc);
+KUNIT_ARRAY_PARAM(pre_gmdid_media_ip, pre_gmdid_media_ips, xe_ip_kunit_desc);
+
+KUNIT_ARRAY_PARAM(graphics_ip, graphics_ips, xe_ip_kunit_desc);
+KUNIT_ARRAY_PARAM(media_ip, media_ips, xe_ip_kunit_desc);
+
+static void xe_pci_id_kunit_desc(const struct pci_device_id *param, char *desc)
+{
+ const struct xe_device_desc *dev_desc =
+ (const struct xe_device_desc *)param->driver_data;
+
+ if (dev_desc)
+ snprintf(desc, KUNIT_PARAM_DESC_SIZE, "0x%X (%s)",
+ param->device, dev_desc->platform_name);
+}
+
+KUNIT_ARRAY_PARAM(pci_id, pciidlist, xe_pci_id_kunit_desc);
+
+/**
+ * xe_pci_graphics_ip_gen_param - Generate graphics struct xe_ip parameters
+ * @test: test context object
+ * @prev: the pointer to the previous parameter to iterate from or NULL
+ * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE
+ *
+ * This function prepares struct xe_ip parameter.
+ *
+ * To be used only as a parameter generator function in &KUNIT_CASE_PARAM.
+ *
+ * Return: pointer to the next parameter or NULL if no more parameters
+ */
+const void *xe_pci_graphics_ip_gen_param(struct kunit *test, const void *prev, char *desc)
+{
+ const void *next = pre_gmdid_graphics_ip_gen_params(test, prev, desc);
+
+ if (next)
+ return next;
+ if (is_insidevar(prev, pre_gmdid_graphics_ips))
+ prev = NULL;
+
+ return graphics_ip_gen_params(test, prev, desc);
}
-EXPORT_SYMBOL_IF_KUNIT(xe_call_for_each_media_ip);
+EXPORT_SYMBOL_IF_KUNIT(xe_pci_graphics_ip_gen_param);
-static void fake_read_gmdid(struct xe_device *xe, enum xe_gmdid_type type,
- u32 *ver, u32 *revid)
+/**
+ * xe_pci_media_ip_gen_param - Generate media struct xe_ip parameters
+ * @test: test context object
+ * @prev: the pointer to the previous parameter to iterate from or NULL
+ * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE
+ *
+ * This function prepares struct xe_ip parameter.
+ *
+ * To be used only as a parameter generator function in &KUNIT_CASE_PARAM.
+ *
+ * Return: pointer to the next parameter or NULL if no more parameters
+ */
+const void *xe_pci_media_ip_gen_param(struct kunit *test, const void *prev, char *desc)
+{
+ const void *next = pre_gmdid_media_ip_gen_params(test, prev, desc);
+
+ if (next)
+ return next;
+ if (is_insidevar(prev, pre_gmdid_media_ips))
+ prev = NULL;
+
+ return media_ip_gen_params(test, prev, desc);
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_pci_media_ip_gen_param);
+
+/**
+ * xe_pci_id_gen_param - Generate struct pci_device_id parameters
+ * @test: test context object
+ * @prev: the pointer to the previous parameter to iterate from or NULL
+ * @desc: output buffer with minimum size of KUNIT_PARAM_DESC_SIZE
+ *
+ * This function prepares struct pci_device_id parameter.
+ *
+ * To be used only as a parameter generator function in &KUNIT_CASE_PARAM.
+ *
+ * Return: pointer to the next parameter or NULL if no more parameters
+ */
+const void *xe_pci_id_gen_param(struct kunit *test, const void *prev, char *desc)
+{
+ const struct pci_device_id *pci = pci_id_gen_params(test, prev, desc);
+
+ return pci->driver_data ? pci : NULL;
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_pci_id_gen_param);
+
+static int fake_read_gmdid(struct xe_device *xe, enum xe_gmdid_type type,
+ u32 *ver, u32 *revid)
{
struct kunit *test = kunit_get_current_test();
struct xe_pci_fake_data *data = test->priv;
if (type == GMDID_MEDIA) {
*ver = data->media_verx100;
- *revid = xe_step_to_gmdid(data->media_step);
+ *revid = xe_step_to_gmdid(data->step.media);
} else {
*ver = data->graphics_verx100;
- *revid = xe_step_to_gmdid(data->graphics_step);
+ *revid = xe_step_to_gmdid(data->step.graphics);
}
+
+ return 0;
+}
+
+static void fake_xe_info_probe_tile_count(struct xe_device *xe)
+{
+ /* Nothing to do, just use the statically defined value. */
}
int xe_pci_fake_device_init(struct xe_device *xe)
@@ -108,6 +370,8 @@ done:
data->sriov_mode : XE_SRIOV_MODE_NONE;
kunit_activate_static_stub(test, read_gmdid, fake_read_gmdid);
+ kunit_activate_static_stub(test, xe_info_probe_tile_count,
+ fake_xe_info_probe_tile_count);
xe_info_init_early(xe, desc, subplatform_desc);
xe_info_init(xe, desc);
@@ -118,6 +382,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_device_init);
/**
* xe_pci_live_device_gen_param - Helper to iterate Xe devices as KUnit parameters
+ * @test: test context object
* @prev: the previously returned value, or NULL for the first iteration
* @desc: the buffer for a parameter name
*
@@ -129,7 +394,7 @@ EXPORT_SYMBOL_IF_KUNIT(xe_pci_fake_device_init);
* Return: pointer to the next &struct xe_device ready to be used as a parameter
* or NULL if there are no more Xe devices on the system.
*/
-const void *xe_pci_live_device_gen_param(const void *prev, char *desc)
+const void *xe_pci_live_device_gen_param(struct kunit *test, const void *prev, char *desc)
{
const struct xe_device *xe = prev;
struct device *dev = xe ? xe->drm.dev : NULL;
diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.c b/drivers/gpu/drm/xe/tests/xe_pci_test.c
index 744a37583d2d..4d10a7e2b570 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_pci_test.c
@@ -14,9 +14,10 @@
#include "xe_pci_test.h"
#include "xe_pci_types.h"
-static void check_graphics_ip(const struct xe_graphics_desc *graphics)
+static void check_graphics_ip(struct kunit *test)
{
- struct kunit *test = kunit_get_current_test();
+ const struct xe_ip *param = test->param_value;
+ const struct xe_graphics_desc *graphics = param->desc;
u64 mask = graphics->hw_engine_mask;
/* RCS, CCS, and BCS engines are allowed on the graphics IP */
@@ -28,9 +29,10 @@ static void check_graphics_ip(const struct xe_graphics_desc *graphics)
KUNIT_ASSERT_EQ(test, mask, 0);
}
-static void check_media_ip(const struct xe_media_desc *media)
+static void check_media_ip(struct kunit *test)
{
- struct kunit *test = kunit_get_current_test();
+ const struct xe_ip *param = test->param_value;
+ const struct xe_media_desc *media = param->desc;
u64 mask = media->hw_engine_mask;
/* VCS, VECS and GSCCS engines are allowed on the media IP */
@@ -42,19 +44,27 @@ static void check_media_ip(const struct xe_media_desc *media)
KUNIT_ASSERT_EQ(test, mask, 0);
}
-static void xe_gmdid_graphics_ip(struct kunit *test)
+static void check_platform_desc(struct kunit *test)
{
- xe_call_for_each_graphics_ip(check_graphics_ip);
-}
+ const struct pci_device_id *pci = test->param_value;
+ const struct xe_device_desc *desc =
+ (const struct xe_device_desc *)pci->driver_data;
-static void xe_gmdid_media_ip(struct kunit *test)
-{
- xe_call_for_each_media_ip(check_media_ip);
+ KUNIT_EXPECT_GT(test, desc->dma_mask_size, 0);
+
+ KUNIT_EXPECT_GT(test, (unsigned int)desc->max_gt_per_tile, 0);
+ KUNIT_EXPECT_LE(test, (unsigned int)desc->max_gt_per_tile, XE_MAX_GT_PER_TILE);
+
+ KUNIT_EXPECT_GT(test, desc->va_bits, 0);
+ KUNIT_EXPECT_LE(test, desc->va_bits, 64);
+
+ KUNIT_EXPECT_GT(test, desc->vm_max_level, 0);
}
static struct kunit_case xe_pci_tests[] = {
- KUNIT_CASE(xe_gmdid_graphics_ip),
- KUNIT_CASE(xe_gmdid_media_ip),
+ KUNIT_CASE_PARAM(check_graphics_ip, xe_pci_graphics_ip_gen_param),
+ KUNIT_CASE_PARAM(check_media_ip, xe_pci_media_ip_gen_param),
+ KUNIT_CASE_PARAM(check_platform_desc, xe_pci_id_gen_param),
{}
};
diff --git a/drivers/gpu/drm/xe/tests/xe_pci_test.h b/drivers/gpu/drm/xe/tests/xe_pci_test.h
index ede46800aff1..30505d1cbefc 100644
--- a/drivers/gpu/drm/xe/tests/xe_pci_test.h
+++ b/drivers/gpu/drm/xe/tests/xe_pci_test.h
@@ -7,33 +7,30 @@
#define _XE_PCI_TEST_H_
#include <linux/types.h>
+#include <kunit/test.h>
#include "xe_platform_types.h"
#include "xe_sriov_types.h"
+#include "xe_step_types.h"
struct xe_device;
-struct xe_graphics_desc;
-struct xe_media_desc;
-
-typedef int (*xe_device_fn)(struct xe_device *);
-typedef void (*xe_graphics_fn)(const struct xe_graphics_desc *);
-typedef void (*xe_media_fn)(const struct xe_media_desc *);
-
-void xe_call_for_each_graphics_ip(xe_graphics_fn xe_fn);
-void xe_call_for_each_media_ip(xe_media_fn xe_fn);
struct xe_pci_fake_data {
enum xe_sriov_mode sriov_mode;
enum xe_platform platform;
enum xe_subplatform subplatform;
+ struct xe_step_info step;
u32 graphics_verx100;
u32 media_verx100;
- u32 graphics_step;
- u32 media_step;
};
int xe_pci_fake_device_init(struct xe_device *xe);
+const void *xe_pci_fake_data_gen_params(struct kunit *test, const void *prev, char *desc);
+void xe_pci_fake_data_desc(const struct xe_pci_fake_data *param, char *desc);
-const void *xe_pci_live_device_gen_param(const void *prev, char *desc);
+const void *xe_pci_graphics_ip_gen_param(struct kunit *test, const void *prev, char *desc);
+const void *xe_pci_media_ip_gen_param(struct kunit *test, const void *prev, char *desc);
+const void *xe_pci_id_gen_param(struct kunit *test, const void *prev, char *desc);
+const void *xe_pci_live_device_gen_param(struct kunit *test, const void *prev, char *desc);
#endif
diff --git a/drivers/gpu/drm/xe/tests/xe_rtp_test.c b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
index b0254b014fe4..d2255a59e58f 100644
--- a/drivers/gpu/drm/xe/tests/xe_rtp_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_rtp_test.c
@@ -48,12 +48,14 @@ struct rtp_test_case {
const struct xe_rtp_entry *entries;
};
-static bool match_yes(const struct xe_gt *gt, const struct xe_hw_engine *hwe)
+static bool match_yes(const struct xe_device *xe, const struct xe_gt *gt,
+ const struct xe_hw_engine *hwe)
{
return true;
}
-static bool match_no(const struct xe_gt *gt, const struct xe_hw_engine *hwe)
+static bool match_no(const struct xe_device *xe, const struct xe_gt *gt,
+ const struct xe_hw_engine *hwe)
{
return false;
}
diff --git a/drivers/gpu/drm/xe/tests/xe_sriov_pf_service_kunit.c b/drivers/gpu/drm/xe/tests/xe_sriov_pf_service_kunit.c
new file mode 100644
index 000000000000..ba95e29b597d
--- /dev/null
+++ b/drivers/gpu/drm/xe/tests/xe_sriov_pf_service_kunit.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: GPL-2.0 AND MIT
+/*
+ * Copyright © 2024-2025 Intel Corporation
+ */
+
+#include <kunit/test.h>
+
+#include "xe_device.h"
+#include "xe_kunit_helpers.h"
+#include "xe_pci_test.h"
+
+static int pf_service_test_init(struct kunit *test)
+{
+ struct xe_pci_fake_data fake = {
+ .sriov_mode = XE_SRIOV_MODE_PF,
+ .platform = XE_TIGERLAKE, /* some random platform */
+ .subplatform = XE_SUBPLATFORM_NONE,
+ };
+ struct xe_device *xe;
+
+ test->priv = &fake;
+ xe_kunit_helper_xe_device_test_init(test);
+
+ xe = test->priv;
+ KUNIT_ASSERT_EQ(test, xe_sriov_init(xe), 0);
+
+ xe_sriov_pf_service_init(xe);
+ /*
+ * sanity check:
+ * - all supported platforms VF/PF ABI versions must be defined
+ * - base version can't be newer than latest
+ */
+ KUNIT_ASSERT_NE(test, 0, xe->sriov.pf.service.version.base.major);
+ KUNIT_ASSERT_NE(test, 0, xe->sriov.pf.service.version.latest.major);
+ KUNIT_ASSERT_LE(test, xe->sriov.pf.service.version.base.major,
+ xe->sriov.pf.service.version.latest.major);
+ if (xe->sriov.pf.service.version.base.major == xe->sriov.pf.service.version.latest.major)
+ KUNIT_ASSERT_LE(test, xe->sriov.pf.service.version.base.minor,
+ xe->sriov.pf.service.version.latest.minor);
+ return 0;
+}
+
+static void pf_negotiate_any(struct kunit *test)
+{
+ struct xe_device *xe = test->priv;
+ u32 major, minor;
+
+ KUNIT_ASSERT_EQ(test, 0,
+ pf_negotiate_version(xe, VF2PF_HANDSHAKE_MAJOR_ANY,
+ VF2PF_HANDSHAKE_MINOR_ANY,
+ &major, &minor));
+ KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major);
+ KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor);
+}
+
+static void pf_negotiate_base_match(struct kunit *test)
+{
+ struct xe_device *xe = test->priv;
+ u32 major, minor;
+
+ KUNIT_ASSERT_EQ(test, 0,
+ pf_negotiate_version(xe,
+ xe->sriov.pf.service.version.base.major,
+ xe->sriov.pf.service.version.base.minor,
+ &major, &minor));
+ KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.base.major);
+ KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.base.minor);
+}
+
+static void pf_negotiate_base_newer(struct kunit *test)
+{
+ struct xe_device *xe = test->priv;
+ u32 major, minor;
+
+ KUNIT_ASSERT_EQ(test, 0,
+ pf_negotiate_version(xe,
+ xe->sriov.pf.service.version.base.major,
+ xe->sriov.pf.service.version.base.minor + 1,
+ &major, &minor));
+ KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.base.major);
+ KUNIT_ASSERT_GE(test, minor, xe->sriov.pf.service.version.base.minor);
+ if (xe->sriov.pf.service.version.base.major == xe->sriov.pf.service.version.latest.major)
+ KUNIT_ASSERT_LE(test, minor, xe->sriov.pf.service.version.latest.minor);
+ else
+ KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n");
+}
+
+static void pf_negotiate_base_next(struct kunit *test)
+{
+ struct xe_device *xe = test->priv;
+ u32 major, minor;
+
+ KUNIT_ASSERT_EQ(test, 0,
+ pf_negotiate_version(xe,
+ xe->sriov.pf.service.version.base.major + 1, 0,
+ &major, &minor));
+ KUNIT_ASSERT_GE(test, major, xe->sriov.pf.service.version.base.major);
+ KUNIT_ASSERT_LE(test, major, xe->sriov.pf.service.version.latest.major);
+ if (major == xe->sriov.pf.service.version.latest.major)
+ KUNIT_ASSERT_LE(test, minor, xe->sriov.pf.service.version.latest.minor);
+ else
+ KUNIT_FAIL(test, "FIXME: don't know how to test multi-version yet!\n");
+}
+
+static void pf_negotiate_base_older(struct kunit *test)
+{
+ struct xe_device *xe = test->priv;
+ u32 major, minor;
+
+ if (!xe->sriov.pf.service.version.base.minor)
+ kunit_skip(test, "no older minor\n");
+
+ KUNIT_ASSERT_NE(test, 0,
+ pf_negotiate_version(xe,
+ xe->sriov.pf.service.version.base.major,
+ xe->sriov.pf.service.version.base.minor - 1,
+ &major, &minor));
+}
+
+static void pf_negotiate_base_prev(struct kunit *test)
+{
+ struct xe_device *xe = test->priv;
+ u32 major, minor;
+
+ KUNIT_ASSERT_NE(test, 0,
+ pf_negotiate_version(xe,
+ xe->sriov.pf.service.version.base.major - 1, 1,
+ &major, &minor));
+}
+
+static void pf_negotiate_latest_match(struct kunit *test)
+{
+ struct xe_device *xe = test->priv;
+ u32 major, minor;
+
+ KUNIT_ASSERT_EQ(test, 0,
+ pf_negotiate_version(xe,
+ xe->sriov.pf.service.version.latest.major,
+ xe->sriov.pf.service.version.latest.minor,
+ &major, &minor));
+ KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major);
+ KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor);
+}
+
+static void pf_negotiate_latest_newer(struct kunit *test)
+{
+ struct xe_device *xe = test->priv;
+ u32 major, minor;
+
+ KUNIT_ASSERT_EQ(test, 0,
+ pf_negotiate_version(xe,
+ xe->sriov.pf.service.version.latest.major,
+ xe->sriov.pf.service.version.latest.minor + 1,
+ &major, &minor));
+ KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major);
+ KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor);
+}
+
+static void pf_negotiate_latest_next(struct kunit *test)
+{
+ struct xe_device *xe = test->priv;
+ u32 major, minor;
+
+ KUNIT_ASSERT_EQ(test, 0,
+ pf_negotiate_version(xe,
+ xe->sriov.pf.service.version.latest.major + 1, 0,
+ &major, &minor));
+ KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major);
+ KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor);
+}
+
+static void pf_negotiate_latest_older(struct kunit *test)
+{
+ struct xe_device *xe = test->priv;
+ u32 major, minor;
+
+ if (!xe->sriov.pf.service.version.latest.minor)
+ kunit_skip(test, "no older minor\n");
+
+ KUNIT_ASSERT_EQ(test, 0,
+ pf_negotiate_version(xe,
+ xe->sriov.pf.service.version.latest.major,
+ xe->sriov.pf.service.version.latest.minor - 1,
+ &major, &minor));
+ KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major);
+ KUNIT_ASSERT_EQ(test, minor, xe->sriov.pf.service.version.latest.minor - 1);
+}
+
+static void pf_negotiate_latest_prev(struct kunit *test)
+{
+ struct xe_device *xe = test->priv;
+ u32 major, minor;
+
+ if (xe->sriov.pf.service.version.base.major == xe->sriov.pf.service.version.latest.major)
+ kunit_skip(test, "no prev major");
+
+ KUNIT_ASSERT_EQ(test, 0,
+ pf_negotiate_version(xe,
+ xe->sriov.pf.service.version.latest.major - 1,
+ xe->sriov.pf.service.version.base.minor + 1,
+ &major, &minor));
+ KUNIT_ASSERT_EQ(test, major, xe->sriov.pf.service.version.latest.major - 1);
+ KUNIT_ASSERT_GE(test, major, xe->sriov.pf.service.version.base.major);
+}
+
+static struct kunit_case pf_service_test_cases[] = {
+ KUNIT_CASE(pf_negotiate_any),
+ KUNIT_CASE(pf_negotiate_base_match),
+ KUNIT_CASE(pf_negotiate_base_newer),
+ KUNIT_CASE(pf_negotiate_base_next),
+ KUNIT_CASE(pf_negotiate_base_older),
+ KUNIT_CASE(pf_negotiate_base_prev),
+ KUNIT_CASE(pf_negotiate_latest_match),
+ KUNIT_CASE(pf_negotiate_latest_newer),
+ KUNIT_CASE(pf_negotiate_latest_next),
+ KUNIT_CASE(pf_negotiate_latest_older),
+ KUNIT_CASE(pf_negotiate_latest_prev),
+ {}
+};
+
+static struct kunit_suite pf_service_suite = {
+ .name = "pf_service",
+ .test_cases = pf_service_test_cases,
+ .init = pf_service_test_init,
+};
+
+kunit_test_suite(pf_service_suite);
diff --git a/drivers/gpu/drm/xe/tests/xe_wa_test.c b/drivers/gpu/drm/xe/tests/xe_wa_test.c
index c96d1fe34151..49d191043dfa 100644
--- a/drivers/gpu/drm/xe/tests/xe_wa_test.c
+++ b/drivers/gpu/drm/xe/tests/xe_wa_test.c
@@ -15,86 +15,10 @@
#include "xe_tuning.h"
#include "xe_wa.h"
-struct platform_test_case {
- const char *name;
- enum xe_platform platform;
- enum xe_subplatform subplatform;
- u32 graphics_verx100;
- u32 media_verx100;
- struct xe_step_info step;
-};
-
-#define PLATFORM_CASE(platform__, graphics_step__) \
- { \
- .name = #platform__ " (" #graphics_step__ ")", \
- .platform = XE_ ## platform__, \
- .subplatform = XE_SUBPLATFORM_NONE, \
- .step = { .graphics = STEP_ ## graphics_step__ } \
- }
-
-
-#define SUBPLATFORM_CASE(platform__, subplatform__, graphics_step__) \
- { \
- .name = #platform__ "_" #subplatform__ " (" #graphics_step__ ")", \
- .platform = XE_ ## platform__, \
- .subplatform = XE_SUBPLATFORM_ ## platform__ ## _ ## subplatform__, \
- .step = { .graphics = STEP_ ## graphics_step__ } \
- }
-
-#define GMDID_CASE(platform__, graphics_verx100__, graphics_step__, \
- media_verx100__, media_step__) \
- { \
- .name = #platform__ " (g:" #graphics_step__ ", m:" #media_step__ ")",\
- .platform = XE_ ## platform__, \
- .subplatform = XE_SUBPLATFORM_NONE, \
- .graphics_verx100 = graphics_verx100__, \
- .media_verx100 = media_verx100__, \
- .step = { .graphics = STEP_ ## graphics_step__, \
- .media = STEP_ ## media_step__ } \
- }
-
-static const struct platform_test_case cases[] = {
- PLATFORM_CASE(TIGERLAKE, B0),
- PLATFORM_CASE(DG1, A0),
- PLATFORM_CASE(DG1, B0),
- PLATFORM_CASE(ALDERLAKE_S, A0),
- PLATFORM_CASE(ALDERLAKE_S, B0),
- PLATFORM_CASE(ALDERLAKE_S, C0),
- PLATFORM_CASE(ALDERLAKE_S, D0),
- PLATFORM_CASE(ALDERLAKE_P, A0),
- PLATFORM_CASE(ALDERLAKE_P, B0),
- PLATFORM_CASE(ALDERLAKE_P, C0),
- SUBPLATFORM_CASE(ALDERLAKE_S, RPLS, D0),
- SUBPLATFORM_CASE(ALDERLAKE_P, RPLU, E0),
- SUBPLATFORM_CASE(DG2, G10, C0),
- SUBPLATFORM_CASE(DG2, G11, B1),
- SUBPLATFORM_CASE(DG2, G12, A1),
- GMDID_CASE(METEORLAKE, 1270, A0, 1300, A0),
- GMDID_CASE(METEORLAKE, 1271, A0, 1300, A0),
- GMDID_CASE(METEORLAKE, 1274, A0, 1300, A0),
- GMDID_CASE(LUNARLAKE, 2004, A0, 2000, A0),
- GMDID_CASE(LUNARLAKE, 2004, B0, 2000, A0),
- GMDID_CASE(BATTLEMAGE, 2001, A0, 1301, A1),
-};
-
-static void platform_desc(const struct platform_test_case *t, char *desc)
-{
- strscpy(desc, t->name, KUNIT_PARAM_DESC_SIZE);
-}
-
-KUNIT_ARRAY_PARAM(platform, cases, platform_desc);
-
static int xe_wa_test_init(struct kunit *test)
{
- const struct platform_test_case *param = test->param_value;
- struct xe_pci_fake_data data = {
- .platform = param->platform,
- .subplatform = param->subplatform,
- .graphics_verx100 = param->graphics_verx100,
- .media_verx100 = param->media_verx100,
- .graphics_step = param->step.graphics,
- .media_step = param->step.media,
- };
+ const struct xe_pci_fake_data *param = test->param_value;
+ struct xe_pci_fake_data data = *param;
struct xe_device *xe;
struct device *dev;
int ret;
@@ -119,13 +43,6 @@ static int xe_wa_test_init(struct kunit *test)
return 0;
}
-static void xe_wa_test_exit(struct kunit *test)
-{
- struct xe_device *xe = test->priv;
-
- drm_kunit_helper_free_device(test, xe->drm.dev);
-}
-
static void xe_wa_gt(struct kunit *test)
{
struct xe_device *xe = test->priv;
@@ -143,14 +60,13 @@ static void xe_wa_gt(struct kunit *test)
}
static struct kunit_case xe_wa_tests[] = {
- KUNIT_CASE_PARAM(xe_wa_gt, platform_gen_params),
+ KUNIT_CASE_PARAM(xe_wa_gt, xe_pci_fake_data_gen_params),
{}
};
static struct kunit_suite xe_rtp_test_suite = {
.name = "xe_wa",
.init = xe_wa_test_init,
- .exit = xe_wa_test_exit,
.test_cases = xe_wa_tests,
};
diff --git a/drivers/gpu/drm/xe/xe_assert.h b/drivers/gpu/drm/xe/xe_assert.h
index 68fe70ce2be3..a818eaa05b7d 100644
--- a/drivers/gpu/drm/xe/xe_assert.h
+++ b/drivers/gpu/drm/xe/xe_assert.h
@@ -12,6 +12,7 @@
#include "xe_gt_types.h"
#include "xe_step.h"
+#include "xe_vram.h"
/**
* DOC: Xe Asserts
@@ -145,7 +146,8 @@
const struct xe_tile *__tile = (tile); \
char __buf[10] __maybe_unused; \
xe_assert_msg(tile_to_xe(__tile), condition, "tile: %u VRAM %s\n" msg, \
- __tile->id, ({ string_get_size(__tile->mem.vram.actual_physical_size, 1, \
+ __tile->id, ({ string_get_size( \
+ xe_vram_region_actual_physical_size(__tile->mem.vram), 1, \
STRING_UNITS_2, __buf, sizeof(__buf)); __buf; }), ## arg); \
})
diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c
index 9570672fce33..6d20229c11de 100644
--- a/drivers/gpu/drm/xe/xe_bb.c
+++ b/drivers/gpu/drm/xe/xe_bb.c
@@ -19,7 +19,7 @@ static int bb_prefetch(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
- if (GRAPHICS_VERx100(xe) >= 1250 && !xe_gt_is_media_type(gt))
+ if (GRAPHICS_VERx100(xe) >= 1250 && xe_gt_is_main_type(gt))
/*
* RCS and CCS require 1K, although other engines would be
* okay with 512.
@@ -60,6 +60,41 @@ err:
return ERR_PTR(err);
}
+struct xe_bb *xe_bb_ccs_new(struct xe_gt *gt, u32 dwords,
+ enum xe_sriov_vf_ccs_rw_ctxs ctx_id)
+{
+ struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL);
+ struct xe_device *xe = gt_to_xe(gt);
+ struct xe_sa_manager *bb_pool;
+ int err;
+
+ if (!bb)
+ return ERR_PTR(-ENOMEM);
+ /*
+ * We need to allocate space for the requested number of dwords &
+ * one additional MI_BATCH_BUFFER_END dword. Since the whole SA
+ * is submitted to HW, we need to make sure that the last instruction
+ * is not over written when the last chunk of SA is allocated for BB.
+ * So, this extra DW acts as a guard here.
+ */
+
+ bb_pool = xe->sriov.vf.ccs.contexts[ctx_id].mem.ccs_bb_pool;
+ bb->bo = xe_sa_bo_new(bb_pool, 4 * (dwords + 1));
+
+ if (IS_ERR(bb->bo)) {
+ err = PTR_ERR(bb->bo);
+ goto err;
+ }
+
+ bb->cs = xe_sa_bo_cpu_addr(bb->bo);
+ bb->len = 0;
+
+ return bb;
+err:
+ kfree(bb);
+ return ERR_PTR(err);
+}
+
static struct xe_sched_job *
__xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb, u64 *addr)
{
diff --git a/drivers/gpu/drm/xe/xe_bb.h b/drivers/gpu/drm/xe/xe_bb.h
index fafacd73dcc3..2a8adc9a6dee 100644
--- a/drivers/gpu/drm/xe/xe_bb.h
+++ b/drivers/gpu/drm/xe/xe_bb.h
@@ -13,8 +13,11 @@ struct dma_fence;
struct xe_gt;
struct xe_exec_queue;
struct xe_sched_job;
+enum xe_sriov_vf_ccs_rw_ctxs;
-struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 size, bool usm);
+struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 dwords, bool usm);
+struct xe_bb *xe_bb_ccs_new(struct xe_gt *gt, u32 dwords,
+ enum xe_sriov_vf_ccs_rw_ctxs ctx_id);
struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q,
struct xe_bb *bb);
struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue *q,
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index d99d91fe8aa9..bf4ee976b680 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -9,6 +9,7 @@
#include <linux/nospec.h>
#include <drm/drm_drv.h>
+#include <drm/drm_dumb_buffers.h>
#include <drm/drm_gem_ttm_helper.h>
#include <drm/drm_managed.h>
#include <drm/ttm/ttm_backup.h>
@@ -19,6 +20,8 @@
#include <kunit/static_stub.h>
+#include <trace/events/gpu_mem.h>
+
#include "xe_device.h"
#include "xe_dma_buf.h"
#include "xe_drm_client.h"
@@ -31,9 +34,12 @@
#include "xe_pxp.h"
#include "xe_res_cursor.h"
#include "xe_shrinker.h"
+#include "xe_sriov_vf_ccs.h"
+#include "xe_tile.h"
#include "xe_trace_bo.h"
#include "xe_ttm_stolen_mgr.h"
#include "xe_vm.h"
+#include "xe_vram_types.h"
const char *const xe_mem_type_to_name[TTM_NUM_MEM_TYPES] = {
[XE_PL_SYSTEM] = "system",
@@ -77,6 +83,10 @@ static struct ttm_placement tt_placement = {
.placement = tt_placement_flags,
};
+#define for_each_set_bo_vram_flag(bit__, bo_flags__) \
+ for (unsigned int __bit_tmp = BIT(0); __bit_tmp <= XE_BO_FLAG_VRAM_MASK; __bit_tmp <<= 1) \
+ for_each_if(((bit__) = __bit_tmp) & (bo_flags__) & XE_BO_FLAG_VRAM_MASK)
+
bool mem_type_is_vram(u32 mem_type)
{
return mem_type >= XE_PL_VRAM0 && mem_type != XE_PL_STOLEN;
@@ -184,6 +194,8 @@ static void try_add_system(struct xe_device *xe, struct xe_bo *bo,
bo->placements[*c] = (struct ttm_place) {
.mem_type = XE_PL_TT,
+ .flags = (bo_flags & XE_BO_FLAG_VRAM_MASK) ?
+ TTM_PL_FLAG_FALLBACK : 0,
};
*c += 1;
}
@@ -196,6 +208,8 @@ static bool force_contiguous(u32 bo_flags)
else if (bo_flags & XE_BO_FLAG_PINNED &&
!(bo_flags & XE_BO_FLAG_PINNED_LATE_RESTORE))
return true; /* needs vmap */
+ else if (bo_flags & XE_BO_FLAG_CPU_ADDR_MIRROR)
+ return true;
/*
* For eviction / restore on suspend / resume objects pinned in VRAM
@@ -205,6 +219,27 @@ static bool force_contiguous(u32 bo_flags)
bo_flags & XE_BO_FLAG_PINNED;
}
+static u8 vram_bo_flag_to_tile_id(struct xe_device *xe, u32 vram_bo_flag)
+{
+ xe_assert(xe, vram_bo_flag & XE_BO_FLAG_VRAM_MASK);
+ xe_assert(xe, (vram_bo_flag & (vram_bo_flag - 1)) == 0);
+
+ return __ffs(vram_bo_flag >> (__ffs(XE_BO_FLAG_VRAM0) - 1)) - 1;
+}
+
+static u32 bo_vram_flags_to_vram_placement(struct xe_device *xe, u32 bo_flags, u32 vram_flag,
+ enum ttm_bo_type type)
+{
+ u8 tile_id = vram_bo_flag_to_tile_id(xe, vram_flag);
+
+ xe_assert(xe, tile_id < xe->info.tile_count);
+
+ if (type == ttm_bo_type_kernel && !(bo_flags & XE_BO_FLAG_FORCE_USER_VRAM))
+ return xe->tiles[tile_id].mem.kernel_vram->placement;
+ else
+ return xe->tiles[tile_id].mem.vram->placement;
+}
+
static void add_vram(struct xe_device *xe, struct xe_bo *bo,
struct ttm_place *places, u32 bo_flags, u32 mem_type, u32 *c)
{
@@ -237,12 +272,15 @@ static void add_vram(struct xe_device *xe, struct xe_bo *bo,
}
static void try_add_vram(struct xe_device *xe, struct xe_bo *bo,
- u32 bo_flags, u32 *c)
+ u32 bo_flags, enum ttm_bo_type type, u32 *c)
{
- if (bo_flags & XE_BO_FLAG_VRAM0)
- add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM0, c);
- if (bo_flags & XE_BO_FLAG_VRAM1)
- add_vram(xe, bo, bo->placements, bo_flags, XE_PL_VRAM1, c);
+ u32 vram_flag;
+
+ for_each_set_bo_vram_flag(vram_flag, bo_flags) {
+ u32 pl = bo_vram_flags_to_vram_placement(xe, bo_flags, vram_flag, type);
+
+ add_vram(xe, bo, bo->placements, bo_flags, pl, c);
+ }
}
static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
@@ -261,11 +299,11 @@ static void try_add_stolen(struct xe_device *xe, struct xe_bo *bo,
}
static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
- u32 bo_flags)
+ u32 bo_flags, enum ttm_bo_type type)
{
u32 c = 0;
- try_add_vram(xe, bo, bo_flags, &c);
+ try_add_vram(xe, bo, bo_flags, type, &c);
try_add_system(xe, bo, bo_flags, &c);
try_add_stolen(xe, bo, bo_flags, &c);
@@ -281,10 +319,10 @@ static int __xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
}
int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
- u32 bo_flags)
+ u32 bo_flags, enum ttm_bo_type type)
{
xe_bo_assert_held(bo);
- return __xe_bo_placement_for_flags(xe, bo, bo_flags);
+ return __xe_bo_placement_for_flags(xe, bo, bo_flags, type);
}
static void xe_evict_flags(struct ttm_buffer_object *tbo,
@@ -336,15 +374,13 @@ static void xe_evict_flags(struct ttm_buffer_object *tbo,
/* struct xe_ttm_tt - Subclassed ttm_tt for xe */
struct xe_ttm_tt {
struct ttm_tt ttm;
- /** @xe - The xe device */
- struct xe_device *xe;
struct sg_table sgt;
struct sg_table *sg;
/** @purgeable: Whether the content of the pages of @ttm is purgeable. */
bool purgeable;
};
-static int xe_tt_map_sg(struct ttm_tt *tt)
+static int xe_tt_map_sg(struct xe_device *xe, struct ttm_tt *tt)
{
struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
unsigned long num_pages = tt->num_pages;
@@ -359,13 +395,13 @@ static int xe_tt_map_sg(struct ttm_tt *tt)
ret = sg_alloc_table_from_pages_segment(&xe_tt->sgt, tt->pages,
num_pages, 0,
(u64)num_pages << PAGE_SHIFT,
- xe_sg_segment_size(xe_tt->xe->drm.dev),
+ xe_sg_segment_size(xe->drm.dev),
GFP_KERNEL);
if (ret)
return ret;
xe_tt->sg = &xe_tt->sgt;
- ret = dma_map_sgtable(xe_tt->xe->drm.dev, xe_tt->sg, DMA_BIDIRECTIONAL,
+ ret = dma_map_sgtable(xe->drm.dev, xe_tt->sg, DMA_BIDIRECTIONAL,
DMA_ATTR_SKIP_CPU_SYNC);
if (ret) {
sg_free_table(xe_tt->sg);
@@ -376,12 +412,12 @@ static int xe_tt_map_sg(struct ttm_tt *tt)
return 0;
}
-static void xe_tt_unmap_sg(struct ttm_tt *tt)
+static void xe_tt_unmap_sg(struct xe_device *xe, struct ttm_tt *tt)
{
struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
if (xe_tt->sg) {
- dma_unmap_sgtable(xe_tt->xe->drm.dev, xe_tt->sg,
+ dma_unmap_sgtable(xe->drm.dev, xe_tt->sg,
DMA_BIDIRECTIONAL, 0);
sg_free_table(xe_tt->sg);
xe_tt->sg = NULL;
@@ -400,24 +436,37 @@ struct sg_table *xe_bo_sg(struct xe_bo *bo)
* Account ttm pages against the device shrinker's shrinkable and
* purgeable counts.
*/
-static void xe_ttm_tt_account_add(struct ttm_tt *tt)
+static void xe_ttm_tt_account_add(struct xe_device *xe, struct ttm_tt *tt)
{
struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
if (xe_tt->purgeable)
- xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, 0, tt->num_pages);
+ xe_shrinker_mod_pages(xe->mem.shrinker, 0, tt->num_pages);
else
- xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, tt->num_pages, 0);
+ xe_shrinker_mod_pages(xe->mem.shrinker, tt->num_pages, 0);
}
-static void xe_ttm_tt_account_subtract(struct ttm_tt *tt)
+static void xe_ttm_tt_account_subtract(struct xe_device *xe, struct ttm_tt *tt)
{
struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
if (xe_tt->purgeable)
- xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, 0, -(long)tt->num_pages);
+ xe_shrinker_mod_pages(xe->mem.shrinker, 0, -(long)tt->num_pages);
else
- xe_shrinker_mod_pages(xe_tt->xe->mem.shrinker, -(long)tt->num_pages, 0);
+ xe_shrinker_mod_pages(xe->mem.shrinker, -(long)tt->num_pages, 0);
+}
+
+static void update_global_total_pages(struct ttm_device *ttm_dev,
+ long num_pages)
+{
+#if IS_ENABLED(CONFIG_TRACE_GPU_MEM)
+ struct xe_device *xe = ttm_to_xe_device(ttm_dev);
+ u64 global_total_pages =
+ atomic64_add_return(num_pages, &xe->global_total_pages);
+
+ trace_gpu_mem_total(xe->drm.primary->index, 0,
+ global_total_pages << PAGE_SHIFT);
+#endif
}
static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
@@ -436,11 +485,10 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
return NULL;
tt = &xe_tt->ttm;
- xe_tt->xe = xe;
extra_pages = 0;
if (xe_bo_needs_ccs_pages(bo))
- extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size),
+ extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, xe_bo_size(bo)),
PAGE_SIZE);
/*
@@ -527,21 +575,25 @@ static int xe_ttm_tt_populate(struct ttm_device *ttm_dev, struct ttm_tt *tt,
return err;
xe_tt->purgeable = false;
- xe_ttm_tt_account_add(tt);
+ xe_ttm_tt_account_add(ttm_to_xe_device(ttm_dev), tt);
+ update_global_total_pages(ttm_dev, tt->num_pages);
return 0;
}
static void xe_ttm_tt_unpopulate(struct ttm_device *ttm_dev, struct ttm_tt *tt)
{
+ struct xe_device *xe = ttm_to_xe_device(ttm_dev);
+
if ((tt->page_flags & TTM_TT_FLAG_EXTERNAL) &&
!(tt->page_flags & TTM_TT_FLAG_EXTERNAL_MAPPABLE))
return;
- xe_tt_unmap_sg(tt);
+ xe_tt_unmap_sg(xe, tt);
ttm_pool_free(&ttm_dev->pool, tt);
- xe_ttm_tt_account_subtract(tt);
+ xe_ttm_tt_account_subtract(xe, tt);
+ update_global_total_pages(ttm_dev, -(long)tt->num_pages);
}
static void xe_ttm_tt_destroy(struct ttm_device *ttm_dev, struct ttm_tt *tt)
@@ -558,6 +610,23 @@ static bool xe_ttm_resource_visible(struct ttm_resource *mem)
return vres->used_visible_size == mem->size;
}
+/**
+ * xe_bo_is_visible_vram - check if BO is placed entirely in visible VRAM.
+ * @bo: The BO
+ *
+ * This function checks whether a given BO resides entirely in memory visible from the CPU
+ *
+ * Returns: true if the BO is entirely visible, false otherwise.
+ *
+ */
+bool xe_bo_is_visible_vram(struct xe_bo *bo)
+{
+ if (drm_WARN_ON(bo->ttm.base.dev, !xe_bo_is_vram(bo)))
+ return false;
+
+ return xe_ttm_resource_visible(bo->ttm.resource);
+}
+
static int xe_ttm_io_mem_reserve(struct ttm_device *bdev,
struct ttm_resource *mem)
{
@@ -789,21 +858,21 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
/* Bo creation path, moving to system or TT. */
if ((!old_mem && ttm) && !handle_system_ccs) {
if (new_mem->mem_type == XE_PL_TT)
- ret = xe_tt_map_sg(ttm);
+ ret = xe_tt_map_sg(xe, ttm);
if (!ret)
ttm_bo_move_null(ttm_bo, new_mem);
goto out;
}
if (ttm_bo->type == ttm_bo_type_sg) {
- ret = xe_bo_move_notify(bo, ctx);
+ if (new_mem->mem_type == XE_PL_SYSTEM)
+ ret = xe_bo_move_notify(bo, ctx);
if (!ret)
ret = xe_bo_move_dmabuf(ttm_bo, new_mem);
return ret;
}
- tt_has_data = ttm && (ttm_tt_is_populated(ttm) ||
- (ttm->page_flags & TTM_TT_FLAG_SWAPPED));
+ tt_has_data = ttm && (ttm_tt_is_populated(ttm) || ttm_tt_is_swapped(ttm));
move_lacks_source = !old_mem || (handle_system_ccs ? (!bo->ccs_cleared) :
(!mem_type_is_vram(old_mem_type) && !tt_has_data));
@@ -812,7 +881,7 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
(!ttm && ttm_bo->type == ttm_bo_type_device);
if (new_mem->mem_type == XE_PL_TT) {
- ret = xe_tt_map_sg(ttm);
+ ret = xe_tt_map_sg(xe, ttm);
if (ret)
goto out;
}
@@ -841,21 +910,6 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
goto out;
}
- /* Reject BO eviction if BO is bound to current VM. */
- if (evict && ctx->resv) {
- struct drm_gpuvm_bo *vm_bo;
-
- drm_gem_for_each_gpuvm_bo(vm_bo, &bo->ttm.base) {
- struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
-
- if (xe_vm_resv(vm) == ctx->resv &&
- xe_vm_in_preempt_fence_mode(vm)) {
- ret = -EBUSY;
- goto out;
- }
- }
- }
-
/*
* Failed multi-hop where the old_mem is still marked as
* TTM_PL_FLAG_TEMPORARY, should just be a dummy move.
@@ -963,6 +1017,20 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
dma_fence_put(fence);
xe_pm_runtime_put(xe);
+ /*
+ * CCS meta data is migrated from TT -> SMEM. So, let us detach the
+ * BBs from BO as it is no longer needed.
+ */
+ if (IS_VF_CCS_READY(xe) && old_mem_type == XE_PL_TT &&
+ new_mem->mem_type == XE_PL_SYSTEM)
+ xe_sriov_vf_ccs_detach_bo(bo);
+
+ if (IS_VF_CCS_READY(xe) &&
+ ((move_lacks_source && new_mem->mem_type == XE_PL_TT) ||
+ (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT)) &&
+ handle_system_ccs)
+ ret = xe_sriov_vf_ccs_attach_bo(bo);
+
out:
if ((!ttm_bo->resource || ttm_bo->resource->mem_type == XE_PL_SYSTEM) &&
ttm_bo->ttm) {
@@ -973,7 +1041,10 @@ out:
if (timeout < 0)
ret = timeout;
- xe_tt_unmap_sg(ttm_bo->ttm);
+ if (IS_VF_CCS_READY(xe))
+ xe_sriov_vf_ccs_detach_bo(bo);
+
+ xe_tt_unmap_sg(xe, ttm_bo->ttm);
}
return ret;
@@ -983,6 +1054,7 @@ static long xe_bo_shrink_purge(struct ttm_operation_ctx *ctx,
struct ttm_buffer_object *bo,
unsigned long *scanned)
{
+ struct xe_device *xe = ttm_to_xe_device(bo->bdev);
long lret;
/* Fake move to system, without copying data. */
@@ -997,7 +1069,7 @@ static long xe_bo_shrink_purge(struct ttm_operation_ctx *ctx,
if (lret)
return lret;
- xe_tt_unmap_sg(bo->ttm);
+ xe_tt_unmap_sg(xe, bo->ttm);
ttm_bo_move_null(bo, new_resource);
}
@@ -1008,11 +1080,30 @@ static long xe_bo_shrink_purge(struct ttm_operation_ctx *ctx,
.allow_move = false});
if (lret > 0)
- xe_ttm_tt_account_subtract(bo->ttm);
+ xe_ttm_tt_account_subtract(xe, bo->ttm);
return lret;
}
+static bool
+xe_bo_eviction_valuable(struct ttm_buffer_object *bo, const struct ttm_place *place)
+{
+ struct drm_gpuvm_bo *vm_bo;
+
+ if (!ttm_bo_eviction_valuable(bo, place))
+ return false;
+
+ if (!xe_bo_is_xe_bo(bo))
+ return true;
+
+ drm_gem_for_each_gpuvm_bo(vm_bo, &bo->base) {
+ if (xe_vm_is_validating(gpuvm_to_vm(vm_bo->vm)))
+ return false;
+ }
+
+ return true;
+}
+
/**
* xe_bo_shrink() - Try to shrink an xe bo.
* @ctx: The struct ttm_operation_ctx used for shrinking.
@@ -1039,7 +1130,7 @@ long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo,
struct xe_ttm_tt *xe_tt = container_of(tt, struct xe_ttm_tt, ttm);
struct ttm_place place = {.mem_type = bo->resource->mem_type};
struct xe_bo *xe_bo = ttm_to_xe_bo(bo);
- struct xe_device *xe = xe_tt->xe;
+ struct xe_device *xe = ttm_to_xe_device(bo->bdev);
bool needs_rpm;
long lret = 0L;
@@ -1047,7 +1138,7 @@ long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo,
(flags.purge && !xe_tt->purgeable))
return -EBUSY;
- if (!ttm_bo_eviction_valuable(bo, &place))
+ if (!xe_bo_eviction_valuable(bo, &place))
return -EBUSY;
if (!xe_bo_is_xe_bo(bo) || !xe_bo_get_unless_zero(xe_bo))
@@ -1076,7 +1167,7 @@ long xe_bo_shrink(struct ttm_operation_ctx *ctx, struct ttm_buffer_object *bo,
xe_pm_runtime_put(xe);
if (lret > 0)
- xe_ttm_tt_account_subtract(tt);
+ xe_ttm_tt_account_subtract(xe, tt);
out_unref:
xe_bo_put(xe_bo);
@@ -1097,42 +1188,47 @@ out_unref:
int xe_bo_notifier_prepare_pinned(struct xe_bo *bo)
{
struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
+ struct xe_validation_ctx ctx;
+ struct drm_exec exec;
struct xe_bo *backup;
int ret = 0;
- xe_bo_lock(bo, false);
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.exclusive = true}, ret) {
+ ret = drm_exec_lock_obj(&exec, &bo->ttm.base);
+ drm_exec_retry_on_contention(&exec);
+ xe_assert(xe, !ret);
+ xe_assert(xe, !bo->backup_obj);
- xe_assert(xe, !bo->backup_obj);
+ /*
+ * Since this is called from the PM notifier we might have raced with
+ * someone unpinning this after we dropped the pinned list lock and
+ * grabbing the above bo lock.
+ */
+ if (!xe_bo_is_pinned(bo))
+ break;
- /*
- * Since this is called from the PM notifier we might have raced with
- * someone unpinning this after we dropped the pinned list lock and
- * grabbing the above bo lock.
- */
- if (!xe_bo_is_pinned(bo))
- goto out_unlock_bo;
+ if (!xe_bo_is_vram(bo))
+ break;
- if (!xe_bo_is_vram(bo))
- goto out_unlock_bo;
+ if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
+ break;
- if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
- goto out_unlock_bo;
+ backup = xe_bo_init_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, xe_bo_size(bo),
+ DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
+ XE_BO_FLAG_PINNED, &exec);
+ if (IS_ERR(backup)) {
+ drm_exec_retry_on_contention(&exec);
+ ret = PTR_ERR(backup);
+ xe_validation_retry_on_oom(&ctx, &ret);
+ break;
+ }
- backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, bo->size,
- DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
- XE_BO_FLAG_PINNED);
- if (IS_ERR(backup)) {
- ret = PTR_ERR(backup);
- goto out_unlock_bo;
+ backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */
+ ttm_bo_pin(&backup->ttm);
+ bo->backup_obj = backup;
}
- backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */
- ttm_bo_pin(&backup->ttm);
- bo->backup_obj = backup;
-
-out_unlock_bo:
- xe_bo_unlock(bo);
return ret;
}
@@ -1158,56 +1254,12 @@ int xe_bo_notifier_unprepare_pinned(struct xe_bo *bo)
return 0;
}
-/**
- * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
- * @bo: The buffer object to move.
- *
- * On successful completion, the object memory will be moved to system memory.
- *
- * This is needed to for special handling of pinned VRAM object during
- * suspend-resume.
- *
- * Return: 0 on success. Negative error code on failure.
- */
-int xe_bo_evict_pinned(struct xe_bo *bo)
+static int xe_bo_evict_pinned_copy(struct xe_bo *bo, struct xe_bo *backup)
{
- struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
- struct xe_bo *backup = bo->backup_obj;
- bool backup_created = false;
+ struct xe_device *xe = xe_bo_device(bo);
bool unmap = false;
int ret = 0;
- xe_bo_lock(bo, false);
-
- if (WARN_ON(!bo->ttm.resource)) {
- ret = -EINVAL;
- goto out_unlock_bo;
- }
-
- if (WARN_ON(!xe_bo_is_pinned(bo))) {
- ret = -EINVAL;
- goto out_unlock_bo;
- }
-
- if (!xe_bo_is_vram(bo))
- goto out_unlock_bo;
-
- if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
- goto out_unlock_bo;
-
- if (!backup) {
- backup = ___xe_bo_create_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL, bo->size,
- DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
- XE_BO_FLAG_PINNED);
- if (IS_ERR(backup)) {
- ret = PTR_ERR(backup);
- goto out_unlock_bo;
- }
- backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */
- backup_created = true;
- }
-
if (xe_bo_is_user(bo) || (bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE)) {
struct xe_migrate *migrate;
struct dma_fence *fence;
@@ -1217,14 +1269,11 @@ int xe_bo_evict_pinned(struct xe_bo *bo)
else
migrate = mem_type_to_migrate(xe, bo->ttm.resource->mem_type);
+ xe_assert(xe, bo->ttm.base.resv == backup->ttm.base.resv);
ret = dma_resv_reserve_fences(bo->ttm.base.resv, 1);
if (ret)
goto out_backup;
- ret = dma_resv_reserve_fences(backup->ttm.base.resv, 1);
- if (ret)
- goto out_backup;
-
fence = xe_migrate_copy(migrate, bo, backup, bo->ttm.resource,
backup->ttm.resource, false);
if (IS_ERR(fence)) {
@@ -1234,8 +1283,6 @@ int xe_bo_evict_pinned(struct xe_bo *bo)
dma_resv_add_fence(bo->ttm.base.resv, fence,
DMA_RESV_USAGE_KERNEL);
- dma_resv_add_fence(backup->ttm.base.resv, fence,
- DMA_RESV_USAGE_KERNEL);
dma_fence_put(fence);
} else {
ret = xe_bo_vmap(backup);
@@ -1245,25 +1292,88 @@ int xe_bo_evict_pinned(struct xe_bo *bo)
if (iosys_map_is_null(&bo->vmap)) {
ret = xe_bo_vmap(bo);
if (ret)
- goto out_backup;
+ goto out_vunmap;
unmap = true;
}
xe_map_memcpy_from(xe, backup->vmap.vaddr, &bo->vmap, 0,
- bo->size);
+ xe_bo_size(bo));
}
if (!bo->backup_obj)
bo->backup_obj = backup;
-
-out_backup:
+out_vunmap:
xe_bo_vunmap(backup);
- if (ret && backup_created)
- xe_bo_put(backup);
-out_unlock_bo:
+out_backup:
if (unmap)
xe_bo_vunmap(bo);
- xe_bo_unlock(bo);
+
+ return ret;
+}
+
+/**
+ * xe_bo_evict_pinned() - Evict a pinned VRAM object to system memory
+ * @bo: The buffer object to move.
+ *
+ * On successful completion, the object memory will be moved to system memory.
+ *
+ * This is needed to for special handling of pinned VRAM object during
+ * suspend-resume.
+ *
+ * Return: 0 on success. Negative error code on failure.
+ */
+int xe_bo_evict_pinned(struct xe_bo *bo)
+{
+ struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
+ struct xe_validation_ctx ctx;
+ struct drm_exec exec;
+ struct xe_bo *backup = bo->backup_obj;
+ bool backup_created = false;
+ int ret = 0;
+
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.exclusive = true}, ret) {
+ ret = drm_exec_lock_obj(&exec, &bo->ttm.base);
+ drm_exec_retry_on_contention(&exec);
+ xe_assert(xe, !ret);
+
+ if (WARN_ON(!bo->ttm.resource)) {
+ ret = -EINVAL;
+ break;
+ }
+
+ if (WARN_ON(!xe_bo_is_pinned(bo))) {
+ ret = -EINVAL;
+ break;
+ }
+
+ if (!xe_bo_is_vram(bo))
+ break;
+
+ if (bo->flags & XE_BO_FLAG_PINNED_NORESTORE)
+ break;
+
+ if (!backup) {
+ backup = xe_bo_init_locked(xe, NULL, NULL, bo->ttm.base.resv, NULL,
+ xe_bo_size(bo),
+ DRM_XE_GEM_CPU_CACHING_WB, ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM | XE_BO_FLAG_NEEDS_CPU_ACCESS |
+ XE_BO_FLAG_PINNED, &exec);
+ if (IS_ERR(backup)) {
+ drm_exec_retry_on_contention(&exec);
+ ret = PTR_ERR(backup);
+ xe_validation_retry_on_oom(&ctx, &ret);
+ break;
+ }
+ backup->parent_obj = xe_bo_get(bo); /* Released by bo_destroy */
+ backup_created = true;
+ }
+
+ ret = xe_bo_evict_pinned_copy(bo, backup);
+ }
+
+ if (ret && backup_created)
+ xe_bo_put(backup);
+
return ret;
}
@@ -1313,10 +1423,6 @@ int xe_bo_restore_pinned(struct xe_bo *bo)
if (ret)
goto out_unlock_bo;
- ret = dma_resv_reserve_fences(backup->ttm.base.resv, 1);
- if (ret)
- goto out_unlock_bo;
-
fence = xe_migrate_copy(migrate, backup, bo,
backup->ttm.resource, bo->ttm.resource,
false);
@@ -1327,8 +1433,6 @@ int xe_bo_restore_pinned(struct xe_bo *bo)
dma_resv_add_fence(bo->ttm.base.resv, fence,
DMA_RESV_USAGE_KERNEL);
- dma_resv_add_fence(backup->ttm.base.resv, fence,
- DMA_RESV_USAGE_KERNEL);
dma_fence_put(fence);
} else {
ret = xe_bo_vmap(backup);
@@ -1343,7 +1447,7 @@ int xe_bo_restore_pinned(struct xe_bo *bo)
}
xe_map_memcpy_to(xe, &bo->vmap, 0, backup->vmap.vaddr,
- bo->size);
+ xe_bo_size(bo));
}
bo->backup_obj = NULL;
@@ -1377,7 +1481,8 @@ int xe_bo_dma_unmap_pinned(struct xe_bo *bo)
ttm_bo->sg = NULL;
xe_tt->sg = NULL;
} else if (xe_tt->sg) {
- dma_unmap_sgtable(xe_tt->xe->drm.dev, xe_tt->sg,
+ dma_unmap_sgtable(ttm_to_xe_device(ttm_bo->bdev)->drm.dev,
+ xe_tt->sg,
DMA_BIDIRECTIONAL, 0);
sg_free_table(xe_tt->sg);
xe_tt->sg = NULL;
@@ -1422,7 +1527,7 @@ static bool xe_ttm_bo_lock_in_destructor(struct ttm_buffer_object *ttm_bo)
* always succeed here, as long as we hold the lru lock.
*/
spin_lock(&ttm_bo->bdev->lru_lock);
- locked = dma_resv_trylock(ttm_bo->base.resv);
+ locked = dma_resv_trylock(&ttm_bo->base._resv);
spin_unlock(&ttm_bo->bdev->lru_lock);
xe_assert(xe, locked);
@@ -1442,13 +1547,6 @@ static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
bo = ttm_to_xe_bo(ttm_bo);
xe_assert(xe_bo_device(bo), !(bo->created && kref_read(&ttm_bo->base.refcount)));
- /*
- * Corner case where TTM fails to allocate memory and this BOs resv
- * still points the VMs resv
- */
- if (ttm_bo->base.resv != &ttm_bo->base._resv)
- return;
-
if (!xe_ttm_bo_lock_in_destructor(ttm_bo))
return;
@@ -1458,14 +1556,14 @@ static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
* TODO: Don't do this for external bos once we scrub them after
* unbind.
*/
- dma_resv_for_each_fence(&cursor, ttm_bo->base.resv,
+ dma_resv_for_each_fence(&cursor, &ttm_bo->base._resv,
DMA_RESV_USAGE_BOOKKEEP, fence) {
if (xe_fence_is_xe_preempt(fence) &&
!dma_fence_is_signaled(fence)) {
if (!replacement)
replacement = dma_fence_get_stub();
- dma_resv_replace_fences(ttm_bo->base.resv,
+ dma_resv_replace_fences(&ttm_bo->base._resv,
fence->context,
replacement,
DMA_RESV_USAGE_BOOKKEEP);
@@ -1473,14 +1571,19 @@ static void xe_ttm_bo_release_notify(struct ttm_buffer_object *ttm_bo)
}
dma_fence_put(replacement);
- dma_resv_unlock(ttm_bo->base.resv);
+ dma_resv_unlock(&ttm_bo->base._resv);
}
static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object *ttm_bo)
{
+ struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
+
if (!xe_bo_is_xe_bo(ttm_bo))
return;
+ if (IS_VF_CCS_READY(ttm_to_xe_device(ttm_bo->bdev)))
+ xe_sriov_vf_ccs_detach_bo(bo);
+
/*
* Object is idle and about to be destroyed. Release the
* dma-buf attachment.
@@ -1542,7 +1645,7 @@ static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo,
if (!mem_type_is_vram(ttm_bo->resource->mem_type))
return -EIO;
- if (!xe_ttm_resource_visible(ttm_bo->resource) || len >= SZ_16K) {
+ if (!xe_bo_is_visible_vram(bo) || len >= SZ_16K) {
struct xe_migrate *migrate =
mem_type_to_migrate(xe, ttm_bo->resource->mem_type);
@@ -1553,7 +1656,7 @@ static int xe_ttm_access_memory(struct ttm_buffer_object *ttm_bo,
vram = res_to_mem_region(ttm_bo->resource);
xe_res_first(ttm_bo->resource, offset & PAGE_MASK,
- bo->size - (offset & PAGE_MASK), &cursor);
+ xe_bo_size(bo) - (offset & PAGE_MASK), &cursor);
do {
unsigned long page_offset = (offset & ~PAGE_MASK);
@@ -1588,7 +1691,7 @@ const struct ttm_device_funcs xe_ttm_funcs = {
.io_mem_pfn = xe_ttm_io_mem_pfn,
.access_memory = xe_ttm_access_memory,
.release_notify = xe_ttm_bo_release_notify,
- .eviction_valuable = ttm_bo_eviction_valuable,
+ .eviction_valuable = xe_bo_eviction_valuable,
.delete_mem_notify = xe_ttm_bo_delete_mem_notify,
.swap_notify = xe_ttm_bo_swap_notify,
};
@@ -1645,7 +1748,7 @@ static void xe_gem_object_free(struct drm_gem_object *obj)
* refcount directly if needed.
*/
__xe_bo_vunmap(gem_to_xe_bo(obj));
- ttm_bo_put(container_of(obj, struct ttm_buffer_object, base));
+ ttm_bo_fini(container_of(obj, struct ttm_buffer_object, base));
}
static void xe_gem_object_close(struct drm_gem_object *obj,
@@ -1662,50 +1765,258 @@ static void xe_gem_object_close(struct drm_gem_object *obj,
}
}
-static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
+static bool should_migrate_to_smem(struct xe_bo *bo)
+{
+ /*
+ * NOTE: The following atomic checks are platform-specific. For example,
+ * if a device supports CXL atomics, these may not be necessary or
+ * may behave differently.
+ */
+
+ return bo->attr.atomic_access == DRM_XE_ATOMIC_GLOBAL ||
+ bo->attr.atomic_access == DRM_XE_ATOMIC_CPU;
+}
+
+static int xe_bo_wait_usage_kernel(struct xe_bo *bo, struct ttm_operation_ctx *ctx)
+{
+ long lerr;
+
+ if (ctx->no_wait_gpu)
+ return dma_resv_test_signaled(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL) ?
+ 0 : -EBUSY;
+
+ lerr = dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
+ ctx->interruptible, MAX_SCHEDULE_TIMEOUT);
+ if (lerr < 0)
+ return lerr;
+ if (lerr == 0)
+ return -EBUSY;
+
+ return 0;
+}
+
+/* Populate the bo if swapped out, or migrate if the access mode requires that. */
+static int xe_bo_fault_migrate(struct xe_bo *bo, struct ttm_operation_ctx *ctx,
+ struct drm_exec *exec)
+{
+ struct ttm_buffer_object *tbo = &bo->ttm;
+ int err = 0;
+
+ if (ttm_manager_type(tbo->bdev, tbo->resource->mem_type)->use_tt) {
+ err = xe_bo_wait_usage_kernel(bo, ctx);
+ if (!err)
+ err = ttm_bo_populate(&bo->ttm, ctx);
+ } else if (should_migrate_to_smem(bo)) {
+ xe_assert(xe_bo_device(bo), bo->flags & XE_BO_FLAG_SYSTEM);
+ err = xe_bo_migrate(bo, XE_PL_TT, ctx, exec);
+ }
+
+ return err;
+}
+
+/* Call into TTM to populate PTEs, and register bo for PTE removal on runtime suspend. */
+static vm_fault_t __xe_bo_cpu_fault(struct vm_fault *vmf, struct xe_device *xe, struct xe_bo *bo)
+{
+ vm_fault_t ret;
+
+ trace_xe_bo_cpu_fault(bo);
+
+ ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
+ TTM_BO_VM_NUM_PREFAULT);
+ /*
+ * When TTM is actually called to insert PTEs, ensure no blocking conditions
+ * remain, in which case TTM may drop locks and return VM_FAULT_RETRY.
+ */
+ xe_assert(xe, ret != VM_FAULT_RETRY);
+
+ if (ret == VM_FAULT_NOPAGE &&
+ mem_type_is_vram(bo->ttm.resource->mem_type)) {
+ mutex_lock(&xe->mem_access.vram_userfault.lock);
+ if (list_empty(&bo->vram_userfault_link))
+ list_add(&bo->vram_userfault_link,
+ &xe->mem_access.vram_userfault.list);
+ mutex_unlock(&xe->mem_access.vram_userfault.lock);
+ }
+
+ return ret;
+}
+
+static vm_fault_t xe_err_to_fault_t(int err)
+{
+ switch (err) {
+ case 0:
+ case -EINTR:
+ case -ERESTARTSYS:
+ case -EAGAIN:
+ return VM_FAULT_NOPAGE;
+ case -ENOMEM:
+ case -ENOSPC:
+ return VM_FAULT_OOM;
+ default:
+ break;
+ }
+ return VM_FAULT_SIGBUS;
+}
+
+static bool xe_ttm_bo_is_imported(struct ttm_buffer_object *tbo)
+{
+ dma_resv_assert_held(tbo->base.resv);
+
+ return tbo->ttm &&
+ (tbo->ttm->page_flags & (TTM_TT_FLAG_EXTERNAL | TTM_TT_FLAG_EXTERNAL_MAPPABLE)) ==
+ TTM_TT_FLAG_EXTERNAL;
+}
+
+static vm_fault_t xe_bo_cpu_fault_fastpath(struct vm_fault *vmf, struct xe_device *xe,
+ struct xe_bo *bo, bool needs_rpm)
+{
+ struct ttm_buffer_object *tbo = &bo->ttm;
+ vm_fault_t ret = VM_FAULT_RETRY;
+ struct xe_validation_ctx ctx;
+ struct ttm_operation_ctx tctx = {
+ .interruptible = true,
+ .no_wait_gpu = true,
+ .gfp_retry_mayfail = true,
+
+ };
+ int err;
+
+ if (needs_rpm && !xe_pm_runtime_get_if_active(xe))
+ return VM_FAULT_RETRY;
+
+ err = xe_validation_ctx_init(&ctx, &xe->val, NULL,
+ (struct xe_val_flags) {
+ .interruptible = true,
+ .no_block = true
+ });
+ if (err)
+ goto out_pm;
+
+ if (!dma_resv_trylock(tbo->base.resv))
+ goto out_validation;
+
+ if (xe_ttm_bo_is_imported(tbo)) {
+ ret = VM_FAULT_SIGBUS;
+ drm_dbg(&xe->drm, "CPU trying to access an imported buffer object.\n");
+ goto out_unlock;
+ }
+
+ err = xe_bo_fault_migrate(bo, &tctx, NULL);
+ if (err) {
+ /* Return VM_FAULT_RETRY on these errors. */
+ if (err != -ENOMEM && err != -ENOSPC && err != -EBUSY)
+ ret = xe_err_to_fault_t(err);
+ goto out_unlock;
+ }
+
+ if (dma_resv_test_signaled(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL))
+ ret = __xe_bo_cpu_fault(vmf, xe, bo);
+
+out_unlock:
+ dma_resv_unlock(tbo->base.resv);
+out_validation:
+ xe_validation_ctx_fini(&ctx);
+out_pm:
+ if (needs_rpm)
+ xe_pm_runtime_put(xe);
+
+ return ret;
+}
+
+static vm_fault_t xe_bo_cpu_fault(struct vm_fault *vmf)
{
struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
struct drm_device *ddev = tbo->base.dev;
struct xe_device *xe = to_xe_device(ddev);
struct xe_bo *bo = ttm_to_xe_bo(tbo);
bool needs_rpm = bo->flags & XE_BO_FLAG_VRAM_MASK;
+ bool retry_after_wait = false;
+ struct xe_validation_ctx ctx;
+ struct drm_exec exec;
vm_fault_t ret;
+ int err = 0;
int idx;
- if (needs_rpm)
- xe_pm_runtime_get(xe);
+ if (!drm_dev_enter(&xe->drm, &idx))
+ return ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
- ret = ttm_bo_vm_reserve(tbo, vmf);
- if (ret)
+ ret = xe_bo_cpu_fault_fastpath(vmf, xe, bo, needs_rpm);
+ if (ret != VM_FAULT_RETRY)
goto out;
- if (drm_dev_enter(ddev, &idx)) {
- trace_xe_bo_cpu_fault(bo);
-
- ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
- TTM_BO_VM_NUM_PREFAULT);
- drm_dev_exit(idx);
+ if (fault_flag_allow_retry_first(vmf->flags)) {
+ if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
+ goto out;
+ retry_after_wait = true;
+ xe_bo_get(bo);
+ mmap_read_unlock(vmf->vma->vm_mm);
} else {
- ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
+ ret = VM_FAULT_NOPAGE;
}
- if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
- goto out;
/*
- * ttm_bo_vm_reserve() already has dma_resv_lock.
+ * The fastpath failed and we were not required to return and retry immediately.
+ * We're now running in one of two modes:
+ *
+ * 1) retry_after_wait == true: The mmap_read_lock() is dropped, and we're trying
+ * to resolve blocking waits. But we can't resolve the fault since the
+ * mmap_read_lock() is dropped. After retrying the fault, the aim is that the fastpath
+ * should succeed. But it may fail since we drop the bo lock.
+ *
+ * 2) retry_after_wait == false: The fastpath failed, typically even after
+ * a retry. Do whatever's necessary to resolve the fault.
+ *
+ * This construct is recommended to avoid excessive waits under the mmap_lock.
*/
- if (ret == VM_FAULT_NOPAGE && mem_type_is_vram(tbo->resource->mem_type)) {
- mutex_lock(&xe->mem_access.vram_userfault.lock);
- if (list_empty(&bo->vram_userfault_link))
- list_add(&bo->vram_userfault_link, &xe->mem_access.vram_userfault.list);
- mutex_unlock(&xe->mem_access.vram_userfault.lock);
+
+ if (needs_rpm)
+ xe_pm_runtime_get(xe);
+
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true},
+ err) {
+ struct ttm_operation_ctx tctx = {
+ .interruptible = true,
+ .no_wait_gpu = false,
+ .gfp_retry_mayfail = retry_after_wait,
+ };
+
+ err = drm_exec_lock_obj(&exec, &tbo->base);
+ drm_exec_retry_on_contention(&exec);
+ if (err)
+ break;
+
+ if (xe_ttm_bo_is_imported(tbo)) {
+ err = -EFAULT;
+ drm_dbg(&xe->drm, "CPU trying to access an imported buffer object.\n");
+ break;
+ }
+
+ err = xe_bo_fault_migrate(bo, &tctx, &exec);
+ if (err) {
+ drm_exec_retry_on_contention(&exec);
+ xe_validation_retry_on_oom(&ctx, &err);
+ break;
+ }
+
+ err = xe_bo_wait_usage_kernel(bo, &tctx);
+ if (err)
+ break;
+
+ if (!retry_after_wait)
+ ret = __xe_bo_cpu_fault(vmf, xe, bo);
}
+ /* if retry_after_wait == true, we *must* return VM_FAULT_RETRY. */
+ if (err && !retry_after_wait)
+ ret = xe_err_to_fault_t(err);
- dma_resv_unlock(tbo->base.resv);
-out:
if (needs_rpm)
xe_pm_runtime_put(xe);
+ if (retry_after_wait)
+ xe_bo_put(bo);
+out:
+ drm_dev_exit(idx);
+
return ret;
}
@@ -1749,7 +2060,7 @@ int xe_bo_read(struct xe_bo *bo, u64 offset, void *dst, int size)
}
static const struct vm_operations_struct xe_gem_vm_ops = {
- .fault = xe_gem_fault,
+ .fault = xe_bo_cpu_fault,
.open = ttm_bo_vm_open,
.close = ttm_bo_vm_close,
.access = xe_bo_vm_access,
@@ -1797,11 +2108,32 @@ void xe_bo_free(struct xe_bo *bo)
kfree(bo);
}
-struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
- struct xe_tile *tile, struct dma_resv *resv,
- struct ttm_lru_bulk_move *bulk, size_t size,
- u16 cpu_caching, enum ttm_bo_type type,
- u32 flags)
+/**
+ * xe_bo_init_locked() - Initialize or create an xe_bo.
+ * @xe: The xe device.
+ * @bo: An already allocated buffer object or NULL
+ * if the function should allocate a new one.
+ * @tile: The tile to select for migration of this bo, and the tile used for
+ * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
+ * @resv: Pointer to a locked shared reservation object to use for this bo,
+ * or NULL for the xe_bo to use its own.
+ * @bulk: The bulk move to use for LRU bumping, or NULL for external bos.
+ * @size: The storage size to use for the bo.
+ * @cpu_caching: The cpu caching used for system memory backing store.
+ * @type: The TTM buffer object type.
+ * @flags: XE_BO_FLAG_ flags.
+ * @exec: The drm_exec transaction to use for exhaustive eviction.
+ *
+ * Initialize or create an xe buffer object. On failure, any allocated buffer
+ * object passed in @bo will have been unreferenced.
+ *
+ * Return: The buffer object on success. Negative error pointer on failure.
+ */
+struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo,
+ struct xe_tile *tile, struct dma_resv *resv,
+ struct ttm_lru_bulk_move *bulk, size_t size,
+ u16 cpu_caching, enum ttm_bo_type type,
+ u32 flags, struct drm_exec *exec)
{
struct ttm_operation_ctx ctx = {
.interruptible = true,
@@ -1853,7 +2185,6 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
bo->ccs_cleared = false;
bo->tile = tile;
- bo->size = size;
bo->flags = flags;
bo->cpu_caching = cpu_caching;
bo->ttm.base.funcs = &xe_gem_object_funcs;
@@ -1871,8 +2202,9 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
ctx.resv = resv;
}
+ xe_validation_assert_exec(xe, exec, &bo->ttm.base);
if (!(flags & XE_BO_FLAG_FIXED_PLACEMENT)) {
- err = __xe_bo_placement_for_flags(xe, bo, bo->flags);
+ err = __xe_bo_placement_for_flags(xe, bo, bo->flags, type);
if (WARN_ON(err)) {
xe_ttm_bo_destroy(&bo->ttm);
return ERR_PTR(err);
@@ -1930,34 +2262,37 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
}
static int __xe_bo_fixed_placement(struct xe_device *xe,
- struct xe_bo *bo,
+ struct xe_bo *bo, enum ttm_bo_type type,
u32 flags,
u64 start, u64 end, u64 size)
{
struct ttm_place *place = bo->placements;
+ u32 vram_flag, vram_stolen_flags;
+
+ /*
+ * to allow fixed placement in GGTT of a VF, post-migration fixups would have to
+ * include selecting a new fixed offset and shifting the page ranges for it
+ */
+ xe_assert(xe, !IS_SRIOV_VF(xe) || !(bo->flags & XE_BO_FLAG_GGTT));
if (flags & (XE_BO_FLAG_USER | XE_BO_FLAG_SYSTEM))
return -EINVAL;
+ vram_flag = flags & XE_BO_FLAG_VRAM_MASK;
+ vram_stolen_flags = (flags & (XE_BO_FLAG_STOLEN)) | vram_flag;
+
+ /* check if more than one VRAM/STOLEN flag is set */
+ if (hweight32(vram_stolen_flags) > 1)
+ return -EINVAL;
+
place->flags = TTM_PL_FLAG_CONTIGUOUS;
place->fpfn = start >> PAGE_SHIFT;
place->lpfn = end >> PAGE_SHIFT;
- switch (flags & (XE_BO_FLAG_STOLEN | XE_BO_FLAG_VRAM_MASK)) {
- case XE_BO_FLAG_VRAM0:
- place->mem_type = XE_PL_VRAM0;
- break;
- case XE_BO_FLAG_VRAM1:
- place->mem_type = XE_PL_VRAM1;
- break;
- case XE_BO_FLAG_STOLEN:
+ if (flags & XE_BO_FLAG_STOLEN)
place->mem_type = XE_PL_STOLEN;
- break;
-
- default:
- /* 0 or multiple of the above set */
- return -EINVAL;
- }
+ else
+ place->mem_type = bo_vram_flags_to_vram_placement(xe, flags, vram_flag, type);
bo->placement = (struct ttm_placement) {
.num_placement = 1,
@@ -1972,7 +2307,7 @@ __xe_bo_create_locked(struct xe_device *xe,
struct xe_tile *tile, struct xe_vm *vm,
size_t size, u64 start, u64 end,
u16 cpu_caching, enum ttm_bo_type type, u32 flags,
- u64 alignment)
+ u64 alignment, struct drm_exec *exec)
{
struct xe_bo *bo = NULL;
int err;
@@ -1986,18 +2321,18 @@ __xe_bo_create_locked(struct xe_device *xe,
return bo;
flags |= XE_BO_FLAG_FIXED_PLACEMENT;
- err = __xe_bo_fixed_placement(xe, bo, flags, start, end, size);
+ err = __xe_bo_fixed_placement(xe, bo, type, flags, start, end, size);
if (err) {
xe_bo_free(bo);
return ERR_PTR(err);
}
}
- bo = ___xe_bo_create_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
- vm && !xe_vm_in_fault_mode(vm) &&
- flags & XE_BO_FLAG_USER ?
- &vm->lru_bulk_move : NULL, size,
- cpu_caching, type, flags);
+ bo = xe_bo_init_locked(xe, bo, tile, vm ? xe_vm_resv(vm) : NULL,
+ vm && !xe_vm_in_fault_mode(vm) &&
+ flags & XE_BO_FLAG_USER ?
+ &vm->lru_bulk_move : NULL, size,
+ cpu_caching, type, flags, exec);
if (IS_ERR(bo))
return bo;
@@ -2031,9 +2366,10 @@ __xe_bo_create_locked(struct xe_device *xe,
if (flags & XE_BO_FLAG_FIXED_PLACEMENT) {
err = xe_ggtt_insert_bo_at(t->mem.ggtt, bo,
- start + bo->size, U64_MAX);
+ start + xe_bo_size(bo), U64_MAX,
+ exec);
} else {
- err = xe_ggtt_insert_bo(t->mem.ggtt, bo);
+ err = xe_ggtt_insert_bo(t->mem.ggtt, bo, exec);
}
if (err)
goto err_unlock_put_bo;
@@ -2050,82 +2386,166 @@ err_unlock_put_bo:
return ERR_PTR(err);
}
-struct xe_bo *
-xe_bo_create_locked_range(struct xe_device *xe,
- struct xe_tile *tile, struct xe_vm *vm,
- size_t size, u64 start, u64 end,
- enum ttm_bo_type type, u32 flags, u64 alignment)
-{
- return __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type,
- flags, alignment);
-}
-
+/**
+ * xe_bo_create_locked() - Create a BO
+ * @xe: The xe device.
+ * @tile: The tile to select for migration of this bo, and the tile used for
+ * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
+ * @vm: The local vm or NULL for external objects.
+ * @size: The storage size to use for the bo.
+ * @type: The TTM buffer object type.
+ * @flags: XE_BO_FLAG_ flags.
+ * @exec: The drm_exec transaction to use for exhaustive eviction.
+ *
+ * Create a locked xe BO with no range- nor alignment restrictions.
+ *
+ * Return: The buffer object on success. Negative error pointer on failure.
+ */
struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
struct xe_vm *vm, size_t size,
- enum ttm_bo_type type, u32 flags)
+ enum ttm_bo_type type, u32 flags,
+ struct drm_exec *exec)
{
return __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL, 0, type,
- flags, 0);
+ flags, 0, exec);
}
-struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
- struct xe_vm *vm, size_t size,
- u16 cpu_caching,
- u32 flags)
+static struct xe_bo *xe_bo_create_novm(struct xe_device *xe, struct xe_tile *tile,
+ size_t size, u16 cpu_caching,
+ enum ttm_bo_type type, u32 flags,
+ u64 alignment, bool intr)
{
- struct xe_bo *bo = __xe_bo_create_locked(xe, tile, vm, size, 0, ~0ULL,
- cpu_caching, ttm_bo_type_device,
- flags | XE_BO_FLAG_USER, 0);
- if (!IS_ERR(bo))
- xe_bo_unlock_vm_held(bo);
+ struct xe_validation_ctx ctx;
+ struct drm_exec exec;
+ struct xe_bo *bo;
+ int ret = 0;
- return bo;
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = intr},
+ ret) {
+ bo = __xe_bo_create_locked(xe, tile, NULL, size, 0, ~0ULL,
+ cpu_caching, type, flags, alignment, &exec);
+ drm_exec_retry_on_contention(&exec);
+ if (IS_ERR(bo)) {
+ ret = PTR_ERR(bo);
+ xe_validation_retry_on_oom(&ctx, &ret);
+ } else {
+ xe_bo_unlock(bo);
+ }
+ }
+
+ return ret ? ERR_PTR(ret) : bo;
}
-struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
- struct xe_vm *vm, size_t size,
- enum ttm_bo_type type, u32 flags)
+/**
+ * xe_bo_create_user() - Create a user BO
+ * @xe: The xe device.
+ * @vm: The local vm or NULL for external objects.
+ * @size: The storage size to use for the bo.
+ * @cpu_caching: The caching mode to be used for system backing store.
+ * @flags: XE_BO_FLAG_ flags.
+ * @exec: The drm_exec transaction to use for exhaustive eviction, or NULL
+ * if such a transaction should be initiated by the call.
+ *
+ * Create a bo on behalf of user-space.
+ *
+ * Return: The buffer object on success. Negative error pointer on failure.
+ */
+struct xe_bo *xe_bo_create_user(struct xe_device *xe,
+ struct xe_vm *vm, size_t size,
+ u16 cpu_caching,
+ u32 flags, struct drm_exec *exec)
{
- struct xe_bo *bo = xe_bo_create_locked(xe, tile, vm, size, type, flags);
+ struct xe_bo *bo;
- if (!IS_ERR(bo))
- xe_bo_unlock_vm_held(bo);
+ flags |= XE_BO_FLAG_USER;
+
+ if (vm || exec) {
+ xe_assert(xe, exec);
+ bo = __xe_bo_create_locked(xe, NULL, vm, size, 0, ~0ULL,
+ cpu_caching, ttm_bo_type_device,
+ flags, 0, exec);
+ if (!IS_ERR(bo))
+ xe_bo_unlock_vm_held(bo);
+ } else {
+ bo = xe_bo_create_novm(xe, NULL, size, cpu_caching,
+ ttm_bo_type_device, flags, 0, true);
+ }
return bo;
}
-struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile,
- struct xe_vm *vm,
- size_t size, u64 offset,
- enum ttm_bo_type type, u32 flags)
+/**
+ * xe_bo_create_pin_range_novm() - Create and pin a BO with range options.
+ * @xe: The xe device.
+ * @tile: The tile to select for migration of this bo, and the tile used for
+ * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
+ * @size: The storage size to use for the bo.
+ * @start: Start of fixed VRAM range or 0.
+ * @end: End of fixed VRAM range or ~0ULL.
+ * @type: The TTM buffer object type.
+ * @flags: XE_BO_FLAG_ flags.
+ *
+ * Create an Xe BO with range- and options. If @start and @end indicate
+ * a fixed VRAM range, this must be a ttm_bo_type_kernel bo with VRAM placement
+ * only.
+ *
+ * Return: The buffer object on success. Negative error pointer on failure.
+ */
+struct xe_bo *xe_bo_create_pin_range_novm(struct xe_device *xe, struct xe_tile *tile,
+ size_t size, u64 start, u64 end,
+ enum ttm_bo_type type, u32 flags)
{
- return xe_bo_create_pin_map_at_aligned(xe, tile, vm, size, offset,
- type, flags, 0);
+ struct xe_validation_ctx ctx;
+ struct drm_exec exec;
+ struct xe_bo *bo;
+ int err = 0;
+
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) {
+ bo = __xe_bo_create_locked(xe, tile, NULL, size, start, end,
+ 0, type, flags, 0, &exec);
+ if (IS_ERR(bo)) {
+ drm_exec_retry_on_contention(&exec);
+ err = PTR_ERR(bo);
+ xe_validation_retry_on_oom(&ctx, &err);
+ break;
+ }
+
+ err = xe_bo_pin(bo, &exec);
+ xe_bo_unlock(bo);
+ if (err) {
+ xe_bo_put(bo);
+ drm_exec_retry_on_contention(&exec);
+ xe_validation_retry_on_oom(&ctx, &err);
+ break;
+ }
+ }
+
+ return err ? ERR_PTR(err) : bo;
}
-struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe,
- struct xe_tile *tile,
- struct xe_vm *vm,
- size_t size, u64 offset,
- enum ttm_bo_type type, u32 flags,
- u64 alignment)
+static struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe,
+ struct xe_tile *tile,
+ struct xe_vm *vm,
+ size_t size, u64 offset,
+ enum ttm_bo_type type, u32 flags,
+ u64 alignment, struct drm_exec *exec)
{
struct xe_bo *bo;
int err;
u64 start = offset == ~0ull ? 0 : offset;
- u64 end = offset == ~0ull ? offset : start + size;
+ u64 end = offset == ~0ull ? ~0ull : start + size;
if (flags & XE_BO_FLAG_STOLEN &&
xe_ttm_stolen_cpu_access_needs_ggtt(xe))
flags |= XE_BO_FLAG_GGTT;
- bo = xe_bo_create_locked_range(xe, tile, vm, size, start, end, type,
- flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | XE_BO_FLAG_PINNED,
- alignment);
+ bo = __xe_bo_create_locked(xe, tile, vm, size, start, end, 0, type,
+ flags | XE_BO_FLAG_NEEDS_CPU_ACCESS | XE_BO_FLAG_PINNED,
+ alignment, exec);
if (IS_ERR(bo))
return bo;
- err = xe_bo_pin(bo);
+ err = xe_bo_pin(bo, exec);
if (err)
goto err_put;
@@ -2145,26 +2565,100 @@ err_put:
return ERR_PTR(err);
}
+/**
+ * xe_bo_create_pin_map_at_novm() - Create pinned and mapped bo at optional VRAM offset
+ * @xe: The xe device.
+ * @tile: The tile to select for migration of this bo, and the tile used for
+ * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
+ * @size: The storage size to use for the bo.
+ * @offset: Optional VRAM offset or %~0ull for don't care.
+ * @type: The TTM buffer object type.
+ * @flags: XE_BO_FLAG_ flags.
+ * @alignment: GGTT alignment.
+ * @intr: Whether to execute any waits for backing store interruptible.
+ *
+ * Create a pinned and optionally mapped bo with VRAM offset and GGTT alignment
+ * options. The bo will be external and not associated with a VM.
+ *
+ * Return: The buffer object on success. Negative error pointer on failure.
+ * In particular, the function may return ERR_PTR(%-EINTR) if @intr was set
+ * to true on entry.
+ */
+struct xe_bo *
+xe_bo_create_pin_map_at_novm(struct xe_device *xe, struct xe_tile *tile,
+ size_t size, u64 offset, enum ttm_bo_type type, u32 flags,
+ u64 alignment, bool intr)
+{
+ struct xe_validation_ctx ctx;
+ struct drm_exec exec;
+ struct xe_bo *bo;
+ int ret = 0;
+
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = intr},
+ ret) {
+ bo = xe_bo_create_pin_map_at_aligned(xe, tile, NULL, size, offset,
+ type, flags, alignment, &exec);
+ if (IS_ERR(bo)) {
+ drm_exec_retry_on_contention(&exec);
+ ret = PTR_ERR(bo);
+ xe_validation_retry_on_oom(&ctx, &ret);
+ }
+ }
+
+ return ret ? ERR_PTR(ret) : bo;
+}
+
+/**
+ * xe_bo_create_pin_map() - Create pinned and mapped bo
+ * @xe: The xe device.
+ * @tile: The tile to select for migration of this bo, and the tile used for
+ * @vm: The vm to associate the buffer object with. The vm's resv must be locked
+ * with the transaction represented by @exec.
+ * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
+ * @size: The storage size to use for the bo.
+ * @type: The TTM buffer object type.
+ * @flags: XE_BO_FLAG_ flags.
+ * @exec: The drm_exec transaction to use for exhaustive eviction, and
+ * previously used for locking @vm's resv.
+ *
+ * Create a pinned and mapped bo. The bo will be external and not associated
+ * with a VM.
+ *
+ * Return: The buffer object on success. Negative error pointer on failure.
+ * In particular, the function may return ERR_PTR(%-EINTR) if @exec was
+ * configured for interruptible locking.
+ */
struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
struct xe_vm *vm, size_t size,
- enum ttm_bo_type type, u32 flags)
+ enum ttm_bo_type type, u32 flags,
+ struct drm_exec *exec)
{
- return xe_bo_create_pin_map_at(xe, tile, vm, size, ~0ull, type, flags);
+ return xe_bo_create_pin_map_at_aligned(xe, tile, vm, size, ~0ull, type, flags,
+ 0, exec);
}
-struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
- const void *data, size_t size,
- enum ttm_bo_type type, u32 flags)
+/**
+ * xe_bo_create_pin_map_novm() - Create pinned and mapped bo
+ * @xe: The xe device.
+ * @tile: The tile to select for migration of this bo, and the tile used for
+ * GGTT binding if any. Only to be non-NULL for ttm_bo_type_kernel bos.
+ * @size: The storage size to use for the bo.
+ * @type: The TTM buffer object type.
+ * @flags: XE_BO_FLAG_ flags.
+ * @intr: Whether to execute any waits for backing store interruptible.
+ *
+ * Create a pinned and mapped bo. The bo will be external and not associated
+ * with a VM.
+ *
+ * Return: The buffer object on success. Negative error pointer on failure.
+ * In particular, the function may return ERR_PTR(%-EINTR) if @intr was set
+ * to true on entry.
+ */
+struct xe_bo *xe_bo_create_pin_map_novm(struct xe_device *xe, struct xe_tile *tile,
+ size_t size, enum ttm_bo_type type, u32 flags,
+ bool intr)
{
- struct xe_bo *bo = xe_bo_create_pin_map(xe, tile, NULL,
- ALIGN(size, PAGE_SIZE),
- type, flags);
- if (IS_ERR(bo))
- return bo;
-
- xe_map_memcpy_to(xe, &bo->vmap, 0, data, size);
-
- return bo;
+ return xe_bo_create_pin_map_at_novm(xe, tile, size, ~0ull, type, flags, 0, intr);
}
static void __xe_bo_unpin_map_no_vm(void *arg)
@@ -2179,8 +2673,7 @@ struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile
int ret;
KUNIT_STATIC_STUB_REDIRECT(xe_managed_bo_create_pin_map, xe, tile, size, flags);
-
- bo = xe_bo_create_pin_map(xe, tile, NULL, size, ttm_bo_type_kernel, flags);
+ bo = xe_bo_create_pin_map_novm(xe, tile, size, ttm_bo_type_kernel, flags, true);
if (IS_ERR(bo))
return bo;
@@ -2191,6 +2684,11 @@ struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile
return bo;
}
+void xe_managed_bo_unpin_map_no_vm(struct xe_bo *bo)
+{
+ devm_release_action(xe_bo_device(bo)->drm.dev, __xe_bo_unpin_map_no_vm, bo);
+}
+
struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
const void *data, size_t size, u32 flags)
{
@@ -2229,7 +2727,7 @@ int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, str
xe_assert(xe, !(*src)->vmap.is_iomem);
bo = xe_managed_bo_create_from_data(xe, tile, (*src)->vmap.vaddr,
- (*src)->size, dst_flags);
+ xe_bo_size(*src), dst_flags);
if (IS_ERR(bo))
return PTR_ERR(bo);
@@ -2262,6 +2760,8 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res)
/**
* xe_bo_pin_external - pin an external BO
* @bo: buffer object to be pinned
+ * @in_place: Pin in current placement, don't attempt to migrate.
+ * @exec: The drm_exec transaction to use for exhaustive eviction.
*
* Pin an external (not tied to a VM, can be exported via dma-buf / prime FD)
* BO. Unique call compared to xe_bo_pin as this function has it own set of
@@ -2269,7 +2769,7 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res)
*
* Returns 0 for success, negative error code otherwise.
*/
-int xe_bo_pin_external(struct xe_bo *bo)
+int xe_bo_pin_external(struct xe_bo *bo, bool in_place, struct drm_exec *exec)
{
struct xe_device *xe = xe_bo_device(bo);
int err;
@@ -2278,9 +2778,11 @@ int xe_bo_pin_external(struct xe_bo *bo)
xe_assert(xe, xe_bo_is_user(bo));
if (!xe_bo_is_pinned(bo)) {
- err = xe_bo_validate(bo, NULL, false);
- if (err)
- return err;
+ if (!in_place) {
+ err = xe_bo_validate(bo, NULL, false, exec);
+ if (err)
+ return err;
+ }
spin_lock(&xe->pinned.lock);
list_add_tail(&bo->pinned_link, &xe->pinned.late.external);
@@ -2289,7 +2791,7 @@ int xe_bo_pin_external(struct xe_bo *bo)
ttm_bo_pin(&bo->ttm);
if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
- xe_ttm_tt_account_subtract(bo->ttm.ttm);
+ xe_ttm_tt_account_subtract(xe, bo->ttm.ttm);
/*
* FIXME: If we always use the reserve / unreserve functions for locking
@@ -2300,7 +2802,17 @@ int xe_bo_pin_external(struct xe_bo *bo)
return 0;
}
-int xe_bo_pin(struct xe_bo *bo)
+/**
+ * xe_bo_pin() - Pin a kernel bo after potentially migrating it
+ * @bo: The kernel bo to pin.
+ * @exec: The drm_exec transaction to use for exhaustive eviction.
+ *
+ * Attempts to migrate a bo to @bo->placement. If that succeeds,
+ * pins the bo.
+ *
+ * Return: %0 on success, negative error code on migration failure.
+ */
+int xe_bo_pin(struct xe_bo *bo, struct drm_exec *exec)
{
struct ttm_place *place = &bo->placements[0];
struct xe_device *xe = xe_bo_device(bo);
@@ -2322,7 +2834,7 @@ int xe_bo_pin(struct xe_bo *bo)
/* We only expect at most 1 pin */
xe_assert(xe, !xe_bo_is_pinned(bo));
- err = xe_bo_validate(bo, NULL, false);
+ err = xe_bo_validate(bo, NULL, false, exec);
if (err)
return err;
@@ -2337,7 +2849,7 @@ int xe_bo_pin(struct xe_bo *bo)
ttm_bo_pin(&bo->ttm);
if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
- xe_ttm_tt_account_subtract(bo->ttm.ttm);
+ xe_ttm_tt_account_subtract(xe, bo->ttm.ttm);
/*
* FIXME: If we always use the reserve / unreserve functions for locking
@@ -2373,7 +2885,7 @@ void xe_bo_unpin_external(struct xe_bo *bo)
ttm_bo_unpin(&bo->ttm);
if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
- xe_ttm_tt_account_add(bo->ttm.ttm);
+ xe_ttm_tt_account_add(xe, bo->ttm.ttm);
/*
* FIXME: If we always use the reserve / unreserve functions for locking
@@ -2405,7 +2917,7 @@ void xe_bo_unpin(struct xe_bo *bo)
}
ttm_bo_unpin(&bo->ttm);
if (bo->ttm.ttm && ttm_tt_is_populated(bo->ttm.ttm))
- xe_ttm_tt_account_add(bo->ttm.ttm);
+ xe_ttm_tt_account_add(xe, bo->ttm.ttm);
}
/**
@@ -2415,6 +2927,7 @@ void xe_bo_unpin(struct xe_bo *bo)
* NULL. Used together with @allow_res_evict.
* @allow_res_evict: Whether it's allowed to evict bos sharing @vm's
* reservation object.
+ * @exec: The drm_exec transaction to use for exhaustive eviction.
*
* Make sure the bo is in allowed placement, migrating it if necessary. If
* needed, other bos will be evicted. If bos selected for eviction shares
@@ -2424,13 +2937,18 @@ void xe_bo_unpin(struct xe_bo *bo)
* Return: 0 on success, negative error code on failure. May return
* -EINTR or -ERESTARTSYS if internal waits are interrupted by a signal.
*/
-int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
+int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict,
+ struct drm_exec *exec)
{
struct ttm_operation_ctx ctx = {
.interruptible = true,
.no_wait_gpu = false,
.gfp_retry_mayfail = true,
};
+ int ret;
+
+ if (xe_bo_is_pinned(bo))
+ return 0;
if (vm) {
lockdep_assert_held(&vm->lock);
@@ -2440,8 +2958,13 @@ int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict)
ctx.resv = xe_vm_resv(vm);
}
+ xe_vm_set_validating(vm, allow_res_evict);
trace_xe_bo_validate(bo);
- return ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
+ xe_validation_assert_exec(xe_bo_device(bo), exec, &bo->ttm.base);
+ ret = ttm_bo_validate(&bo->ttm, &bo->placement, &ctx);
+ xe_vm_clear_validating(vm, allow_res_evict);
+
+ return ret;
}
bool xe_bo_is_xe_bo(struct ttm_buffer_object *bo)
@@ -2513,7 +3036,7 @@ int xe_bo_vmap(struct xe_bo *bo)
* TODO: Fix up ttm_bo_vmap to do that, or fix up ttm_bo_kmap
* to use struct iosys_map.
*/
- ret = ttm_bo_kmap(&bo->ttm, 0, bo->size >> PAGE_SHIFT, &bo->kmap);
+ ret = ttm_bo_kmap(&bo->ttm, 0, xe_bo_size(bo) >> PAGE_SHIFT, &bo->kmap);
if (ret)
return ret;
@@ -2557,7 +3080,7 @@ typedef int (*xe_gem_create_set_property_fn)(struct xe_device *xe,
u64 value);
static const xe_gem_create_set_property_fn gem_create_set_property_funcs[] = {
- [DRM_XE_GEM_CREATE_EXTENSION_SET_PROPERTY] = gem_create_set_pxp_type,
+ [DRM_XE_GEM_CREATE_SET_PROPERTY_PXP_TYPE] = gem_create_set_pxp_type,
};
static int gem_create_user_ext_set_property(struct xe_device *xe,
@@ -2633,8 +3156,9 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
struct xe_device *xe = to_xe_device(dev);
struct xe_file *xef = to_xe_file(file);
struct drm_xe_gem_create *args = data;
+ struct xe_validation_ctx ctx;
+ struct drm_exec exec;
struct xe_vm *vm = NULL;
- ktime_t end = 0;
struct xe_bo *bo;
unsigned int bo_flags;
u32 handle;
@@ -2708,25 +3232,26 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
return -ENOENT;
}
-retry:
- if (vm) {
- err = xe_vm_lock(vm, true);
- if (err)
- goto out_vm;
+ err = 0;
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true},
+ err) {
+ if (vm) {
+ err = xe_vm_drm_exec_lock(vm, &exec);
+ drm_exec_retry_on_contention(&exec);
+ if (err)
+ break;
+ }
+ bo = xe_bo_create_user(xe, vm, args->size, args->cpu_caching,
+ bo_flags, &exec);
+ drm_exec_retry_on_contention(&exec);
+ if (IS_ERR(bo)) {
+ err = PTR_ERR(bo);
+ xe_validation_retry_on_oom(&ctx, &err);
+ break;
+ }
}
-
- bo = xe_bo_create_user(xe, NULL, vm, args->size, args->cpu_caching,
- bo_flags);
-
- if (vm)
- xe_vm_unlock(vm);
-
- if (IS_ERR(bo)) {
- err = PTR_ERR(bo);
- if (xe_vm_validate_should_retry(NULL, err, &end))
- goto retry;
+ if (err)
goto out_vm;
- }
if (args->extensions) {
err = gem_create_user_extensions(xe, bo, args->extensions, 0);
@@ -2875,6 +3400,9 @@ static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place)
* xe_bo_migrate - Migrate an object to the desired region id
* @bo: The buffer object to migrate.
* @mem_type: The TTM region type to migrate to.
+ * @tctx: A pointer to a struct ttm_operation_ctx or NULL if
+ * a default interruptibe ctx is to be used.
+ * @exec: The drm_exec transaction to use for exhaustive eviction.
*
* Attempt to migrate the buffer object to the desired memory region. The
* buffer object may not be pinned, and must be locked.
@@ -2886,7 +3414,8 @@ static void xe_place_from_ttm_type(u32 mem_type, struct ttm_place *place)
* Return: 0 on success. Negative error code on failure. In particular may
* return -EINTR or -ERESTARTSYS if signal pending.
*/
-int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
+int xe_bo_migrate(struct xe_bo *bo, u32 mem_type, struct ttm_operation_ctx *tctx,
+ struct drm_exec *exec)
{
struct xe_device *xe = ttm_to_xe_device(bo->ttm.bdev);
struct ttm_operation_ctx ctx = {
@@ -2898,6 +3427,7 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
struct ttm_place requested;
xe_bo_assert_held(bo);
+ tctx = tctx ? tctx : &ctx;
if (bo->ttm.resource->mem_type == mem_type)
return 0;
@@ -2924,19 +3454,22 @@ int xe_bo_migrate(struct xe_bo *bo, u32 mem_type)
add_vram(xe, bo, &requested, bo->flags, mem_type, &c);
}
- return ttm_bo_validate(&bo->ttm, &placement, &ctx);
+ if (!tctx->no_wait_gpu)
+ xe_validation_assert_exec(xe_bo_device(bo), exec, &bo->ttm.base);
+ return ttm_bo_validate(&bo->ttm, &placement, tctx);
}
/**
* xe_bo_evict - Evict an object to evict placement
* @bo: The buffer object to migrate.
+ * @exec: The drm_exec transaction to use for exhaustive eviction.
*
* On successful completion, the object memory will be moved to evict
* placement. This function blocks until the object has been fully moved.
*
* Return: 0 on success. Negative error code on failure.
*/
-int xe_bo_evict(struct xe_bo *bo)
+int xe_bo_evict(struct xe_bo *bo, struct drm_exec *exec)
{
struct ttm_operation_ctx ctx = {
.interruptible = false,
@@ -2982,6 +3515,14 @@ bool xe_bo_needs_ccs_pages(struct xe_bo *bo)
if (IS_DGFX(xe) && (bo->flags & XE_BO_FLAG_SYSTEM))
return false;
+ /*
+ * Compression implies coh_none, therefore we know for sure that WB
+ * memory can't currently use compression, which is likely one of the
+ * common cases.
+ */
+ if (bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WB)
+ return false;
+
return true;
}
@@ -3057,7 +3598,7 @@ void xe_bo_put(struct xe_bo *bo)
#endif
for_each_tile(tile, xe_bo_device(bo), id)
if (bo->ggtt_node[id] && bo->ggtt_node[id]->ggtt)
- might_lock(&bo->ggtt_node[id]->ggtt->lock);
+ xe_ggtt_might_lock(bo->ggtt_node[id]->ggtt);
drm_gem_object_put(&bo->ttm.base);
}
}
@@ -3079,20 +3620,19 @@ int xe_bo_dumb_create(struct drm_file *file_priv,
struct xe_device *xe = to_xe_device(dev);
struct xe_bo *bo;
uint32_t handle;
- int cpp = DIV_ROUND_UP(args->bpp, 8);
int err;
u32 page_size = max_t(u32, PAGE_SIZE,
xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K ? SZ_64K : SZ_4K);
- args->pitch = ALIGN(args->width * cpp, 64);
- args->size = ALIGN(mul_u32_u32(args->pitch, args->height),
- page_size);
+ err = drm_mode_size_dumb(dev, args, SZ_64, page_size);
+ if (err)
+ return err;
- bo = xe_bo_create_user(xe, NULL, NULL, args->size,
+ bo = xe_bo_create_user(xe, NULL, args->size,
DRM_XE_GEM_CPU_CACHING_WC,
XE_BO_FLAG_VRAM_IF_DGFX(xe_device_get_root_tile(xe)) |
XE_BO_FLAG_SCANOUT |
- XE_BO_FLAG_NEEDS_CPU_ACCESS);
+ XE_BO_FLAG_NEEDS_CPU_ACCESS, NULL);
if (IS_ERR(bo))
return PTR_ERR(bo);
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 02ada1fb8a23..911d5b90461a 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -10,8 +10,10 @@
#include "xe_bo_types.h"
#include "xe_macros.h"
+#include "xe_validation.h"
#include "xe_vm_types.h"
#include "xe_vm.h"
+#include "xe_vram_types.h"
#define XE_DEFAULT_GTT_SIZE_MB 3072ULL /* 3GB by default */
@@ -23,8 +25,9 @@
#define XE_BO_FLAG_VRAM_MASK (XE_BO_FLAG_VRAM0 | XE_BO_FLAG_VRAM1)
/* -- */
#define XE_BO_FLAG_STOLEN BIT(4)
+#define XE_BO_FLAG_VRAM(vram) (XE_BO_FLAG_VRAM0 << ((vram)->id))
#define XE_BO_FLAG_VRAM_IF_DGFX(tile) (IS_DGFX(tile_to_xe(tile)) ? \
- XE_BO_FLAG_VRAM0 << (tile)->id : \
+ XE_BO_FLAG_VRAM((tile)->mem.vram) : \
XE_BO_FLAG_SYSTEM)
#define XE_BO_FLAG_GGTT BIT(5)
#define XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE BIT(6)
@@ -46,6 +49,7 @@
#define XE_BO_FLAG_GGTT2 BIT(22)
#define XE_BO_FLAG_GGTT3 BIT(23)
#define XE_BO_FLAG_CPU_ADDR_MIRROR BIT(24)
+#define XE_BO_FLAG_FORCE_USER_VRAM BIT(25)
/* this one is trigger internally only */
#define XE_BO_FLAG_INTERNAL_TEST BIT(30)
@@ -86,49 +90,40 @@ struct sg_table;
struct xe_bo *xe_bo_alloc(void);
void xe_bo_free(struct xe_bo *bo);
-struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
- struct xe_tile *tile, struct dma_resv *resv,
- struct ttm_lru_bulk_move *bulk, size_t size,
- u16 cpu_caching, enum ttm_bo_type type,
- u32 flags);
-struct xe_bo *
-xe_bo_create_locked_range(struct xe_device *xe,
- struct xe_tile *tile, struct xe_vm *vm,
- size_t size, u64 start, u64 end,
- enum ttm_bo_type type, u32 flags, u64 alignment);
+struct xe_bo *xe_bo_init_locked(struct xe_device *xe, struct xe_bo *bo,
+ struct xe_tile *tile, struct dma_resv *resv,
+ struct ttm_lru_bulk_move *bulk, size_t size,
+ u16 cpu_caching, enum ttm_bo_type type,
+ u32 flags, struct drm_exec *exec);
struct xe_bo *xe_bo_create_locked(struct xe_device *xe, struct xe_tile *tile,
struct xe_vm *vm, size_t size,
- enum ttm_bo_type type, u32 flags);
-struct xe_bo *xe_bo_create(struct xe_device *xe, struct xe_tile *tile,
- struct xe_vm *vm, size_t size,
- enum ttm_bo_type type, u32 flags);
-struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_tile *tile,
- struct xe_vm *vm, size_t size,
- u16 cpu_caching,
- u32 flags);
+ enum ttm_bo_type type, u32 flags,
+ struct drm_exec *exec);
+struct xe_bo *xe_bo_create_user(struct xe_device *xe, struct xe_vm *vm, size_t size,
+ u16 cpu_caching, u32 flags, struct drm_exec *exec);
struct xe_bo *xe_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
struct xe_vm *vm, size_t size,
- enum ttm_bo_type type, u32 flags);
-struct xe_bo *xe_bo_create_pin_map_at(struct xe_device *xe, struct xe_tile *tile,
- struct xe_vm *vm, size_t size, u64 offset,
- enum ttm_bo_type type, u32 flags);
-struct xe_bo *xe_bo_create_pin_map_at_aligned(struct xe_device *xe,
- struct xe_tile *tile,
- struct xe_vm *vm,
- size_t size, u64 offset,
- enum ttm_bo_type type, u32 flags,
- u64 alignment);
-struct xe_bo *xe_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
- const void *data, size_t size,
- enum ttm_bo_type type, u32 flags);
+ enum ttm_bo_type type, u32 flags,
+ struct drm_exec *exec);
+struct xe_bo *xe_bo_create_pin_map_novm(struct xe_device *xe, struct xe_tile *tile,
+ size_t size, enum ttm_bo_type type, u32 flags,
+ bool intr);
+struct xe_bo *xe_bo_create_pin_range_novm(struct xe_device *xe, struct xe_tile *tile,
+ size_t size, u64 start, u64 end,
+ enum ttm_bo_type type, u32 flags);
+struct xe_bo *
+xe_bo_create_pin_map_at_novm(struct xe_device *xe, struct xe_tile *tile,
+ size_t size, u64 offset, enum ttm_bo_type type,
+ u32 flags, u64 alignment, bool intr);
struct xe_bo *xe_managed_bo_create_pin_map(struct xe_device *xe, struct xe_tile *tile,
size_t size, u32 flags);
+void xe_managed_bo_unpin_map_no_vm(struct xe_bo *bo);
struct xe_bo *xe_managed_bo_create_from_data(struct xe_device *xe, struct xe_tile *tile,
const void *data, size_t size, u32 flags);
int xe_managed_bo_reinit_in_vram(struct xe_device *xe, struct xe_tile *tile, struct xe_bo **src);
int xe_bo_placement_for_flags(struct xe_device *xe, struct xe_bo *bo,
- u32 bo_flags);
+ u32 bo_flags, enum ttm_bo_type type);
static inline struct xe_bo *ttm_to_xe_bo(const struct ttm_buffer_object *bo)
{
@@ -201,11 +196,12 @@ static inline void xe_bo_unlock_vm_held(struct xe_bo *bo)
}
}
-int xe_bo_pin_external(struct xe_bo *bo);
-int xe_bo_pin(struct xe_bo *bo);
+int xe_bo_pin_external(struct xe_bo *bo, bool in_place, struct drm_exec *exec);
+int xe_bo_pin(struct xe_bo *bo, struct drm_exec *exec);
void xe_bo_unpin_external(struct xe_bo *bo);
void xe_bo_unpin(struct xe_bo *bo);
-int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict);
+int xe_bo_validate(struct xe_bo *bo, struct xe_vm *vm, bool allow_res_evict,
+ struct drm_exec *exec);
static inline bool xe_bo_is_pinned(struct xe_bo *bo)
{
@@ -238,6 +234,19 @@ xe_bo_main_addr(struct xe_bo *bo, size_t page_size)
return xe_bo_addr(bo, 0, page_size);
}
+/**
+ * xe_bo_size() - Xe BO size
+ * @bo: The bo object.
+ *
+ * Simple helper to return Xe BO's size.
+ *
+ * Return: Xe BO's size
+ */
+static inline size_t xe_bo_size(struct xe_bo *bo)
+{
+ return bo->ttm.base.size;
+}
+
static inline u32
__xe_bo_ggtt_addr(struct xe_bo *bo, u8 tile_id)
{
@@ -246,7 +255,7 @@ __xe_bo_ggtt_addr(struct xe_bo *bo, u8 tile_id)
if (XE_WARN_ON(!ggtt_node))
return 0;
- XE_WARN_ON(ggtt_node->base.size > bo->size);
+ XE_WARN_ON(ggtt_node->base.size > xe_bo_size(bo));
XE_WARN_ON(ggtt_node->base.start + ggtt_node->base.size > (1ull << 32));
return ggtt_node->base.start;
}
@@ -265,6 +274,7 @@ int xe_bo_read(struct xe_bo *bo, u64 offset, void *dst, int size);
bool mem_type_is_vram(u32 mem_type);
bool xe_bo_is_vram(struct xe_bo *bo);
+bool xe_bo_is_visible_vram(struct xe_bo *bo);
bool xe_bo_is_stolen(struct xe_bo *bo);
bool xe_bo_is_stolen_devmem(struct xe_bo *bo);
bool xe_bo_is_vm_bound(struct xe_bo *bo);
@@ -273,8 +283,9 @@ uint64_t vram_region_gpu_offset(struct ttm_resource *res);
bool xe_bo_can_migrate(struct xe_bo *bo, u32 mem_type);
-int xe_bo_migrate(struct xe_bo *bo, u32 mem_type);
-int xe_bo_evict(struct xe_bo *bo);
+int xe_bo_migrate(struct xe_bo *bo, u32 mem_type, struct ttm_operation_ctx *ctc,
+ struct drm_exec *exec);
+int xe_bo_evict(struct xe_bo *bo, struct drm_exec *exec);
int xe_bo_evict_pinned(struct xe_bo *bo);
int xe_bo_notifier_prepare_pinned(struct xe_bo *bo);
@@ -300,7 +311,22 @@ bool xe_bo_needs_ccs_pages(struct xe_bo *bo);
static inline size_t xe_bo_ccs_pages_start(struct xe_bo *bo)
{
- return PAGE_ALIGN(bo->ttm.base.size);
+ return PAGE_ALIGN(xe_bo_size(bo));
+}
+
+/**
+ * xe_bo_has_valid_ccs_bb - Check if CCS's BBs were setup for the BO.
+ * @bo: the &xe_bo to check
+ *
+ * The CCS's BBs should only be setup by the driver VF, but it is safe
+ * to call this function also by non-VF driver.
+ *
+ * Return: true iff the CCS's BBs are setup, false otherwise.
+ */
+static inline bool xe_bo_has_valid_ccs_bb(struct xe_bo *bo)
+{
+ return bo->bb_ccs[XE_SRIOV_VF_CCS_READ_CTX] &&
+ bo->bb_ccs[XE_SRIOV_VF_CCS_WRITE_CTX];
}
static inline bool xe_bo_has_pages(struct xe_bo *bo)
diff --git a/drivers/gpu/drm/xe/xe_bo_doc.h b/drivers/gpu/drm/xe/xe_bo_doc.h
index 25a884c64bf1..401e7dd26ef3 100644
--- a/drivers/gpu/drm/xe/xe_bo_doc.h
+++ b/drivers/gpu/drm/xe/xe_bo_doc.h
@@ -12,7 +12,7 @@
* BO management
* =============
*
- * TTM manages (placement, eviction, etc...) all BOs in XE.
+ * TTM manages (placement, eviction, etc...) all BOs in Xe.
*
* BO creation
* ===========
@@ -29,7 +29,7 @@
* a kernel BO (e.g. engine state, memory for page tables, etc...). These BOs
* are typically mapped in the GGTT (any kernel BOs aside memory for page tables
* are in the GGTT), are pinned (can't move or be evicted at runtime), have a
- * vmap (XE can access the memory via xe_map layer) and have contiguous physical
+ * vmap (Xe can access the memory via xe_map layer) and have contiguous physical
* memory.
*
* More details of why kernel BOs are pinned and contiguous below.
@@ -40,7 +40,7 @@
* A user BO is created via the DRM_IOCTL_XE_GEM_CREATE IOCTL. Once it is
* created the BO can be mmap'd (via DRM_IOCTL_XE_GEM_MMAP_OFFSET) for user
* access and it can be bound for GPU access (via DRM_IOCTL_XE_VM_BIND). All
- * user BOs are evictable and user BOs are never pinned by XE. The allocation of
+ * user BOs are evictable and user BOs are never pinned by Xe. The allocation of
* the backing store can be deferred from creation time until first use which is
* either mmap, bind, or pagefault.
*
@@ -84,7 +84,7 @@
* ====================
*
* All eviction (or in other words, moving a BO from one memory location to
- * another) is routed through TTM with a callback into XE.
+ * another) is routed through TTM with a callback into Xe.
*
* Runtime eviction
* ----------------
diff --git a/drivers/gpu/drm/xe/xe_bo_evict.c b/drivers/gpu/drm/xe/xe_bo_evict.c
index ed3746d32b27..7661fca7f278 100644
--- a/drivers/gpu/drm/xe/xe_bo_evict.c
+++ b/drivers/gpu/drm/xe/xe_bo_evict.c
@@ -73,6 +73,11 @@ int xe_bo_notifier_prepare_all_pinned(struct xe_device *xe)
&xe->pinned.late.kernel_bo_present,
xe_bo_notifier_prepare_pinned);
+ if (!ret)
+ ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.external,
+ &xe->pinned.late.external,
+ xe_bo_notifier_prepare_pinned);
+
return ret;
}
@@ -93,6 +98,10 @@ void xe_bo_notifier_unprepare_all_pinned(struct xe_device *xe)
(void)xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present,
&xe->pinned.late.kernel_bo_present,
xe_bo_notifier_unprepare_pinned);
+
+ (void)xe_bo_apply_to_pinned(xe, &xe->pinned.late.external,
+ &xe->pinned.late.external,
+ xe_bo_notifier_unprepare_pinned);
}
/**
@@ -158,8 +167,8 @@ int xe_bo_evict_all(struct xe_device *xe)
if (ret)
return ret;
- ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present,
- &xe->pinned.late.evicted, xe_bo_evict_pinned);
+ ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.external,
+ &xe->pinned.late.external, xe_bo_evict_pinned);
if (!ret)
ret = xe_bo_apply_to_pinned(xe, &xe->pinned.late.kernel_bo_present,
@@ -182,7 +191,6 @@ int xe_bo_evict_all(struct xe_device *xe)
static int xe_bo_restore_and_map_ggtt(struct xe_bo *bo)
{
- struct xe_device *xe = xe_bo_device(bo);
int ret;
ret = xe_bo_restore_pinned(bo);
@@ -197,19 +205,10 @@ static int xe_bo_restore_and_map_ggtt(struct xe_bo *bo)
if (tile != bo->tile && !(bo->flags & XE_BO_FLAG_GGTTx(tile)))
continue;
- mutex_lock(&tile->mem.ggtt->lock);
- xe_ggtt_map_bo(tile->mem.ggtt, bo);
- mutex_unlock(&tile->mem.ggtt->lock);
+ xe_ggtt_map_bo_unlocked(tile->mem.ggtt, bo);
}
}
- /*
- * We expect validate to trigger a move VRAM and our move code
- * should setup the iosys map.
- */
- xe_assert(xe, !(bo->flags & XE_BO_FLAG_PINNED_LATE_RESTORE) ||
- !iosys_map_is_null(&bo->vmap));
-
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index eb5e83c5f233..d4fe3c8dca5b 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -9,6 +9,7 @@
#include <linux/iosys-map.h>
#include <drm/drm_gpusvm.h>
+#include <drm/drm_pagemap.h>
#include <drm/ttm/ttm_bo.h>
#include <drm/ttm/ttm_device.h>
#include <drm/ttm/ttm_placement.h>
@@ -24,7 +25,9 @@ struct xe_vm;
/* TODO: To be selected with VM_MADVISE */
#define XE_BO_PRIORITY_NORMAL 1
-/** @xe_bo: XE buffer object */
+/**
+ * struct xe_bo - Xe buffer object
+ */
struct xe_bo {
/** @ttm: TTM base buffer object */
struct ttm_buffer_object ttm;
@@ -32,8 +35,6 @@ struct xe_bo {
struct xe_bo *backup_obj;
/** @parent_obj: Ref to parent bo if this a backup_obj */
struct xe_bo *parent_obj;
- /** @size: Size of this buffer object */
- size_t size;
/** @flags: flags for this buffer object */
u32 flags;
/** @vm: VM this BO is attached to, for extobj this will be NULL */
@@ -48,7 +49,7 @@ struct xe_bo {
struct xe_ggtt_node *ggtt_node[XE_MAX_TILES_PER_DEVICE];
/** @vmap: iosys map of this buffer */
struct iosys_map vmap;
- /** @ttm_kmap: TTM bo kmap object for internal use only. Keep off. */
+ /** @kmap: TTM bo kmap object for internal use only. Keep off. */
struct ttm_bo_kmap_obj kmap;
/** @pinned_link: link to present / evicted list of pinned BO */
struct list_head pinned_link;
@@ -62,6 +63,14 @@ struct xe_bo {
*/
struct list_head client_link;
#endif
+ /** @attr: User controlled attributes for bo */
+ struct {
+ /**
+ * @atomic_access: type of atomic access bo needs
+ * protected by bo dma-resv lock
+ */
+ u32 atomic_access;
+ } attr;
/**
* @pxp_key_instance: PXP key instance this BO was created against. A
* 0 in this variable indicates that the BO does not use PXP encryption.
@@ -75,9 +84,12 @@ struct xe_bo {
/** @created: Whether the bo has passed initial creation */
bool created;
- /** @ccs_cleared */
+ /** @ccs_cleared: true means that CCS region of BO is already cleared */
bool ccs_cleared;
+ /** @bb_ccs: BB instructions of CCS read/write. Valid only for VF */
+ struct xe_bb *bb_ccs[XE_SRIOV_VF_CCS_CTX_COUNT];
+
/**
* @cpu_caching: CPU caching mode. Currently only used for userspace
* objects. Exceptions are system memory on DGFX, which is always
@@ -86,12 +98,13 @@ struct xe_bo {
u16 cpu_caching;
/** @devmem_allocation: SVM device memory allocation */
- struct drm_gpusvm_devmem devmem_allocation;
+ struct drm_pagemap_devmem devmem_allocation;
/** @vram_userfault_link: Link into @mem_access.vram_userfault.list */
- struct list_head vram_userfault_link;
+ struct list_head vram_userfault_link;
- /** @min_align: minimum alignment needed for this BO if different
+ /**
+ * @min_align: minimum alignment needed for this BO if different
* from default
*/
u64 min_align;
diff --git a/drivers/gpu/drm/xe/xe_configfs.c b/drivers/gpu/drm/xe/xe_configfs.c
index cb9f175c89a1..9f6251b1008b 100644
--- a/drivers/gpu/drm/xe/xe_configfs.c
+++ b/drivers/gpu/drm/xe/xe_configfs.c
@@ -3,38 +3,71 @@
* Copyright © 2025 Intel Corporation
*/
+#include <linux/bitops.h>
+#include <linux/ctype.h>
#include <linux/configfs.h>
+#include <linux/cleanup.h>
+#include <linux/find.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/pci.h>
+#include <linux/string.h>
+#include "instructions/xe_mi_commands.h"
#include "xe_configfs.h"
+#include "xe_gt_types.h"
+#include "xe_hw_engine_types.h"
#include "xe_module.h"
+#include "xe_pci_types.h"
+#include "xe_sriov_types.h"
/**
* DOC: Xe Configfs
*
* Overview
- * =========
+ * ========
*
- * Configfs is a filesystem-based manager of kernel objects. XE KMD registers a
- * configfs subsystem called ``'xe'`` that creates a directory in the mounted configfs directory
- * The user can create devices under this directory and configure them as necessary
- * See Documentation/filesystems/configfs.rst for more information about how configfs works.
+ * Configfs is a filesystem-based manager of kernel objects. Xe KMD registers a
+ * configfs subsystem called ``xe`` that creates a directory in the mounted
+ * configfs directory. The user can create devices under this directory and
+ * configure them as necessary. See Documentation/filesystems/configfs.rst for
+ * more information about how configfs works.
*
* Create devices
- * ===============
+ * ==============
+ *
+ * To create a device, the ``xe`` module should already be loaded, but some
+ * attributes can only be set before binding the device. It can be accomplished
+ * by blocking the driver autoprobe::
*
- * In order to create a device, the user has to create a directory inside ``'xe'``::
+ * # echo 0 > /sys/bus/pci/drivers_autoprobe
+ * # modprobe xe
*
- * mkdir /sys/kernel/config/xe/0000:03:00.0/
+ * In order to create a device, the user has to create a directory inside ``xe``::
+ *
+ * # mkdir /sys/kernel/config/xe/0000:03:00.0/
*
* Every device created is populated by the driver with entries that can be
* used to configure it::
*
* /sys/kernel/config/xe/
- * .. 0000:03:00.0/
- * ... survivability_mode
+ * ├── 0000:00:02.0
+ * │   └── ...
+ * ├── 0000:00:02.1
+ * │   └── ...
+ * :
+ * └── 0000:03:00.0
+ * ├── survivability_mode
+ * ├── gt_types_allowed
+ * ├── engines_allowed
+ * └── enable_psmi
+ *
+ * After configuring the attributes as per next section, the device can be
+ * probed with::
+ *
+ * # echo 0000:03:00.0 > /sys/bus/pci/drivers/xe/bind
+ * # # or
+ * # echo 0000:03:00.0 > /sys/bus/pci/drivers_probe
*
* Configure Attributes
* ====================
@@ -46,28 +79,280 @@
* effect when probing the device. Example to enable it::
*
* # echo 1 > /sys/kernel/config/xe/0000:03:00.0/survivability_mode
- * # echo 0000:03:00.0 > /sys/bus/pci/drivers/xe/bind (Enters survivability mode if supported)
+ *
+ * This attribute can only be set before binding to the device.
+ *
+ * Allowed GT types:
+ * -----------------
+ *
+ * Allow only specific types of GTs to be detected and initialized by the
+ * driver. Any combination of GT types can be enabled/disabled, although
+ * some settings will cause the device to fail to probe.
+ *
+ * Writes support both comma- and newline-separated input format. Reads
+ * will always return one GT type per line. "primary" and "media" are the
+ * GT type names supported by this interface.
+ *
+ * This attribute can only be set before binding to the device.
+ *
+ * Examples:
+ *
+ * Allow both primary and media GTs to be initialized and used. This matches
+ * the driver's default behavior::
+ *
+ * # echo 'primary,media' > /sys/kernel/config/xe/0000:03:00.0/gt_types_allowed
+ *
+ * Allow only the primary GT of each tile to be initialized and used,
+ * effectively disabling the media GT if it exists on the platform::
+ *
+ * # echo 'primary' > /sys/kernel/config/xe/0000:03:00.0/gt_types_allowed
+ *
+ * Allow only the media GT of each tile to be initialized and used,
+ * effectively disabling the primary GT. **This configuration will cause
+ * device probe failure on all current platforms, but may be allowed on
+ * igpu platforms in the future**::
+ *
+ * # echo 'media' > /sys/kernel/config/xe/0000:03:00.0/gt_types_allowed
+ *
+ * Disable all GTs. Only other GPU IP (such as display) is potentially usable.
+ * **This configuration will cause device probe failure on all current
+ * platforms, but may be allowed on igpu platforms in the future**::
+ *
+ * # echo '' > /sys/kernel/config/xe/0000:03:00.0/gt_types_allowed
+ *
+ * Allowed engines:
+ * ----------------
+ *
+ * Allow only a set of engine(s) to be available, disabling the other engines
+ * even if they are available in hardware. This is applied after HW fuses are
+ * considered on each tile. Examples:
+ *
+ * Allow only one render and one copy engines, nothing else::
+ *
+ * # echo 'rcs0,bcs0' > /sys/kernel/config/xe/0000:03:00.0/engines_allowed
+ *
+ * Allow only compute engines and first copy engine::
+ *
+ * # echo 'ccs*,bcs0' > /sys/kernel/config/xe/0000:03:00.0/engines_allowed
+ *
+ * Note that the engine names are the per-GT hardware names. On multi-tile
+ * platforms, writing ``rcs0,bcs0`` to this file would allow the first render
+ * and copy engines on each tile.
+ *
+ * The requested configuration may not be supported by the platform and driver
+ * may fail to probe. For example: if at least one copy engine is expected to be
+ * available for migrations, but it's disabled. This is intended for debugging
+ * purposes only.
+ *
+ * This attribute can only be set before binding to the device.
+ *
+ * PSMI
+ * ----
+ *
+ * Enable extra debugging capabilities to trace engine execution. Only useful
+ * during early platform enabling and requires additional hardware connected.
+ * Once it's enabled, additionals WAs are added and runtime configuration is
+ * done via debugfs. Example to enable it::
+ *
+ * # echo 1 > /sys/kernel/config/xe/0000:03:00.0/enable_psmi
+ *
+ * This attribute can only be set before binding to the device.
+ *
+ * Context restore BB
+ * ------------------
+ *
+ * Allow to execute a batch buffer during any context switches. When the
+ * GPU is restoring the context, it executes additional commands. It's useful
+ * for testing additional workarounds and validating certain HW behaviors: it's
+ * not intended for normal execution and will taint the kernel with TAINT_TEST
+ * when used.
+ *
+ * The syntax allows to pass straight instructions to be executed by the engine
+ * in a batch buffer or set specific registers.
+ *
+ * #. Generic instruction::
+ *
+ * <engine-class> cmd <instr> [[dword0] [dword1] [...]]
+ *
+ * #. Simple register setting::
+ *
+ * <engine-class> reg <address> <value>
+ *
+ * Commands are saved per engine class: all instances of that class will execute
+ * those commands during context switch. The instruction, dword arguments,
+ * addresses and values are in hex format like in the examples below.
+ *
+ * #. Execute a LRI command to write 0xDEADBEEF to register 0x4f10 after the
+ * normal context restore::
+ *
+ * # echo 'rcs cmd 11000001 4F100 DEADBEEF' \
+ * > /sys/kernel/config/xe/0000:03:00.0/ctx_restore_post_bb
+ *
+ * #. Execute a LRI command to write 0xDEADBEEF to register 0x4f10 at the
+ * beginning of the context restore::
+ *
+ * # echo 'rcs cmd 11000001 4F100 DEADBEEF' \
+ * > /sys/kernel/config/xe/0000:03:00.0/ctx_restore_mid_bb
+
+ * #. Load certain values in a couple of registers (it can be used as a simpler
+ * alternative to the `cmd`) action::
+ *
+ * # cat > /sys/kernel/config/xe/0000:03:00.0/ctx_restore_post_bb <<EOF
+ * rcs reg 4F100 DEADBEEF
+ * rcs reg 4F104 FFFFFFFF
+ * EOF
+ *
+ * .. note::
+ *
+ * When using multiple lines, make sure to use a command that is
+ * implemented with a single write syscall, like HEREDOC.
+ *
+ * Currently this is implemented only for post and mid context restore and
+ * these attributes can only be set before binding to the device.
+ *
+ * Max SR-IOV Virtual Functions
+ * ----------------------------
+ *
+ * This config allows to limit number of the Virtual Functions (VFs) that can
+ * be managed by the Physical Function (PF) driver, where value 0 disables the
+ * PF mode (no VFs).
+ *
+ * The default max_vfs config value is taken from the max_vfs modparam.
+ *
+ * How to enable PF with support with unlimited (up to HW limit) number of VFs::
+ *
+ * # echo unlimited > /sys/kernel/config/xe/0000:00:02.0/sriov/max_vfs
+ * # echo 0000:00:02.0 > /sys/bus/pci/drivers/xe/bind
+ *
+ * How to enable PF with support up to 3 VFs::
+ *
+ * # echo 3 > /sys/kernel/config/xe/0000:00:02.0/sriov/max_vfs
+ * # echo 0000:00:02.0 > /sys/bus/pci/drivers/xe/bind
+ *
+ * How to disable PF mode and always run as native::
+ *
+ * # echo 0 > /sys/kernel/config/xe/0000:00:02.0/sriov/max_vfs
+ * # echo 0000:00:02.0 > /sys/bus/pci/drivers/xe/bind
+ *
+ * This setting only takes effect when probing the device.
*
* Remove devices
* ==============
*
* The created device directories can be removed using ``rmdir``::
*
- * rmdir /sys/kernel/config/xe/0000:03:00.0/
+ * # rmdir /sys/kernel/config/xe/0000:03:00.0/
*/
-struct xe_config_device {
+/* Similar to struct xe_bb, but not tied to HW (yet) */
+struct wa_bb {
+ u32 *cs;
+ u32 len; /* in dwords */
+};
+
+struct xe_config_group_device {
struct config_group group;
+ struct config_group sriov;
- bool survivability_mode;
+ struct xe_config_device {
+ u64 gt_types_allowed;
+ u64 engines_allowed;
+ struct wa_bb ctx_restore_post_bb[XE_ENGINE_CLASS_MAX];
+ struct wa_bb ctx_restore_mid_bb[XE_ENGINE_CLASS_MAX];
+ bool survivability_mode;
+ bool enable_psmi;
+ struct {
+ unsigned int max_vfs;
+ } sriov;
+ } config;
/* protects attributes */
struct mutex lock;
+ /* matching descriptor */
+ const struct xe_device_desc *desc;
+ /* tentative SR-IOV mode */
+ enum xe_sriov_mode mode;
+};
+
+static const struct xe_config_device device_defaults = {
+ .gt_types_allowed = U64_MAX,
+ .engines_allowed = U64_MAX,
+ .survivability_mode = false,
+ .enable_psmi = false,
+ .sriov = {
+ .max_vfs = UINT_MAX,
+ },
};
+static void set_device_defaults(struct xe_config_device *config)
+{
+ *config = device_defaults;
+#ifdef CONFIG_PCI_IOV
+ config->sriov.max_vfs = xe_modparam.max_vfs;
+#endif
+}
+
+struct engine_info {
+ const char *cls;
+ u64 mask;
+ enum xe_engine_class engine_class;
+};
+
+/* Some helpful macros to aid on the sizing of buffer allocation when parsing */
+#define MAX_ENGINE_CLASS_CHARS 5
+#define MAX_ENGINE_INSTANCE_CHARS 2
+
+static const struct engine_info engine_info[] = {
+ { .cls = "rcs", .mask = XE_HW_ENGINE_RCS_MASK, .engine_class = XE_ENGINE_CLASS_RENDER },
+ { .cls = "bcs", .mask = XE_HW_ENGINE_BCS_MASK, .engine_class = XE_ENGINE_CLASS_COPY },
+ { .cls = "vcs", .mask = XE_HW_ENGINE_VCS_MASK, .engine_class = XE_ENGINE_CLASS_VIDEO_DECODE },
+ { .cls = "vecs", .mask = XE_HW_ENGINE_VECS_MASK, .engine_class = XE_ENGINE_CLASS_VIDEO_ENHANCE },
+ { .cls = "ccs", .mask = XE_HW_ENGINE_CCS_MASK, .engine_class = XE_ENGINE_CLASS_COMPUTE },
+ { .cls = "gsccs", .mask = XE_HW_ENGINE_GSCCS_MASK, .engine_class = XE_ENGINE_CLASS_OTHER },
+};
+
+static const struct {
+ const char *name;
+ enum xe_gt_type type;
+} gt_types[] = {
+ { .name = "primary", .type = XE_GT_TYPE_MAIN },
+ { .name = "media", .type = XE_GT_TYPE_MEDIA },
+};
+
+static struct xe_config_group_device *to_xe_config_group_device(struct config_item *item)
+{
+ return container_of(to_config_group(item), struct xe_config_group_device, group);
+}
+
static struct xe_config_device *to_xe_config_device(struct config_item *item)
{
- return container_of(to_config_group(item), struct xe_config_device, group);
+ return &to_xe_config_group_device(item)->config;
+}
+
+static bool is_bound(struct xe_config_group_device *dev)
+{
+ unsigned int domain, bus, slot, function;
+ struct pci_dev *pdev;
+ const char *name;
+ bool ret;
+
+ lockdep_assert_held(&dev->lock);
+
+ name = dev->group.cg_item.ci_name;
+ if (sscanf(name, "%x:%x:%x.%x", &domain, &bus, &slot, &function) != 4)
+ return false;
+
+ pdev = pci_get_domain_bus_and_slot(domain, bus, PCI_DEVFN(slot, function));
+ if (!pdev)
+ return false;
+
+ ret = pci_get_drvdata(pdev);
+ pci_dev_put(pdev);
+
+ if (ret)
+ pci_dbg(pdev, "Already bound to driver\n");
+
+ return ret;
}
static ssize_t survivability_mode_show(struct config_item *item, char *page)
@@ -79,7 +364,7 @@ static ssize_t survivability_mode_show(struct config_item *item, char *page)
static ssize_t survivability_mode_store(struct config_item *item, const char *page, size_t len)
{
- struct xe_config_device *dev = to_xe_config_device(item);
+ struct xe_config_group_device *dev = to_xe_config_group_device(item);
bool survivability_mode;
int ret;
@@ -87,25 +372,466 @@ static ssize_t survivability_mode_store(struct config_item *item, const char *pa
if (ret)
return ret;
- mutex_lock(&dev->lock);
- dev->survivability_mode = survivability_mode;
- mutex_unlock(&dev->lock);
+ guard(mutex)(&dev->lock);
+ if (is_bound(dev))
+ return -EBUSY;
+
+ dev->config.survivability_mode = survivability_mode;
+
+ return len;
+}
+
+static ssize_t gt_types_allowed_show(struct config_item *item, char *page)
+{
+ struct xe_config_device *dev = to_xe_config_device(item);
+ char *p = page;
+
+ for (size_t i = 0; i < ARRAY_SIZE(gt_types); i++)
+ if (dev->gt_types_allowed & BIT_ULL(gt_types[i].type))
+ p += sprintf(p, "%s\n", gt_types[i].name);
+
+ return p - page;
+}
+
+static ssize_t gt_types_allowed_store(struct config_item *item, const char *page,
+ size_t len)
+{
+ struct xe_config_group_device *dev = to_xe_config_group_device(item);
+ char *buf __free(kfree) = kstrdup(page, GFP_KERNEL);
+ char *p = buf;
+ u64 typemask = 0;
+
+ if (!buf)
+ return -ENOMEM;
+
+ while (p) {
+ char *typename = strsep(&p, ",\n");
+ bool matched = false;
+
+ if (typename[0] == '\0')
+ continue;
+
+ for (size_t i = 0; i < ARRAY_SIZE(gt_types); i++) {
+ if (strcmp(typename, gt_types[i].name) == 0) {
+ typemask |= BIT(gt_types[i].type);
+ matched = true;
+ break;
+ }
+ }
+
+ if (!matched)
+ return -EINVAL;
+ }
+
+ guard(mutex)(&dev->lock);
+ if (is_bound(dev))
+ return -EBUSY;
+
+ dev->config.gt_types_allowed = typemask;
+
+ return len;
+}
+
+static ssize_t engines_allowed_show(struct config_item *item, char *page)
+{
+ struct xe_config_device *dev = to_xe_config_device(item);
+ char *p = page;
+
+ for (size_t i = 0; i < ARRAY_SIZE(engine_info); i++) {
+ u64 mask = engine_info[i].mask;
+
+ if ((dev->engines_allowed & mask) == mask) {
+ p += sprintf(p, "%s*\n", engine_info[i].cls);
+ } else if (mask & dev->engines_allowed) {
+ u16 bit0 = __ffs64(mask), bit;
+
+ mask &= dev->engines_allowed;
+
+ for_each_set_bit(bit, (const unsigned long *)&mask, 64)
+ p += sprintf(p, "%s%u\n", engine_info[i].cls,
+ bit - bit0);
+ }
+ }
+
+ return p - page;
+}
+
+/*
+ * Lookup engine_info. If @mask is not NULL, reduce the mask according to the
+ * instance in @pattern.
+ *
+ * Examples of inputs:
+ * - lookup_engine_info("rcs0", &mask): return "rcs" entry from @engine_info and
+ * mask == BIT_ULL(XE_HW_ENGINE_RCS0)
+ * - lookup_engine_info("rcs*", &mask): return "rcs" entry from @engine_info and
+ * mask == XE_HW_ENGINE_RCS_MASK
+ * - lookup_engine_info("rcs", NULL): return "rcs" entry from @engine_info
+ */
+static const struct engine_info *lookup_engine_info(const char *pattern, u64 *mask)
+{
+ for (size_t i = 0; i < ARRAY_SIZE(engine_info); i++) {
+ u8 instance;
+ u16 bit;
+
+ if (!str_has_prefix(pattern, engine_info[i].cls))
+ continue;
+
+ pattern += strlen(engine_info[i].cls);
+ if (!mask)
+ return *pattern ? NULL : &engine_info[i];
+
+ if (!strcmp(pattern, "*")) {
+ *mask = engine_info[i].mask;
+ return &engine_info[i];
+ }
+
+ if (kstrtou8(pattern, 10, &instance))
+ return NULL;
+
+ bit = __ffs64(engine_info[i].mask) + instance;
+ if (bit >= fls64(engine_info[i].mask))
+ return NULL;
+
+ *mask = BIT_ULL(bit);
+ return &engine_info[i];
+ }
+
+ return NULL;
+}
+
+static int parse_engine(const char *s, const char *end_chars, u64 *mask,
+ const struct engine_info **pinfo)
+{
+ char buf[MAX_ENGINE_CLASS_CHARS + MAX_ENGINE_INSTANCE_CHARS + 1];
+ const struct engine_info *info;
+ size_t len;
+
+ len = strcspn(s, end_chars);
+ if (len >= sizeof(buf))
+ return -EINVAL;
+
+ memcpy(buf, s, len);
+ buf[len] = '\0';
+
+ info = lookup_engine_info(buf, mask);
+ if (!info)
+ return -ENOENT;
+
+ if (pinfo)
+ *pinfo = info;
+
+ return len;
+}
+
+static ssize_t engines_allowed_store(struct config_item *item, const char *page,
+ size_t len)
+{
+ struct xe_config_group_device *dev = to_xe_config_group_device(item);
+ ssize_t patternlen, p;
+ u64 mask, val = 0;
+
+ for (p = 0; p < len; p += patternlen + 1) {
+ patternlen = parse_engine(page + p, ",\n", &mask, NULL);
+ if (patternlen < 0)
+ return -EINVAL;
+
+ val |= mask;
+ }
+
+ guard(mutex)(&dev->lock);
+ if (is_bound(dev))
+ return -EBUSY;
+
+ dev->config.engines_allowed = val;
+
+ return len;
+}
+
+static ssize_t enable_psmi_show(struct config_item *item, char *page)
+{
+ struct xe_config_device *dev = to_xe_config_device(item);
+
+ return sprintf(page, "%d\n", dev->enable_psmi);
+}
+
+static ssize_t enable_psmi_store(struct config_item *item, const char *page, size_t len)
+{
+ struct xe_config_group_device *dev = to_xe_config_group_device(item);
+ bool val;
+ int ret;
+
+ ret = kstrtobool(page, &val);
+ if (ret)
+ return ret;
+
+ guard(mutex)(&dev->lock);
+ if (is_bound(dev))
+ return -EBUSY;
+
+ dev->config.enable_psmi = val;
+
+ return len;
+}
+
+static bool wa_bb_read_advance(bool dereference, char **p,
+ const char *append, size_t len,
+ size_t *max_size)
+{
+ if (dereference) {
+ if (len >= *max_size)
+ return false;
+ *max_size -= len;
+ if (append)
+ memcpy(*p, append, len);
+ }
+
+ *p += len;
+
+ return true;
+}
+
+static ssize_t wa_bb_show(struct xe_config_group_device *dev,
+ struct wa_bb wa_bb[static XE_ENGINE_CLASS_MAX],
+ char *data, size_t sz)
+{
+ char *p = data;
+
+ guard(mutex)(&dev->lock);
+
+ for (size_t i = 0; i < ARRAY_SIZE(engine_info); i++) {
+ enum xe_engine_class ec = engine_info[i].engine_class;
+ size_t len;
+
+ if (!wa_bb[ec].len)
+ continue;
+
+ len = snprintf(p, sz, "%s:", engine_info[i].cls);
+ if (!wa_bb_read_advance(data, &p, NULL, len, &sz))
+ return -ENOBUFS;
+
+ for (size_t j = 0; j < wa_bb[ec].len; j++) {
+ len = snprintf(p, sz, " %08x", wa_bb[ec].cs[j]);
+ if (!wa_bb_read_advance(data, &p, NULL, len, &sz))
+ return -ENOBUFS;
+ }
+
+ if (!wa_bb_read_advance(data, &p, "\n", 1, &sz))
+ return -ENOBUFS;
+ }
+
+ if (!wa_bb_read_advance(data, &p, "", 1, &sz))
+ return -ENOBUFS;
+
+ /* Reserve one more to match check for '\0' */
+ if (!data)
+ p++;
+
+ return p - data;
+}
+
+static ssize_t ctx_restore_mid_bb_show(struct config_item *item, char *page)
+{
+ struct xe_config_group_device *dev = to_xe_config_group_device(item);
+
+ return wa_bb_show(dev, dev->config.ctx_restore_mid_bb, page, SZ_4K);
+}
+
+static ssize_t ctx_restore_post_bb_show(struct config_item *item, char *page)
+{
+ struct xe_config_group_device *dev = to_xe_config_group_device(item);
+
+ return wa_bb_show(dev, dev->config.ctx_restore_post_bb, page, SZ_4K);
+}
+
+static void wa_bb_append(struct wa_bb *wa_bb, u32 val)
+{
+ if (wa_bb->cs)
+ wa_bb->cs[wa_bb->len] = val;
+
+ wa_bb->len++;
+}
+
+static ssize_t parse_hex(const char *line, u32 *pval)
+{
+ char numstr[12];
+ const char *p;
+ ssize_t numlen;
+
+ p = line + strspn(line, " \t");
+ if (!*p || *p == '\n')
+ return 0;
+
+ numlen = strcspn(p, " \t\n");
+ if (!numlen || numlen >= sizeof(numstr) - 1)
+ return -EINVAL;
+
+ memcpy(numstr, p, numlen);
+ numstr[numlen] = '\0';
+ p += numlen;
+
+ if (kstrtou32(numstr, 16, pval))
+ return -EINVAL;
+
+ return p - line;
+}
+
+/*
+ * Parse lines with the format
+ *
+ * <engine-class> cmd <u32> <u32...>
+ * <engine-class> reg <u32_addr> <u32_val>
+ *
+ * and optionally save them in @wa_bb[i].cs is non-NULL.
+ *
+ * Return the number of dwords parsed.
+ */
+static ssize_t parse_wa_bb_lines(const char *lines,
+ struct wa_bb wa_bb[static XE_ENGINE_CLASS_MAX])
+{
+ ssize_t dwords = 0, ret;
+ const char *p;
+
+ for (p = lines; *p; p++) {
+ const struct engine_info *info = NULL;
+ u32 val, val2;
+
+ /* Also allow empty lines */
+ p += strspn(p, " \t\n");
+ if (!*p)
+ break;
+
+ ret = parse_engine(p, " \t\n", NULL, &info);
+ if (ret < 0)
+ return ret;
+
+ p += ret;
+ p += strspn(p, " \t");
+
+ if (str_has_prefix(p, "cmd")) {
+ for (p += strlen("cmd"); *p;) {
+ ret = parse_hex(p, &val);
+ if (ret < 0)
+ return -EINVAL;
+ if (!ret)
+ break;
+
+ p += ret;
+ dwords++;
+ wa_bb_append(&wa_bb[info->engine_class], val);
+ }
+ } else if (str_has_prefix(p, "reg")) {
+ p += strlen("reg");
+ ret = parse_hex(p, &val);
+ if (ret <= 0)
+ return -EINVAL;
+
+ p += ret;
+ ret = parse_hex(p, &val2);
+ if (ret <= 0)
+ return -EINVAL;
+
+ p += ret;
+ dwords += 3;
+ wa_bb_append(&wa_bb[info->engine_class],
+ MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1));
+ wa_bb_append(&wa_bb[info->engine_class], val);
+ wa_bb_append(&wa_bb[info->engine_class], val2);
+ } else {
+ return -EINVAL;
+ }
+ }
+
+ return dwords;
+}
+
+static ssize_t wa_bb_store(struct wa_bb wa_bb[static XE_ENGINE_CLASS_MAX],
+ struct xe_config_group_device *dev,
+ const char *page, size_t len)
+{
+ /* tmp_wa_bb must match wa_bb's size */
+ struct wa_bb tmp_wa_bb[XE_ENGINE_CLASS_MAX] = { };
+ ssize_t count, class;
+ u32 *tmp;
+
+ /* 1. Count dwords - wa_bb[i].cs is NULL for all classes */
+ count = parse_wa_bb_lines(page, tmp_wa_bb);
+ if (count < 0)
+ return count;
+
+ guard(mutex)(&dev->lock);
+
+ if (is_bound(dev))
+ return -EBUSY;
+
+ /*
+ * 2. Allocate a u32 array and set the pointers to the right positions
+ * according to the length of each class' wa_bb
+ */
+ tmp = krealloc(wa_bb[0].cs, count * sizeof(u32), GFP_KERNEL);
+ if (!tmp)
+ return -ENOMEM;
+
+ if (!count) {
+ memset(wa_bb, 0, sizeof(tmp_wa_bb));
+ return len;
+ }
+
+ for (class = 0, count = 0; class < XE_ENGINE_CLASS_MAX; ++class) {
+ tmp_wa_bb[class].cs = tmp + count;
+ count += tmp_wa_bb[class].len;
+ tmp_wa_bb[class].len = 0;
+ }
+
+ /* 3. Parse wa_bb lines again, this time saving the values */
+ count = parse_wa_bb_lines(page, tmp_wa_bb);
+ if (count < 0)
+ return count;
+
+ memcpy(wa_bb, tmp_wa_bb, sizeof(tmp_wa_bb));
return len;
}
+static ssize_t ctx_restore_mid_bb_store(struct config_item *item,
+ const char *data, size_t sz)
+{
+ struct xe_config_group_device *dev = to_xe_config_group_device(item);
+
+ return wa_bb_store(dev->config.ctx_restore_mid_bb, dev, data, sz);
+}
+
+static ssize_t ctx_restore_post_bb_store(struct config_item *item,
+ const char *data, size_t sz)
+{
+ struct xe_config_group_device *dev = to_xe_config_group_device(item);
+
+ return wa_bb_store(dev->config.ctx_restore_post_bb, dev, data, sz);
+}
+
+CONFIGFS_ATTR(, ctx_restore_mid_bb);
+CONFIGFS_ATTR(, ctx_restore_post_bb);
+CONFIGFS_ATTR(, enable_psmi);
+CONFIGFS_ATTR(, engines_allowed);
+CONFIGFS_ATTR(, gt_types_allowed);
CONFIGFS_ATTR(, survivability_mode);
static struct configfs_attribute *xe_config_device_attrs[] = {
+ &attr_ctx_restore_mid_bb,
+ &attr_ctx_restore_post_bb,
+ &attr_enable_psmi,
+ &attr_engines_allowed,
+ &attr_gt_types_allowed,
&attr_survivability_mode,
NULL,
};
static void xe_config_device_release(struct config_item *item)
{
- struct xe_config_device *dev = to_xe_config_device(item);
+ struct xe_config_group_device *dev = to_xe_config_group_device(item);
mutex_destroy(&dev->lock);
+
+ kfree(dev->config.ctx_restore_post_bb[0].cs);
kfree(dev);
}
@@ -113,45 +839,192 @@ static struct configfs_item_operations xe_config_device_ops = {
.release = xe_config_device_release,
};
+static bool xe_config_device_is_visible(struct config_item *item,
+ struct configfs_attribute *attr, int n)
+{
+ struct xe_config_group_device *dev = to_xe_config_group_device(item);
+
+ if (attr == &attr_survivability_mode) {
+ if (!dev->desc->is_dgfx || dev->desc->platform < XE_BATTLEMAGE)
+ return false;
+ }
+
+ return true;
+}
+
+static struct configfs_group_operations xe_config_device_group_ops = {
+ .is_visible = xe_config_device_is_visible,
+};
+
static const struct config_item_type xe_config_device_type = {
.ct_item_ops = &xe_config_device_ops,
+ .ct_group_ops = &xe_config_device_group_ops,
.ct_attrs = xe_config_device_attrs,
.ct_owner = THIS_MODULE,
};
+static ssize_t sriov_max_vfs_show(struct config_item *item, char *page)
+{
+ struct xe_config_group_device *dev = to_xe_config_group_device(item->ci_parent);
+
+ guard(mutex)(&dev->lock);
+
+ if (dev->config.sriov.max_vfs == UINT_MAX)
+ return sprintf(page, "%s\n", "unlimited");
+ else
+ return sprintf(page, "%u\n", dev->config.sriov.max_vfs);
+}
+
+static ssize_t sriov_max_vfs_store(struct config_item *item, const char *page, size_t len)
+{
+ struct xe_config_group_device *dev = to_xe_config_group_device(item->ci_parent);
+ unsigned int max_vfs;
+ int ret;
+
+ guard(mutex)(&dev->lock);
+
+ if (is_bound(dev))
+ return -EBUSY;
+
+ ret = kstrtouint(page, 0, &max_vfs);
+ if (ret) {
+ if (!sysfs_streq(page, "unlimited"))
+ return ret;
+ max_vfs = UINT_MAX;
+ }
+
+ dev->config.sriov.max_vfs = max_vfs;
+ return len;
+}
+
+CONFIGFS_ATTR(sriov_, max_vfs);
+
+static struct configfs_attribute *xe_config_sriov_attrs[] = {
+ &sriov_attr_max_vfs,
+ NULL,
+};
+
+static bool xe_config_sriov_is_visible(struct config_item *item,
+ struct configfs_attribute *attr, int n)
+{
+ struct xe_config_group_device *dev = to_xe_config_group_device(item->ci_parent);
+
+ if (attr == &sriov_attr_max_vfs && dev->mode != XE_SRIOV_MODE_PF)
+ return false;
+
+ return true;
+}
+
+static struct configfs_group_operations xe_config_sriov_group_ops = {
+ .is_visible = xe_config_sriov_is_visible,
+};
+
+static const struct config_item_type xe_config_sriov_type = {
+ .ct_owner = THIS_MODULE,
+ .ct_group_ops = &xe_config_sriov_group_ops,
+ .ct_attrs = xe_config_sriov_attrs,
+};
+
+static const struct xe_device_desc *xe_match_desc(struct pci_dev *pdev)
+{
+ struct device_driver *driver = driver_find("xe", &pci_bus_type);
+ struct pci_driver *drv = to_pci_driver(driver);
+ const struct pci_device_id *ids = drv ? drv->id_table : NULL;
+ const struct pci_device_id *found = pci_match_id(ids, pdev);
+
+ return found ? (const void *)found->driver_data : NULL;
+}
+
+static struct pci_dev *get_physfn_instead(struct pci_dev *virtfn)
+{
+ struct pci_dev *physfn = pci_physfn(virtfn);
+
+ pci_dev_get(physfn);
+ pci_dev_put(virtfn);
+ return physfn;
+}
+
static struct config_group *xe_config_make_device_group(struct config_group *group,
const char *name)
{
unsigned int domain, bus, slot, function;
- struct xe_config_device *dev;
+ struct xe_config_group_device *dev;
+ const struct xe_device_desc *match;
+ enum xe_sriov_mode mode;
struct pci_dev *pdev;
+ char canonical[16];
+ int vfnumber = 0;
int ret;
- ret = sscanf(name, "%04x:%02x:%02x.%x", &domain, &bus, &slot, &function);
+ ret = sscanf(name, "%x:%x:%x.%x", &domain, &bus, &slot, &function);
if (ret != 4)
return ERR_PTR(-EINVAL);
+ ret = scnprintf(canonical, sizeof(canonical), "%04x:%02x:%02x.%d", domain, bus,
+ PCI_SLOT(PCI_DEVFN(slot, function)),
+ PCI_FUNC(PCI_DEVFN(slot, function)));
+ if (ret != 12 || strcmp(name, canonical))
+ return ERR_PTR(-EINVAL);
+
pdev = pci_get_domain_bus_and_slot(domain, bus, PCI_DEVFN(slot, function));
+ mode = pdev ? dev_is_pf(&pdev->dev) ?
+ XE_SRIOV_MODE_PF : XE_SRIOV_MODE_NONE : XE_SRIOV_MODE_VF;
+
+ if (!pdev && function)
+ pdev = pci_get_domain_bus_and_slot(domain, bus, PCI_DEVFN(slot, 0));
+ if (!pdev && slot)
+ pdev = pci_get_domain_bus_and_slot(domain, bus, PCI_DEVFN(0, 0));
if (!pdev)
- return ERR_PTR(-EINVAL);
+ return ERR_PTR(-ENODEV);
+
+ if (PCI_DEVFN(slot, function) != pdev->devfn) {
+ pdev = get_physfn_instead(pdev);
+ vfnumber = PCI_DEVFN(slot, function) - pdev->devfn;
+ if (!dev_is_pf(&pdev->dev) || vfnumber > pci_sriov_get_totalvfs(pdev)) {
+ pci_dev_put(pdev);
+ return ERR_PTR(-ENODEV);
+ }
+ }
+
+ match = xe_match_desc(pdev);
+ if (match && vfnumber && !match->has_sriov) {
+ pci_info(pdev, "xe driver does not support VFs on this device\n");
+ match = NULL;
+ } else if (!match) {
+ pci_info(pdev, "xe driver does not support configuration of this device\n");
+ }
+
+ pci_dev_put(pdev);
+
+ if (!match)
+ return ERR_PTR(-ENOENT);
dev = kzalloc(sizeof(*dev), GFP_KERNEL);
if (!dev)
return ERR_PTR(-ENOMEM);
+ dev->desc = match;
+ dev->mode = match->has_sriov ? mode : XE_SRIOV_MODE_NONE;
+
+ set_device_defaults(&dev->config);
+
config_group_init_type_name(&dev->group, name, &xe_config_device_type);
+ if (dev->mode != XE_SRIOV_MODE_NONE) {
+ config_group_init_type_name(&dev->sriov, "sriov", &xe_config_sriov_type);
+ configfs_add_default_group(&dev->sriov, &dev->group);
+ }
mutex_init(&dev->lock);
return &dev->group;
}
-static struct configfs_group_operations xe_config_device_group_ops = {
+static struct configfs_group_operations xe_config_group_ops = {
.make_group = xe_config_make_device_group,
};
static const struct config_item_type xe_configfs_type = {
- .ct_group_ops = &xe_config_device_group_ops,
+ .ct_group_ops = &xe_config_group_ops,
.ct_owner = THIS_MODULE,
};
@@ -164,87 +1037,255 @@ static struct configfs_subsystem xe_configfs = {
},
};
-static struct xe_config_device *configfs_find_group(struct pci_dev *pdev)
+static struct xe_config_group_device *find_xe_config_group_device(struct pci_dev *pdev)
{
struct config_item *item;
- char name[64];
-
- snprintf(name, sizeof(name), "%04x:%02x:%02x.%x", pci_domain_nr(pdev->bus),
- pdev->bus->number, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
mutex_lock(&xe_configfs.su_mutex);
- item = config_group_find_item(&xe_configfs.su_group, name);
+ item = config_group_find_item(&xe_configfs.su_group, pci_name(pdev));
mutex_unlock(&xe_configfs.su_mutex);
if (!item)
return NULL;
- return to_xe_config_device(item);
+ return to_xe_config_group_device(item);
+}
+
+static void dump_custom_dev_config(struct pci_dev *pdev,
+ struct xe_config_group_device *dev)
+{
+#define PRI_CUSTOM_ATTR(fmt_, attr_) do { \
+ if (dev->config.attr_ != device_defaults.attr_) \
+ pci_info(pdev, "configfs: " __stringify(attr_) " = " fmt_ "\n", \
+ dev->config.attr_); \
+ } while (0)
+
+ PRI_CUSTOM_ATTR("%llx", gt_types_allowed);
+ PRI_CUSTOM_ATTR("%llx", engines_allowed);
+ PRI_CUSTOM_ATTR("%d", enable_psmi);
+ PRI_CUSTOM_ATTR("%d", survivability_mode);
+
+#undef PRI_CUSTOM_ATTR
+}
+
+/**
+ * xe_configfs_check_device() - Test if device was configured by configfs
+ * @pdev: the &pci_dev device to test
+ *
+ * Try to find the configfs group that belongs to the specified pci device
+ * and print a diagnostic message if different than the default value.
+ */
+void xe_configfs_check_device(struct pci_dev *pdev)
+{
+ struct xe_config_group_device *dev = find_xe_config_group_device(pdev);
+
+ if (!dev)
+ return;
+
+ /* memcmp here is safe as both are zero-initialized */
+ if (memcmp(&dev->config, &device_defaults, sizeof(dev->config))) {
+ pci_info(pdev, "Found custom settings in configfs\n");
+ dump_custom_dev_config(pdev, dev);
+ }
+
+ config_group_put(&dev->group);
}
/**
* xe_configfs_get_survivability_mode - get configfs survivability mode attribute
* @pdev: pci device
*
- * find the configfs group that belongs to the pci device and return
- * the survivability mode attribute
- *
- * Return: survivability mode if config group is found, false otherwise
+ * Return: survivability_mode attribute in configfs
*/
bool xe_configfs_get_survivability_mode(struct pci_dev *pdev)
{
- struct xe_config_device *dev = configfs_find_group(pdev);
+ struct xe_config_group_device *dev = find_xe_config_group_device(pdev);
bool mode;
if (!dev)
- return false;
+ return device_defaults.survivability_mode;
- mode = dev->survivability_mode;
- config_item_put(&dev->group.cg_item);
+ mode = dev->config.survivability_mode;
+ config_group_put(&dev->group);
return mode;
}
+static u64 get_gt_types_allowed(struct pci_dev *pdev)
+{
+ struct xe_config_group_device *dev = find_xe_config_group_device(pdev);
+ u64 mask;
+
+ if (!dev)
+ return device_defaults.gt_types_allowed;
+
+ mask = dev->config.gt_types_allowed;
+ config_group_put(&dev->group);
+
+ return mask;
+}
+
+/**
+ * xe_configfs_primary_gt_allowed - determine whether primary GTs are supported
+ * @pdev: pci device
+ *
+ * Return: True if primary GTs are enabled, false if they have been disabled via
+ * configfs.
+ */
+bool xe_configfs_primary_gt_allowed(struct pci_dev *pdev)
+{
+ return get_gt_types_allowed(pdev) & BIT_ULL(XE_GT_TYPE_MAIN);
+}
+
+/**
+ * xe_configfs_media_gt_allowed - determine whether media GTs are supported
+ * @pdev: pci device
+ *
+ * Return: True if the media GTs are enabled, false if they have been disabled
+ * via configfs.
+ */
+bool xe_configfs_media_gt_allowed(struct pci_dev *pdev)
+{
+ return get_gt_types_allowed(pdev) & BIT_ULL(XE_GT_TYPE_MEDIA);
+}
+
/**
- * xe_configfs_clear_survivability_mode - clear configfs survivability mode attribute
+ * xe_configfs_get_engines_allowed - get engine allowed mask from configfs
* @pdev: pci device
*
- * find the configfs group that belongs to the pci device and clear survivability
- * mode attribute
+ * Return: engine mask with allowed engines set in configfs
*/
-void xe_configfs_clear_survivability_mode(struct pci_dev *pdev)
+u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev)
{
- struct xe_config_device *dev = configfs_find_group(pdev);
+ struct xe_config_group_device *dev = find_xe_config_group_device(pdev);
+ u64 engines_allowed;
if (!dev)
- return;
+ return device_defaults.engines_allowed;
+
+ engines_allowed = dev->config.engines_allowed;
+ config_group_put(&dev->group);
+
+ return engines_allowed;
+}
+
+/**
+ * xe_configfs_get_psmi_enabled - get configfs enable_psmi setting
+ * @pdev: pci device
+ *
+ * Return: enable_psmi setting in configfs
+ */
+bool xe_configfs_get_psmi_enabled(struct pci_dev *pdev)
+{
+ struct xe_config_group_device *dev = find_xe_config_group_device(pdev);
+ bool ret;
+
+ if (!dev)
+ return false;
- mutex_lock(&dev->lock);
- dev->survivability_mode = 0;
- mutex_unlock(&dev->lock);
+ ret = dev->config.enable_psmi;
+ config_group_put(&dev->group);
- config_item_put(&dev->group.cg_item);
+ return ret;
+}
+
+/**
+ * xe_configfs_get_ctx_restore_mid_bb - get configfs ctx_restore_mid_bb setting
+ * @pdev: pci device
+ * @class: hw engine class
+ * @cs: pointer to the bb to use - only valid during probe
+ *
+ * Return: Number of dwords used in the mid_ctx_restore setting in configfs
+ */
+u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev,
+ enum xe_engine_class class,
+ const u32 **cs)
+{
+ struct xe_config_group_device *dev = find_xe_config_group_device(pdev);
+ u32 len;
+
+ if (!dev)
+ return 0;
+
+ if (cs)
+ *cs = dev->config.ctx_restore_mid_bb[class].cs;
+
+ len = dev->config.ctx_restore_mid_bb[class].len;
+ config_group_put(&dev->group);
+
+ return len;
}
+/**
+ * xe_configfs_get_ctx_restore_post_bb - get configfs ctx_restore_post_bb setting
+ * @pdev: pci device
+ * @class: hw engine class
+ * @cs: pointer to the bb to use - only valid during probe
+ *
+ * Return: Number of dwords used in the post_ctx_restore setting in configfs
+ */
+u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev,
+ enum xe_engine_class class,
+ const u32 **cs)
+{
+ struct xe_config_group_device *dev = find_xe_config_group_device(pdev);
+ u32 len;
+
+ if (!dev)
+ return 0;
+
+ *cs = dev->config.ctx_restore_post_bb[class].cs;
+ len = dev->config.ctx_restore_post_bb[class].len;
+ config_group_put(&dev->group);
+
+ return len;
+}
+
+#ifdef CONFIG_PCI_IOV
+/**
+ * xe_configfs_get_max_vfs() - Get number of VFs that could be managed
+ * @pdev: the &pci_dev device
+ *
+ * Find the configfs group that belongs to the PCI device and return maximum
+ * number of Virtual Functions (VFs) that could be managed by this device.
+ * If configfs group is not present, use value of max_vfs module parameter.
+ *
+ * Return: maximum number of VFs that could be managed.
+ */
+unsigned int xe_configfs_get_max_vfs(struct pci_dev *pdev)
+{
+ struct xe_config_group_device *dev = find_xe_config_group_device(pdev);
+ unsigned int max_vfs;
+
+ if (!dev)
+ return xe_modparam.max_vfs;
+
+ scoped_guard(mutex, &dev->lock)
+ max_vfs = dev->config.sriov.max_vfs;
+
+ config_group_put(&dev->group);
+
+ return max_vfs;
+}
+#endif
+
int __init xe_configfs_init(void)
{
- struct config_group *root = &xe_configfs.su_group;
int ret;
- config_group_init(root);
+ config_group_init(&xe_configfs.su_group);
mutex_init(&xe_configfs.su_mutex);
ret = configfs_register_subsystem(&xe_configfs);
if (ret) {
- pr_err("Error %d while registering %s subsystem\n",
- ret, root->cg_item.ci_namebuf);
+ mutex_destroy(&xe_configfs.su_mutex);
return ret;
}
return 0;
}
-void __exit xe_configfs_exit(void)
+void xe_configfs_exit(void)
{
configfs_unregister_subsystem(&xe_configfs);
+ mutex_destroy(&xe_configfs.su_mutex);
}
-
diff --git a/drivers/gpu/drm/xe/xe_configfs.h b/drivers/gpu/drm/xe/xe_configfs.h
index d7d041ec2611..fed57be0b90e 100644
--- a/drivers/gpu/drm/xe/xe_configfs.h
+++ b/drivers/gpu/drm/xe/xe_configfs.h
@@ -5,20 +5,43 @@
#ifndef _XE_CONFIGFS_H_
#define _XE_CONFIGFS_H_
+#include <linux/limits.h>
#include <linux/types.h>
+#include <xe_hw_engine_types.h>
+
struct pci_dev;
#if IS_ENABLED(CONFIG_CONFIGFS_FS)
int xe_configfs_init(void);
void xe_configfs_exit(void);
+void xe_configfs_check_device(struct pci_dev *pdev);
bool xe_configfs_get_survivability_mode(struct pci_dev *pdev);
-void xe_configfs_clear_survivability_mode(struct pci_dev *pdev);
+bool xe_configfs_primary_gt_allowed(struct pci_dev *pdev);
+bool xe_configfs_media_gt_allowed(struct pci_dev *pdev);
+u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev);
+bool xe_configfs_get_psmi_enabled(struct pci_dev *pdev);
+u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev, enum xe_engine_class,
+ const u32 **cs);
+u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev, enum xe_engine_class,
+ const u32 **cs);
+#ifdef CONFIG_PCI_IOV
+unsigned int xe_configfs_get_max_vfs(struct pci_dev *pdev);
+#endif
#else
-static inline int xe_configfs_init(void) { return 0; };
-static inline void xe_configfs_exit(void) {};
-static inline bool xe_configfs_get_survivability_mode(struct pci_dev *pdev) { return false; };
-static inline void xe_configfs_clear_survivability_mode(struct pci_dev *pdev) {};
+static inline int xe_configfs_init(void) { return 0; }
+static inline void xe_configfs_exit(void) { }
+static inline void xe_configfs_check_device(struct pci_dev *pdev) { }
+static inline bool xe_configfs_get_survivability_mode(struct pci_dev *pdev) { return false; }
+static inline bool xe_configfs_primary_gt_allowed(struct pci_dev *pdev) { return true; }
+static inline bool xe_configfs_media_gt_allowed(struct pci_dev *pdev) { return true; }
+static inline u64 xe_configfs_get_engines_allowed(struct pci_dev *pdev) { return U64_MAX; }
+static inline bool xe_configfs_get_psmi_enabled(struct pci_dev *pdev) { return false; }
+static inline u32 xe_configfs_get_ctx_restore_mid_bb(struct pci_dev *pdev, enum xe_engine_class,
+ const u32 **cs) { return 0; }
+static inline u32 xe_configfs_get_ctx_restore_post_bb(struct pci_dev *pdev, enum xe_engine_class,
+ const u32 **cs) { return 0; }
+static inline unsigned int xe_configfs_get_max_vfs(struct pci_dev *pdev) { return UINT_MAX; }
#endif
#endif
diff --git a/drivers/gpu/drm/xe/xe_debugfs.c b/drivers/gpu/drm/xe/xe_debugfs.c
index d0503959a8ed..e91da9589c5f 100644
--- a/drivers/gpu/drm/xe/xe_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_debugfs.c
@@ -11,16 +11,24 @@
#include <drm/drm_debugfs.h>
+#include "regs/xe_pmt.h"
#include "xe_bo.h"
#include "xe_device.h"
#include "xe_force_wake.h"
#include "xe_gt_debugfs.h"
#include "xe_gt_printk.h"
#include "xe_guc_ads.h"
+#include "xe_mmio.h"
#include "xe_pm.h"
+#include "xe_psmi.h"
#include "xe_pxp_debugfs.h"
#include "xe_sriov.h"
+#include "xe_sriov_pf_debugfs.h"
+#include "xe_sriov_vf.h"
#include "xe_step.h"
+#include "xe_tile_debugfs.h"
+#include "xe_vsec.h"
+#include "xe_wa.h"
#ifdef CONFIG_DRM_XE_DEBUG
#include "xe_bo_evict.h"
@@ -29,6 +37,24 @@
#endif
DECLARE_FAULT_ATTR(gt_reset_failure);
+DECLARE_FAULT_ATTR(inject_csc_hw_error);
+
+static void read_residency_counter(struct xe_device *xe, struct xe_mmio *mmio,
+ u32 offset, const char *name, struct drm_printer *p)
+{
+ u64 residency = 0;
+ int ret;
+
+ ret = xe_pmt_telem_read(to_pci_dev(xe->drm.dev),
+ xe_mmio_read32(mmio, PUNIT_TELEMETRY_GUID),
+ &residency, offset, sizeof(residency));
+ if (ret != sizeof(residency)) {
+ drm_warn(&xe->drm, "%s counter failed to read, ret %d\n", name, ret);
+ return;
+ }
+
+ drm_printf(p, "%s : %llu\n", name, residency);
+}
static struct xe_device *node_to_xe(struct drm_info_node *node)
{
@@ -82,9 +108,89 @@ static int sriov_info(struct seq_file *m, void *data)
return 0;
}
+static int workarounds(struct xe_device *xe, struct drm_printer *p)
+{
+ xe_pm_runtime_get(xe);
+ xe_wa_device_dump(xe, p);
+ xe_pm_runtime_put(xe);
+
+ return 0;
+}
+
+static int workaround_info(struct seq_file *m, void *data)
+{
+ struct xe_device *xe = node_to_xe(m->private);
+ struct drm_printer p = drm_seq_file_printer(m);
+
+ workarounds(xe, &p);
+ return 0;
+}
+
+static int dgfx_pkg_residencies_show(struct seq_file *m, void *data)
+{
+ struct xe_device *xe;
+ struct xe_mmio *mmio;
+ struct drm_printer p;
+
+ xe = node_to_xe(m->private);
+ p = drm_seq_file_printer(m);
+ xe_pm_runtime_get(xe);
+ mmio = xe_root_tile_mmio(xe);
+ static const struct {
+ u32 offset;
+ const char *name;
+ } residencies[] = {
+ {BMG_G2_RESIDENCY_OFFSET, "Package G2"},
+ {BMG_G6_RESIDENCY_OFFSET, "Package G6"},
+ {BMG_G7_RESIDENCY_OFFSET, "Package G7"},
+ {BMG_G8_RESIDENCY_OFFSET, "Package G8"},
+ {BMG_G10_RESIDENCY_OFFSET, "Package G10"},
+ {BMG_MODS_RESIDENCY_OFFSET, "Package ModS"}
+ };
+
+ for (int i = 0; i < ARRAY_SIZE(residencies); i++)
+ read_residency_counter(xe, mmio, residencies[i].offset, residencies[i].name, &p);
+
+ xe_pm_runtime_put(xe);
+ return 0;
+}
+
+static int dgfx_pcie_link_residencies_show(struct seq_file *m, void *data)
+{
+ struct xe_device *xe;
+ struct xe_mmio *mmio;
+ struct drm_printer p;
+
+ xe = node_to_xe(m->private);
+ p = drm_seq_file_printer(m);
+ xe_pm_runtime_get(xe);
+ mmio = xe_root_tile_mmio(xe);
+
+ static const struct {
+ u32 offset;
+ const char *name;
+ } residencies[] = {
+ {BMG_PCIE_LINK_L0_RESIDENCY_OFFSET, "PCIE LINK L0 RESIDENCY"},
+ {BMG_PCIE_LINK_L1_RESIDENCY_OFFSET, "PCIE LINK L1 RESIDENCY"},
+ {BMG_PCIE_LINK_L1_2_RESIDENCY_OFFSET, "PCIE LINK L1.2 RESIDENCY"}
+ };
+
+ for (int i = 0; i < ARRAY_SIZE(residencies); i++)
+ read_residency_counter(xe, mmio, residencies[i].offset, residencies[i].name, &p);
+
+ xe_pm_runtime_put(xe);
+ return 0;
+}
+
static const struct drm_info_list debugfs_list[] = {
{"info", info, 0},
{ .name = "sriov_info", .show = sriov_info, },
+ { .name = "workarounds", .show = workaround_info, },
+};
+
+static const struct drm_info_list debugfs_residencies[] = {
+ { .name = "dgfx_pkg_residencies", .show = dgfx_pkg_residencies_show, },
+ { .name = "dgfx_pcie_link_residencies", .show = dgfx_pcie_link_residencies_show, },
};
static int forcewake_open(struct inode *inode, struct file *file)
@@ -191,26 +297,112 @@ static const struct file_operations wedged_mode_fops = {
.write = wedged_mode_set,
};
+static ssize_t atomic_svm_timeslice_ms_show(struct file *f, char __user *ubuf,
+ size_t size, loff_t *pos)
+{
+ struct xe_device *xe = file_inode(f)->i_private;
+ char buf[32];
+ int len = 0;
+
+ len = scnprintf(buf, sizeof(buf), "%d\n", xe->atomic_svm_timeslice_ms);
+
+ return simple_read_from_buffer(ubuf, size, pos, buf, len);
+}
+
+static ssize_t atomic_svm_timeslice_ms_set(struct file *f,
+ const char __user *ubuf,
+ size_t size, loff_t *pos)
+{
+ struct xe_device *xe = file_inode(f)->i_private;
+ u32 atomic_svm_timeslice_ms;
+ ssize_t ret;
+
+ ret = kstrtouint_from_user(ubuf, size, 0, &atomic_svm_timeslice_ms);
+ if (ret)
+ return ret;
+
+ xe->atomic_svm_timeslice_ms = atomic_svm_timeslice_ms;
+
+ return size;
+}
+
+static const struct file_operations atomic_svm_timeslice_ms_fops = {
+ .owner = THIS_MODULE,
+ .read = atomic_svm_timeslice_ms_show,
+ .write = atomic_svm_timeslice_ms_set,
+};
+
+static ssize_t disable_late_binding_show(struct file *f, char __user *ubuf,
+ size_t size, loff_t *pos)
+{
+ struct xe_device *xe = file_inode(f)->i_private;
+ struct xe_late_bind *late_bind = &xe->late_bind;
+ char buf[32];
+ int len;
+
+ len = scnprintf(buf, sizeof(buf), "%d\n", late_bind->disable);
+
+ return simple_read_from_buffer(ubuf, size, pos, buf, len);
+}
+
+static ssize_t disable_late_binding_set(struct file *f, const char __user *ubuf,
+ size_t size, loff_t *pos)
+{
+ struct xe_device *xe = file_inode(f)->i_private;
+ struct xe_late_bind *late_bind = &xe->late_bind;
+ bool val;
+ int ret;
+
+ ret = kstrtobool_from_user(ubuf, size, &val);
+ if (ret)
+ return ret;
+
+ late_bind->disable = val;
+ return size;
+}
+
+static const struct file_operations disable_late_binding_fops = {
+ .owner = THIS_MODULE,
+ .read = disable_late_binding_show,
+ .write = disable_late_binding_set,
+};
+
void xe_debugfs_register(struct xe_device *xe)
{
struct ttm_device *bdev = &xe->ttm;
struct drm_minor *minor = xe->drm.primary;
struct dentry *root = minor->debugfs_root;
struct ttm_resource_manager *man;
+ struct xe_tile *tile;
struct xe_gt *gt;
u32 mem_type;
+ u8 tile_id;
u8 id;
drm_debugfs_create_files(debugfs_list,
ARRAY_SIZE(debugfs_list),
root, minor);
+ if (xe->info.platform == XE_BATTLEMAGE && !IS_SRIOV_VF(xe)) {
+ drm_debugfs_create_files(debugfs_residencies,
+ ARRAY_SIZE(debugfs_residencies),
+ root, minor);
+ fault_create_debugfs_attr("inject_csc_hw_error", root,
+ &inject_csc_hw_error);
+ }
+
debugfs_create_file("forcewake_all", 0400, root, xe,
&forcewake_all_fops);
debugfs_create_file("wedged_mode", 0600, root, xe,
&wedged_mode_fops);
+ debugfs_create_file("atomic_svm_timeslice_ms", 0600, root, xe,
+ &atomic_svm_timeslice_ms_fops);
+
+ debugfs_create_file("disable_late_binding", 0600, root, xe,
+ &disable_late_binding_fops);
+
for (mem_type = XE_PL_VRAM0; mem_type <= XE_PL_VRAM1; ++mem_type) {
man = ttm_manager_type(bdev, mem_type);
@@ -229,10 +421,20 @@ void xe_debugfs_register(struct xe_device *xe)
if (man)
ttm_resource_manager_create_debugfs(man, root, "stolen_mm");
+ for_each_tile(tile, xe, tile_id)
+ xe_tile_debugfs_register(tile);
+
for_each_gt(gt, xe, id)
xe_gt_debugfs_register(gt);
xe_pxp_debugfs_register(xe->pxp);
+ xe_psmi_debugfs_register(xe);
+
fault_create_debugfs_attr("fail_gt_reset", root, &gt_reset_failure);
+
+ if (IS_SRIOV_PF(xe))
+ xe_sriov_pf_debugfs_register(xe, root);
+ else if (IS_SRIOV_VF(xe))
+ xe_sriov_vf_debugfs_register(xe, root);
}
diff --git a/drivers/gpu/drm/xe/xe_dep_job_types.h b/drivers/gpu/drm/xe/xe_dep_job_types.h
new file mode 100644
index 000000000000..c6a484f24c8c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_dep_job_types.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_DEP_JOB_TYPES_H_
+#define _XE_DEP_JOB_TYPES_H_
+
+#include <drm/gpu_scheduler.h>
+
+struct xe_dep_job;
+
+/** struct xe_dep_job_ops - Generic Xe dependency job operations */
+struct xe_dep_job_ops {
+ /** @run_job: Run generic Xe dependency job */
+ struct dma_fence *(*run_job)(struct xe_dep_job *job);
+ /** @free_job: Free generic Xe dependency job */
+ void (*free_job)(struct xe_dep_job *job);
+};
+
+/** struct xe_dep_job - Generic dependency Xe job */
+struct xe_dep_job {
+ /** @drm: base DRM scheduler job */
+ struct drm_sched_job drm;
+ /** @ops: dependency job operations */
+ const struct xe_dep_job_ops *ops;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_dep_scheduler.c b/drivers/gpu/drm/xe/xe_dep_scheduler.c
new file mode 100644
index 000000000000..9bd3bfd2e526
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_dep_scheduler.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include <linux/slab.h>
+
+#include <drm/gpu_scheduler.h>
+
+#include "xe_dep_job_types.h"
+#include "xe_dep_scheduler.h"
+#include "xe_device_types.h"
+
+/**
+ * DOC: Xe Dependency Scheduler
+ *
+ * The Xe dependency scheduler is a simple wrapper built around the DRM
+ * scheduler to execute jobs once their dependencies are resolved (i.e., all
+ * input fences specified as dependencies are signaled). The jobs that are
+ * executed contain virtual functions to run (execute) and free the job,
+ * allowing a single dependency scheduler to handle jobs performing different
+ * operations.
+ *
+ * Example use cases include deferred resource freeing, TLB invalidations after
+ * bind jobs, etc.
+ */
+
+/** struct xe_dep_scheduler - Generic Xe dependency scheduler */
+struct xe_dep_scheduler {
+ /** @sched: DRM GPU scheduler */
+ struct drm_gpu_scheduler sched;
+ /** @entity: DRM scheduler entity */
+ struct drm_sched_entity entity;
+ /** @rcu: For safe freeing of exported dma fences */
+ struct rcu_head rcu;
+};
+
+static struct dma_fence *xe_dep_scheduler_run_job(struct drm_sched_job *drm_job)
+{
+ struct xe_dep_job *dep_job =
+ container_of(drm_job, typeof(*dep_job), drm);
+
+ return dep_job->ops->run_job(dep_job);
+}
+
+static void xe_dep_scheduler_free_job(struct drm_sched_job *drm_job)
+{
+ struct xe_dep_job *dep_job =
+ container_of(drm_job, typeof(*dep_job), drm);
+
+ dep_job->ops->free_job(dep_job);
+}
+
+static const struct drm_sched_backend_ops sched_ops = {
+ .run_job = xe_dep_scheduler_run_job,
+ .free_job = xe_dep_scheduler_free_job,
+};
+
+/**
+ * xe_dep_scheduler_create() - Generic Xe dependency scheduler create
+ * @xe: Xe device
+ * @submit_wq: Submit workqueue struct (can be NULL)
+ * @name: Name of dependency scheduler
+ * @job_limit: Max dependency jobs that can be scheduled
+ *
+ * Create a generic Xe dependency scheduler and initialize internal DRM
+ * scheduler objects.
+ *
+ * Return: Generic Xe dependency scheduler object on success, ERR_PTR failure
+ */
+struct xe_dep_scheduler *
+xe_dep_scheduler_create(struct xe_device *xe,
+ struct workqueue_struct *submit_wq,
+ const char *name, u32 job_limit)
+{
+ struct xe_dep_scheduler *dep_scheduler;
+ struct drm_gpu_scheduler *sched;
+ const struct drm_sched_init_args args = {
+ .ops = &sched_ops,
+ .submit_wq = submit_wq,
+ .num_rqs = 1,
+ .credit_limit = job_limit,
+ .timeout = MAX_SCHEDULE_TIMEOUT,
+ .name = name,
+ .dev = xe->drm.dev,
+ };
+ int err;
+
+ dep_scheduler = kzalloc(sizeof(*dep_scheduler), GFP_KERNEL);
+ if (!dep_scheduler)
+ return ERR_PTR(-ENOMEM);
+
+ err = drm_sched_init(&dep_scheduler->sched, &args);
+ if (err)
+ goto err_free;
+
+ sched = &dep_scheduler->sched;
+ err = drm_sched_entity_init(&dep_scheduler->entity, 0, &sched, 1, NULL);
+ if (err)
+ goto err_sched;
+
+ init_rcu_head(&dep_scheduler->rcu);
+
+ return dep_scheduler;
+
+err_sched:
+ drm_sched_fini(&dep_scheduler->sched);
+err_free:
+ kfree(dep_scheduler);
+
+ return ERR_PTR(err);
+}
+
+/**
+ * xe_dep_scheduler_fini() - Generic Xe dependency scheduler finalize
+ * @dep_scheduler: Generic Xe dependency scheduler object
+ *
+ * Finalize internal DRM scheduler objects and free generic Xe dependency
+ * scheduler object
+ */
+void xe_dep_scheduler_fini(struct xe_dep_scheduler *dep_scheduler)
+{
+ drm_sched_entity_fini(&dep_scheduler->entity);
+ drm_sched_fini(&dep_scheduler->sched);
+ /*
+ * RCU free due sched being exported via DRM scheduler fences
+ * (timeline name).
+ */
+ kfree_rcu(dep_scheduler, rcu);
+}
+
+/**
+ * xe_dep_scheduler_entity() - Retrieve a generic Xe dependency scheduler
+ * DRM scheduler entity
+ * @dep_scheduler: Generic Xe dependency scheduler object
+ *
+ * Return: The generic Xe dependency scheduler's DRM scheduler entity
+ */
+struct drm_sched_entity *
+xe_dep_scheduler_entity(struct xe_dep_scheduler *dep_scheduler)
+{
+ return &dep_scheduler->entity;
+}
diff --git a/drivers/gpu/drm/xe/xe_dep_scheduler.h b/drivers/gpu/drm/xe/xe_dep_scheduler.h
new file mode 100644
index 000000000000..853961eec64b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_dep_scheduler.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include <linux/types.h>
+
+struct drm_sched_entity;
+struct workqueue_struct;
+struct xe_dep_scheduler;
+struct xe_device;
+
+struct xe_dep_scheduler *
+xe_dep_scheduler_create(struct xe_device *xe,
+ struct workqueue_struct *submit_wq,
+ const char *name, u32 job_limit);
+
+void xe_dep_scheduler_fini(struct xe_dep_scheduler *dep_scheduler);
+
+struct drm_sched_entity *
+xe_dep_scheduler_entity(struct xe_dep_scheduler *dep_scheduler);
diff --git a/drivers/gpu/drm/xe/xe_devcoredump.c b/drivers/gpu/drm/xe/xe_devcoredump.c
index 7a8af2311318..d444eda65ca6 100644
--- a/drivers/gpu/drm/xe/xe_devcoredump.c
+++ b/drivers/gpu/drm/xe/xe_devcoredump.c
@@ -106,9 +106,9 @@ static ssize_t __xe_devcoredump_read(char *buffer, ssize_t count,
drm_puts(&p, "module: " KBUILD_MODNAME "\n");
ts = ktime_to_timespec64(ss->snapshot_time);
- drm_printf(&p, "Snapshot time: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec);
+ drm_printf(&p, "Snapshot time: %ptSp\n", &ts);
ts = ktime_to_timespec64(ss->boot_time);
- drm_printf(&p, "Uptime: %lld.%09ld\n", ts.tv_sec, ts.tv_nsec);
+ drm_printf(&p, "Uptime: %ptSp\n", &ts);
drm_printf(&p, "Process: %s [%d]\n", ss->process_name, ss->pid);
xe_device_snapshot_print(xe, &p);
@@ -171,14 +171,32 @@ static void xe_devcoredump_snapshot_free(struct xe_devcoredump_snapshot *ss)
#define XE_DEVCOREDUMP_CHUNK_MAX (SZ_512M + SZ_1G)
+/**
+ * xe_devcoredump_read() - Read data from the Xe device coredump snapshot
+ * @buffer: Destination buffer to copy the coredump data into
+ * @offset: Offset in the coredump data to start reading from
+ * @count: Number of bytes to read
+ * @data: Pointer to the xe_devcoredump structure
+ * @datalen: Length of the data (unused)
+ *
+ * Reads a chunk of the coredump snapshot data into the provided buffer.
+ * If the devcoredump is smaller than 1.5 GB (XE_DEVCOREDUMP_CHUNK_MAX),
+ * it is read directly from a pre-written buffer. For larger devcoredumps,
+ * the pre-written buffer must be periodically repopulated from the snapshot
+ * state due to kmalloc size limitations.
+ *
+ * Return: Number of bytes copied on success, or a negative error code on failure.
+ */
static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
size_t count, void *data, size_t datalen)
{
struct xe_devcoredump *coredump = data;
struct xe_devcoredump_snapshot *ss;
- ssize_t byte_copied;
+ ssize_t byte_copied = 0;
u32 chunk_offset;
ssize_t new_chunk_position;
+ bool pm_needed = false;
+ int ret = 0;
if (!coredump)
return -ENODEV;
@@ -188,20 +206,19 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
/* Ensure delayed work is captured before continuing */
flush_work(&ss->work);
- if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX)
+ pm_needed = ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX;
+ if (pm_needed)
xe_pm_runtime_get(gt_to_xe(ss->gt));
mutex_lock(&coredump->lock);
if (!ss->read.buffer) {
- mutex_unlock(&coredump->lock);
- return -ENODEV;
+ ret = -ENODEV;
+ goto unlock;
}
- if (offset >= ss->read.size) {
- mutex_unlock(&coredump->lock);
- return 0;
- }
+ if (offset >= ss->read.size)
+ goto unlock;
new_chunk_position = div_u64_rem(offset,
XE_DEVCOREDUMP_CHUNK_MAX,
@@ -221,12 +238,13 @@ static ssize_t xe_devcoredump_read(char *buffer, loff_t offset,
ss->read.size - offset;
memcpy(buffer, ss->read.buffer + chunk_offset, byte_copied);
+unlock:
mutex_unlock(&coredump->lock);
- if (ss->read.size > XE_DEVCOREDUMP_CHUNK_MAX)
+ if (pm_needed)
xe_pm_runtime_put(gt_to_xe(ss->gt));
- return byte_copied;
+ return byte_copied ? byte_copied : ret;
}
static void xe_devcoredump_free(void *data)
@@ -313,13 +331,9 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
{
struct xe_devcoredump_snapshot *ss = &coredump->snapshot;
struct xe_guc *guc = exec_queue_to_guc(q);
- u32 adj_logical_mask = q->logical_mask;
- u32 width_mask = (0x1 << q->width) - 1;
const char *process_name = "no process";
-
unsigned int fw_ref;
bool cookie;
- int i;
ss->snapshot_time = ktime_get_real();
ss->boot_time = ktime_get_boottime();
@@ -335,14 +349,6 @@ static void devcoredump_snapshot(struct xe_devcoredump *coredump,
INIT_WORK(&ss->work, xe_devcoredump_deferred_snap_work);
cookie = dma_fence_begin_signalling();
- for (i = 0; q->width > 1 && i < XE_HW_ENGINE_MAX_INSTANCE;) {
- if (adj_logical_mask & BIT(i)) {
- adj_logical_mask |= width_mask << i;
- i += q->width;
- } else {
- ++i;
- }
- }
/* keep going if fw fails as we still want to save the memory and SW data */
fw_ref = xe_force_wake_get(gt_to_fw(q->gt), XE_FORCEWAKE_ALL);
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index c02c4c4e9412..cf29e259861f 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -8,6 +8,7 @@
#include <linux/aperture.h>
#include <linux/delay.h>
#include <linux/fault-inject.h>
+#include <linux/iopoll.h>
#include <linux/units.h>
#include <drm/drm_atomic_helper.h>
@@ -40,18 +41,23 @@
#include "xe_gt_printk.h"
#include "xe_gt_sriov_vf.h"
#include "xe_guc.h"
+#include "xe_guc_pc.h"
#include "xe_hw_engine_group.h"
#include "xe_hwmon.h"
+#include "xe_i2c.h"
#include "xe_irq.h"
-#include "xe_memirq.h"
+#include "xe_late_bind_fw.h"
#include "xe_mmio.h"
#include "xe_module.h"
+#include "xe_nvm.h"
#include "xe_oa.h"
#include "xe_observation.h"
+#include "xe_pagefault.h"
#include "xe_pat.h"
#include "xe_pcode.h"
#include "xe_pm.h"
#include "xe_pmu.h"
+#include "xe_psmi.h"
#include "xe_pxp.h"
#include "xe_query.h"
#include "xe_shrinker.h"
@@ -61,11 +67,14 @@
#include "xe_ttm_stolen_mgr.h"
#include "xe_ttm_sys_mgr.h"
#include "xe_vm.h"
+#include "xe_vm_madvise.h"
#include "xe_vram.h"
+#include "xe_vram_types.h"
#include "xe_vsec.h"
#include "xe_wait_user_fence.h"
#include "xe_wa.h"
+#include <generated/xe_device_wa_oob.h>
#include <generated/xe_wa_oob.h>
static int xe_file_open(struct drm_device *dev, struct drm_file *file)
@@ -197,6 +206,9 @@ static const struct drm_ioctl_desc xe_ioctls[] = {
DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl,
DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(XE_OBSERVATION, xe_observation_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_MADVISE, xe_vm_madvise_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_VM_QUERY_MEM_RANGE_ATTRS, xe_vm_query_vmas_attrs_ioctl,
+ DRM_RENDER_ALLOW),
};
static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
@@ -402,9 +414,6 @@ static void xe_device_destroy(struct drm_device *dev, void *dummy)
if (xe->unordered_wq)
destroy_workqueue(xe->unordered_wq);
- if (!IS_ERR_OR_NULL(xe->mem.shrinker))
- xe_shrinker_destroy(xe->mem.shrinker);
-
if (xe->destroy_wq)
destroy_workqueue(xe->destroy_wq);
@@ -429,7 +438,7 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
err = ttm_device_init(&xe->ttm, &xe_ttm_funcs, xe->drm.dev,
xe->drm.anon_inode->i_mapping,
- xe->drm.vma_offset_manager, false, false);
+ xe->drm.vma_offset_manager, 0);
if (WARN_ON(err))
goto err;
@@ -438,18 +447,21 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
if (err)
goto err;
- xe->mem.shrinker = xe_shrinker_create(xe);
- if (IS_ERR(xe->mem.shrinker))
- return ERR_CAST(xe->mem.shrinker);
+ err = xe_shrinker_create(xe);
+ if (err)
+ goto err;
xe->info.devid = pdev->device;
xe->info.revid = pdev->revision;
xe->info.force_execlist = xe_modparam.force_execlist;
+ xe->atomic_svm_timeslice_ms = 5;
err = xe_irq_init(xe);
if (err)
goto err;
+ xe_validation_device_init(&xe->val);
+
init_waitqueue_head(&xe->ufence_wq);
init_rwsem(&xe->usm.lock);
@@ -493,10 +505,6 @@ struct xe_device *xe_device_create(struct pci_dev *pdev,
if (err)
goto err;
- err = xe_display_create(xe);
- if (WARN_ON(err))
- goto err;
-
return xe;
err:
@@ -527,7 +535,7 @@ static bool xe_driver_flr_disabled(struct xe_device *xe)
* re-init and saving/restoring (or re-populating) the wiped memory. Since we
* perform the FLR as the very last action before releasing access to the HW
* during the driver release flow, we don't attempt recovery at all, because
- * if/when a new instance of i915 is bound to the device it will do a full
+ * if/when a new instance of Xe is bound to the device it will do a full
* re-init anyway.
*/
static void __xe_driver_flr(struct xe_device *xe)
@@ -624,16 +632,22 @@ mask_err:
return err;
}
-static bool verify_lmem_ready(struct xe_device *xe)
+static int lmem_initializing(struct xe_device *xe)
{
- u32 val = xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL) & LMEM_INIT;
+ if (xe_mmio_read32(xe_root_tile_mmio(xe), GU_CNTL) & LMEM_INIT)
+ return 0;
- return !!val;
+ if (signal_pending(current))
+ return -EINTR;
+
+ return 1;
}
static int wait_for_lmem_ready(struct xe_device *xe)
{
- unsigned long timeout, start;
+ const unsigned long TIMEOUT_SEC = 60;
+ unsigned long prev_jiffies;
+ int initializing;
if (!IS_DGFX(xe))
return 0;
@@ -641,53 +655,65 @@ static int wait_for_lmem_ready(struct xe_device *xe)
if (IS_SRIOV_VF(xe))
return 0;
- if (verify_lmem_ready(xe))
+ if (!lmem_initializing(xe))
return 0;
drm_dbg(&xe->drm, "Waiting for lmem initialization\n");
+ prev_jiffies = jiffies;
- start = jiffies;
- timeout = start + secs_to_jiffies(60); /* 60 sec! */
-
- do {
- if (signal_pending(current))
- return -EINTR;
-
- /*
- * The boot firmware initializes local memory and
- * assesses its health. If memory training fails,
- * the punit will have been instructed to keep the GT powered
- * down.we won't be able to communicate with it
- *
- * If the status check is done before punit updates the register,
- * it can lead to the system being unusable.
- * use a timeout and defer the probe to prevent this.
- */
- if (time_after(jiffies, timeout)) {
- drm_dbg(&xe->drm, "lmem not initialized by firmware\n");
- return -EPROBE_DEFER;
- }
-
- msleep(20);
-
- } while (!verify_lmem_ready(xe));
+ /*
+ * The boot firmware initializes local memory and
+ * assesses its health. If memory training fails,
+ * the punit will have been instructed to keep the GT powered
+ * down.we won't be able to communicate with it
+ *
+ * If the status check is done before punit updates the register,
+ * it can lead to the system being unusable.
+ * use a timeout and defer the probe to prevent this.
+ */
+ poll_timeout_us(initializing = lmem_initializing(xe),
+ initializing <= 0,
+ 20 * USEC_PER_MSEC, TIMEOUT_SEC * USEC_PER_SEC, true);
+ if (initializing < 0)
+ return initializing;
+
+ if (initializing) {
+ drm_dbg(&xe->drm, "lmem not initialized by firmware\n");
+ return -EPROBE_DEFER;
+ }
drm_dbg(&xe->drm, "lmem ready after %ums",
- jiffies_to_msecs(jiffies - start));
+ jiffies_to_msecs(jiffies - prev_jiffies));
return 0;
}
ALLOW_ERROR_INJECTION(wait_for_lmem_ready, ERRNO); /* See xe_pci_probe() */
-static void sriov_update_device_info(struct xe_device *xe)
+static void vf_update_device_info(struct xe_device *xe)
{
+ xe_assert(xe, IS_SRIOV_VF(xe));
/* disable features that are not available/applicable to VFs */
- if (IS_SRIOV_VF(xe)) {
- xe->info.probe_display = 0;
- xe->info.has_heci_gscfi = 0;
- xe->info.skip_guc_pc = 1;
- xe->info.skip_pcode = 1;
- }
+ xe->info.probe_display = 0;
+ xe->info.has_heci_cscfi = 0;
+ xe->info.has_heci_gscfi = 0;
+ xe->info.has_late_bind = 0;
+ xe->info.skip_guc_pc = 1;
+ xe->info.skip_pcode = 1;
+}
+
+static int xe_device_vram_alloc(struct xe_device *xe)
+{
+ struct xe_vram_region *vram;
+
+ if (!IS_DGFX(xe))
+ return 0;
+
+ vram = drmm_kzalloc(&xe->drm, sizeof(*vram), GFP_KERNEL);
+ if (!vram)
+ return -ENOMEM;
+
+ xe->mem.vram = vram;
+ return 0;
}
/**
@@ -704,13 +730,17 @@ int xe_device_probe_early(struct xe_device *xe)
{
int err;
+ xe_wa_device_init(xe);
+ xe_wa_process_device_oob(xe);
+
err = xe_mmio_probe_early(xe);
if (err)
return err;
xe_sriov_probe_early(xe);
- sriov_update_device_info(xe);
+ if (IS_SRIOV_VF(xe))
+ vf_update_device_info(xe);
err = xe_pcode_probe_early(xe);
if (err || xe_survivability_mode_is_requested(xe)) {
@@ -721,7 +751,7 @@ int xe_device_probe_early(struct xe_device *xe)
* possible, but still return the previous error for error
* propagation
*/
- err = xe_survivability_mode_enable(xe);
+ err = xe_survivability_mode_boot_enable(xe);
if (err)
return err;
@@ -734,6 +764,10 @@ int xe_device_probe_early(struct xe_device *xe)
xe->wedged.mode = xe_modparam.wedged_mode;
+ err = xe_device_vram_alloc(xe);
+ if (err)
+ return err;
+
return 0;
}
ALLOW_ERROR_INJECTION(xe_device_probe_early, ERRNO); /* See xe_pci_probe() */
@@ -749,6 +783,8 @@ static int probe_has_flat_ccs(struct xe_device *xe)
return 0;
gt = xe_root_mmio_gt(xe);
+ if (!gt)
+ return 0;
fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
if (!fw_ref)
@@ -789,51 +825,18 @@ int xe_device_probe(struct xe_device *xe)
if (err)
return err;
- err = xe_ttm_sys_mgr_init(xe);
- if (err)
- return err;
-
for_each_gt(gt, xe, id) {
err = xe_gt_init_early(gt);
if (err)
return err;
-
- /*
- * Only after this point can GT-specific MMIO operations
- * (including things like communication with the GuC)
- * be performed.
- */
- xe_gt_mmio_init(gt);
}
for_each_tile(tile, xe, id) {
- if (IS_SRIOV_VF(xe)) {
- xe_guc_comm_init_early(&tile->primary_gt->uc.guc);
- err = xe_gt_sriov_vf_bootstrap(tile->primary_gt);
- if (err)
- return err;
- err = xe_gt_sriov_vf_query_config(tile->primary_gt);
- if (err)
- return err;
- }
err = xe_ggtt_init_early(tile->mem.ggtt);
if (err)
return err;
- err = xe_memirq_init(&tile->memirq);
- if (err)
- return err;
}
- for_each_gt(gt, xe, id) {
- err = xe_gt_init_hwconfig(gt);
- if (err)
- return err;
- }
-
- err = xe_devcoredump_init(xe);
- if (err)
- return err;
-
/*
* From here on, if a step fails, make sure a Driver-FLR is triggereed
*/
@@ -855,6 +858,14 @@ int xe_device_probe(struct xe_device *xe)
return err;
}
+ /*
+ * Allow allocations only now to ensure xe_display_init_early()
+ * is the first to allocate, always.
+ */
+ err = xe_ttm_sys_mgr_init(xe);
+ if (err)
+ return err;
+
/* Allocate and map stolen after potential VRAM resize */
err = xe_ttm_stolen_mgr_init(xe);
if (err)
@@ -886,10 +897,28 @@ int xe_device_probe(struct xe_device *xe)
return err;
}
+ err = xe_pagefault_init(xe);
+ if (err)
+ return err;
+
+ if (xe->tiles->media_gt &&
+ XE_GT_WA(xe->tiles->media_gt, 15015404425_disable))
+ XE_DEVICE_WA_DISABLE(xe, 15015404425);
+
+ err = xe_devcoredump_init(xe);
+ if (err)
+ return err;
+
+ xe_nvm_init(xe);
+
err = xe_heci_gsc_init(xe);
if (err)
return err;
+ err = xe_late_bind_init(&xe->late_bind);
+ if (err)
+ return err;
+
err = xe_oa_init(xe);
if (err)
return err;
@@ -902,6 +931,10 @@ int xe_device_probe(struct xe_device *xe)
if (err)
return err;
+ err = xe_psmi_init(xe);
+ if (err)
+ return err;
+
err = drm_dev_register(&xe->drm, 0);
if (err)
return err;
@@ -926,11 +959,19 @@ int xe_device_probe(struct xe_device *xe)
if (err)
goto err_unregister_display;
+ err = xe_i2c_probe(xe);
+ if (err)
+ goto err_unregister_display;
+
for_each_gt(gt, xe, id)
xe_gt_sanitize_freq(gt);
xe_vsec_init(xe);
+ err = xe_sriov_init_late(xe);
+ if (err)
+ goto err_unregister_display;
+
return devm_add_action_or_reset(xe->drm.dev, xe_device_sanitize, xe);
err_unregister_display:
@@ -943,6 +984,8 @@ void xe_device_remove(struct xe_device *xe)
{
xe_display_unregister(xe);
+ xe_nvm_fini(xe);
+
drm_dev_unplug(&xe->drm);
xe_bo_pci_dev_remove_all(xe);
@@ -955,16 +998,16 @@ void xe_device_shutdown(struct xe_device *xe)
drm_dbg(&xe->drm, "Shutting down device\n");
- if (xe_driver_flr_disabled(xe)) {
- xe_display_pm_shutdown(xe);
+ xe_display_pm_shutdown(xe);
- xe_irq_suspend(xe);
+ xe_irq_suspend(xe);
- for_each_gt(gt, xe, id)
- xe_gt_shutdown(gt);
+ for_each_gt(gt, xe, id)
+ xe_gt_shutdown(gt);
- xe_display_pm_shutdown_late(xe);
- } else {
+ xe_display_pm_shutdown_late(xe);
+
+ if (!xe_driver_flr_disabled(xe)) {
/* BOOM! */
__xe_driver_flr(xe);
}
@@ -986,38 +1029,15 @@ void xe_device_wmb(struct xe_device *xe)
xe_mmio_write32(xe_root_tile_mmio(xe), VF_CAP_REG, 0);
}
-/**
- * xe_device_td_flush() - Flush transient L3 cache entries
- * @xe: The device
- *
- * Display engine has direct access to memory and is never coherent with L3/L4
- * caches (or CPU caches), however KMD is responsible for specifically flushing
- * transient L3 GPU cache entries prior to the flip sequence to ensure scanout
- * can happen from such a surface without seeing corruption.
- *
- * Display surfaces can be tagged as transient by mapping it using one of the
- * various L3:XD PAT index modes on Xe2.
- *
- * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed
- * at the end of each submission via PIPE_CONTROL for compute/render, since SA
- * Media is not coherent with L3 and we want to support render-vs-media
- * usescases. For other engines like copy/blt the HW internally forces uncached
- * behaviour, hence why we can skip the TDF on such platforms.
+/*
+ * Issue a TRANSIENT_FLUSH_REQUEST and wait for completion on each gt.
*/
-void xe_device_td_flush(struct xe_device *xe)
+static void tdf_request_sync(struct xe_device *xe)
{
- struct xe_gt *gt;
unsigned int fw_ref;
+ struct xe_gt *gt;
u8 id;
- if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
- return;
-
- if (XE_WA(xe_root_mmio_gt(xe), 16023588340)) {
- xe_device_l2_flush(xe);
- return;
- }
-
for_each_gt(gt, xe, id) {
if (xe_gt_is_media_type(gt))
continue;
@@ -1027,6 +1047,7 @@ void xe_device_td_flush(struct xe_device *xe)
return;
xe_mmio_write32(&gt->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST);
+
/*
* FIXME: We can likely do better here with our choice of
* timeout. Currently we just assume the worst case, i.e. 150us,
@@ -1035,7 +1056,7 @@ void xe_device_td_flush(struct xe_device *xe)
* transient and need to be flushed..
*/
if (xe_mmio_wait32(&gt->mmio, XE2_TDF_CTRL, TRANSIENT_FLUSH_REQUEST, 0,
- 150, NULL, false))
+ 300, NULL, false))
xe_gt_err_once(gt, "TD flush timeout\n");
xe_force_wake_put(gt_to_fw(gt), fw_ref);
@@ -1048,8 +1069,10 @@ void xe_device_l2_flush(struct xe_device *xe)
unsigned int fw_ref;
gt = xe_root_mmio_gt(xe);
+ if (!gt)
+ return;
- if (!XE_WA(gt, 16023588340))
+ if (!XE_GT_WA(gt, 16023588340))
return;
fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
@@ -1057,15 +1080,55 @@ void xe_device_l2_flush(struct xe_device *xe)
return;
spin_lock(&gt->global_invl_lock);
- xe_mmio_write32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1);
- if (xe_mmio_wait32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 500, NULL, true))
+ xe_mmio_write32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1);
+ if (xe_mmio_wait32(&gt->mmio, XE2_GLOBAL_INVAL, 0x1, 0x0, 1000, NULL, true))
xe_gt_err_once(gt, "Global invalidation timeout\n");
+
spin_unlock(&gt->global_invl_lock);
xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
+/**
+ * xe_device_td_flush() - Flush transient L3 cache entries
+ * @xe: The device
+ *
+ * Display engine has direct access to memory and is never coherent with L3/L4
+ * caches (or CPU caches), however KMD is responsible for specifically flushing
+ * transient L3 GPU cache entries prior to the flip sequence to ensure scanout
+ * can happen from such a surface without seeing corruption.
+ *
+ * Display surfaces can be tagged as transient by mapping it using one of the
+ * various L3:XD PAT index modes on Xe2.
+ *
+ * Note: On non-discrete xe2 platforms, like LNL, the entire L3 cache is flushed
+ * at the end of each submission via PIPE_CONTROL for compute/render, since SA
+ * Media is not coherent with L3 and we want to support render-vs-media
+ * usescases. For other engines like copy/blt the HW internally forces uncached
+ * behaviour, hence why we can skip the TDF on such platforms.
+ */
+void xe_device_td_flush(struct xe_device *xe)
+{
+ struct xe_gt *root_gt;
+
+ if (!IS_DGFX(xe) || GRAPHICS_VER(xe) < 20)
+ return;
+
+ root_gt = xe_root_mmio_gt(xe);
+ if (!root_gt)
+ return;
+
+ if (XE_GT_WA(root_gt, 16023588340)) {
+ /* A transient flush is not sufficient: flush the L2 */
+ xe_device_l2_flush(xe);
+ } else {
+ xe_guc_pc_apply_flush_freq_limit(&root_gt->uc.guc.pc);
+ tdf_request_sync(xe);
+ xe_guc_pc_remove_flush_freq_limit(&root_gt->uc.guc.pc);
+ }
+}
+
u32 xe_device_ccs_bytes(struct xe_device *xe, u64 size)
{
return xe_device_has_flat_ccs(xe) ?
@@ -1127,11 +1190,63 @@ static void xe_device_wedged_fini(struct drm_device *drm, void *arg)
}
/**
+ * DOC: Xe Device Wedging
+ *
+ * Xe driver uses drm device wedged uevent as documented in Documentation/gpu/drm-uapi.rst.
+ * When device is in wedged state, every IOCTL will be blocked and GT cannot be
+ * used. Certain critical errors like gt reset failure, firmware failures can cause
+ * the device to be wedged. The default recovery method for a wedged state
+ * is rebind/bus-reset.
+ *
+ * Another recovery method is vendor-specific. Below are the cases that send
+ * ``WEDGED=vendor-specific`` recovery method in drm device wedged uevent.
+ *
+ * Case: Firmware Flash
+ * --------------------
+ *
+ * Identification Hint
+ * +++++++++++++++++++
+ *
+ * ``WEDGED=vendor-specific`` drm device wedged uevent with
+ * :ref:`Runtime Survivability mode <xe-survivability-mode>` is used to notify
+ * admin/userspace consumer about the need for a firmware flash.
+ *
+ * Recovery Procedure
+ * ++++++++++++++++++
+ *
+ * Once ``WEDGED=vendor-specific`` drm device wedged uevent is received, follow
+ * the below steps
+ *
+ * - Check Runtime Survivability mode sysfs.
+ * If enabled, firmware flash is required to recover the device.
+ *
+ * /sys/bus/pci/devices/<device>/survivability_mode
+ *
+ * - Admin/userspace consumer can use firmware flashing tools like fwupd to flash
+ * firmware and restore device to normal operation.
+ */
+
+/**
+ * xe_device_set_wedged_method - Set wedged recovery method
+ * @xe: xe device instance
+ * @method: recovery method to set
+ *
+ * Set wedged recovery method to be sent in drm wedged uevent.
+ */
+void xe_device_set_wedged_method(struct xe_device *xe, unsigned long method)
+{
+ xe->wedged.method = method;
+}
+
+/**
* xe_device_declare_wedged - Declare device wedged
* @xe: xe device instance
*
- * This is a final state that can only be cleared with a module
- * re-probe (unbind + bind).
+ * This is a final state that can only be cleared with the recovery method
+ * specified in the drm wedged uevent. The method can be set using
+ * xe_device_set_wedged_method before declaring the device as wedged. If no method
+ * is set, reprobe (unbind/re-bind) will be sent by default.
+ *
* In this state every IOCTL will be blocked so the GT cannot be used.
* In general it will be called upon any critical error such as gt reset
* failure or guc loading failure. Userspace will be notified of this state
@@ -1165,12 +1280,18 @@ void xe_device_declare_wedged(struct xe_device *xe)
"IOCTLs and executions are blocked. Only a rebind may clear the failure\n"
"Please file a _new_ bug report at https://gitlab.freedesktop.org/drm/xe/kernel/issues/new\n",
dev_name(xe->drm.dev));
-
- /* Notify userspace of wedged device */
- drm_dev_wedged_event(&xe->drm,
- DRM_WEDGE_RECOVERY_REBIND | DRM_WEDGE_RECOVERY_BUS_RESET);
}
for_each_gt(gt, xe, id)
xe_gt_declare_wedged(gt);
+
+ if (xe_device_wedged(xe)) {
+ /* If no wedge recovery method is set, use default */
+ if (!xe->wedged.method)
+ xe_device_set_wedged_method(xe, DRM_WEDGE_RECOVERY_REBIND |
+ DRM_WEDGE_RECOVERY_BUS_RESET);
+
+ /* Notify userspace of wedged device */
+ drm_dev_wedged_event(&xe->drm, xe->wedged.method, NULL);
+ }
}
diff --git a/drivers/gpu/drm/xe/xe_device.h b/drivers/gpu/drm/xe/xe_device.h
index 0bc3bc8e6803..32cc6323b7f6 100644
--- a/drivers/gpu/drm/xe/xe_device.h
+++ b/drivers/gpu/drm/xe/xe_device.h
@@ -60,35 +60,32 @@ static inline struct xe_tile *xe_device_get_root_tile(struct xe_device *xe)
return &xe->tiles[0];
}
+/*
+ * Highest GT/tile count for any platform. Used only for memory allocation
+ * sizing. Any logic looping over GTs or mapping userspace GT IDs into GT
+ * structures should use the per-platform xe->info.max_gt_per_tile instead.
+ */
#define XE_MAX_GT_PER_TILE 2
-static inline struct xe_gt *xe_tile_get_gt(struct xe_tile *tile, u8 gt_id)
-{
- if (drm_WARN_ON(&tile_to_xe(tile)->drm, gt_id >= XE_MAX_GT_PER_TILE))
- gt_id = 0;
-
- return gt_id ? tile->media_gt : tile->primary_gt;
-}
-
static inline struct xe_gt *xe_device_get_gt(struct xe_device *xe, u8 gt_id)
{
- struct xe_tile *root_tile = xe_device_get_root_tile(xe);
+ struct xe_tile *tile;
struct xe_gt *gt;
- /*
- * FIXME: This only works for now because multi-tile and standalone
- * media are mutually exclusive on the platforms we have today.
- *
- * id => GT mapping may change once we settle on how we want to handle
- * our UAPI.
- */
- if (MEDIA_VER(xe) >= 13) {
- gt = xe_tile_get_gt(root_tile, gt_id);
- } else {
- if (drm_WARN_ON(&xe->drm, gt_id >= XE_MAX_TILES_PER_DEVICE))
- gt_id = 0;
-
- gt = xe->tiles[gt_id].primary_gt;
+ if (gt_id >= xe->info.tile_count * xe->info.max_gt_per_tile)
+ return NULL;
+
+ tile = &xe->tiles[gt_id / xe->info.max_gt_per_tile];
+ switch (gt_id % xe->info.max_gt_per_tile) {
+ default:
+ xe_assert(xe, false);
+ fallthrough;
+ case 0:
+ gt = tile->primary_gt;
+ break;
+ case 1:
+ gt = tile->media_gt;
+ break;
}
if (!gt)
@@ -130,14 +127,14 @@ static inline bool xe_device_uc_enabled(struct xe_device *xe)
for ((id__) = 1; (id__) < (xe__)->info.tile_count; (id__)++) \
for_each_if((tile__) = &(xe__)->tiles[(id__)])
-/*
- * FIXME: This only works for now since multi-tile and standalone media
- * happen to be mutually exclusive. Future platforms may change this...
- */
#define for_each_gt(gt__, xe__, id__) \
- for ((id__) = 0; (id__) < (xe__)->info.gt_count; (id__)++) \
+ for ((id__) = 0; (id__) < (xe__)->info.tile_count * (xe__)->info.max_gt_per_tile; (id__)++) \
for_each_if((gt__) = xe_device_get_gt((xe__), (id__)))
+#define for_each_gt_on_tile(gt__, tile__, id__) \
+ for_each_gt((gt__), (tile__)->xe, (id__)) \
+ for_each_if((gt__)->tile == (tile__))
+
static inline struct xe_force_wake *gt_to_fw(struct xe_gt *gt)
{
return &gt->pm.fw;
@@ -190,11 +187,14 @@ static inline bool xe_device_wedged(struct xe_device *xe)
return atomic_read(&xe->wedged.flag);
}
+void xe_device_set_wedged_method(struct xe_device *xe, unsigned long method);
void xe_device_declare_wedged(struct xe_device *xe);
struct xe_file *xe_file_get(struct xe_file *xef);
void xe_file_put(struct xe_file *xef);
+int xe_is_injection_active(void);
+
/*
* Occasionally it is seen that the G2H worker starts running after a delay of more than
* a second even after being queued and activated by the Linux workqueue subsystem. This
diff --git a/drivers/gpu/drm/xe/xe_device_sysfs.c b/drivers/gpu/drm/xe/xe_device_sysfs.c
index 2e657692e5b5..ec9c06b06fb5 100644
--- a/drivers/gpu/drm/xe/xe_device_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_device_sysfs.c
@@ -24,6 +24,12 @@
*
* vram_d3cold_threshold - Report/change vram used threshold(in MB) below
* which vram save/restore is permissible during runtime D3cold entry/exit.
+ *
+ * lb_fan_control_version - Fan control version provisioned by late binding.
+ * Exposed only if supported by the device.
+ *
+ * lb_voltage_regulator_version - Voltage regulator version provisioned by late
+ * binding. Exposed only if supported by the device.
*/
static ssize_t
@@ -32,13 +38,8 @@ vram_d3cold_threshold_show(struct device *dev,
{
struct pci_dev *pdev = to_pci_dev(dev);
struct xe_device *xe = pdev_to_xe_device(pdev);
- int ret;
- xe_pm_runtime_get(xe);
- ret = sysfs_emit(buf, "%d\n", xe->d3cold.vram_threshold);
- xe_pm_runtime_put(xe);
-
- return ret;
+ return sysfs_emit(buf, "%d\n", xe->d3cold.vram_threshold);
}
static ssize_t
@@ -65,6 +66,128 @@ vram_d3cold_threshold_store(struct device *dev, struct device_attribute *attr,
static DEVICE_ATTR_RW(vram_d3cold_threshold);
+static struct attribute *vram_attrs[] = {
+ &dev_attr_vram_d3cold_threshold.attr,
+ NULL
+};
+
+static const struct attribute_group vram_attr_group = {
+ .attrs = vram_attrs,
+};
+
+static ssize_t
+lb_fan_control_version_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev));
+ struct xe_tile *root = xe_device_get_root_tile(xe);
+ u32 cap = 0, ver_low = FAN_TABLE, ver_high = FAN_TABLE;
+ u16 major = 0, minor = 0, hotfix = 0, build = 0;
+ int ret;
+
+ xe_pm_runtime_get(xe);
+
+ ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0),
+ &cap, NULL);
+ if (ret)
+ goto out;
+
+ if (REG_FIELD_GET(V1_FAN_PROVISIONED, cap)) {
+ ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_LOW, 0),
+ &ver_low, NULL);
+ if (ret)
+ goto out;
+
+ ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_HIGH, 0),
+ &ver_high, NULL);
+ if (ret)
+ goto out;
+
+ major = REG_FIELD_GET(MAJOR_VERSION_MASK, ver_low);
+ minor = REG_FIELD_GET(MINOR_VERSION_MASK, ver_low);
+ hotfix = REG_FIELD_GET(HOTFIX_VERSION_MASK, ver_high);
+ build = REG_FIELD_GET(BUILD_VERSION_MASK, ver_high);
+ }
+out:
+ xe_pm_runtime_put(xe);
+
+ return ret ?: sysfs_emit(buf, "%u.%u.%u.%u\n", major, minor, hotfix, build);
+}
+static DEVICE_ATTR_ADMIN_RO(lb_fan_control_version);
+
+static ssize_t
+lb_voltage_regulator_version_show(struct device *dev, struct device_attribute *attr, char *buf)
+{
+ struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev));
+ struct xe_tile *root = xe_device_get_root_tile(xe);
+ u32 cap = 0, ver_low = VR_CONFIG, ver_high = VR_CONFIG;
+ u16 major = 0, minor = 0, hotfix = 0, build = 0;
+ int ret;
+
+ xe_pm_runtime_get(xe);
+
+ ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0),
+ &cap, NULL);
+ if (ret)
+ goto out;
+
+ if (REG_FIELD_GET(VR_PARAMS_PROVISIONED, cap)) {
+ ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_LOW, 0),
+ &ver_low, NULL);
+ if (ret)
+ goto out;
+
+ ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_VERSION_HIGH, 0),
+ &ver_high, NULL);
+ if (ret)
+ goto out;
+
+ major = REG_FIELD_GET(MAJOR_VERSION_MASK, ver_low);
+ minor = REG_FIELD_GET(MINOR_VERSION_MASK, ver_low);
+ hotfix = REG_FIELD_GET(HOTFIX_VERSION_MASK, ver_high);
+ build = REG_FIELD_GET(BUILD_VERSION_MASK, ver_high);
+ }
+out:
+ xe_pm_runtime_put(xe);
+
+ return ret ?: sysfs_emit(buf, "%u.%u.%u.%u\n", major, minor, hotfix, build);
+}
+static DEVICE_ATTR_ADMIN_RO(lb_voltage_regulator_version);
+
+static struct attribute *late_bind_attrs[] = {
+ &dev_attr_lb_fan_control_version.attr,
+ &dev_attr_lb_voltage_regulator_version.attr,
+ NULL
+};
+
+static umode_t late_bind_attr_is_visible(struct kobject *kobj,
+ struct attribute *attr, int n)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct xe_device *xe = pdev_to_xe_device(to_pci_dev(dev));
+ struct xe_tile *root = xe_device_get_root_tile(xe);
+ u32 cap = 0;
+ int ret;
+
+ ret = xe_pcode_read(root, PCODE_MBOX(PCODE_LATE_BINDING, GET_CAPABILITY_STATUS, 0),
+ &cap, NULL);
+ if (ret)
+ return 0;
+
+ if (attr == &dev_attr_lb_fan_control_version.attr &&
+ REG_FIELD_GET(V1_FAN_SUPPORTED, cap))
+ return attr->mode;
+ if (attr == &dev_attr_lb_voltage_regulator_version.attr &&
+ REG_FIELD_GET(VR_PARAMS_SUPPORTED, cap))
+ return attr->mode;
+
+ return 0;
+}
+
+static const struct attribute_group late_bind_attr_group = {
+ .attrs = late_bind_attrs,
+ .is_visible = late_bind_attr_is_visible,
+};
+
/**
* DOC: PCIe Gen5 Limitations
*
@@ -115,7 +238,7 @@ auto_link_downgrade_capable_show(struct device *dev, struct device_attribute *at
xe_pm_runtime_put(xe);
cap = REG_FIELD_GET(LINK_DOWNGRADE, val);
- return sysfs_emit(buf, "%u\n", cap == DOWNGRADE_CAPABLE ? true : false);
+ return sysfs_emit(buf, "%u\n", cap == DOWNGRADE_CAPABLE);
}
static DEVICE_ATTR_ADMIN_RO(auto_link_downgrade_capable);
@@ -138,22 +261,15 @@ auto_link_downgrade_status_show(struct device *dev, struct device_attribute *att
}
static DEVICE_ATTR_ADMIN_RO(auto_link_downgrade_status);
-static const struct attribute *auto_link_downgrade_attrs[] = {
+static struct attribute *auto_link_downgrade_attrs[] = {
&dev_attr_auto_link_downgrade_capable.attr,
&dev_attr_auto_link_downgrade_status.attr,
NULL
};
-static void xe_device_sysfs_fini(void *arg)
-{
- struct xe_device *xe = arg;
-
- if (xe->d3cold.capable)
- sysfs_remove_file(&xe->drm.dev->kobj, &dev_attr_vram_d3cold_threshold.attr);
-
- if (xe->info.platform == XE_BATTLEMAGE)
- sysfs_remove_files(&xe->drm.dev->kobj, auto_link_downgrade_attrs);
-}
+static const struct attribute_group auto_link_downgrade_attr_group = {
+ .attrs = auto_link_downgrade_attrs,
+};
int xe_device_sysfs_init(struct xe_device *xe)
{
@@ -161,16 +277,20 @@ int xe_device_sysfs_init(struct xe_device *xe)
int ret;
if (xe->d3cold.capable) {
- ret = sysfs_create_file(&dev->kobj, &dev_attr_vram_d3cold_threshold.attr);
+ ret = devm_device_add_group(dev, &vram_attr_group);
if (ret)
return ret;
}
- if (xe->info.platform == XE_BATTLEMAGE) {
- ret = sysfs_create_files(&dev->kobj, auto_link_downgrade_attrs);
+ if (xe->info.platform == XE_BATTLEMAGE && !IS_SRIOV_VF(xe)) {
+ ret = devm_device_add_group(dev, &auto_link_downgrade_attr_group);
+ if (ret)
+ return ret;
+
+ ret = devm_device_add_group(dev, &late_bind_attr_group);
if (ret)
return ret;
}
- return devm_add_action_or_reset(dev, xe_device_sysfs_fini, xe);
+ return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index c8fa2c011666..0b2fa7c56d38 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -10,34 +10,39 @@
#include <drm/drm_device.h>
#include <drm/drm_file.h>
-#include <drm/drm_pagemap.h>
#include <drm/ttm/ttm_device.h>
#include "xe_devcoredump_types.h"
#include "xe_heci_gsc.h"
+#include "xe_late_bind_fw_types.h"
#include "xe_lmtt_types.h"
#include "xe_memirq_types.h"
#include "xe_oa_types.h"
+#include "xe_pagefault_types.h"
#include "xe_platform_types.h"
#include "xe_pmu_types.h"
#include "xe_pt_types.h"
+#include "xe_sriov_pf_types.h"
#include "xe_sriov_types.h"
+#include "xe_sriov_vf_types.h"
+#include "xe_sriov_vf_ccs_types.h"
#include "xe_step_types.h"
#include "xe_survivability_mode_types.h"
-#include "xe_ttm_vram_mgr_types.h"
+#include "xe_tile_sriov_vf_types.h"
+#include "xe_validation.h"
#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
#define TEST_VM_OPS_ERROR
#endif
-#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
-#include "intel_display_core.h"
-#include "intel_display_device.h"
-#endif
-
+struct dram_info;
+struct intel_display;
+struct intel_dg_nvm_dev;
struct xe_ggtt;
+struct xe_i2c;
struct xe_pat_ops;
struct xe_pxp;
+struct xe_vram_region;
#define XE_BO_INVALID_OFFSET LONG_MAX
@@ -71,61 +76,6 @@ struct xe_pxp;
struct xe_tile * : (tile__)->xe)
/**
- * struct xe_vram_region - memory region structure
- * This is used to describe a memory region in xe
- * device, such as HBM memory or CXL extension memory.
- */
-struct xe_vram_region {
- /** @io_start: IO start address of this VRAM instance */
- resource_size_t io_start;
- /**
- * @io_size: IO size of this VRAM instance
- *
- * This represents how much of this VRAM we can access
- * via the CPU through the VRAM BAR. This can be smaller
- * than @usable_size, in which case only part of VRAM is CPU
- * accessible (typically the first 256M). This
- * configuration is known as small-bar.
- */
- resource_size_t io_size;
- /** @dpa_base: This memory regions's DPA (device physical address) base */
- resource_size_t dpa_base;
- /**
- * @usable_size: usable size of VRAM
- *
- * Usable size of VRAM excluding reserved portions
- * (e.g stolen mem)
- */
- resource_size_t usable_size;
- /**
- * @actual_physical_size: Actual VRAM size
- *
- * Actual VRAM size including reserved portions
- * (e.g stolen mem)
- */
- resource_size_t actual_physical_size;
- /** @mapping: pointer to VRAM mappable space */
- void __iomem *mapping;
- /** @ttm: VRAM TTM manager */
- struct xe_ttm_vram_mgr ttm;
-#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR)
- /** @pagemap: Used to remap device memory as ZONE_DEVICE */
- struct dev_pagemap pagemap;
- /**
- * @dpagemap: The struct drm_pagemap of the ZONE_DEVICE memory
- * pages of this tile.
- */
- struct drm_pagemap dpagemap;
- /**
- * @hpa_base: base host physical address
- *
- * This is generated when remap device memory as ZONE_DEVICE
- */
- resource_size_t hpa_base;
-#endif
-};
-
-/**
* struct xe_mmio - register mmio structure
*
* Represents an MMIO region that the CPU may use to access registers. A
@@ -210,12 +160,20 @@ struct xe_tile {
/** @mem: memory management info for tile */
struct {
/**
- * @mem.vram: VRAM info for tile.
+ * @mem.kernel_vram: kernel-dedicated VRAM info for tile.
+ *
+ * Although VRAM is associated with a specific tile, it can
+ * still be accessed by all tiles' GTs.
+ */
+ struct xe_vram_region *kernel_vram;
+
+ /**
+ * @mem.vram: general purpose VRAM info for tile.
*
* Although VRAM is associated with a specific tile, it can
* still be accessed by all tiles' GTs.
*/
- struct xe_vram_region vram;
+ struct xe_vram_region *vram;
/** @mem.ggtt: Global graphics translation table */
struct xe_ggtt *ggtt;
@@ -237,12 +195,17 @@ struct xe_tile {
struct {
/** @sriov.vf.ggtt_balloon: GGTT regions excluded from use. */
struct xe_ggtt_node *ggtt_balloon[2];
+ /** @sriov.vf.self_config: VF configuration data */
+ struct xe_tile_sriov_vf_selfconfig self_config;
} vf;
} sriov;
/** @memirq: Memory Based Interrupts. */
struct xe_memirq memirq;
+ /** @csc_hw_error_work: worker to report CSC HW errors */
+ struct work_struct csc_hw_error_work;
+
/** @pcode: tile's PCODE */
struct {
/** @pcode.lock: protecting tile's PCODE mailbox data */
@@ -254,15 +217,23 @@ struct xe_tile {
/** @sysfs: sysfs' kobj used by xe_tile_sysfs */
struct kobject *sysfs;
+
+ /** @debugfs: debugfs directory associated with this tile */
+ struct dentry *debugfs;
};
/**
- * struct xe_device - Top level struct of XE device
+ * struct xe_device - Top level struct of Xe device
*/
struct xe_device {
/** @drm: drm device */
struct drm_device drm;
+#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
+ /** @display: display device data, must be placed after drm device member */
+ struct intel_display *display;
+#endif
+
/** @devcoredump: device coredump */
struct xe_devcoredump devcoredump;
@@ -280,9 +251,9 @@ struct xe_device {
u32 media_verx100;
/** @info.mem_region_mask: mask of valid memory regions */
u32 mem_region_mask;
- /** @info.platform: XE platform enum */
+ /** @info.platform: Xe platform enum */
enum xe_platform platform;
- /** @info.subplatform: XE subplatform enum */
+ /** @info.subplatform: Xe subplatform enum */
enum xe_subplatform subplatform;
/** @info.devid: device ID */
u16 devid;
@@ -296,6 +267,8 @@ struct xe_device {
u8 vram_flags;
/** @info.tile_count: Number of tiles */
u8 tile_count;
+ /** @info.max_gt_per_tile: Number of GT IDs allocated to each tile */
+ u8 max_gt_per_tile;
/** @info.gt_count: Total number of GTs for entire device */
u8 gt_count;
/** @info.vm_max_level: Max VM level */
@@ -319,16 +292,26 @@ struct xe_device {
u8 has_fan_control:1;
/** @info.has_flat_ccs: Whether flat CCS metadata is used */
u8 has_flat_ccs:1;
+ /** @info.has_gsc_nvm: Device has gsc non-volatile memory */
+ u8 has_gsc_nvm:1;
/** @info.has_heci_cscfi: device has heci cscfi */
u8 has_heci_cscfi:1;
/** @info.has_heci_gscfi: device has heci gscfi */
u8 has_heci_gscfi:1;
+ /** @info.has_late_bind: Device has firmware late binding support */
+ u8 has_late_bind:1;
/** @info.has_llc: Device has a shared CPU+GPU last level cache */
u8 has_llc:1;
+ /** @info.has_mbx_power_limits: Device has support to manage power limits using
+ * pcode mailbox commands.
+ */
+ u8 has_mbx_power_limits:1;
+ /** @info.has_mem_copy_instr: Device supports MEM_COPY instruction */
+ u8 has_mem_copy_instr:1;
/** @info.has_pxp: Device has PXP support */
u8 has_pxp:1;
- /** @info.has_range_tlb_invalidation: Has range based TLB invalidations */
- u8 has_range_tlb_invalidation:1;
+ /** @info.has_range_tlb_inval: Has range based TLB invalidations */
+ u8 has_range_tlb_inval:1;
/** @info.has_sriov: Supports SR-IOV */
u8 has_sriov:1;
/** @info.has_usm: Device has unified shared memory support */
@@ -354,8 +337,23 @@ struct xe_device {
u8 skip_mtcfg:1;
/** @info.skip_pcode: skip access to PCODE uC */
u8 skip_pcode:1;
+ /** @info.needs_shared_vf_gt_wq: needs shared GT WQ on VF */
+ u8 needs_shared_vf_gt_wq:1;
} info;
+ /** @wa_active: keep track of active workarounds */
+ struct {
+ /** @wa_active.oob: bitmap with active OOB workarounds */
+ unsigned long *oob;
+
+ /**
+ * @wa_active.oob_initialized: Mark oob as initialized to help detecting misuse
+ * of XE_DEVICE_WA() - it can only be called on initialization after
+ * Device OOB WAs have been processed.
+ */
+ bool oob_initialized;
+ } wa_active;
+
/** @survivability: survivability information for device */
struct xe_survivability survivability;
@@ -390,7 +388,7 @@ struct xe_device {
/** @mem: memory info for device */
struct {
/** @mem.vram: VRAM info for device */
- struct xe_vram_region vram;
+ struct xe_vram_region *vram;
/** @mem.sys_mgr: system TTM manager */
struct ttm_resource_manager sys_mgr;
/** @mem.sys_mgr: system memory shrinker. */
@@ -402,10 +400,12 @@ struct xe_device {
/** @sriov.__mode: SR-IOV mode (Don't access directly!) */
enum xe_sriov_mode __mode;
- /** @sriov.pf: PF specific data */
- struct xe_device_pf pf;
- /** @sriov.vf: VF specific data */
- struct xe_device_vf vf;
+ union {
+ /** @sriov.pf: PF specific data */
+ struct xe_device_pf pf;
+ /** @sriov.vf: VF specific data */
+ struct xe_device_vf vf;
+ };
/** @sriov.wq: workqueue used by the virtualization workers */
struct workqueue_struct *wq;
@@ -419,6 +419,16 @@ struct xe_device {
u32 next_asid;
/** @usm.lock: protects UM state */
struct rw_semaphore lock;
+ /** @usm.pf_wq: page fault work queue, unbound, high priority */
+ struct workqueue_struct *pf_wq;
+ /*
+ * We pick 4 here because, in the current implementation, it
+ * yields the best bandwidth utilization of the kernel paging
+ * engine.
+ */
+#define XE_PAGEFAULT_QUEUE_COUNT 4
+ /** @usm.pf_queue: Page fault queues */
+ struct xe_pagefault_queue pf_queue[XE_PAGEFAULT_QUEUE_COUNT];
} usm;
/** @pinned: pinned BO state */
@@ -452,7 +462,7 @@ struct xe_device {
/** @ordered_wq: used to serialize compute mode resume */
struct workqueue_struct *ordered_wq;
- /** @unordered_wq: used to serialize unordered work, mostly display */
+ /** @unordered_wq: used to serialize unordered work */
struct workqueue_struct *unordered_wq;
/** @destroy_wq: used to serialize user destroy work, like queue */
@@ -498,6 +508,10 @@ struct xe_device {
const struct xe_pat_table_entry *table;
/** @pat.n_entries: Number of PAT entries */
int n_entries;
+ /** @pat.ats_entry: PAT entry for PCIe ATS responses */
+ const struct xe_pat_table_entry *pat_ats;
+ /** @pat.pta_entry: PAT entry for page table accesses */
+ const struct xe_pat_table_entry *pat_pta;
u32 idx[__XE_CACHE_LEVEL_COUNT];
} pat;
@@ -525,6 +539,12 @@ struct xe_device {
/** @pm_notifier: Our PM notifier to perform actions in response to various PM events. */
struct notifier_block pm_notifier;
+ /** @pm_block: Completion to block validating tasks on suspend / hibernate prepare */
+ struct completion pm_block;
+ /** @rebind_resume_list: List of wq items to kick on resume. */
+ struct list_head rebind_resume_list;
+ /** @rebind_resume_lock: Lock to protect the rebind_resume_list */
+ struct mutex rebind_resume_lock;
/** @pmt: Support the PMT driver callback interface */
struct {
@@ -544,6 +564,12 @@ struct xe_device {
/** @heci_gsc: graphics security controller */
struct xe_heci_gsc heci_gsc;
+ /** @nvm: discrete graphics non-volatile memory */
+ struct intel_dg_nvm_dev *nvm;
+
+ /** @late_bind: xe mei late bind interface */
+ struct xe_late_bind late_bind;
+
/** @oa: oa observation subsystem */
struct xe_oa oa;
@@ -559,6 +585,8 @@ struct xe_device {
atomic_t flag;
/** @wedged.mode: Mode controlled by kernel parameter and debugfs */
int mode;
+ /** @wedged.method: Recovery method to be sent in the drm device wedged uevent */
+ unsigned long method;
} wedged;
/** @bo_device: Struct to control async free of BOs */
@@ -572,6 +600,12 @@ struct xe_device {
/** @pmu: performance monitoring unit */
struct xe_pmu pmu;
+ /** @i2c: I2C host controller */
+ struct xe_i2c *i2c;
+
+ /** @atomic_svm_timeslice_ms: Atomic SVM fault timeslice MS */
+ u32 atomic_svm_timeslice_ms;
+
#ifdef TEST_VM_OPS_ERROR
/**
* @vm_inject_error_position: inject errors at different places in VM
@@ -580,6 +614,31 @@ struct xe_device {
u8 vm_inject_error_position;
#endif
+#if IS_ENABLED(CONFIG_TRACE_GPU_MEM)
+ /**
+ * @global_total_pages: global GPU page usage tracked for gpu_mem
+ * tracepoints
+ */
+ atomic64_t global_total_pages;
+#endif
+ /** @val: The domain for exhaustive eviction, which is currently per device. */
+ struct xe_validation_device val;
+
+ /** @psmi: GPU debugging via additional validation HW */
+ struct {
+ /** @psmi.capture_obj: PSMI buffer for VRAM */
+ struct xe_bo *capture_obj[XE_MAX_TILES_PER_DEVICE + 1];
+ /** @psmi.region_mask: Mask of valid memory regions */
+ u8 region_mask;
+ } psmi;
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+ /** @g2g_test_array: for testing G2G communications */
+ u32 *g2g_test_array;
+ /** @g2g_test_count: for testing G2G communications */
+ atomic_t g2g_test_count;
+#endif
+
/* private: */
#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
@@ -589,27 +648,7 @@ struct xe_device {
* drm_i915_private during build. After cleanup these should go away,
* migrating to the right sub-structs
*/
- struct intel_display display;
-
- struct dram_info {
- bool wm_lv_0_adjust_needed;
- u8 num_channels;
- bool symmetric_memory;
- enum intel_dram_type {
- INTEL_DRAM_UNKNOWN,
- INTEL_DRAM_DDR3,
- INTEL_DRAM_DDR4,
- INTEL_DRAM_LPDDR3,
- INTEL_DRAM_LPDDR4,
- INTEL_DRAM_DDR5,
- INTEL_DRAM_LPDDR5,
- INTEL_DRAM_GDDR,
- INTEL_DRAM_GDDR_ECC,
- __INTEL_DRAM_TYPE_MAX,
- } type;
- u8 num_qgv_points;
- u8 num_psf_gv_points;
- } dram_info;
+ const struct dram_info *dram_info;
/*
* edram size in MB.
@@ -617,27 +656,14 @@ struct xe_device {
*/
u32 edram_size_mb;
- /* To shut up runtime pm macros.. */
- struct xe_runtime_pm {} runtime_pm;
-
- /* only to allow build, not used functionally */
- u32 irq_mask;
-
struct intel_uncore {
spinlock_t lock;
} uncore;
-
- /* only to allow build, not used functionally */
- struct {
- unsigned int hpll_freq;
- unsigned int czclk_freq;
- unsigned int fsb_freq, mem_freq, is_ddr3;
- };
#endif
};
/**
- * struct xe_file - file handle for XE driver
+ * struct xe_file - file handle for Xe driver
*/
struct xe_file {
/** @xe: xe DEVICE **/
diff --git a/drivers/gpu/drm/xe/xe_device_wa_oob.rules b/drivers/gpu/drm/xe/xe_device_wa_oob.rules
new file mode 100644
index 000000000000..55ba01bc8f38
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_device_wa_oob.rules
@@ -0,0 +1,5 @@
+22010954014 PLATFORM(DG2)
+15015404425 PLATFORM(LUNARLAKE)
+ PLATFORM(PANTHERLAKE)
+22019338487_display PLATFORM(LUNARLAKE)
+14022085890 SUBPLATFORM(BATTLEMAGE, G21)
diff --git a/drivers/gpu/drm/xe/xe_dma_buf.c b/drivers/gpu/drm/xe/xe_dma_buf.c
index 346f857f3837..7c74a31d4486 100644
--- a/drivers/gpu/drm/xe/xe_dma_buf.c
+++ b/drivers/gpu/drm/xe/xe_dma_buf.c
@@ -48,31 +48,43 @@ static void xe_dma_buf_detach(struct dma_buf *dmabuf,
static int xe_dma_buf_pin(struct dma_buf_attachment *attach)
{
- struct drm_gem_object *obj = attach->dmabuf->priv;
+ struct dma_buf *dmabuf = attach->dmabuf;
+ struct drm_gem_object *obj = dmabuf->priv;
struct xe_bo *bo = gem_to_xe_bo(obj);
struct xe_device *xe = xe_bo_device(bo);
+ struct drm_exec *exec = XE_VALIDATION_UNSUPPORTED;
+ bool allow_vram = true;
int ret;
- /*
- * For now only support pinning in TT memory, for two reasons:
- * 1) Avoid pinning in a placement not accessible to some importers.
- * 2) Pinning in VRAM requires PIN accounting which is a to-do.
- */
- if (xe_bo_is_pinned(bo) && !xe_bo_is_mem_type(bo, XE_PL_TT)) {
+ if (!IS_ENABLED(CONFIG_DMABUF_MOVE_NOTIFY)) {
+ allow_vram = false;
+ } else {
+ list_for_each_entry(attach, &dmabuf->attachments, node) {
+ if (!attach->peer2peer) {
+ allow_vram = false;
+ break;
+ }
+ }
+ }
+
+ if (xe_bo_is_pinned(bo) && !xe_bo_is_mem_type(bo, XE_PL_TT) &&
+ !(xe_bo_is_vram(bo) && allow_vram)) {
drm_dbg(&xe->drm, "Can't migrate pinned bo for dma-buf pin.\n");
return -EINVAL;
}
- ret = xe_bo_migrate(bo, XE_PL_TT);
- if (ret) {
- if (ret != -EINTR && ret != -ERESTARTSYS)
- drm_dbg(&xe->drm,
- "Failed migrating dma-buf to TT memory: %pe\n",
- ERR_PTR(ret));
- return ret;
+ if (!allow_vram) {
+ ret = xe_bo_migrate(bo, XE_PL_TT, NULL, exec);
+ if (ret) {
+ if (ret != -EINTR && ret != -ERESTARTSYS)
+ drm_dbg(&xe->drm,
+ "Failed migrating dma-buf to TT memory: %pe\n",
+ ERR_PTR(ret));
+ return ret;
+ }
}
- ret = xe_bo_pin_external(bo);
+ ret = xe_bo_pin_external(bo, !allow_vram, exec);
xe_assert(xe, !ret);
return 0;
@@ -92,6 +104,7 @@ static struct sg_table *xe_dma_buf_map(struct dma_buf_attachment *attach,
struct dma_buf *dma_buf = attach->dmabuf;
struct drm_gem_object *obj = dma_buf->priv;
struct xe_bo *bo = gem_to_xe_bo(obj);
+ struct drm_exec *exec = XE_VALIDATION_UNSUPPORTED;
struct sg_table *sgt;
int r = 0;
@@ -100,9 +113,9 @@ static struct sg_table *xe_dma_buf_map(struct dma_buf_attachment *attach,
if (!xe_bo_is_pinned(bo)) {
if (!attach->peer2peer)
- r = xe_bo_migrate(bo, XE_PL_TT);
+ r = xe_bo_migrate(bo, XE_PL_TT, NULL, exec);
else
- r = xe_bo_validate(bo, NULL, false);
+ r = xe_bo_validate(bo, NULL, false, exec);
if (r)
return ERR_PTR(r);
}
@@ -111,7 +124,7 @@ static struct sg_table *xe_dma_buf_map(struct dma_buf_attachment *attach,
case XE_PL_TT:
sgt = drm_prime_pages_to_sg(obj->dev,
bo->ttm.ttm->pages,
- bo->ttm.ttm->num_pages);
+ obj->size >> PAGE_SHIFT);
if (IS_ERR(sgt))
return sgt;
@@ -161,15 +174,26 @@ static int xe_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
struct xe_bo *bo = gem_to_xe_bo(obj);
bool reads = (direction == DMA_BIDIRECTIONAL ||
direction == DMA_FROM_DEVICE);
+ struct xe_validation_ctx ctx;
+ struct drm_exec exec;
+ int ret = 0;
if (!reads)
return 0;
/* Can we do interruptible lock here? */
- xe_bo_lock(bo, false);
- (void)xe_bo_migrate(bo, XE_PL_TT);
- xe_bo_unlock(bo);
+ xe_validation_guard(&ctx, &xe_bo_device(bo)->val, &exec, (struct xe_val_flags) {}, ret) {
+ ret = drm_exec_lock_obj(&exec, &bo->ttm.base);
+ drm_exec_retry_on_contention(&exec);
+ if (ret)
+ break;
+
+ ret = xe_bo_migrate(bo, XE_PL_TT, NULL, &exec);
+ drm_exec_retry_on_contention(&exec);
+ xe_validation_retry_on_oom(&ctx, &ret);
+ }
+ /* If we failed, cpu-access takes place in current placement. */
return 0;
}
@@ -191,10 +215,22 @@ struct dma_buf *xe_gem_prime_export(struct drm_gem_object *obj, int flags)
{
struct xe_bo *bo = gem_to_xe_bo(obj);
struct dma_buf *buf;
+ struct ttm_operation_ctx ctx = {
+ .interruptible = true,
+ .no_wait_gpu = true,
+ /* We opt to avoid OOM on system pages allocations */
+ .gfp_retry_mayfail = true,
+ .allow_res_evict = false,
+ };
+ int ret;
if (bo->vm)
return ERR_PTR(-EPERM);
+ ret = ttm_bo_setup_export(&bo->ttm, &ctx);
+ if (ret)
+ return ERR_PTR(ret);
+
buf = drm_gem_prime_export(obj, flags);
if (!IS_ERR(buf))
buf->ops = &xe_dmabuf_ops;
@@ -208,32 +244,45 @@ xe_dma_buf_init_obj(struct drm_device *dev, struct xe_bo *storage,
{
struct dma_resv *resv = dma_buf->resv;
struct xe_device *xe = to_xe_device(dev);
+ struct xe_validation_ctx ctx;
+ struct drm_gem_object *dummy_obj;
+ struct drm_exec exec;
struct xe_bo *bo;
- int ret;
-
- dma_resv_lock(resv, NULL);
- bo = ___xe_bo_create_locked(xe, storage, NULL, resv, NULL, dma_buf->size,
- 0, /* Will require 1way or 2way for vm_bind */
- ttm_bo_type_sg, XE_BO_FLAG_SYSTEM);
- if (IS_ERR(bo)) {
- ret = PTR_ERR(bo);
- goto error;
+ int ret = 0;
+
+ dummy_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
+ if (!dummy_obj)
+ return ERR_PTR(-ENOMEM);
+
+ dummy_obj->resv = resv;
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, ret) {
+ ret = drm_exec_lock_obj(&exec, dummy_obj);
+ drm_exec_retry_on_contention(&exec);
+ if (ret)
+ break;
+
+ bo = xe_bo_init_locked(xe, storage, NULL, resv, NULL, dma_buf->size,
+ 0, /* Will require 1way or 2way for vm_bind */
+ ttm_bo_type_sg, XE_BO_FLAG_SYSTEM, &exec);
+ drm_exec_retry_on_contention(&exec);
+ if (IS_ERR(bo)) {
+ ret = PTR_ERR(bo);
+ xe_validation_retry_on_oom(&ctx, &ret);
+ break;
+ }
}
- dma_resv_unlock(resv);
-
- return &bo->ttm.base;
+ drm_gem_object_put(dummy_obj);
-error:
- dma_resv_unlock(resv);
- return ERR_PTR(ret);
+ return ret ? ERR_PTR(ret) : &bo->ttm.base;
}
static void xe_dma_buf_move_notify(struct dma_buf_attachment *attach)
{
struct drm_gem_object *obj = attach->importer_priv;
struct xe_bo *bo = gem_to_xe_bo(obj);
+ struct drm_exec *exec = XE_VALIDATION_UNSUPPORTED;
- XE_WARN_ON(xe_bo_evict(bo));
+ XE_WARN_ON(xe_bo_evict(bo, exec));
}
static const struct dma_buf_attach_ops xe_dma_buf_attach_ops = {
diff --git a/drivers/gpu/drm/xe/xe_drm_client.c b/drivers/gpu/drm/xe/xe_drm_client.c
index 31f688e953d7..f931ff9b1ec0 100644
--- a/drivers/gpu/drm/xe/xe_drm_client.c
+++ b/drivers/gpu/drm/xe/xe_drm_client.c
@@ -167,7 +167,7 @@ void xe_drm_client_remove_bo(struct xe_bo *bo)
static void bo_meminfo(struct xe_bo *bo,
struct drm_memory_stats stats[TTM_NUM_MEM_TYPES])
{
- u64 sz = bo->size;
+ u64 sz = xe_bo_size(bo);
u32 mem_type = bo->ttm.resource->mem_type;
xe_bo_assert_held(bo);
diff --git a/drivers/gpu/drm/xe/xe_drv.h b/drivers/gpu/drm/xe/xe_drv.h
index d61650d4aa0b..95242a375e54 100644
--- a/drivers/gpu/drm/xe/xe_drv.h
+++ b/drivers/gpu/drm/xe/xe_drv.h
@@ -9,7 +9,7 @@
#include <drm/drm_drv.h>
#define DRIVER_NAME "xe"
-#define DRIVER_DESC "Intel Xe Graphics"
+#define DRIVER_DESC "Intel Xe2 Graphics"
/* Interface history:
*
diff --git a/drivers/gpu/drm/xe/xe_eu_stall.c b/drivers/gpu/drm/xe/xe_eu_stall.c
index 96732613b4b7..a5c36a317a70 100644
--- a/drivers/gpu/drm/xe/xe_eu_stall.c
+++ b/drivers/gpu/drm/xe/xe_eu_stall.c
@@ -49,6 +49,7 @@ struct xe_eu_stall_data_stream {
wait_queue_head_t poll_wq;
size_t data_record_size;
size_t per_xecore_buf_size;
+ unsigned int fw_ref;
struct xe_gt *gt;
struct xe_bo *bo;
@@ -124,6 +125,27 @@ struct xe_eu_stall_data_xe2 {
__u64 unused[6];
} __packed;
+/*
+ * EU stall data format for Xe3p arch GPUs.
+ */
+struct xe_eu_stall_data_xe3p {
+ __u64 ip_addr:61; /* Bits 0 to 60 */
+ __u64 tdr_count:8; /* Bits 61 to 68 */
+ __u64 other_count:8; /* Bits 69 to 76 */
+ __u64 control_count:8; /* Bits 77 to 84 */
+ __u64 pipestall_count:8; /* Bits 85 to 92 */
+ __u64 send_count:8; /* Bits 93 to 100 */
+ __u64 dist_acc_count:8; /* Bits 101 to 108 */
+ __u64 sbid_count:8; /* Bits 109 to 116 */
+ __u64 sync_count:8; /* Bits 117 to 124 */
+ __u64 inst_fetch_count:8; /* Bits 125 to 132 */
+ __u64 active_count:8; /* Bits 133 to 140 */
+ __u64 ex_id:3; /* Bits 141 to 143 */
+ __u64 end_flag:1; /* Bit 144 */
+ __u64 unused_bits:47;
+ __u64 unused[5];
+} __packed;
+
const u64 eu_stall_sampling_rates[] = {251, 251 * 2, 251 * 3, 251 * 4, 251 * 5, 251 * 6, 251 * 7};
/**
@@ -167,10 +189,13 @@ size_t xe_eu_stall_data_record_size(struct xe_device *xe)
{
size_t record_size = 0;
- if (xe->info.platform == XE_PVC)
- record_size = sizeof(struct xe_eu_stall_data_pvc);
+ if (GRAPHICS_VER(xe) >= 35)
+ record_size = sizeof(struct xe_eu_stall_data_xe3p);
else if (GRAPHICS_VER(xe) >= 20)
record_size = sizeof(struct xe_eu_stall_data_xe2);
+ else if (xe->info.platform == XE_PVC)
+ record_size = sizeof(struct xe_eu_stall_data_pvc);
+
xe_assert(xe, is_power_of_2(record_size));
@@ -258,11 +283,13 @@ static int set_prop_eu_stall_wait_num_reports(struct xe_device *xe, u64 value,
static int set_prop_eu_stall_gt_id(struct xe_device *xe, u64 value,
struct eu_stall_open_properties *props)
{
- if (value >= xe->info.gt_count) {
+ struct xe_gt *gt = xe_device_get_gt(xe, value);
+
+ if (!gt) {
drm_dbg(&xe->drm, "Invalid GT ID %llu for EU stall sampling\n", value);
return -EINVAL;
}
- props->gt = xe_device_get_gt(xe, value);
+ props->gt = gt;
return 0;
}
@@ -288,7 +315,7 @@ static int xe_eu_stall_user_ext_set_property(struct xe_device *xe, u64 extension
return -EFAULT;
if (XE_IOCTL_DBG(xe, ext.property >= ARRAY_SIZE(xe_set_eu_stall_property_funcs)) ||
- XE_IOCTL_DBG(xe, ext.pad))
+ XE_IOCTL_DBG(xe, !ext.property) || XE_IOCTL_DBG(xe, ext.pad))
return -EINVAL;
idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_set_eu_stall_property_funcs));
@@ -615,9 +642,8 @@ static int xe_eu_stall_data_buf_alloc(struct xe_eu_stall_data_stream *stream,
size = stream->per_xecore_buf_size * last_xecore;
- bo = xe_bo_create_pin_map_at_aligned(tile->xe, tile, NULL,
- size, ~0ull, ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, SZ_64);
+ bo = xe_bo_create_pin_map_at_novm(tile->xe, tile, size, ~0ull, ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, SZ_64, false);
if (IS_ERR(bo)) {
kfree(stream->xecore_buf);
return PTR_ERR(bo);
@@ -635,19 +661,18 @@ static int xe_eu_stall_stream_enable(struct xe_eu_stall_data_stream *stream)
struct per_xecore_buf *xecore_buf;
struct xe_gt *gt = stream->gt;
u16 group, instance;
- unsigned int fw_ref;
int xecore;
/* Take runtime pm ref and forcewake to disable RC6 */
xe_pm_runtime_get(gt_to_xe(gt));
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_RENDER);
- if (!xe_force_wake_ref_has_domain(fw_ref, XE_FW_RENDER)) {
+ stream->fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_RENDER);
+ if (!xe_force_wake_ref_has_domain(stream->fw_ref, XE_FW_RENDER)) {
xe_gt_err(gt, "Failed to get RENDER forcewake\n");
xe_pm_runtime_put(gt_to_xe(gt));
return -ETIMEDOUT;
}
- if (XE_WA(gt, 22016596838))
+ if (XE_GT_WA(gt, 22016596838))
xe_gt_mcr_multicast_write(gt, ROW_CHICKEN2,
_MASKED_BIT_ENABLE(DISABLE_DOP_GATING));
@@ -803,11 +828,11 @@ static int xe_eu_stall_disable_locked(struct xe_eu_stall_data_stream *stream)
cancel_delayed_work_sync(&stream->buf_poll_work);
- if (XE_WA(gt, 22016596838))
+ if (XE_GT_WA(gt, 22016596838))
xe_gt_mcr_multicast_write(gt, ROW_CHICKEN2,
_MASKED_BIT_DISABLE(DISABLE_DOP_GATING));
- xe_force_wake_put(gt_to_fw(gt), XE_FW_RENDER);
+ xe_force_wake_put(gt_to_fw(gt), stream->fw_ref);
xe_pm_runtime_put(gt_to_xe(gt));
return 0;
diff --git a/drivers/gpu/drm/xe/xe_exec.c b/drivers/gpu/drm/xe/xe_exec.c
index 44364c042ad7..fd9480031750 100644
--- a/drivers/gpu/drm/xe/xe_exec.c
+++ b/drivers/gpu/drm/xe/xe_exec.c
@@ -16,9 +16,12 @@
#include "xe_exec_queue.h"
#include "xe_hw_engine_group.h"
#include "xe_macros.h"
+#include "xe_pm.h"
#include "xe_ring_ops_types.h"
#include "xe_sched_job.h"
#include "xe_sync.h"
+#include "xe_svm.h"
+#include "xe_trace.h"
#include "xe_vm.h"
/**
@@ -31,7 +34,7 @@
* - Binding at exec time
* - Flow controlling the ring at exec time
*
- * In XE we avoid all of this complication by not allowing a BO list to be
+ * In Xe we avoid all of this complication by not allowing a BO list to be
* passed into an exec, using the dma-buf implicit sync uAPI, have binds as
* separate operations, and using the DRM scheduler to flow control the ring.
* Let's deep dive on each of these.
@@ -97,9 +100,13 @@
static int xe_exec_fn(struct drm_gpuvm_exec *vm_exec)
{
struct xe_vm *vm = container_of(vm_exec->vm, struct xe_vm, gpuvm);
+ int ret;
/* The fence slot added here is intended for the exec sched job. */
- return xe_vm_validate_rebind(vm, &vm_exec->exec, 1);
+ xe_vm_set_validation_exec(vm, &vm_exec->exec);
+ ret = xe_vm_validate_rebind(vm, &vm_exec->exec, 1);
+ xe_vm_set_validation_exec(vm, NULL);
+ return ret;
}
int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
@@ -115,17 +122,18 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
struct drm_gpuvm_exec vm_exec = {.extra.fn = xe_exec_fn};
struct drm_exec *exec = &vm_exec.exec;
u32 i, num_syncs, num_ufence = 0;
+ struct xe_validation_ctx ctx;
struct xe_sched_job *job;
struct xe_vm *vm;
- bool write_locked, skip_retry = false;
- ktime_t end = 0;
+ bool write_locked;
int err = 0;
struct xe_hw_engine_group *group;
enum xe_hw_engine_group_execution_mode mode, previous_mode;
if (XE_IOCTL_DBG(xe, args->extensions) ||
XE_IOCTL_DBG(xe, args->pad[0] || args->pad[1] || args->pad[2]) ||
- XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+ XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]) ||
+ XE_IOCTL_DBG(xe, args->num_syncs > DRM_XE_MAX_SYNCS))
return -EINVAL;
q = xe_exec_queue_lookup(xef, args->exec_queue_id);
@@ -148,6 +156,12 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
goto err_exec_queue;
}
+ if (atomic_read(&q->job_cnt) >= XE_MAX_JOB_COUNT_PER_EXEC_QUEUE) {
+ trace_xe_exec_queue_reach_max_job_count(q, XE_MAX_JOB_COUNT_PER_EXEC_QUEUE);
+ err = -EAGAIN;
+ goto err_exec_queue;
+ }
+
if (args->num_syncs) {
syncs = kcalloc(args->num_syncs, sizeof(*syncs), GFP_KERNEL);
if (!syncs) {
@@ -160,7 +174,8 @@ int xe_exec_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
- &syncs_user[num_syncs], SYNC_PARSE_FLAG_EXEC |
+ &syncs_user[num_syncs], NULL, 0,
+ SYNC_PARSE_FLAG_EXEC |
(xe_vm_in_lr_mode(vm) ?
SYNC_PARSE_FLAG_LR_MODE : 0));
if (err)
@@ -237,17 +252,21 @@ retry:
goto err_unlock_list;
}
- vm_exec.vm = &vm->gpuvm;
- vm_exec.flags = DRM_EXEC_INTERRUPTIBLE_WAIT;
- if (xe_vm_in_lr_mode(vm)) {
- drm_exec_init(exec, vm_exec.flags, 0);
- } else {
- err = drm_gpuvm_exec_lock(&vm_exec);
- if (err) {
- if (xe_vm_validate_should_retry(exec, err, &end))
- err = -EAGAIN;
+ /*
+ * It's OK to block interruptible here with the vm lock held, since
+ * on task freezing during suspend / hibernate, the call will
+ * return -ERESTARTSYS and the IOCTL will be rerun.
+ */
+ err = xe_pm_block_on_suspend(xe);
+ if (err)
+ goto err_unlock_list;
+
+ if (!xe_vm_in_lr_mode(vm)) {
+ vm_exec.vm = &vm->gpuvm;
+ vm_exec.flags = DRM_EXEC_INTERRUPTIBLE_WAIT;
+ err = xe_validation_exec_lock(&ctx, &vm_exec, &xe->val);
+ if (err)
goto err_unlock_list;
- }
}
if (xe_vm_is_closed_or_banned(q->vm)) {
@@ -256,12 +275,6 @@ retry:
goto err_exec;
}
- if (xe_exec_queue_is_lr(q) && xe_exec_queue_ring_full(q)) {
- err = -EWOULDBLOCK; /* Aliased to -EAGAIN */
- skip_retry = true;
- goto err_exec;
- }
-
if (xe_exec_queue_uses_pxp(q)) {
err = xe_vm_validate_protected(q->vm);
if (err)
@@ -290,11 +303,7 @@ retry:
goto err_put_job;
if (!xe_vm_in_lr_mode(vm)) {
- err = xe_sched_job_last_fence_add_dep(job, vm);
- if (err)
- goto err_put_job;
-
- err = down_read_interruptible(&vm->userptr.notifier_lock);
+ err = xe_svm_notifier_lock_interruptible(vm);
if (err)
goto err_put_job;
@@ -318,8 +327,6 @@ retry:
xe_sched_job_init_user_fence(job, &syncs[i]);
}
- if (xe_exec_queue_is_lr(q))
- q->ring_ops->emit_job(job);
if (!xe_vm_in_lr_mode(vm))
xe_exec_queue_last_fence_set(q, vm, &job->drm.s_fence->finished);
xe_sched_job_push(job);
@@ -336,15 +343,16 @@ retry:
err_repin:
if (!xe_vm_in_lr_mode(vm))
- up_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_unlock(vm);
err_put_job:
if (err)
xe_sched_job_put(job);
err_exec:
- drm_exec_fini(exec);
+ if (!xe_vm_in_lr_mode(vm))
+ xe_validation_ctx_fini(&ctx);
err_unlock_list:
up_read(&vm->lock);
- if (err == -EAGAIN && !skip_retry)
+ if (err == -EAGAIN)
goto retry;
err_hw_exec_mode:
if (mode == EXEC_MODE_DMA_FENCE)
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index ce78cee5dec6..8724f8de67e2 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -10,10 +10,13 @@
#include <drm/drm_device.h>
#include <drm/drm_drv.h>
#include <drm/drm_file.h>
+#include <drm/drm_syncobj.h>
#include <uapi/drm/xe_drm.h>
+#include "xe_dep_scheduler.h"
#include "xe_device.h"
#include "xe_gt.h"
+#include "xe_gt_sriov_vf.h"
#include "xe_hw_engine_class_sysfs.h"
#include "xe_hw_engine_group.h"
#include "xe_hw_fence.h"
@@ -27,6 +30,29 @@
#include "xe_vm.h"
#include "xe_pxp.h"
+/**
+ * DOC: Execution Queue
+ *
+ * An Execution queue is an interface for the HW context of execution.
+ * The user creates an execution queue, submits the GPU jobs through those
+ * queues and in the end destroys them.
+ *
+ * Execution queues can also be created by XeKMD itself for driver internal
+ * operations like object migration etc.
+ *
+ * An execution queue is associated with a specified HW engine or a group of
+ * engines (belonging to the same tile and engine class) and any GPU job
+ * submitted on the queue will be run on one of these engines.
+ *
+ * An execution queue is tied to an address space (VM). It holds a reference
+ * of the associated VM and the underlying Logical Ring Context/s (LRC/s)
+ * until the queue is destroyed.
+ *
+ * The execution queue sits on top of the submission backend. It opaquely
+ * handles the GuC and Execlist backends whichever the platform uses, and
+ * the ring operations the different engine classes support.
+ */
+
enum xe_exec_queue_sched_prop {
XE_EXEC_QUEUE_JOB_TIMEOUT = 0,
XE_EXEC_QUEUE_TIMESLICE = 1,
@@ -39,6 +65,12 @@ static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue
static void __xe_exec_queue_free(struct xe_exec_queue *q)
{
+ int i;
+
+ for (i = 0; i < XE_EXEC_QUEUE_TLB_INVAL_COUNT; ++i)
+ if (q->tlb_inval[i].dep_scheduler)
+ xe_dep_scheduler_fini(q->tlb_inval[i].dep_scheduler);
+
if (xe_exec_queue_uses_pxp(q))
xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q);
if (q->vm)
@@ -50,6 +82,39 @@ static void __xe_exec_queue_free(struct xe_exec_queue *q)
kfree(q);
}
+static int alloc_dep_schedulers(struct xe_device *xe, struct xe_exec_queue *q)
+{
+ struct xe_tile *tile = gt_to_tile(q->gt);
+ int i;
+
+ for (i = 0; i < XE_EXEC_QUEUE_TLB_INVAL_COUNT; ++i) {
+ struct xe_dep_scheduler *dep_scheduler;
+ struct xe_gt *gt;
+ struct workqueue_struct *wq;
+
+ if (i == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT)
+ gt = tile->primary_gt;
+ else
+ gt = tile->media_gt;
+
+ if (!gt)
+ continue;
+
+ wq = gt->tlb_inval.job_wq;
+
+#define MAX_TLB_INVAL_JOBS 16 /* Picking a reasonable value */
+ dep_scheduler = xe_dep_scheduler_create(xe, wq, q->name,
+ MAX_TLB_INVAL_JOBS);
+ if (IS_ERR(dep_scheduler))
+ return PTR_ERR(dep_scheduler);
+
+ q->tlb_inval[i].dep_scheduler = dep_scheduler;
+ }
+#undef MAX_TLB_INVAL_JOBS
+
+ return 0;
+}
+
static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
struct xe_vm *vm,
u32 logical_mask,
@@ -94,6 +159,14 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
else
q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL;
+ if (q->flags & (EXEC_QUEUE_FLAG_MIGRATE | EXEC_QUEUE_FLAG_VM)) {
+ err = alloc_dep_schedulers(xe, q);
+ if (err) {
+ __xe_exec_queue_free(q);
+ return ERR_PTR(err);
+ }
+ }
+
if (vm)
q->vm = xe_vm_get(vm);
@@ -112,9 +185,8 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
return q;
}
-static int __xe_exec_queue_init(struct xe_exec_queue *q)
+static int __xe_exec_queue_init(struct xe_exec_queue *q, u32 exec_queue_flags)
{
- struct xe_vm *vm = q->vm;
int i, err;
u32 flags = 0;
@@ -132,38 +204,56 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q)
flags |= XE_LRC_CREATE_RUNALONE;
}
- if (vm) {
- err = xe_vm_lock(vm, true);
- if (err)
- return err;
- }
+ if (!(exec_queue_flags & EXEC_QUEUE_FLAG_KERNEL))
+ flags |= XE_LRC_CREATE_USER_CTX;
+ err = q->ops->init(q);
+ if (err)
+ return err;
+
+ /*
+ * This must occur after q->ops->init to avoid race conditions during VF
+ * post-migration recovery, as the fixups for the LRC GGTT addresses
+ * depend on the queue being present in the backend tracking structure.
+ *
+ * In addition to above, we must wait on inflight GGTT changes to avoid
+ * writing out stale values here. Such wait provides a solid solution
+ * (without a race) only if the function can detect migration instantly
+ * from the moment vCPU resumes execution.
+ */
for (i = 0; i < q->width; ++i) {
- q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K, q->msix_vec, flags);
- if (IS_ERR(q->lrc[i])) {
- err = PTR_ERR(q->lrc[i]);
- goto err_unlock;
+ struct xe_lrc *lrc;
+
+ xe_gt_sriov_vf_wait_valid_ggtt(q->gt);
+ lrc = xe_lrc_create(q->hwe, q->vm, xe_lrc_ring_size(),
+ q->msix_vec, flags);
+ if (IS_ERR(lrc)) {
+ err = PTR_ERR(lrc);
+ goto err_lrc;
}
- }
-
- if (vm)
- xe_vm_unlock(vm);
- err = q->ops->init(q);
- if (err)
- goto err_lrc;
+ /* Pairs with READ_ONCE to xe_exec_queue_contexts_hwsp_rebase */
+ WRITE_ONCE(q->lrc[i], lrc);
+ }
return 0;
-err_unlock:
- if (vm)
- xe_vm_unlock(vm);
err_lrc:
for (i = i - 1; i >= 0; --i)
xe_lrc_put(q->lrc[i]);
return err;
}
+static void __xe_exec_queue_fini(struct xe_exec_queue *q)
+{
+ int i;
+
+ q->ops->fini(q);
+
+ for (i = 0; i < q->width; ++i)
+ xe_lrc_put(q->lrc[i]);
+}
+
struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm,
u32 logical_mask, u16 width,
struct xe_hw_engine *hwe, u32 flags,
@@ -180,7 +270,7 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v
if (IS_ERR(q))
return q;
- err = __xe_exec_queue_init(q);
+ err = __xe_exec_queue_init(q, flags);
if (err)
goto err_post_alloc;
@@ -194,11 +284,13 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v
if (xe_exec_queue_uses_pxp(q)) {
err = xe_pxp_exec_queue_add(xe->pxp, q);
if (err)
- goto err_post_alloc;
+ goto err_post_init;
}
return q;
+err_post_init:
+ __xe_exec_queue_fini(q);
err_post_alloc:
__xe_exec_queue_free(q);
return ERR_PTR(err);
@@ -277,6 +369,16 @@ struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe,
}
xe_vm_put(migrate_vm);
+ if (!IS_ERR(q)) {
+ int err = drm_syncobj_create(&q->ufence_syncobj,
+ DRM_SYNCOBJ_CREATE_SIGNALED,
+ NULL);
+ if (err) {
+ xe_exec_queue_put(q);
+ return ERR_PTR(err);
+ }
+ }
+
return q;
}
ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO);
@@ -285,24 +387,31 @@ void xe_exec_queue_destroy(struct kref *ref)
{
struct xe_exec_queue *q = container_of(ref, struct xe_exec_queue, refcount);
struct xe_exec_queue *eq, *next;
+ int i;
+
+ xe_assert(gt_to_xe(q->gt), atomic_read(&q->job_cnt) == 0);
+
+ if (q->ufence_syncobj)
+ drm_syncobj_put(q->ufence_syncobj);
if (xe_exec_queue_uses_pxp(q))
xe_pxp_exec_queue_remove(gt_to_xe(q->gt)->pxp, q);
xe_exec_queue_last_fence_put_unlocked(q);
+ for_each_tlb_inval(i)
+ xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, i);
+
if (!(q->flags & EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)) {
list_for_each_entry_safe(eq, next, &q->multi_gt_list,
multi_gt_link)
xe_exec_queue_put(eq);
}
- q->ops->fini(q);
+ q->ops->destroy(q);
}
void xe_exec_queue_fini(struct xe_exec_queue *q)
{
- int i;
-
/*
* Before releasing our ref to lrc and xef, accumulate our run ticks
* and wakeup any waiters.
@@ -311,9 +420,7 @@ void xe_exec_queue_fini(struct xe_exec_queue *q)
if (q->xef && atomic_dec_and_test(&q->xef->exec_queue.pending_removal))
wake_up_var(&q->xef->exec_queue.pending_removal);
- for (i = 0; i < q->width; ++i)
- xe_lrc_put(q->lrc[i]);
-
+ __xe_exec_queue_fini(q);
__xe_exec_queue_free(q);
}
@@ -623,7 +730,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
if (XE_IOCTL_DBG(xe, err))
return -EFAULT;
- if (XE_IOCTL_DBG(xe, eci[0].gt_id >= xe->info.gt_count))
+ if (XE_IOCTL_DBG(xe, !xe_device_get_gt(xe, eci[0].gt_id)))
return -EINVAL;
if (args->flags & DRM_XE_EXEC_QUEUE_LOW_LATENCY_HINT)
@@ -755,34 +862,30 @@ int xe_exec_queue_get_property_ioctl(struct drm_device *dev, void *data,
}
/**
- * xe_exec_queue_is_lr() - Whether an exec_queue is long-running
- * @q: The exec_queue
+ * xe_exec_queue_lrc() - Get the LRC from exec queue.
+ * @q: The exec_queue.
*
- * Return: True if the exec_queue is long-running, false otherwise.
+ * Retrieves the primary LRC for the exec queue. Note that this function
+ * returns only the first LRC instance, even when multiple parallel LRCs
+ * are configured.
+ *
+ * Return: Pointer to LRC on success, error on failure
*/
-bool xe_exec_queue_is_lr(struct xe_exec_queue *q)
+struct xe_lrc *xe_exec_queue_lrc(struct xe_exec_queue *q)
{
- return q->vm && xe_vm_in_lr_mode(q->vm) &&
- !(q->flags & EXEC_QUEUE_FLAG_VM);
-}
-
-static s32 xe_exec_queue_num_job_inflight(struct xe_exec_queue *q)
-{
- return q->lrc[0]->fence_ctx.next_seqno - xe_lrc_seqno(q->lrc[0]) - 1;
+ return q->lrc[0];
}
/**
- * xe_exec_queue_ring_full() - Whether an exec_queue's ring is full
+ * xe_exec_queue_is_lr() - Whether an exec_queue is long-running
* @q: The exec_queue
*
- * Return: True if the exec_queue's ring is full, false otherwise.
+ * Return: True if the exec_queue is long-running, false otherwise.
*/
-bool xe_exec_queue_ring_full(struct xe_exec_queue *q)
+bool xe_exec_queue_is_lr(struct xe_exec_queue *q)
{
- struct xe_lrc *lrc = q->lrc[0];
- s32 max_job = lrc->ring.size / MAX_JOB_SIZE_BYTES;
-
- return xe_exec_queue_num_job_inflight(q) >= max_job;
+ return q->vm && xe_vm_in_lr_mode(q->vm) &&
+ !(q->flags & EXEC_QUEUE_FLAG_VM);
}
/**
@@ -915,7 +1018,9 @@ int xe_exec_queue_destroy_ioctl(struct drm_device *dev, void *data,
static void xe_exec_queue_last_fence_lockdep_assert(struct xe_exec_queue *q,
struct xe_vm *vm)
{
- if (q->flags & EXEC_QUEUE_FLAG_VM) {
+ if (q->flags & EXEC_QUEUE_FLAG_MIGRATE) {
+ xe_migrate_job_lock_assert(q);
+ } else if (q->flags & EXEC_QUEUE_FLAG_VM) {
lockdep_assert_held(&vm->lock);
} else {
xe_vm_assert_held(vm);
@@ -1014,29 +1119,132 @@ void xe_exec_queue_last_fence_set(struct xe_exec_queue *q, struct xe_vm *vm,
struct dma_fence *fence)
{
xe_exec_queue_last_fence_lockdep_assert(q, vm);
+ xe_assert(vm->xe, !dma_fence_is_container(fence));
xe_exec_queue_last_fence_put(q, vm);
q->last_fence = dma_fence_get(fence);
}
/**
- * xe_exec_queue_last_fence_test_dep - Test last fence dependency of queue
+ * xe_exec_queue_tlb_inval_last_fence_put() - Drop ref to last TLB invalidation fence
* @q: The exec queue
- * @vm: The VM the engine does a bind or exec for
+ * @vm: The VM the engine does a bind for
+ * @type: Either primary or media GT
+ */
+void xe_exec_queue_tlb_inval_last_fence_put(struct xe_exec_queue *q,
+ struct xe_vm *vm,
+ unsigned int type)
+{
+ xe_exec_queue_last_fence_lockdep_assert(q, vm);
+ xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
+ type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
+
+ xe_exec_queue_tlb_inval_last_fence_put_unlocked(q, type);
+}
+
+/**
+ * xe_exec_queue_tlb_inval_last_fence_put_unlocked() - Drop ref to last TLB
+ * invalidation fence unlocked
+ * @q: The exec queue
+ * @type: Either primary or media GT
+ *
+ * Only safe to be called from xe_exec_queue_destroy().
+ */
+void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct xe_exec_queue *q,
+ unsigned int type)
+{
+ xe_assert(q->vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
+ type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
+
+ dma_fence_put(q->tlb_inval[type].last_fence);
+ q->tlb_inval[type].last_fence = NULL;
+}
+
+/**
+ * xe_exec_queue_tlb_inval_last_fence_get() - Get last fence for TLB invalidation
+ * @q: The exec queue
+ * @vm: The VM the engine does a bind for
+ * @type: Either primary or media GT
+ *
+ * Get last fence, takes a ref
*
- * Returns:
- * -ETIME if there exists an unsignalled last fence dependency, zero otherwise.
+ * Returns: last fence if not signaled, dma fence stub if signaled
*/
-int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q, struct xe_vm *vm)
+struct dma_fence *xe_exec_queue_tlb_inval_last_fence_get(struct xe_exec_queue *q,
+ struct xe_vm *vm,
+ unsigned int type)
{
struct dma_fence *fence;
+
+ xe_exec_queue_last_fence_lockdep_assert(q, vm);
+ xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
+ type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
+ xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM |
+ EXEC_QUEUE_FLAG_MIGRATE));
+
+ if (q->tlb_inval[type].last_fence &&
+ test_bit(DMA_FENCE_FLAG_SIGNALED_BIT,
+ &q->tlb_inval[type].last_fence->flags))
+ xe_exec_queue_tlb_inval_last_fence_put(q, vm, type);
+
+ fence = q->tlb_inval[type].last_fence ?: dma_fence_get_stub();
+ dma_fence_get(fence);
+ return fence;
+}
+
+/**
+ * xe_exec_queue_tlb_inval_last_fence_set() - Set last fence for TLB invalidation
+ * @q: The exec queue
+ * @vm: The VM the engine does a bind for
+ * @fence: The fence
+ * @type: Either primary or media GT
+ *
+ * Set the last fence for the tlb invalidation type on the queue. Increases
+ * reference count for fence, when closing queue
+ * xe_exec_queue_tlb_inval_last_fence_put should be called.
+ */
+void xe_exec_queue_tlb_inval_last_fence_set(struct xe_exec_queue *q,
+ struct xe_vm *vm,
+ struct dma_fence *fence,
+ unsigned int type)
+{
+ xe_exec_queue_last_fence_lockdep_assert(q, vm);
+ xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
+ type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
+ xe_assert(vm->xe, q->flags & (EXEC_QUEUE_FLAG_VM |
+ EXEC_QUEUE_FLAG_MIGRATE));
+ xe_assert(vm->xe, !dma_fence_is_container(fence));
+
+ xe_exec_queue_tlb_inval_last_fence_put(q, vm, type);
+ q->tlb_inval[type].last_fence = dma_fence_get(fence);
+}
+
+/**
+ * xe_exec_queue_contexts_hwsp_rebase - Re-compute GGTT references
+ * within all LRCs of a queue.
+ * @q: the &xe_exec_queue struct instance containing target LRCs
+ * @scratch: scratch buffer to be used as temporary storage
+ *
+ * Returns: zero on success, negative error code on failure
+ */
+int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch)
+{
+ int i;
int err = 0;
- fence = xe_exec_queue_last_fence_get(q, vm);
- if (fence) {
- err = test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) ?
- 0 : -ETIME;
- dma_fence_put(fence);
+ for (i = 0; i < q->width; ++i) {
+ struct xe_lrc *lrc;
+
+ /* Pairs with WRITE_ONCE in __xe_exec_queue_init */
+ lrc = READ_ONCE(q->lrc[i]);
+ if (!lrc)
+ continue;
+
+ xe_lrc_update_memirq_regs_with_address(lrc, q->hwe, scratch);
+ xe_lrc_update_hwctx_regs_with_address(lrc);
+ err = xe_lrc_setup_wa_bb_with_scratch(lrc, q->hwe, scratch);
+ if (err)
+ break;
}
return err;
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h
index 17bc50a7f05a..fda4d4f9bda8 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue.h
@@ -14,6 +14,10 @@ struct drm_file;
struct xe_device;
struct xe_file;
+#define for_each_tlb_inval(__i) \
+ for (__i = XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT; \
+ __i <= XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT; ++__i)
+
struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm,
u32 logical_mask, u16 width,
struct xe_hw_engine *hw_engine, u32 flags,
@@ -64,8 +68,6 @@ static inline bool xe_exec_queue_uses_pxp(struct xe_exec_queue *q)
bool xe_exec_queue_is_lr(struct xe_exec_queue *q);
-bool xe_exec_queue_ring_full(struct xe_exec_queue *q);
-
bool xe_exec_queue_is_idle(struct xe_exec_queue *q);
void xe_exec_queue_kill(struct xe_exec_queue *q);
@@ -86,8 +88,27 @@ struct dma_fence *xe_exec_queue_last_fence_get_for_resume(struct xe_exec_queue *
struct xe_vm *vm);
void xe_exec_queue_last_fence_set(struct xe_exec_queue *e, struct xe_vm *vm,
struct dma_fence *fence);
-int xe_exec_queue_last_fence_test_dep(struct xe_exec_queue *q,
- struct xe_vm *vm);
+
+void xe_exec_queue_tlb_inval_last_fence_put(struct xe_exec_queue *q,
+ struct xe_vm *vm,
+ unsigned int type);
+
+void xe_exec_queue_tlb_inval_last_fence_put_unlocked(struct xe_exec_queue *q,
+ unsigned int type);
+
+struct dma_fence *xe_exec_queue_tlb_inval_last_fence_get(struct xe_exec_queue *q,
+ struct xe_vm *vm,
+ unsigned int type);
+
+void xe_exec_queue_tlb_inval_last_fence_set(struct xe_exec_queue *q,
+ struct xe_vm *vm,
+ struct dma_fence *fence,
+ unsigned int type);
+
void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q);
+int xe_exec_queue_contexts_hwsp_rebase(struct xe_exec_queue *q, void *scratch);
+
+struct xe_lrc *xe_exec_queue_lrc(struct xe_exec_queue *q);
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index cc1cffb5c87f..771ffe35cd0c 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -15,6 +15,7 @@
#include "xe_hw_fence_types.h"
#include "xe_lrc_types.h"
+struct drm_syncobj;
struct xe_execlist_exec_queue;
struct xe_gt;
struct xe_guc_exec_queue;
@@ -87,6 +88,8 @@ struct xe_exec_queue {
#define EXEC_QUEUE_FLAG_HIGH_PRIORITY BIT(4)
/* flag to indicate low latency hint to guc */
#define EXEC_QUEUE_FLAG_LOW_LATENCY BIT(5)
+/* for migration (kernel copy, clear, bind) jobs */
+#define EXEC_QUEUE_FLAG_MIGRATE BIT(6)
/**
* @flags: flags for this exec queue, should statically setup aside from ban
@@ -132,6 +135,24 @@ struct xe_exec_queue {
struct list_head link;
} lr;
+#define XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT 0
+#define XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT 1
+#define XE_EXEC_QUEUE_TLB_INVAL_COUNT (XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT + 1)
+
+ /** @tlb_inval: TLB invalidations exec queue state */
+ struct {
+ /**
+ * @tlb_inval.dep_scheduler: The TLB invalidation
+ * dependency scheduler
+ */
+ struct xe_dep_scheduler *dep_scheduler;
+ /**
+ * @last_fence: last fence for tlb invalidation, protected by
+ * vm->lock in write mode
+ */
+ struct dma_fence *last_fence;
+ } tlb_inval[XE_EXEC_QUEUE_TLB_INVAL_COUNT];
+
/** @pxp: PXP info tracking */
struct {
/** @pxp.type: PXP session type used by this queue */
@@ -140,6 +161,12 @@ struct xe_exec_queue {
struct list_head link;
} pxp;
+ /** @ufence_syncobj: User fence syncobj */
+ struct drm_syncobj *ufence_syncobj;
+
+ /** @ufence_timeline_value: User fence timeline value */
+ u64 ufence_timeline_value;
+
/** @ops: submission backend exec queue operations */
const struct xe_exec_queue_ops *ops;
@@ -147,6 +174,11 @@ struct xe_exec_queue {
const struct xe_ring_ops *ring_ops;
/** @entity: DRM sched entity for this exec queue (1 to 1 relationship) */
struct drm_sched_entity *entity;
+
+#define XE_MAX_JOB_COUNT_PER_EXEC_QUEUE 1000
+ /** @job_cnt: number of drm jobs in this exec queue */
+ atomic_t job_cnt;
+
/**
* @tlb_flush_seqno: The seqno of the last rebind tlb flush performed
* Protected by @vm's resv. Unused if @vm == NULL.
@@ -166,8 +198,14 @@ struct xe_exec_queue_ops {
int (*init)(struct xe_exec_queue *q);
/** @kill: Kill inflight submissions for backend */
void (*kill)(struct xe_exec_queue *q);
- /** @fini: Fini exec queue for submission backend */
+ /** @fini: Undoes the init() for submission backend */
void (*fini)(struct xe_exec_queue *q);
+ /**
+ * @destroy: Destroy exec queue for submission backend. The backend
+ * function must call xe_exec_queue_fini() (which will in turn call the
+ * fini() backend function) to ensure the queue is properly cleaned up.
+ */
+ void (*destroy)(struct xe_exec_queue *q);
/** @set_priority: Set priority for exec queue */
int (*set_priority)(struct xe_exec_queue *q,
enum xe_exec_queue_priority priority);
@@ -186,6 +224,9 @@ struct xe_exec_queue_ops {
* call after suspend. In dma-fencing path thus must return within a
* reasonable amount of time. -ETIME return shall indicate an error
* waiting for suspend resulting in associated VM getting killed.
+ * -EAGAIN return indicates the wait should be tried again, if the wait
+ * is within a work item, the work item should be requeued as deadlock
+ * avoidance mechanism.
*/
int (*suspend_wait)(struct xe_exec_queue *q);
/**
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index 788f56b066b6..769d05517f93 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -339,7 +339,7 @@ static int execlist_exec_queue_init(struct xe_exec_queue *q)
const struct drm_sched_init_args args = {
.ops = &drm_sched_ops,
.num_rqs = 1,
- .credit_limit = q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES,
+ .credit_limit = xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES,
.hang_limit = XE_SCHED_HANG_LIMIT,
.timeout = XE_SCHED_JOB_TIMEOUT,
.name = q->hwe->name,
@@ -385,10 +385,20 @@ err_free:
return err;
}
-static void execlist_exec_queue_fini_async(struct work_struct *w)
+static void execlist_exec_queue_fini(struct xe_exec_queue *q)
+{
+ struct xe_execlist_exec_queue *exl = q->execlist;
+
+ drm_sched_entity_fini(&exl->entity);
+ drm_sched_fini(&exl->sched);
+
+ kfree(exl);
+}
+
+static void execlist_exec_queue_destroy_async(struct work_struct *w)
{
struct xe_execlist_exec_queue *ee =
- container_of(w, struct xe_execlist_exec_queue, fini_async);
+ container_of(w, struct xe_execlist_exec_queue, destroy_async);
struct xe_exec_queue *q = ee->q;
struct xe_execlist_exec_queue *exl = q->execlist;
struct xe_device *xe = gt_to_xe(q->gt);
@@ -401,10 +411,6 @@ static void execlist_exec_queue_fini_async(struct work_struct *w)
list_del(&exl->active_link);
spin_unlock_irqrestore(&exl->port->lock, flags);
- drm_sched_entity_fini(&exl->entity);
- drm_sched_fini(&exl->sched);
- kfree(exl);
-
xe_exec_queue_fini(q);
}
@@ -413,10 +419,10 @@ static void execlist_exec_queue_kill(struct xe_exec_queue *q)
/* NIY */
}
-static void execlist_exec_queue_fini(struct xe_exec_queue *q)
+static void execlist_exec_queue_destroy(struct xe_exec_queue *q)
{
- INIT_WORK(&q->execlist->fini_async, execlist_exec_queue_fini_async);
- queue_work(system_unbound_wq, &q->execlist->fini_async);
+ INIT_WORK(&q->execlist->destroy_async, execlist_exec_queue_destroy_async);
+ queue_work(system_unbound_wq, &q->execlist->destroy_async);
}
static int execlist_exec_queue_set_priority(struct xe_exec_queue *q,
@@ -467,6 +473,7 @@ static const struct xe_exec_queue_ops execlist_exec_queue_ops = {
.init = execlist_exec_queue_init,
.kill = execlist_exec_queue_kill,
.fini = execlist_exec_queue_fini,
+ .destroy = execlist_exec_queue_destroy,
.set_priority = execlist_exec_queue_set_priority,
.set_timeslice = execlist_exec_queue_set_timeslice,
.set_preempt_timeout = execlist_exec_queue_set_preempt_timeout,
diff --git a/drivers/gpu/drm/xe/xe_execlist_types.h b/drivers/gpu/drm/xe/xe_execlist_types.h
index 415140936f11..92c4ba52db0c 100644
--- a/drivers/gpu/drm/xe/xe_execlist_types.h
+++ b/drivers/gpu/drm/xe/xe_execlist_types.h
@@ -42,7 +42,7 @@ struct xe_execlist_exec_queue {
bool has_run;
- struct work_struct fini_async;
+ struct work_struct destroy_async;
enum xe_exec_queue_priority active_priority;
struct list_head active_link;
diff --git a/drivers/gpu/drm/xe/xe_force_wake.c b/drivers/gpu/drm/xe/xe_force_wake.c
index 8a5cba22b586..c59a9b330697 100644
--- a/drivers/gpu/drm/xe/xe_force_wake.c
+++ b/drivers/gpu/drm/xe/xe_force_wake.c
@@ -64,7 +64,7 @@ void xe_force_wake_init_engines(struct xe_gt *gt, struct xe_force_wake *fw)
{
int i, j;
- if (!xe_gt_is_media_type(gt))
+ if (xe_gt_is_main_type(gt))
init_domain(fw, XE_FW_DOMAIN_ID_RENDER,
FORCEWAKE_RENDER,
FORCEWAKE_ACK_RENDER);
diff --git a/drivers/gpu/drm/xe/xe_force_wake_types.h b/drivers/gpu/drm/xe/xe_force_wake_types.h
index 899fbbcb3ea9..14b7b86e801b 100644
--- a/drivers/gpu/drm/xe/xe_force_wake_types.h
+++ b/drivers/gpu/drm/xe/xe_force_wake_types.h
@@ -52,7 +52,22 @@ enum xe_force_wake_domains {
};
/**
- * struct xe_force_wake_domain - XE force wake domains
+ * struct xe_force_wake_domain - Xe force wake power domain
+ *
+ * Represents an individual device-internal power domain. The driver must
+ * ensure the power domain is awake before accessing registers or other
+ * hardware functionality that is part of the power domain. Since different
+ * driver threads may access hardware units simultaneously, a reference count
+ * is used to ensure that the domain remains awake as long as any software
+ * is using the part of the hardware covered by the power domain.
+ *
+ * Hardware provides a register interface to allow the driver to request
+ * wake/sleep of power domains, although in most cases the actual action of
+ * powering the hardware up/down is handled by firmware (and may be subject to
+ * requirements and constraints outside of the driver's visibility) so the
+ * driver needs to wait for an acknowledgment that a wake request has been
+ * acted upon before accessing the parts of the hardware that reside within the
+ * power domain.
*/
struct xe_force_wake_domain {
/** @id: domain force wake id */
@@ -70,7 +85,14 @@ struct xe_force_wake_domain {
};
/**
- * struct xe_force_wake - XE force wake
+ * struct xe_force_wake - Xe force wake collection
+ *
+ * Represents a collection of related power domains (struct
+ * xe_force_wake_domain) associated with a subunit of the device.
+ *
+ * Currently only used for GT power domains (where the term "forcewake" is used
+ * in the hardware documentation), although the interface could be extended to
+ * power wells in other parts of the hardware in the future.
*/
struct xe_force_wake {
/** @gt: back pointers to GT */
diff --git a/drivers/gpu/drm/xe/xe_gen_wa_oob.c b/drivers/gpu/drm/xe/xe_gen_wa_oob.c
index ed9183599e31..247e41c1c48d 100644
--- a/drivers/gpu/drm/xe/xe_gen_wa_oob.c
+++ b/drivers/gpu/drm/xe/xe_gen_wa_oob.c
@@ -18,8 +18,8 @@
" *\n" \
" * This file was generated from rules: %s\n" \
" */\n" \
- "#ifndef _GENERATED_XE_WA_OOB_\n" \
- "#define _GENERATED_XE_WA_OOB_\n" \
+ "#ifndef _GENERATED_%s_\n" \
+ "#define _GENERATED_%s_\n" \
"\n" \
"enum {\n"
@@ -52,7 +52,7 @@ static char *strip(char *line, size_t linelen)
}
#define MAX_LINE_LEN 4096
-static int parse(FILE *input, FILE *csource, FILE *cheader)
+static int parse(FILE *input, FILE *csource, FILE *cheader, char *prefix)
{
char line[MAX_LINE_LEN + 1];
char *name, *prev_name = NULL, *rules;
@@ -96,7 +96,7 @@ static int parse(FILE *input, FILE *csource, FILE *cheader)
}
if (name) {
- fprintf(cheader, "\tXE_WA_OOB_%s = %u,\n", name, idx);
+ fprintf(cheader, "\t%s_%s = %u,\n", prefix, name, idx);
/* Close previous entry before starting a new one */
if (idx)
@@ -118,7 +118,41 @@ static int parse(FILE *input, FILE *csource, FILE *cheader)
if (idx)
fprintf(csource, ") },\n");
- fprintf(cheader, "\t_XE_WA_OOB_COUNT = %u\n", idx);
+ fprintf(cheader, "\t_%s_COUNT = %u\n", prefix, idx);
+
+ return 0;
+}
+
+/* Avoid GNU vs POSIX basename() discrepancy, just use our own */
+static const char *xbasename(const char *s)
+{
+ const char *p = strrchr(s, '/');
+
+ return p ? p + 1 : s;
+}
+
+static int fn_to_prefix(const char *fn, char *prefix, size_t size)
+{
+ size_t len;
+
+ fn = xbasename(fn);
+ len = strlen(fn);
+
+ if (len > size - 1)
+ return -ENAMETOOLONG;
+
+ memcpy(prefix, fn, len + 1);
+
+ for (char *p = prefix; *p; p++) {
+ switch (*p) {
+ case '.':
+ *p = '\0';
+ return 0;
+ default:
+ *p = toupper(*p);
+ break;
+ }
+ }
return 0;
}
@@ -141,6 +175,7 @@ int main(int argc, const char *argv[])
[ARGS_CHEADER] = { .fn = argv[3], .mode = "w" },
};
int ret = 1;
+ char prefix[128];
if (argc < 3) {
fprintf(stderr, "ERROR: wrong arguments\n");
@@ -148,6 +183,9 @@ int main(int argc, const char *argv[])
return 1;
}
+ if (fn_to_prefix(args[ARGS_CHEADER].fn, prefix, sizeof(prefix)) < 0)
+ return 1;
+
for (int i = 0; i < _ARGS_COUNT; i++) {
args[i].f = fopen(args[i].fn, args[i].mode);
if (!args[i].f) {
@@ -157,9 +195,10 @@ int main(int argc, const char *argv[])
}
}
- fprintf(args[ARGS_CHEADER].f, HEADER, args[ARGS_INPUT].fn);
+ fprintf(args[ARGS_CHEADER].f, HEADER, args[ARGS_INPUT].fn, prefix, prefix);
+
ret = parse(args[ARGS_INPUT].f, args[ARGS_CSOURCE].f,
- args[ARGS_CHEADER].f);
+ args[ARGS_CHEADER].f, prefix);
if (!ret)
fprintf(args[ARGS_CHEADER].f, FOOTER);
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index 7062115909f2..ef481b334af4 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -5,6 +5,7 @@
#include "xe_ggtt.h"
+#include <kunit/visibility.h>
#include <linux/fault-inject.h>
#include <linux/io-64-nonatomic-lo-hi.h>
#include <linux/sizes.h>
@@ -22,12 +23,14 @@
#include "xe_device.h"
#include "xe_gt.h"
#include "xe_gt_printk.h"
-#include "xe_gt_sriov_vf.h"
-#include "xe_gt_tlb_invalidation.h"
#include "xe_map.h"
#include "xe_mmio.h"
#include "xe_pm.h"
+#include "xe_res_cursor.h"
#include "xe_sriov.h"
+#include "xe_tile_printk.h"
+#include "xe_tile_sriov_vf.h"
+#include "xe_tlb_inval.h"
#include "xe_wa.h"
#include "xe_wopcm.h"
@@ -64,13 +67,9 @@
* give us the correct placement for free.
*/
-static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
- u16 pat_index)
+static u64 xelp_ggtt_pte_flags(struct xe_bo *bo, u16 pat_index)
{
- u64 pte;
-
- pte = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
- pte |= XE_PAGE_PRESENT;
+ u64 pte = XE_PAGE_PRESENT;
if (xe_bo_is_vram(bo) || xe_bo_is_stolen_devmem(bo))
pte |= XE_GGTT_PTE_DM;
@@ -78,13 +77,12 @@ static u64 xelp_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
return pte;
}
-static u64 xelpg_ggtt_pte_encode_bo(struct xe_bo *bo, u64 bo_offset,
- u16 pat_index)
+static u64 xelpg_ggtt_pte_flags(struct xe_bo *bo, u16 pat_index)
{
struct xe_device *xe = xe_bo_device(bo);
u64 pte;
- pte = xelp_ggtt_pte_encode_bo(bo, bo_offset, pat_index);
+ pte = xelp_ggtt_pte_flags(bo, pat_index);
xe_assert(xe, pat_index <= 3);
@@ -109,10 +107,23 @@ static unsigned int probe_gsm_size(struct pci_dev *pdev)
static void ggtt_update_access_counter(struct xe_ggtt *ggtt)
{
struct xe_tile *tile = ggtt->tile;
- struct xe_gt *affected_gt = XE_WA(tile->primary_gt, 22019338487) ?
- tile->primary_gt : tile->media_gt;
- struct xe_mmio *mmio = &affected_gt->mmio;
- u32 max_gtt_writes = XE_WA(ggtt->tile->primary_gt, 22019338487) ? 1100 : 63;
+ struct xe_gt *affected_gt;
+ u32 max_gtt_writes;
+
+ if (tile->primary_gt && XE_GT_WA(tile->primary_gt, 22019338487)) {
+ affected_gt = tile->primary_gt;
+ max_gtt_writes = 1100;
+
+ /* Only expected to apply to primary GT on dgpu platforms */
+ xe_tile_assert(tile, IS_DGFX(tile_to_xe(tile)));
+ } else {
+ affected_gt = tile->media_gt;
+ max_gtt_writes = 63;
+
+ /* Only expected to apply to media GT on igpu platforms */
+ xe_tile_assert(tile, !IS_DGFX(tile_to_xe(tile)));
+ }
+
/*
* Wa_22019338487: GMD_ID is a RO register, a dummy write forces gunit
* to wait for completion of prior GTT writes before letting this through.
@@ -121,7 +132,7 @@ static void ggtt_update_access_counter(struct xe_ggtt *ggtt)
lockdep_assert_held(&ggtt->lock);
if ((++ggtt->access_count % max_gtt_writes) == 0) {
- xe_mmio_write32(mmio, GMD_ID, 0x0);
+ xe_mmio_write32(&affected_gt->mmio, GMD_ID, 0x0);
ggtt->access_count = 0;
}
}
@@ -140,6 +151,14 @@ static void xe_ggtt_set_pte_and_flush(struct xe_ggtt *ggtt, u64 addr, u64 pte)
ggtt_update_access_counter(ggtt);
}
+static u64 xe_ggtt_get_pte(struct xe_ggtt *ggtt, u64 addr)
+{
+ xe_tile_assert(ggtt->tile, !(addr & XE_PTE_MASK));
+ xe_tile_assert(ggtt->tile, addr < ggtt->size);
+
+ return readq(&ggtt->gsm[addr >> XE_PTE_SHIFT]);
+}
+
static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size)
{
u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[XE_CACHE_WB];
@@ -149,8 +168,9 @@ static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size)
xe_tile_assert(ggtt->tile, start < end);
if (ggtt->scratch)
- scratch_pte = ggtt->pt_ops->pte_encode_bo(ggtt->scratch, 0,
- pat_index);
+ scratch_pte = xe_bo_addr(ggtt->scratch, 0, XE_PAGE_SIZE) |
+ ggtt->pt_ops->pte_encode_flags(ggtt->scratch,
+ pat_index);
else
scratch_pte = 0;
@@ -160,12 +180,47 @@ static void xe_ggtt_clear(struct xe_ggtt *ggtt, u64 start, u64 size)
}
}
+static void primelockdep(struct xe_ggtt *ggtt)
+{
+ if (!IS_ENABLED(CONFIG_LOCKDEP))
+ return;
+
+ fs_reclaim_acquire(GFP_KERNEL);
+ might_lock(&ggtt->lock);
+ fs_reclaim_release(GFP_KERNEL);
+}
+
+/**
+ * xe_ggtt_alloc - Allocate a GGTT for a given &xe_tile
+ * @tile: &xe_tile
+ *
+ * Allocates a &xe_ggtt for a given tile.
+ *
+ * Return: &xe_ggtt on success, or NULL when out of memory.
+ */
+struct xe_ggtt *xe_ggtt_alloc(struct xe_tile *tile)
+{
+ struct xe_device *xe = tile_to_xe(tile);
+ struct xe_ggtt *ggtt;
+
+ ggtt = drmm_kzalloc(&xe->drm, sizeof(*ggtt), GFP_KERNEL);
+ if (!ggtt)
+ return NULL;
+
+ if (drmm_mutex_init(&xe->drm, &ggtt->lock))
+ return NULL;
+
+ primelockdep(ggtt);
+ ggtt->tile = tile;
+
+ return ggtt;
+}
+
static void ggtt_fini_early(struct drm_device *drm, void *arg)
{
struct xe_ggtt *ggtt = arg;
destroy_workqueue(ggtt->wq);
- mutex_destroy(&ggtt->lock);
drm_mm_takedown(&ggtt->mm);
}
@@ -176,31 +231,52 @@ static void ggtt_fini(void *arg)
ggtt->scratch = NULL;
}
-static void primelockdep(struct xe_ggtt *ggtt)
+#ifdef CONFIG_LOCKDEP
+void xe_ggtt_might_lock(struct xe_ggtt *ggtt)
{
- if (!IS_ENABLED(CONFIG_LOCKDEP))
- return;
-
- fs_reclaim_acquire(GFP_KERNEL);
might_lock(&ggtt->lock);
- fs_reclaim_release(GFP_KERNEL);
}
+#endif
static const struct xe_ggtt_pt_ops xelp_pt_ops = {
- .pte_encode_bo = xelp_ggtt_pte_encode_bo,
+ .pte_encode_flags = xelp_ggtt_pte_flags,
.ggtt_set_pte = xe_ggtt_set_pte,
+ .ggtt_get_pte = xe_ggtt_get_pte,
};
static const struct xe_ggtt_pt_ops xelpg_pt_ops = {
- .pte_encode_bo = xelpg_ggtt_pte_encode_bo,
+ .pte_encode_flags = xelpg_ggtt_pte_flags,
.ggtt_set_pte = xe_ggtt_set_pte,
+ .ggtt_get_pte = xe_ggtt_get_pte,
};
static const struct xe_ggtt_pt_ops xelpg_pt_wa_ops = {
- .pte_encode_bo = xelpg_ggtt_pte_encode_bo,
+ .pte_encode_flags = xelpg_ggtt_pte_flags,
.ggtt_set_pte = xe_ggtt_set_pte_and_flush,
+ .ggtt_get_pte = xe_ggtt_get_pte,
};
+static void __xe_ggtt_init_early(struct xe_ggtt *ggtt, u32 reserved)
+{
+ drm_mm_init(&ggtt->mm, reserved,
+ ggtt->size - reserved);
+}
+
+int xe_ggtt_init_kunit(struct xe_ggtt *ggtt, u32 reserved, u32 size)
+{
+ ggtt->size = size;
+ __xe_ggtt_init_early(ggtt, reserved);
+ return 0;
+}
+EXPORT_SYMBOL_IF_KUNIT(xe_ggtt_init_kunit);
+
+static void dev_fini_ggtt(void *arg)
+{
+ struct xe_ggtt *ggtt = arg;
+
+ drain_workqueue(ggtt->wq);
+}
+
/**
* xe_ggtt_init_early - Early GGTT initialization
* @ggtt: the &xe_ggtt to be initialized
@@ -219,13 +295,13 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt)
unsigned int gsm_size;
int err;
- if (IS_SRIOV_VF(xe))
+ if (IS_SRIOV_VF(xe) || GRAPHICS_VERx100(xe) >= 1250)
gsm_size = SZ_8M; /* GGTT is expected to be 4GiB */
else
gsm_size = probe_gsm_size(pdev);
if (gsm_size == 0) {
- drm_err(&xe->drm, "Hardware reported no preallocated GSM\n");
+ xe_tile_err(ggtt->tile, "Hardware reported no preallocated GSM\n");
return -ENOMEM;
}
@@ -239,26 +315,29 @@ int xe_ggtt_init_early(struct xe_ggtt *ggtt)
ggtt->size = GUC_GGTT_TOP;
if (GRAPHICS_VERx100(xe) >= 1270)
- ggtt->pt_ops = (ggtt->tile->media_gt &&
- XE_WA(ggtt->tile->media_gt, 22019338487)) ||
- XE_WA(ggtt->tile->primary_gt, 22019338487) ?
- &xelpg_pt_wa_ops : &xelpg_pt_ops;
+ ggtt->pt_ops =
+ (ggtt->tile->media_gt && XE_GT_WA(ggtt->tile->media_gt, 22019338487)) ||
+ (ggtt->tile->primary_gt && XE_GT_WA(ggtt->tile->primary_gt, 22019338487)) ?
+ &xelpg_pt_wa_ops : &xelpg_pt_ops;
else
ggtt->pt_ops = &xelp_pt_ops;
ggtt->wq = alloc_workqueue("xe-ggtt-wq", 0, WQ_MEM_RECLAIM);
+ if (!ggtt->wq)
+ return -ENOMEM;
- drm_mm_init(&ggtt->mm, xe_wopcm_size(xe),
- ggtt->size - xe_wopcm_size(xe));
- mutex_init(&ggtt->lock);
- primelockdep(ggtt);
+ __xe_ggtt_init_early(ggtt, xe_wopcm_size(xe));
err = drmm_add_action_or_reset(&xe->drm, ggtt_fini_early, ggtt);
if (err)
return err;
+ err = devm_add_action_or_reset(xe->drm.dev, dev_fini_ggtt, ggtt);
+ if (err)
+ return err;
+
if (IS_SRIOV_VF(xe)) {
- err = xe_gt_sriov_vf_prepare_ggtt(xe_tile_get_gt(ggtt->tile, 0));
+ err = xe_tile_sriov_vf_prepare_ggtt(ggtt->tile);
if (err)
return err;
}
@@ -377,7 +456,7 @@ int xe_ggtt_init(struct xe_ggtt *ggtt)
goto err;
}
- xe_map_memset(xe, &ggtt->scratch->vmap, 0, 0, ggtt->scratch->size);
+ xe_map_memset(xe, &ggtt->scratch->vmap, 0, 0, xe_bo_size(ggtt->scratch));
xe_ggtt_initial_clear(ggtt);
@@ -394,9 +473,8 @@ static void ggtt_invalidate_gt_tlb(struct xe_gt *gt)
if (!gt)
return;
- err = xe_gt_tlb_invalidation_ggtt(gt);
- if (err)
- drm_warn(&gt_to_xe(gt)->drm, "xe_gt_tlb_invalidation_ggtt error=%d", err);
+ err = xe_tlb_inval_ggtt(&gt->tlb_inval);
+ xe_gt_WARN(gt, err, "Failed to invalidate GGTT (%pe)", ERR_PTR(err));
}
static void xe_ggtt_invalidate(struct xe_ggtt *ggtt)
@@ -423,22 +501,23 @@ static void xe_ggtt_dump_node(struct xe_ggtt *ggtt,
if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
string_get_size(node->size, 1, STRING_UNITS_2, buf, sizeof(buf));
- xe_gt_dbg(ggtt->tile->primary_gt, "GGTT %#llx-%#llx (%s) %s\n",
- node->start, node->start + node->size, buf, description);
+ xe_tile_dbg(ggtt->tile, "GGTT %#llx-%#llx (%s) %s\n",
+ node->start, node->start + node->size, buf, description);
}
}
/**
- * xe_ggtt_node_insert_balloon - prevent allocation of specified GGTT addresses
+ * xe_ggtt_node_insert_balloon_locked - prevent allocation of specified GGTT addresses
* @node: the &xe_ggtt_node to hold reserved GGTT node
* @start: the starting GGTT address of the reserved region
* @end: then end GGTT address of the reserved region
*
- * Use xe_ggtt_node_remove_balloon() to release a reserved GGTT node.
+ * To be used in cases where ggtt->lock is already taken.
+ * Use xe_ggtt_node_remove_balloon_locked() to release a reserved GGTT node.
*
* Return: 0 on success or a negative error code on failure.
*/
-int xe_ggtt_node_insert_balloon(struct xe_ggtt_node *node, u64 start, u64 end)
+int xe_ggtt_node_insert_balloon_locked(struct xe_ggtt_node *node, u64 start, u64 end)
{
struct xe_ggtt *ggtt = node->ggtt;
int err;
@@ -447,18 +526,16 @@ int xe_ggtt_node_insert_balloon(struct xe_ggtt_node *node, u64 start, u64 end)
xe_tile_assert(ggtt->tile, IS_ALIGNED(start, XE_PAGE_SIZE));
xe_tile_assert(ggtt->tile, IS_ALIGNED(end, XE_PAGE_SIZE));
xe_tile_assert(ggtt->tile, !drm_mm_node_allocated(&node->base));
+ lockdep_assert_held(&ggtt->lock);
node->base.color = 0;
node->base.start = start;
node->base.size = end - start;
- mutex_lock(&ggtt->lock);
err = drm_mm_reserve_node(&ggtt->mm, &node->base);
- mutex_unlock(&ggtt->lock);
- if (xe_gt_WARN(ggtt->tile->primary_gt, err,
- "Failed to balloon GGTT %#llx-%#llx (%pe)\n",
- node->base.start, node->base.start + node->base.size, ERR_PTR(err)))
+ if (xe_tile_WARN(ggtt->tile, err, "Failed to balloon GGTT %#llx-%#llx (%pe)\n",
+ node->base.start, node->base.start + node->base.size, ERR_PTR(err)))
return err;
xe_ggtt_dump_node(ggtt, &node->base, "balloon");
@@ -466,27 +543,72 @@ int xe_ggtt_node_insert_balloon(struct xe_ggtt_node *node, u64 start, u64 end)
}
/**
- * xe_ggtt_node_remove_balloon - release a reserved GGTT region
+ * xe_ggtt_node_remove_balloon_locked - release a reserved GGTT region
* @node: the &xe_ggtt_node with reserved GGTT region
*
- * See xe_ggtt_node_insert_balloon() for details.
+ * To be used in cases where ggtt->lock is already taken.
+ * See xe_ggtt_node_insert_balloon_locked() for details.
*/
-void xe_ggtt_node_remove_balloon(struct xe_ggtt_node *node)
+void xe_ggtt_node_remove_balloon_locked(struct xe_ggtt_node *node)
{
- if (!node || !node->ggtt)
+ if (!xe_ggtt_node_allocated(node))
return;
- if (!drm_mm_node_allocated(&node->base))
- goto free_node;
+ lockdep_assert_held(&node->ggtt->lock);
xe_ggtt_dump_node(node->ggtt, &node->base, "remove-balloon");
- mutex_lock(&node->ggtt->lock);
drm_mm_remove_node(&node->base);
- mutex_unlock(&node->ggtt->lock);
+}
-free_node:
- xe_ggtt_node_fini(node);
+static void xe_ggtt_assert_fit(struct xe_ggtt *ggtt, u64 start, u64 size)
+{
+ struct xe_tile *tile = ggtt->tile;
+ struct xe_device *xe = tile_to_xe(tile);
+ u64 __maybe_unused wopcm = xe_wopcm_size(xe);
+
+ xe_tile_assert(tile, start >= wopcm);
+ xe_tile_assert(tile, start + size < ggtt->size - wopcm);
+}
+
+/**
+ * xe_ggtt_shift_nodes_locked - Shift GGTT nodes to adjust for a change in usable address range.
+ * @ggtt: the &xe_ggtt struct instance
+ * @shift: change to the location of area provisioned for current VF
+ *
+ * This function moves all nodes from the GGTT VM, to a temp list. These nodes are expected
+ * to represent allocations in range formerly assigned to current VF, before the range changed.
+ * When the GGTT VM is completely clear of any nodes, they are re-added with shifted offsets.
+ *
+ * The function has no ability of failing - because it shifts existing nodes, without
+ * any additional processing. If the nodes were successfully existing at the old address,
+ * they will do the same at the new one. A fail inside this function would indicate that
+ * the list of nodes was either already damaged, or that the shift brings the address range
+ * outside of valid bounds. Both cases justify an assert rather than error code.
+ */
+void xe_ggtt_shift_nodes_locked(struct xe_ggtt *ggtt, s64 shift)
+{
+ struct xe_tile *tile __maybe_unused = ggtt->tile;
+ struct drm_mm_node *node, *tmpn;
+ LIST_HEAD(temp_list_head);
+
+ lockdep_assert_held(&ggtt->lock);
+
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG))
+ drm_mm_for_each_node_safe(node, tmpn, &ggtt->mm)
+ xe_ggtt_assert_fit(ggtt, node->start + shift, node->size);
+
+ drm_mm_for_each_node_safe(node, tmpn, &ggtt->mm) {
+ drm_mm_remove_node(node);
+ list_add(&node->node_list, &temp_list_head);
+ }
+
+ list_for_each_entry_safe(node, tmpn, &temp_list_head, node_list) {
+ list_del(&node->node_list);
+ node->start += shift;
+ drm_mm_reserve_node(&ggtt->mm, node);
+ xe_tile_assert(tile, drm_mm_node_allocated(node));
+ }
}
/**
@@ -537,12 +659,12 @@ int xe_ggtt_node_insert(struct xe_ggtt_node *node, u32 size, u32 align)
* xe_ggtt_node_init - Initialize %xe_ggtt_node struct
* @ggtt: the &xe_ggtt where the new node will later be inserted/reserved.
*
- * This function will allocated the struct %xe_ggtt_node and return it's pointer.
+ * This function will allocate the struct %xe_ggtt_node and return its pointer.
* This struct will then be freed after the node removal upon xe_ggtt_node_remove()
- * or xe_ggtt_node_remove_balloon().
+ * or xe_ggtt_node_remove_balloon_locked().
* Having %xe_ggtt_node struct allocated doesn't mean that the node is already allocated
* in GGTT. Only the xe_ggtt_node_insert(), xe_ggtt_node_insert_locked(),
- * xe_ggtt_node_insert_balloon() will ensure the node is inserted or reserved in GGTT.
+ * xe_ggtt_node_insert_balloon_locked() will ensure the node is inserted or reserved in GGTT.
*
* Return: A pointer to %xe_ggtt_node struct on success. An ERR_PTR otherwise.
**/
@@ -564,7 +686,7 @@ struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt)
* @node: the &xe_ggtt_node to be freed
*
* If anything went wrong with either xe_ggtt_node_insert(), xe_ggtt_node_insert_locked(),
- * or xe_ggtt_node_insert_balloon(); and this @node is not going to be reused, then,
+ * or xe_ggtt_node_insert_balloon_locked(); and this @node is not going to be reused, then,
* this function needs to be called to free the %xe_ggtt_node struct
**/
void xe_ggtt_node_fini(struct xe_ggtt_node *node)
@@ -587,30 +709,77 @@ bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node)
}
/**
+ * xe_ggtt_node_pt_size() - Get the size of page table entries needed to map a GGTT node.
+ * @node: the &xe_ggtt_node
+ *
+ * Return: GGTT node page table entries size in bytes.
+ */
+size_t xe_ggtt_node_pt_size(const struct xe_ggtt_node *node)
+{
+ if (!node)
+ return 0;
+
+ return node->base.size / XE_PAGE_SIZE * sizeof(u64);
+}
+
+/**
* xe_ggtt_map_bo - Map the BO into GGTT
* @ggtt: the &xe_ggtt where node will be mapped
+ * @node: the &xe_ggtt_node where this BO is mapped
* @bo: the &xe_bo to be mapped
+ * @pat_index: Which pat_index to use.
*/
-void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
+void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_ggtt_node *node,
+ struct xe_bo *bo, u16 pat_index)
{
- u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB;
- u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode];
- u64 start;
- u64 offset, pte;
- if (XE_WARN_ON(!bo->ggtt_node[ggtt->tile->id]))
+ u64 start, pte, end;
+ struct xe_res_cursor cur;
+
+ if (XE_WARN_ON(!node))
return;
- start = bo->ggtt_node[ggtt->tile->id]->base.start;
+ start = node->base.start;
+ end = start + xe_bo_size(bo);
+
+ pte = ggtt->pt_ops->pte_encode_flags(bo, pat_index);
+ if (!xe_bo_is_vram(bo) && !xe_bo_is_stolen(bo)) {
+ xe_assert(xe_bo_device(bo), bo->ttm.ttm);
+
+ for (xe_res_first_sg(xe_bo_sg(bo), 0, xe_bo_size(bo), &cur);
+ cur.remaining; xe_res_next(&cur, XE_PAGE_SIZE))
+ ggtt->pt_ops->ggtt_set_pte(ggtt, end - cur.remaining,
+ pte | xe_res_dma(&cur));
+ } else {
+ /* Prepend GPU offset */
+ pte |= vram_region_gpu_offset(bo->ttm.resource);
- for (offset = 0; offset < bo->size; offset += XE_PAGE_SIZE) {
- pte = ggtt->pt_ops->pte_encode_bo(bo, offset, pat_index);
- ggtt->pt_ops->ggtt_set_pte(ggtt, start + offset, pte);
+ for (xe_res_first(bo->ttm.resource, 0, xe_bo_size(bo), &cur);
+ cur.remaining; xe_res_next(&cur, XE_PAGE_SIZE))
+ ggtt->pt_ops->ggtt_set_pte(ggtt, end - cur.remaining,
+ pte + cur.start);
}
}
+/**
+ * xe_ggtt_map_bo_unlocked - Restore a mapping of a BO into GGTT
+ * @ggtt: the &xe_ggtt where node will be mapped
+ * @bo: the &xe_bo to be mapped
+ *
+ * This is used to restore a GGTT mapping after suspend.
+ */
+void xe_ggtt_map_bo_unlocked(struct xe_ggtt *ggtt, struct xe_bo *bo)
+{
+ u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB;
+ u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode];
+
+ mutex_lock(&ggtt->lock);
+ xe_ggtt_map_bo(ggtt, bo->ggtt_node[ggtt->tile->id], bo, pat_index);
+ mutex_unlock(&ggtt->lock);
+}
+
static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
- u64 start, u64 end)
+ u64 start, u64 end, struct drm_exec *exec)
{
u64 alignment = bo->min_align > 0 ? bo->min_align : XE_PAGE_SIZE;
u8 tile_id = ggtt->tile->id;
@@ -621,11 +790,11 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
if (XE_WARN_ON(bo->ggtt_node[tile_id])) {
/* Someone's already inserted this BO in the GGTT */
- xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == bo->size);
+ xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == xe_bo_size(bo));
return 0;
}
- err = xe_bo_validate(bo, NULL, false);
+ err = xe_bo_validate(bo, NULL, false, exec);
if (err)
return err;
@@ -640,12 +809,15 @@ static int __xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
mutex_lock(&ggtt->lock);
err = drm_mm_insert_node_in_range(&ggtt->mm, &bo->ggtt_node[tile_id]->base,
- bo->size, alignment, 0, start, end, 0);
+ xe_bo_size(bo), alignment, 0, start, end, 0);
if (err) {
xe_ggtt_node_fini(bo->ggtt_node[tile_id]);
bo->ggtt_node[tile_id] = NULL;
} else {
- xe_ggtt_map_bo(ggtt, bo);
+ u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB;
+ u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode];
+
+ xe_ggtt_map_bo(ggtt, bo->ggtt_node[tile_id], bo, pat_index);
}
mutex_unlock(&ggtt->lock);
@@ -664,25 +836,28 @@ out:
* @bo: the &xe_bo to be inserted
* @start: address where it will be inserted
* @end: end of the range where it will be inserted
+ * @exec: The drm_exec transaction to use for exhaustive eviction.
*
* Return: 0 on success or a negative error code on failure.
*/
int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
- u64 start, u64 end)
+ u64 start, u64 end, struct drm_exec *exec)
{
- return __xe_ggtt_insert_bo_at(ggtt, bo, start, end);
+ return __xe_ggtt_insert_bo_at(ggtt, bo, start, end, exec);
}
/**
* xe_ggtt_insert_bo - Insert BO into GGTT
* @ggtt: the &xe_ggtt where bo will be inserted
* @bo: the &xe_bo to be inserted
+ * @exec: The drm_exec transaction to use for exhaustive eviction.
*
* Return: 0 on success or a negative error code on failure.
*/
-int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
+int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo,
+ struct drm_exec *exec)
{
- return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX);
+ return __xe_ggtt_insert_bo_at(ggtt, bo, 0, U64_MAX, exec);
}
/**
@@ -698,7 +873,7 @@ void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
return;
/* This BO is not currently in the GGTT */
- xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == bo->size);
+ xe_tile_assert(ggtt->tile, bo->ggtt_node[tile_id]->base.size == xe_bo_size(bo));
xe_ggtt_node_remove(bo->ggtt_node[tile_id],
bo->flags & XE_BO_FLAG_GGTT_INVALIDATE);
@@ -780,6 +955,85 @@ void xe_ggtt_assign(const struct xe_ggtt_node *node, u16 vfid)
xe_ggtt_assign_locked(node->ggtt, &node->base, vfid);
mutex_unlock(&node->ggtt->lock);
}
+
+/**
+ * xe_ggtt_node_save() - Save a &xe_ggtt_node to a buffer.
+ * @node: the &xe_ggtt_node to be saved
+ * @dst: destination buffer
+ * @size: destination buffer size in bytes
+ * @vfid: VF identifier
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_ggtt_node_save(struct xe_ggtt_node *node, void *dst, size_t size, u16 vfid)
+{
+ struct xe_ggtt *ggtt;
+ u64 start, end;
+ u64 *buf = dst;
+ u64 pte;
+
+ if (!node)
+ return -ENOENT;
+
+ guard(mutex)(&node->ggtt->lock);
+
+ if (xe_ggtt_node_pt_size(node) != size)
+ return -EINVAL;
+
+ ggtt = node->ggtt;
+ start = node->base.start;
+ end = start + node->base.size - 1;
+
+ while (start < end) {
+ pte = ggtt->pt_ops->ggtt_get_pte(ggtt, start);
+ if (vfid != u64_get_bits(pte, GGTT_PTE_VFID))
+ return -EPERM;
+
+ *buf++ = u64_replace_bits(pte, 0, GGTT_PTE_VFID);
+ start += XE_PAGE_SIZE;
+ }
+
+ return 0;
+}
+
+/**
+ * xe_ggtt_node_load() - Load a &xe_ggtt_node from a buffer.
+ * @node: the &xe_ggtt_node to be loaded
+ * @src: source buffer
+ * @size: source buffer size in bytes
+ * @vfid: VF identifier
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_ggtt_node_load(struct xe_ggtt_node *node, const void *src, size_t size, u16 vfid)
+{
+ u64 vfid_pte = xe_encode_vfid_pte(vfid);
+ const u64 *buf = src;
+ struct xe_ggtt *ggtt;
+ u64 start, end;
+
+ if (!node)
+ return -ENOENT;
+
+ guard(mutex)(&node->ggtt->lock);
+
+ if (xe_ggtt_node_pt_size(node) != size)
+ return -EINVAL;
+
+ ggtt = node->ggtt;
+ start = node->base.start;
+ end = start + node->base.size - 1;
+
+ while (start < end) {
+ vfid_pte = u64_replace_bits(*buf++, vfid, GGTT_PTE_VFID);
+ ggtt->pt_ops->ggtt_set_pte(ggtt, start, vfid_pte);
+ start += XE_PAGE_SIZE;
+ }
+ xe_ggtt_invalidate(ggtt);
+
+ return 0;
+}
+
#endif
/**
@@ -841,3 +1095,30 @@ u64 xe_ggtt_print_holes(struct xe_ggtt *ggtt, u64 alignment, struct drm_printer
return total;
}
+
+/**
+ * xe_ggtt_encode_pte_flags - Get PTE encoding flags for BO
+ * @ggtt: &xe_ggtt
+ * @bo: &xe_bo
+ * @pat_index: The pat_index for the PTE.
+ *
+ * This function returns the pte_flags for a given BO, without address.
+ * It's used for DPT to fill a GGTT mapped BO with a linear lookup table.
+ */
+u64 xe_ggtt_encode_pte_flags(struct xe_ggtt *ggtt,
+ struct xe_bo *bo, u16 pat_index)
+{
+ return ggtt->pt_ops->pte_encode_flags(bo, pat_index);
+}
+
+/**
+ * xe_ggtt_read_pte - Read a PTE from the GGTT
+ * @ggtt: &xe_ggtt
+ * @offset: the offset for which the mapping should be read.
+ *
+ * Used by testcases, and by display reading out an inherited bios FB.
+ */
+u64 xe_ggtt_read_pte(struct xe_ggtt *ggtt, u64 offset)
+{
+ return ioread64(ggtt->gsm + (offset / XE_PAGE_SIZE));
+}
diff --git a/drivers/gpu/drm/xe/xe_ggtt.h b/drivers/gpu/drm/xe/xe_ggtt.h
index 27e7d67de004..93fea4b6079c 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.h
+++ b/drivers/gpu/drm/xe/xe_ggtt.h
@@ -9,25 +9,33 @@
#include "xe_ggtt_types.h"
struct drm_printer;
+struct xe_tile;
+struct drm_exec;
+struct xe_ggtt *xe_ggtt_alloc(struct xe_tile *tile);
int xe_ggtt_init_early(struct xe_ggtt *ggtt);
+int xe_ggtt_init_kunit(struct xe_ggtt *ggtt, u32 reserved, u32 size);
int xe_ggtt_init(struct xe_ggtt *ggtt);
struct xe_ggtt_node *xe_ggtt_node_init(struct xe_ggtt *ggtt);
void xe_ggtt_node_fini(struct xe_ggtt_node *node);
-int xe_ggtt_node_insert_balloon(struct xe_ggtt_node *node,
- u64 start, u64 size);
-void xe_ggtt_node_remove_balloon(struct xe_ggtt_node *node);
+int xe_ggtt_node_insert_balloon_locked(struct xe_ggtt_node *node,
+ u64 start, u64 size);
+void xe_ggtt_node_remove_balloon_locked(struct xe_ggtt_node *node);
+void xe_ggtt_shift_nodes_locked(struct xe_ggtt *ggtt, s64 shift);
int xe_ggtt_node_insert(struct xe_ggtt_node *node, u32 size, u32 align);
int xe_ggtt_node_insert_locked(struct xe_ggtt_node *node,
u32 size, u32 align, u32 mm_flags);
void xe_ggtt_node_remove(struct xe_ggtt_node *node, bool invalidate);
bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node);
-void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
-int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
+size_t xe_ggtt_node_pt_size(const struct xe_ggtt_node *node);
+void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_ggtt_node *node,
+ struct xe_bo *bo, u16 pat_index);
+void xe_ggtt_map_bo_unlocked(struct xe_ggtt *ggtt, struct xe_bo *bo);
+int xe_ggtt_insert_bo(struct xe_ggtt *ggtt, struct xe_bo *bo, struct drm_exec *exec);
int xe_ggtt_insert_bo_at(struct xe_ggtt *ggtt, struct xe_bo *bo,
- u64 start, u64 end);
+ u64 start, u64 end, struct drm_exec *exec);
void xe_ggtt_remove_bo(struct xe_ggtt *ggtt, struct xe_bo *bo);
u64 xe_ggtt_largest_hole(struct xe_ggtt *ggtt, u64 alignment, u64 *spare);
@@ -36,6 +44,18 @@ u64 xe_ggtt_print_holes(struct xe_ggtt *ggtt, u64 alignment, struct drm_printer
#ifdef CONFIG_PCI_IOV
void xe_ggtt_assign(const struct xe_ggtt_node *node, u16 vfid);
+int xe_ggtt_node_save(struct xe_ggtt_node *node, void *dst, size_t size, u16 vfid);
+int xe_ggtt_node_load(struct xe_ggtt_node *node, const void *src, size_t size, u16 vfid);
#endif
+#ifndef CONFIG_LOCKDEP
+static inline void xe_ggtt_might_lock(struct xe_ggtt *ggtt)
+{ }
+#else
+void xe_ggtt_might_lock(struct xe_ggtt *ggtt);
+#endif
+
+u64 xe_ggtt_encode_pte_flags(struct xe_ggtt *ggtt, struct xe_bo *bo, u16 pat_index);
+u64 xe_ggtt_read_pte(struct xe_ggtt *ggtt, u64 offset);
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_ggtt_types.h b/drivers/gpu/drm/xe/xe_ggtt_types.h
index cb02b7994a9a..dacd796f8184 100644
--- a/drivers/gpu/drm/xe/xe_ggtt_types.h
+++ b/drivers/gpu/drm/xe/xe_ggtt_types.h
@@ -74,10 +74,12 @@ struct xe_ggtt_node {
* Which can vary from platform to platform.
*/
struct xe_ggtt_pt_ops {
- /** @pte_encode_bo: Encode PTE address for a given BO */
- u64 (*pte_encode_bo)(struct xe_bo *bo, u64 bo_offset, u16 pat_index);
+ /** @pte_encode_flags: Encode PTE flags for a given BO */
+ u64 (*pte_encode_flags)(struct xe_bo *bo, u16 pat_index);
/** @ggtt_set_pte: Directly write into GGTT's PTE */
void (*ggtt_set_pte)(struct xe_ggtt *ggtt, u64 addr, u64 pte);
+ /** @ggtt_get_pte: Directly read from GGTT's PTE */
+ u64 (*ggtt_get_pte)(struct xe_ggtt *ggtt, u64 addr);
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.c b/drivers/gpu/drm/xe/xe_gpu_scheduler.c
index 869b43a4151d..f91e06d03511 100644
--- a/drivers/gpu/drm/xe/xe_gpu_scheduler.c
+++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.c
@@ -122,3 +122,17 @@ void xe_sched_add_msg_locked(struct xe_gpu_scheduler *sched,
list_add_tail(&msg->link, &sched->msgs);
xe_sched_process_msg_queue(sched);
}
+
+/**
+ * xe_sched_add_msg_head() - Xe GPU scheduler add message to head of list
+ * @sched: Xe GPU scheduler
+ * @msg: Message to add
+ */
+void xe_sched_add_msg_head(struct xe_gpu_scheduler *sched,
+ struct xe_sched_msg *msg)
+{
+ lockdep_assert_held(&sched->base.job_list_lock);
+
+ list_add(&msg->link, &sched->msgs);
+ xe_sched_process_msg_queue(sched);
+}
diff --git a/drivers/gpu/drm/xe/xe_gpu_scheduler.h b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
index c250ea773491..c7a77a3a9681 100644
--- a/drivers/gpu/drm/xe/xe_gpu_scheduler.h
+++ b/drivers/gpu/drm/xe/xe_gpu_scheduler.h
@@ -7,7 +7,7 @@
#define _XE_GPU_SCHEDULER_H_
#include "xe_gpu_scheduler_types.h"
-#include "xe_sched_job_types.h"
+#include "xe_sched_job.h"
int xe_sched_init(struct xe_gpu_scheduler *sched,
const struct drm_sched_backend_ops *ops,
@@ -28,6 +28,8 @@ void xe_sched_add_msg(struct xe_gpu_scheduler *sched,
struct xe_sched_msg *msg);
void xe_sched_add_msg_locked(struct xe_gpu_scheduler *sched,
struct xe_sched_msg *msg);
+void xe_sched_add_msg_head(struct xe_gpu_scheduler *sched,
+ struct xe_sched_msg *msg);
static inline void xe_sched_msg_lock(struct xe_gpu_scheduler *sched)
{
@@ -51,7 +53,17 @@ static inline void xe_sched_tdr_queue_imm(struct xe_gpu_scheduler *sched)
static inline void xe_sched_resubmit_jobs(struct xe_gpu_scheduler *sched)
{
- drm_sched_resubmit_jobs(&sched->base);
+ struct drm_sched_job *s_job;
+ bool restore_replay = false;
+
+ list_for_each_entry(s_job, &sched->base.pending_list, list) {
+ struct drm_sched_fence *s_fence = s_job->s_fence;
+ struct dma_fence *hw_fence = s_fence->parent;
+
+ restore_replay |= to_xe_sched_job(s_job)->restore_replay;
+ if (restore_replay || (hw_fence && !dma_fence_is_signaled(hw_fence)))
+ sched->base.ops->run_job(s_job);
+ }
}
static inline bool
@@ -68,17 +80,30 @@ static inline void xe_sched_add_pending_job(struct xe_gpu_scheduler *sched,
spin_unlock(&sched->base.job_list_lock);
}
+/**
+ * xe_sched_first_pending_job() - Find first pending job which is unsignaled
+ * @sched: Xe GPU scheduler
+ *
+ * Return first unsignaled job in pending list or NULL
+ */
static inline
struct xe_sched_job *xe_sched_first_pending_job(struct xe_gpu_scheduler *sched)
{
- struct xe_sched_job *job;
+ struct xe_sched_job *job, *r_job = NULL;
spin_lock(&sched->base.job_list_lock);
- job = list_first_entry_or_null(&sched->base.pending_list,
- struct xe_sched_job, drm.list);
+ list_for_each_entry(job, &sched->base.pending_list, drm.list) {
+ struct drm_sched_fence *s_fence = job->drm.s_fence;
+ struct dma_fence *hw_fence = s_fence->parent;
+
+ if (hw_fence && !dma_fence_is_signaled(hw_fence)) {
+ r_job = job;
+ break;
+ }
+ }
spin_unlock(&sched->base.job_list_lock);
- return job;
+ return r_job;
}
static inline int
diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
index 0bcf97063ff6..dd69cb834f8e 100644
--- a/drivers/gpu/drm/xe/xe_gsc.c
+++ b/drivers/gpu/drm/xe/xe_gsc.c
@@ -59,7 +59,8 @@ static int memcpy_fw(struct xe_gsc *gsc)
xe_map_memcpy_from(xe, storage, &gsc->fw.bo->vmap, 0, fw_size);
xe_map_memcpy_to(xe, &gsc->private->vmap, 0, storage, fw_size);
- xe_map_memset(xe, &gsc->private->vmap, fw_size, 0, gsc->private->size - fw_size);
+ xe_map_memset(xe, &gsc->private->vmap, fw_size, 0,
+ xe_bo_size(gsc->private) - fw_size);
kfree(storage);
@@ -82,7 +83,8 @@ static int emit_gsc_upload(struct xe_gsc *gsc)
bb->cs[bb->len++] = GSC_FW_LOAD;
bb->cs[bb->len++] = lower_32_bits(offset);
bb->cs[bb->len++] = upper_32_bits(offset);
- bb->cs[bb->len++] = (gsc->private->size / SZ_4K) | GSC_FW_LOAD_LIMIT_VALID;
+ bb->cs[bb->len++] = (xe_bo_size(gsc->private) / SZ_4K) |
+ GSC_FW_LOAD_LIMIT_VALID;
job = xe_bb_create_job(gsc->q, bb);
if (IS_ERR(job)) {
@@ -134,10 +136,10 @@ static int query_compatibility_version(struct xe_gsc *gsc)
u64 ggtt_offset;
int err;
- bo = xe_bo_create_pin_map(xe, tile, NULL, GSC_VER_PKT_SZ * 2,
- ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM |
- XE_BO_FLAG_GGTT);
+ bo = xe_bo_create_pin_map_novm(xe, tile, GSC_VER_PKT_SZ * 2,
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT, false);
if (IS_ERR(bo)) {
xe_gt_err(gt, "failed to allocate bo for GSC version query\n");
return PTR_ERR(bo);
@@ -264,7 +266,7 @@ static int gsc_upload_and_init(struct xe_gsc *gsc)
unsigned int fw_ref;
int ret;
- if (XE_WA(tile->primary_gt, 14018094691)) {
+ if (tile->primary_gt && XE_GT_WA(tile->primary_gt, 14018094691)) {
fw_ref = xe_force_wake_get(gt_to_fw(tile->primary_gt), XE_FORCEWAKE_ALL);
/*
@@ -279,7 +281,7 @@ static int gsc_upload_and_init(struct xe_gsc *gsc)
ret = gsc_upload(gsc);
- if (XE_WA(tile->primary_gt, 14018094691))
+ if (tile->primary_gt && XE_GT_WA(tile->primary_gt, 14018094691))
xe_force_wake_put(gt_to_fw(tile->primary_gt), fw_ref);
if (ret)
@@ -591,7 +593,7 @@ void xe_gsc_wa_14015076503(struct xe_gt *gt, bool prep)
u32 gs1_clr = prep ? 0 : HECI_H_GS1_ER_PREP;
/* WA only applies if the GSC is loaded */
- if (!XE_WA(gt, 14015076503) || !gsc_fw_is_loaded(gt))
+ if (!XE_GT_WA(gt, 14015076503) || !gsc_fw_is_loaded(gt))
return;
xe_mmio_rmw32(&gt->mmio, HECI_H_GS1(MTL_GSC_HECI2_BASE), gs1_clr, gs1_set);
diff --git a/drivers/gpu/drm/xe/xe_gsc_proxy.c b/drivers/gpu/drm/xe/xe_gsc_proxy.c
index d0519cd6704a..464282a89eef 100644
--- a/drivers/gpu/drm/xe/xe_gsc_proxy.c
+++ b/drivers/gpu/drm/xe/xe_gsc_proxy.c
@@ -23,6 +23,7 @@
#include "xe_map.h"
#include "xe_mmio.h"
#include "xe_pm.h"
+#include "xe_tile.h"
/*
* GSC proxy:
@@ -483,7 +484,7 @@ int xe_gsc_proxy_init(struct xe_gsc *gsc)
}
/* no multi-tile devices with this feature yet */
- if (tile->id > 0) {
+ if (!xe_tile_is_root(tile)) {
xe_gt_err(gt, "unexpected GSC proxy init on tile %u\n", tile->id);
return -EINVAL;
}
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 0e5d243c9451..cdce210e36f2 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -32,15 +32,14 @@
#include "xe_gt_freq.h"
#include "xe_gt_idle.h"
#include "xe_gt_mcr.h"
-#include "xe_gt_pagefault.h"
#include "xe_gt_printk.h"
#include "xe_gt_sriov_pf.h"
#include "xe_gt_sriov_vf.h"
#include "xe_gt_sysfs.h"
-#include "xe_gt_tlb_invalidation.h"
#include "xe_gt_topology.h"
#include "xe_guc_exec_queue_types.h"
#include "xe_guc_pc.h"
+#include "xe_guc_submit.h"
#include "xe_hw_fence.h"
#include "xe_hw_engine_class_sysfs.h"
#include "xe_irq.h"
@@ -49,6 +48,7 @@
#include "xe_map.h"
#include "xe_migrate.h"
#include "xe_mmio.h"
+#include "xe_pagefault.h"
#include "xe_pat.h"
#include "xe_pm.h"
#include "xe_mocs.h"
@@ -57,6 +57,7 @@
#include "xe_sa.h"
#include "xe_sched_job.h"
#include "xe_sriov.h"
+#include "xe_tlb_inval.h"
#include "xe_tuning.h"
#include "xe_uc.h"
#include "xe_uc_fw.h"
@@ -64,29 +65,29 @@
#include "xe_wa.h"
#include "xe_wopcm.h"
-static void gt_fini(struct drm_device *drm, void *arg)
-{
- struct xe_gt *gt = arg;
-
- destroy_workqueue(gt->ordered_wq);
-}
-
struct xe_gt *xe_gt_alloc(struct xe_tile *tile)
{
+ struct xe_device *xe = tile_to_xe(tile);
+ struct drm_device *drm = &xe->drm;
+ bool shared_wq = xe->info.needs_shared_vf_gt_wq && tile->primary_gt &&
+ IS_SRIOV_VF(xe);
+ struct workqueue_struct *ordered_wq;
struct xe_gt *gt;
- int err;
- gt = drmm_kzalloc(&tile_to_xe(tile)->drm, sizeof(*gt), GFP_KERNEL);
+ gt = drmm_kzalloc(drm, sizeof(*gt), GFP_KERNEL);
if (!gt)
return ERR_PTR(-ENOMEM);
gt->tile = tile;
- gt->ordered_wq = alloc_ordered_workqueue("gt-ordered-wq",
- WQ_MEM_RECLAIM);
+ if (shared_wq && tile->primary_gt->ordered_wq)
+ ordered_wq = tile->primary_gt->ordered_wq;
+ else
+ ordered_wq = drmm_alloc_ordered_workqueue(drm, "gt-ordered-wq",
+ WQ_MEM_RECLAIM);
+ if (IS_ERR(ordered_wq))
+ return ERR_CAST(ordered_wq);
- err = drmm_add_action_or_reset(&gt_to_xe(gt)->drm, gt_fini, gt);
- if (err)
- return ERR_PTR(err);
+ gt->ordered_wq = ordered_wq;
return gt;
}
@@ -97,7 +98,7 @@ void xe_gt_sanitize(struct xe_gt *gt)
* FIXME: if xe_uc_sanitize is called here, on TGL driver will not
* reload
*/
- gt->uc.guc.submission_state.enabled = false;
+ xe_guc_submit_disable(&gt->uc.guc);
}
static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)
@@ -105,20 +106,20 @@ static void xe_gt_enable_host_l2_vram(struct xe_gt *gt)
unsigned int fw_ref;
u32 reg;
- if (!XE_WA(gt, 16023588340))
+ if (!XE_GT_WA(gt, 16023588340))
return;
fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
if (!fw_ref)
return;
- if (!xe_gt_is_media_type(gt)) {
+ if (xe_gt_is_main_type(gt)) {
reg = xe_gt_mcr_unicast_read_any(gt, XE2_GAMREQSTRM_CTRL);
reg |= CG_DIS_CNTLBUS;
xe_gt_mcr_multicast_write(gt, XE2_GAMREQSTRM_CTRL, reg);
}
- xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0x3);
+ xe_gt_mcr_multicast_write(gt, XEHPC_L3CLOS_MASK(3), 0xF);
xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
@@ -127,7 +128,7 @@ static void xe_gt_disable_host_l2_vram(struct xe_gt *gt)
unsigned int fw_ref;
u32 reg;
- if (!XE_WA(gt, 16023588340))
+ if (!XE_GT_WA(gt, 16023588340))
return;
if (xe_gt_is_media_type(gt))
@@ -146,30 +147,23 @@ static void xe_gt_disable_host_l2_vram(struct xe_gt *gt)
static void gt_reset_worker(struct work_struct *w);
-static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q)
+static int emit_job_sync(struct xe_exec_queue *q, struct xe_bb *bb,
+ long timeout_jiffies)
{
struct xe_sched_job *job;
- struct xe_bb *bb;
struct dma_fence *fence;
long timeout;
- bb = xe_bb_new(gt, 4, false);
- if (IS_ERR(bb))
- return PTR_ERR(bb);
-
job = xe_bb_create_job(q, bb);
- if (IS_ERR(job)) {
- xe_bb_free(bb, NULL);
+ if (IS_ERR(job))
return PTR_ERR(job);
- }
xe_sched_job_arm(job);
fence = dma_fence_get(&job->drm.s_fence->finished);
xe_sched_job_push(job);
- timeout = dma_fence_wait_timeout(fence, false, HZ);
+ timeout = dma_fence_wait_timeout(fence, false, timeout_jiffies);
dma_fence_put(fence);
- xe_bb_free(bb, NULL);
if (timeout < 0)
return timeout;
else if (!timeout)
@@ -178,27 +172,30 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q)
return 0;
}
+static int emit_nop_job(struct xe_gt *gt, struct xe_exec_queue *q)
+{
+ struct xe_bb *bb;
+ int ret;
+
+ bb = xe_bb_new(gt, 4, false);
+ if (IS_ERR(bb))
+ return PTR_ERR(bb);
+
+ ret = emit_job_sync(q, bb, HZ);
+ xe_bb_free(bb, NULL);
+
+ return ret;
+}
+
static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
{
struct xe_reg_sr *sr = &q->hwe->reg_lrc;
struct xe_reg_sr_entry *entry;
+ int count_rmw = 0, count = 0, ret;
unsigned long idx;
- struct xe_sched_job *job;
struct xe_bb *bb;
- struct dma_fence *fence;
- long timeout;
- int count_rmw = 0;
- int count = 0;
-
- if (q->hwe->class == XE_ENGINE_CLASS_RENDER)
- /* Big enough to emit all of the context's 3DSTATE */
- bb = xe_bb_new(gt, xe_gt_lrc_size(gt, q->hwe->class), false);
- else
- /* Just pick a large BB size */
- bb = xe_bb_new(gt, SZ_4K, false);
-
- if (IS_ERR(bb))
- return PTR_ERR(bb);
+ size_t bb_len = 0;
+ u32 *cs;
/* count RMW registers as those will be handled separately */
xa_for_each(&sr->xa, idx, entry) {
@@ -208,13 +205,34 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
++count_rmw;
}
- if (count || count_rmw)
- xe_gt_dbg(gt, "LRC WA %s save-restore batch\n", sr->name);
+ if (count)
+ bb_len += count * 2 + 1;
+
+ if (count_rmw)
+ bb_len += count_rmw * 20 + 7;
+
+ if (q->hwe->class == XE_ENGINE_CLASS_RENDER)
+ /*
+ * Big enough to emit all of the context's 3DSTATE via
+ * xe_lrc_emit_hwe_state_instructions()
+ */
+ bb_len += xe_gt_lrc_size(gt, q->hwe->class) / sizeof(u32);
+
+ xe_gt_dbg(gt, "LRC %s WA job: %zu dwords\n", q->hwe->name, bb_len);
+
+ bb = xe_bb_new(gt, bb_len, false);
+ if (IS_ERR(bb))
+ return PTR_ERR(bb);
+
+ cs = bb->cs;
if (count) {
- /* emit single LRI with all non RMW regs */
+ /*
+ * Emit single LRI with all non RMW regs: 1 leading dw + 2dw per
+ * reg + 1
+ */
- bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
+ *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(count);
xa_for_each(&sr->xa, idx, entry) {
struct xe_reg reg = entry->reg;
@@ -229,79 +247,68 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_exec_queue *q)
val |= entry->set_bits;
- bb->cs[bb->len++] = reg.addr;
- bb->cs[bb->len++] = val;
+ *cs++ = reg.addr;
+ *cs++ = val;
xe_gt_dbg(gt, "REG[0x%x] = 0x%08x", reg.addr, val);
}
}
if (count_rmw) {
- /* emit MI_MATH for each RMW reg */
+ /* Emit MI_MATH for each RMW reg: 20dw per reg + 7 trailing dw */
xa_for_each(&sr->xa, idx, entry) {
if (entry->reg.masked || entry->clr_bits == ~0)
continue;
- bb->cs[bb->len++] = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO;
- bb->cs[bb->len++] = entry->reg.addr;
- bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr;
-
- bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) |
- MI_LRI_LRM_CS_MMIO;
- bb->cs[bb->len++] = CS_GPR_REG(0, 1).addr;
- bb->cs[bb->len++] = entry->clr_bits;
- bb->cs[bb->len++] = CS_GPR_REG(0, 2).addr;
- bb->cs[bb->len++] = entry->set_bits;
-
- bb->cs[bb->len++] = MI_MATH(8);
- bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCA, REG0);
- bb->cs[bb->len++] = CS_ALU_INSTR_LOADINV(SRCB, REG1);
- bb->cs[bb->len++] = CS_ALU_INSTR_AND;
- bb->cs[bb->len++] = CS_ALU_INSTR_STORE(REG0, ACCU);
- bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCA, REG0);
- bb->cs[bb->len++] = CS_ALU_INSTR_LOAD(SRCB, REG2);
- bb->cs[bb->len++] = CS_ALU_INSTR_OR;
- bb->cs[bb->len++] = CS_ALU_INSTR_STORE(REG0, ACCU);
-
- bb->cs[bb->len++] = MI_LOAD_REGISTER_REG | MI_LRR_SRC_CS_MMIO;
- bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr;
- bb->cs[bb->len++] = entry->reg.addr;
+ *cs++ = MI_LOAD_REGISTER_REG | MI_LRR_DST_CS_MMIO;
+ *cs++ = entry->reg.addr;
+ *cs++ = CS_GPR_REG(0, 0).addr;
+
+ *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(2) |
+ MI_LRI_LRM_CS_MMIO;
+ *cs++ = CS_GPR_REG(0, 1).addr;
+ *cs++ = entry->clr_bits;
+ *cs++ = CS_GPR_REG(0, 2).addr;
+ *cs++ = entry->set_bits;
+
+ *cs++ = MI_MATH(8);
+ *cs++ = CS_ALU_INSTR_LOAD(SRCA, REG0);
+ *cs++ = CS_ALU_INSTR_LOADINV(SRCB, REG1);
+ *cs++ = CS_ALU_INSTR_AND;
+ *cs++ = CS_ALU_INSTR_STORE(REG0, ACCU);
+ *cs++ = CS_ALU_INSTR_LOAD(SRCA, REG0);
+ *cs++ = CS_ALU_INSTR_LOAD(SRCB, REG2);
+ *cs++ = CS_ALU_INSTR_OR;
+ *cs++ = CS_ALU_INSTR_STORE(REG0, ACCU);
+
+ *cs++ = MI_LOAD_REGISTER_REG | MI_LRR_SRC_CS_MMIO;
+ *cs++ = CS_GPR_REG(0, 0).addr;
+ *cs++ = entry->reg.addr;
xe_gt_dbg(gt, "REG[%#x] = ~%#x|%#x\n",
entry->reg.addr, entry->clr_bits, entry->set_bits);
}
/* reset used GPR */
- bb->cs[bb->len++] = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(3) | MI_LRI_LRM_CS_MMIO;
- bb->cs[bb->len++] = CS_GPR_REG(0, 0).addr;
- bb->cs[bb->len++] = 0;
- bb->cs[bb->len++] = CS_GPR_REG(0, 1).addr;
- bb->cs[bb->len++] = 0;
- bb->cs[bb->len++] = CS_GPR_REG(0, 2).addr;
- bb->cs[bb->len++] = 0;
+ *cs++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(3) |
+ MI_LRI_LRM_CS_MMIO;
+ *cs++ = CS_GPR_REG(0, 0).addr;
+ *cs++ = 0;
+ *cs++ = CS_GPR_REG(0, 1).addr;
+ *cs++ = 0;
+ *cs++ = CS_GPR_REG(0, 2).addr;
+ *cs++ = 0;
}
- xe_lrc_emit_hwe_state_instructions(q, bb);
+ cs = xe_lrc_emit_hwe_state_instructions(q, cs);
- job = xe_bb_create_job(q, bb);
- if (IS_ERR(job)) {
- xe_bb_free(bb, NULL);
- return PTR_ERR(job);
- }
+ bb->len = cs - bb->cs;
- xe_sched_job_arm(job);
- fence = dma_fence_get(&job->drm.s_fence->finished);
- xe_sched_job_push(job);
+ ret = emit_job_sync(q, bb, HZ);
- timeout = dma_fence_wait_timeout(fence, false, HZ);
- dma_fence_put(fence);
xe_bb_free(bb, NULL);
- if (timeout < 0)
- return timeout;
- else if (!timeout)
- return -ETIME;
- return 0;
+ return ret;
}
int xe_gt_record_default_lrcs(struct xe_gt *gt)
@@ -363,14 +370,6 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt)
goto put_nop_q;
}
- /* Reload golden LRC to record the effect of any indirect W/A */
- err = emit_nop_job(gt, q);
- if (err) {
- xe_gt_err(gt, "hwe %s: emit_nop_job failed (%pe) guc_id=%u\n",
- hwe->name, ERR_PTR(err), q->guc->id);
- goto put_nop_q;
- }
-
xe_map_memcpy_from(xe, default_lrc,
&q->lrc[0]->bo->vmap,
xe_lrc_pphwsp_offset(q->lrc[0]),
@@ -390,6 +389,7 @@ put_exec_queue:
int xe_gt_init_early(struct xe_gt *gt)
{
+ unsigned int fw_ref;
int err;
if (IS_SRIOV_PF(gt_to_xe(gt))) {
@@ -398,9 +398,15 @@ int xe_gt_init_early(struct xe_gt *gt)
return err;
}
+ if (IS_SRIOV_VF(gt_to_xe(gt))) {
+ err = xe_gt_sriov_vf_init_early(gt);
+ if (err)
+ return err;
+ }
+
xe_reg_sr_init(&gt->reg_sr, "GT", gt_to_xe(gt));
- err = xe_wa_init(gt);
+ err = xe_wa_gt_init(gt);
if (err)
return err;
@@ -408,15 +414,36 @@ int xe_gt_init_early(struct xe_gt *gt)
if (err)
return err;
- xe_wa_process_oob(gt);
+ xe_wa_process_gt_oob(gt);
xe_force_wake_init_gt(gt, gt_to_fw(gt));
spin_lock_init(&gt->global_invl_lock);
- err = xe_gt_tlb_invalidation_init_early(gt);
+ err = xe_gt_tlb_inval_init_early(gt);
if (err)
return err;
+ xe_mocs_init_early(gt);
+
+ /*
+ * Only after this point can GT-specific MMIO operations
+ * (including things like communication with the GuC)
+ * be performed.
+ */
+ xe_gt_mmio_init(gt);
+
+ err = xe_uc_init_noalloc(&gt->uc);
+ if (err)
+ return err;
+
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
+ return -ETIMEDOUT;
+
+ xe_gt_mcr_init_early(gt);
+ xe_pat_init(gt);
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+
return 0;
}
@@ -431,7 +458,7 @@ static void dump_pat_on_error(struct xe_gt *gt)
xe_pat_dump(gt, &p);
}
-static int gt_fw_domain_init(struct xe_gt *gt)
+static int gt_init_with_gt_forcewake(struct xe_gt *gt)
{
unsigned int fw_ref;
int err;
@@ -440,7 +467,15 @@ static int gt_fw_domain_init(struct xe_gt *gt)
if (!fw_ref)
return -ETIMEDOUT;
- if (!xe_gt_is_media_type(gt)) {
+ err = xe_uc_init(&gt->uc);
+ if (err)
+ goto err_force_wake;
+
+ xe_gt_topology_init(gt);
+ xe_gt_mcr_init(gt);
+ xe_gt_enable_host_l2_vram(gt);
+
+ if (xe_gt_is_main_type(gt)) {
err = xe_ggtt_init(gt_to_tile(gt)->mem.ggtt);
if (err)
goto err_force_wake;
@@ -455,8 +490,10 @@ static int gt_fw_domain_init(struct xe_gt *gt)
xe_gt_mcr_init(gt);
err = xe_hw_engines_init_early(gt);
- if (err)
+ if (err) {
+ dump_pat_on_error(gt);
goto err_force_wake;
+ }
err = xe_hw_engine_class_sysfs_init(gt);
if (err)
@@ -477,13 +514,12 @@ static int gt_fw_domain_init(struct xe_gt *gt)
return 0;
err_force_wake:
- dump_pat_on_error(gt);
xe_force_wake_put(gt_to_fw(gt), fw_ref);
return err;
}
-static int all_fw_domain_init(struct xe_gt *gt)
+static int gt_init_with_all_forcewake(struct xe_gt *gt)
{
unsigned int fw_ref;
int err;
@@ -516,7 +552,7 @@ static int all_fw_domain_init(struct xe_gt *gt)
if (err)
goto err_force_wake;
- if (!xe_gt_is_media_type(gt)) {
+ if (xe_gt_is_main_type(gt)) {
/*
* USM has its only SA pool to non-block behind user operations
*/
@@ -532,17 +568,15 @@ static int all_fw_domain_init(struct xe_gt *gt)
}
}
- if (!xe_gt_is_media_type(gt)) {
+ if (xe_gt_is_main_type(gt)) {
struct xe_tile *tile = gt_to_tile(gt);
- tile->migrate = xe_migrate_init(tile);
- if (IS_ERR(tile->migrate)) {
- err = PTR_ERR(tile->migrate);
+ err = xe_migrate_init(tile->migrate);
+ if (err)
goto err_force_wake;
- }
}
- err = xe_uc_init_hw(&gt->uc);
+ err = xe_uc_load_hw(&gt->uc);
if (err)
goto err_force_wake;
@@ -552,13 +586,11 @@ static int all_fw_domain_init(struct xe_gt *gt)
xe_gt_apply_ccs_mode(gt);
}
- if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt))
+ if (IS_SRIOV_PF(gt_to_xe(gt)) && xe_gt_is_main_type(gt))
xe_lmtt_init_hw(&gt_to_tile(gt)->sriov.pf.lmtt);
- if (IS_SRIOV_PF(gt_to_xe(gt))) {
- xe_gt_sriov_pf_init(gt);
+ if (IS_SRIOV_PF(gt_to_xe(gt)))
xe_gt_sriov_pf_init_hw(gt);
- }
xe_force_wake_put(gt_to_fw(gt), fw_ref);
@@ -570,44 +602,18 @@ err_force_wake:
return err;
}
-/*
- * Initialize enough GT to be able to load GuC in order to obtain hwconfig and
- * enable CTB communication.
- */
-int xe_gt_init_hwconfig(struct xe_gt *gt)
-{
- unsigned int fw_ref;
- int err;
-
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (!fw_ref)
- return -ETIMEDOUT;
-
- xe_gt_mcr_init_early(gt);
- xe_pat_init(gt);
-
- err = xe_uc_init(&gt->uc);
- if (err)
- goto out_fw;
-
- err = xe_uc_init_hwconfig(&gt->uc);
- if (err)
- goto out_fw;
-
- xe_gt_topology_init(gt);
- xe_gt_mcr_init(gt);
- xe_gt_enable_host_l2_vram(gt);
-
-out_fw:
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
- return err;
-}
-
static void xe_gt_fini(void *arg)
{
struct xe_gt *gt = arg;
int i;
+ if (disable_work_sync(&gt->reset.worker))
+ /*
+ * If gt_reset_worker was halted from executing, take care of
+ * releasing the rpm reference here.
+ */
+ xe_pm_runtime_put(gt_to_xe(gt));
+
for (i = 0; i < XE_ENGINE_CLASS_MAX; ++i)
xe_hw_fence_irq_finish(&gt->fence_irq[i]);
@@ -630,17 +636,11 @@ int xe_gt_init(struct xe_gt *gt)
if (err)
return err;
- err = xe_gt_pagefault_init(gt);
- if (err)
- return err;
-
- xe_mocs_init_early(gt);
-
err = xe_gt_sysfs_init(gt);
if (err)
return err;
- err = gt_fw_domain_init(gt);
+ err = gt_init_with_gt_forcewake(gt);
if (err)
return err;
@@ -654,7 +654,7 @@ int xe_gt_init(struct xe_gt *gt)
xe_force_wake_init_engines(gt, gt_to_fw(gt));
- err = all_fw_domain_init(gt);
+ err = gt_init_with_all_forcewake(gt);
if (err)
return err;
@@ -664,6 +664,12 @@ int xe_gt_init(struct xe_gt *gt)
if (err)
return err;
+ if (IS_SRIOV_VF(gt_to_xe(gt))) {
+ err = xe_gt_sriov_vf_init(gt);
+ if (err)
+ return err;
+ }
+
return 0;
}
@@ -742,7 +748,7 @@ static int vf_gt_restart(struct xe_gt *gt)
if (err)
return err;
- err = xe_uc_init_hw(&gt->uc);
+ err = xe_uc_load_hw(&gt->uc);
if (err)
return err;
@@ -780,20 +786,17 @@ static int do_gt_restart(struct xe_gt *gt)
if (err)
return err;
- err = xe_uc_init_hw(&gt->uc);
+ err = xe_uc_load_hw(&gt->uc);
if (err)
return err;
- if (IS_SRIOV_PF(gt_to_xe(gt)) && !xe_gt_is_media_type(gt))
+ if (IS_SRIOV_PF(gt_to_xe(gt)) && xe_gt_is_main_type(gt))
xe_lmtt_init_hw(&gt_to_tile(gt)->sriov.pf.lmtt);
if (IS_SRIOV_PF(gt_to_xe(gt)))
xe_gt_sriov_pf_init_hw(gt);
xe_mocs_init(gt);
- err = xe_uc_start(&gt->uc);
- if (err)
- return err;
for_each_hw_engine(hwe, gt, id)
xe_reg_sr_apply_mmio(&hwe->reg_sr, gt);
@@ -801,6 +804,10 @@ static int do_gt_restart(struct xe_gt *gt)
/* Get CCS mode in sync between sw/hw */
xe_gt_apply_ccs_mode(gt);
+ err = xe_uc_start(&gt->uc);
+ if (err)
+ return err;
+
/* Restore GT freq to expected values */
xe_gt_sanitize_freq(gt);
@@ -810,22 +817,21 @@ static int do_gt_restart(struct xe_gt *gt)
return 0;
}
-static int gt_reset(struct xe_gt *gt)
+static void gt_reset_worker(struct work_struct *w)
{
+ struct xe_gt *gt = container_of(w, typeof(*gt), reset.worker);
unsigned int fw_ref;
int err;
if (xe_device_wedged(gt_to_xe(gt)))
- return -ECANCELED;
+ goto err_pm_put;
/* We only support GT resets with GuC submission */
if (!xe_device_uc_enabled(gt_to_xe(gt)))
- return -ENODEV;
+ goto err_pm_put;
xe_gt_info(gt, "reset started\n");
- xe_pm_runtime_get(gt_to_xe(gt));
-
if (xe_fault_inject_gt_reset()) {
err = -ECANCELED;
goto err_fail;
@@ -839,13 +845,16 @@ static int gt_reset(struct xe_gt *gt)
goto err_out;
}
+ if (IS_SRIOV_PF(gt_to_xe(gt)))
+ xe_gt_sriov_pf_stop_prepare(gt);
+
xe_uc_gucrc_disable(&gt->uc);
xe_uc_stop_prepare(&gt->uc);
- xe_gt_pagefault_reset(gt);
+ xe_pagefault_reset(gt_to_xe(gt), gt);
xe_uc_stop(&gt->uc);
- xe_gt_tlb_invalidation_reset(gt);
+ xe_tlb_inval_reset(&gt->tlb_inval);
err = do_gt_reset(gt);
if (err)
@@ -856,29 +865,23 @@ static int gt_reset(struct xe_gt *gt)
goto err_out;
xe_force_wake_put(gt_to_fw(gt), fw_ref);
+
+ /* Pair with get while enqueueing the work in xe_gt_reset_async() */
xe_pm_runtime_put(gt_to_xe(gt));
xe_gt_info(gt, "reset done\n");
- return 0;
+ return;
err_out:
xe_force_wake_put(gt_to_fw(gt), fw_ref);
XE_WARN_ON(xe_uc_start(&gt->uc));
+
err_fail:
xe_gt_err(gt, "reset failed (%pe)\n", ERR_PTR(err));
-
xe_device_declare_wedged(gt_to_xe(gt));
+err_pm_put:
xe_pm_runtime_put(gt_to_xe(gt));
-
- return err;
-}
-
-static void gt_reset_worker(struct work_struct *w)
-{
- struct xe_gt *gt = container_of(w, typeof(*gt), reset.worker);
-
- gt_reset(gt);
}
void xe_gt_reset_async(struct xe_gt *gt)
@@ -890,7 +893,11 @@ void xe_gt_reset_async(struct xe_gt *gt)
return;
xe_gt_info(gt, "reset queued\n");
- queue_work(gt->ordered_wq, &gt->reset.worker);
+
+ /* Pair with put in gt_reset_worker() if work is enqueued */
+ xe_pm_runtime_get_noresume(gt_to_xe(gt));
+ if (!queue_work(gt->ordered_wq, &gt->reset.worker))
+ xe_pm_runtime_put(gt_to_xe(gt));
}
void xe_gt_suspend_prepare(struct xe_gt *gt)
@@ -961,7 +968,7 @@ int xe_gt_sanitize_freq(struct xe_gt *gt)
if ((!xe_uc_fw_is_available(&gt->uc.gsc.fw) ||
xe_uc_fw_is_loaded(&gt->uc.gsc.fw) ||
xe_uc_fw_is_in_error_state(&gt->uc.gsc.fw)) &&
- XE_WA(gt, 22019338487))
+ XE_GT_WA(gt, 22019338487))
ret = xe_guc_pc_restore_stashed_freq(&gt->uc.guc.pc);
return ret;
@@ -1059,5 +1066,5 @@ void xe_gt_declare_wedged(struct xe_gt *gt)
xe_gt_assert(gt, gt_to_xe(gt)->wedged.mode);
xe_uc_declare_wedged(&gt->uc);
- xe_gt_tlb_invalidation_reset(gt);
+ xe_tlb_inval_reset(&gt->tlb_inval);
}
diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
index 187fa6490eaf..9d710049da45 100644
--- a/drivers/gpu/drm/xe/xe_gt.h
+++ b/drivers/gpu/drm/xe/xe_gt.h
@@ -12,6 +12,7 @@
#include "xe_device.h"
#include "xe_device_types.h"
+#include "xe_gt_sriov_vf.h"
#include "xe_hw_engine.h"
#define for_each_hw_engine(hwe__, gt__, id__) \
@@ -21,14 +22,19 @@
#define CCS_MASK(gt) (((gt)->info.engine_mask & XE_HW_ENGINE_CCS_MASK) >> XE_HW_ENGINE_CCS0)
+#define GT_VER(gt) ({ \
+ typeof(gt) gt_ = (gt); \
+ struct xe_device *xe = gt_to_xe(gt_); \
+ xe_gt_is_media_type(gt_) ? MEDIA_VER(xe) : GRAPHICS_VER(xe); \
+})
+
extern struct fault_attr gt_reset_failure;
static inline bool xe_fault_inject_gt_reset(void)
{
- return should_fail(&gt_reset_failure, 1);
+ return IS_ENABLED(CONFIG_DEBUG_FS) && should_fail(&gt_reset_failure, 1);
}
struct xe_gt *xe_gt_alloc(struct xe_tile *tile);
-int xe_gt_init_hwconfig(struct xe_gt *gt);
int xe_gt_init_early(struct xe_gt *gt);
int xe_gt_init(struct xe_gt *gt);
void xe_gt_mmio_init(struct xe_gt *gt);
@@ -107,6 +113,11 @@ static inline bool xe_gt_has_indirect_ring_state(struct xe_gt *gt)
xe_device_uc_enabled(gt_to_xe(gt));
}
+static inline bool xe_gt_is_main_type(struct xe_gt *gt)
+{
+ return gt->info.type == XE_GT_TYPE_MAIN;
+}
+
static inline bool xe_gt_is_media_type(struct xe_gt *gt)
{
return gt->info.type == XE_GT_TYPE_MEDIA;
@@ -120,4 +131,16 @@ static inline bool xe_gt_is_usm_hwe(struct xe_gt *gt, struct xe_hw_engine *hwe)
hwe->instance == gt->usm.reserved_bcs_instance;
}
+/**
+ * xe_gt_recovery_pending() - GT recovery pending
+ * @gt: the &xe_gt
+ *
+ * Return: True if GT recovery in pending, False otherwise
+ */
+static inline bool xe_gt_recovery_pending(struct xe_gt *gt)
+{
+ return IS_SRIOV_VF(gt_to_xe(gt)) &&
+ xe_gt_sriov_vf_recovery_pending(gt);
+}
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_clock.c b/drivers/gpu/drm/xe/xe_gt_clock.c
index 4f011d1573c6..bfc25c46f798 100644
--- a/drivers/gpu/drm/xe/xe_gt_clock.c
+++ b/drivers/gpu/drm/xe/xe_gt_clock.c
@@ -55,30 +55,11 @@ static void read_crystal_clock(struct xe_gt *gt, u32 rpm_config_reg, u32 *freq,
}
}
-static void check_ctc_mode(struct xe_gt *gt)
-{
- /*
- * CTC_MODE[0] = 1 is definitely not supported for Xe2 and later
- * platforms. In theory it could be a valid setting for pre-Xe2
- * platforms, but there's no documentation on how to properly handle
- * this case. Reading TIMESTAMP_OVERRIDE, as the driver attempted in
- * the past has been confirmed as incorrect by the hardware architects.
- *
- * For now just warn if we ever encounter hardware in the wild that
- * has this setting and move on as if it hadn't been set.
- */
- if (xe_mmio_read32(&gt->mmio, CTC_MODE) & CTC_SOURCE_DIVIDE_LOGIC)
- xe_gt_warn(gt, "CTC_MODE[0] is set; this is unexpected and undocumented\n");
-}
-
int xe_gt_clock_init(struct xe_gt *gt)
{
u32 freq;
u32 c0;
- if (!IS_SRIOV_VF(gt_to_xe(gt)))
- check_ctc_mode(gt);
-
c0 = xe_mmio_read32(&gt->mmio, RPM_CONFIG0);
read_crystal_clock(gt, c0, &freq, &gt->info.timestamp_base);
@@ -93,11 +74,6 @@ int xe_gt_clock_init(struct xe_gt *gt)
return 0;
}
-static u64 div_u64_roundup(u64 n, u32 d)
-{
- return div_u64(n + d - 1, d);
-}
-
/**
* xe_gt_clock_interval_to_ms - Convert sampled GT clock ticks to msec
*
@@ -108,5 +84,5 @@ static u64 div_u64_roundup(u64 n, u32 d)
*/
u64 xe_gt_clock_interval_to_ms(struct xe_gt *gt, u64 count)
{
- return div_u64_roundup(count * MSEC_PER_SEC, gt->info.reference_clock);
+ return mul_u64_u32_div(count, MSEC_PER_SEC, gt->info.reference_clock);
}
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.c b/drivers/gpu/drm/xe/xe_gt_debugfs.c
index 119a55bb7580..e4fd632f43cf 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.c
@@ -12,7 +12,6 @@
#include "xe_device.h"
#include "xe_force_wake.h"
-#include "xe_ggtt.h"
#include "xe_gt.h"
#include "xe_gt_mcr.h"
#include "xe_gt_idle.h"
@@ -29,11 +28,18 @@
#include "xe_pm.h"
#include "xe_reg_sr.h"
#include "xe_reg_whitelist.h"
+#include "xe_sa.h"
#include "xe_sriov.h"
+#include "xe_sriov_vf_ccs.h"
#include "xe_tuning.h"
#include "xe_uc_debugfs.h"
#include "xe_wa.h"
+static struct xe_gt *node_to_gt(struct drm_info_node *node)
+{
+ return node->dent->d_parent->d_inode->i_private;
+}
+
/**
* xe_gt_debugfs_simple_show - A show callback for struct drm_info_list
* @m: the &seq_file
@@ -76,8 +82,7 @@ int xe_gt_debugfs_simple_show(struct seq_file *m, void *data)
{
struct drm_printer p = drm_seq_file_printer(m);
struct drm_info_node *node = m->private;
- struct dentry *parent = node->dent->d_parent;
- struct xe_gt *gt = parent->d_inode->i_private;
+ struct xe_gt *gt = node_to_gt(node);
int (*print)(struct xe_gt *, struct drm_printer *) = node->info_ent->data;
if (WARN_ON(!print))
@@ -86,15 +91,36 @@ int xe_gt_debugfs_simple_show(struct seq_file *m, void *data)
return print(gt, &p);
}
-static int hw_engines(struct xe_gt *gt, struct drm_printer *p)
+/**
+ * xe_gt_debugfs_show_with_rpm - A show callback for struct drm_info_list
+ * @m: the &seq_file
+ * @data: data used by the drm debugfs helpers
+ *
+ * Similar to xe_gt_debugfs_simple_show() but implicitly takes a RPM ref.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_debugfs_show_with_rpm(struct seq_file *m, void *data)
{
+ struct drm_info_node *node = m->private;
+ struct xe_gt *gt = node_to_gt(node);
struct xe_device *xe = gt_to_xe(gt);
+ int ret;
+
+ xe_pm_runtime_get(xe);
+ ret = xe_gt_debugfs_simple_show(m, data);
+ xe_pm_runtime_put(xe);
+
+ return ret;
+}
+
+static int hw_engines(struct xe_gt *gt, struct drm_printer *p)
+{
struct xe_hw_engine *hwe;
enum xe_hw_engine_id id;
unsigned int fw_ref;
int ret = 0;
- xe_pm_runtime_get(xe);
fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
ret = -ETIMEDOUT;
@@ -106,88 +132,21 @@ static int hw_engines(struct xe_gt *gt, struct drm_printer *p)
fw_put:
xe_force_wake_put(gt_to_fw(gt), fw_ref);
- xe_pm_runtime_put(xe);
-
- return ret;
-}
-
-static int powergate_info(struct xe_gt *gt, struct drm_printer *p)
-{
- int ret;
-
- xe_pm_runtime_get(gt_to_xe(gt));
- ret = xe_gt_idle_pg_print(gt, p);
- xe_pm_runtime_put(gt_to_xe(gt));
return ret;
}
-static int force_reset(struct xe_gt *gt, struct drm_printer *p)
-{
- xe_pm_runtime_get(gt_to_xe(gt));
- xe_gt_reset_async(gt);
- xe_pm_runtime_put(gt_to_xe(gt));
-
- return 0;
-}
-
-static int force_reset_sync(struct xe_gt *gt, struct drm_printer *p)
-{
- xe_pm_runtime_get(gt_to_xe(gt));
- xe_gt_reset(gt);
- xe_pm_runtime_put(gt_to_xe(gt));
-
- return 0;
-}
-
-static int sa_info(struct xe_gt *gt, struct drm_printer *p)
-{
- struct xe_tile *tile = gt_to_tile(gt);
-
- xe_pm_runtime_get(gt_to_xe(gt));
- drm_suballoc_dump_debug_info(&tile->mem.kernel_bb_pool->base, p,
- tile->mem.kernel_bb_pool->gpu_addr);
- xe_pm_runtime_put(gt_to_xe(gt));
-
- return 0;
-}
-
-static int topology(struct xe_gt *gt, struct drm_printer *p)
-{
- xe_pm_runtime_get(gt_to_xe(gt));
- xe_gt_topology_dump(gt, p);
- xe_pm_runtime_put(gt_to_xe(gt));
-
- return 0;
-}
-
static int steering(struct xe_gt *gt, struct drm_printer *p)
{
- xe_pm_runtime_get(gt_to_xe(gt));
xe_gt_mcr_steering_dump(gt, p);
- xe_pm_runtime_put(gt_to_xe(gt));
-
return 0;
}
-static int ggtt(struct xe_gt *gt, struct drm_printer *p)
-{
- int ret;
-
- xe_pm_runtime_get(gt_to_xe(gt));
- ret = xe_ggtt_dump(gt_to_tile(gt)->mem.ggtt, p);
- xe_pm_runtime_put(gt_to_xe(gt));
-
- return ret;
-}
-
static int register_save_restore(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_hw_engine *hwe;
enum xe_hw_engine_id id;
- xe_pm_runtime_get(gt_to_xe(gt));
-
xe_reg_sr_dump(&gt->reg_sr, p);
drm_printf(p, "\n");
@@ -205,98 +164,42 @@ static int register_save_restore(struct xe_gt *gt, struct drm_printer *p)
for_each_hw_engine(hwe, gt, id)
xe_reg_whitelist_dump(&hwe->reg_whitelist, p);
- xe_pm_runtime_put(gt_to_xe(gt));
-
- return 0;
-}
-
-static int workarounds(struct xe_gt *gt, struct drm_printer *p)
-{
- xe_pm_runtime_get(gt_to_xe(gt));
- xe_wa_dump(gt, p);
- xe_pm_runtime_put(gt_to_xe(gt));
-
- return 0;
-}
-
-static int tunings(struct xe_gt *gt, struct drm_printer *p)
-{
- xe_pm_runtime_get(gt_to_xe(gt));
- xe_tuning_dump(gt, p);
- xe_pm_runtime_put(gt_to_xe(gt));
-
- return 0;
-}
-
-static int pat(struct xe_gt *gt, struct drm_printer *p)
-{
- xe_pm_runtime_get(gt_to_xe(gt));
- xe_pat_dump(gt, p);
- xe_pm_runtime_put(gt_to_xe(gt));
-
- return 0;
-}
-
-static int mocs(struct xe_gt *gt, struct drm_printer *p)
-{
- xe_pm_runtime_get(gt_to_xe(gt));
- xe_mocs_dump(gt, p);
- xe_pm_runtime_put(gt_to_xe(gt));
-
return 0;
}
static int rcs_default_lrc(struct xe_gt *gt, struct drm_printer *p)
{
- xe_pm_runtime_get(gt_to_xe(gt));
xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_RENDER);
- xe_pm_runtime_put(gt_to_xe(gt));
-
return 0;
}
static int ccs_default_lrc(struct xe_gt *gt, struct drm_printer *p)
{
- xe_pm_runtime_get(gt_to_xe(gt));
xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_COMPUTE);
- xe_pm_runtime_put(gt_to_xe(gt));
-
return 0;
}
static int bcs_default_lrc(struct xe_gt *gt, struct drm_printer *p)
{
- xe_pm_runtime_get(gt_to_xe(gt));
xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_COPY);
- xe_pm_runtime_put(gt_to_xe(gt));
-
return 0;
}
static int vcs_default_lrc(struct xe_gt *gt, struct drm_printer *p)
{
- xe_pm_runtime_get(gt_to_xe(gt));
xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_VIDEO_DECODE);
- xe_pm_runtime_put(gt_to_xe(gt));
-
return 0;
}
static int vecs_default_lrc(struct xe_gt *gt, struct drm_printer *p)
{
- xe_pm_runtime_get(gt_to_xe(gt));
xe_lrc_dump_default(p, gt, XE_ENGINE_CLASS_VIDEO_ENHANCE);
- xe_pm_runtime_put(gt_to_xe(gt));
-
return 0;
}
static int hwconfig(struct xe_gt *gt, struct drm_printer *p)
{
- xe_pm_runtime_get(gt_to_xe(gt));
xe_guc_hwconfig_dump(&gt->uc.guc, p);
- xe_pm_runtime_put(gt_to_xe(gt));
-
return 0;
}
@@ -306,43 +209,134 @@ static int hwconfig(struct xe_gt *gt, struct drm_printer *p)
* - without access to the PF specific data
*/
static const struct drm_info_list vf_safe_debugfs_list[] = {
- {"force_reset", .show = xe_gt_debugfs_simple_show, .data = force_reset},
- {"force_reset_sync", .show = xe_gt_debugfs_simple_show, .data = force_reset_sync},
- {"sa_info", .show = xe_gt_debugfs_simple_show, .data = sa_info},
- {"topology", .show = xe_gt_debugfs_simple_show, .data = topology},
- {"ggtt", .show = xe_gt_debugfs_simple_show, .data = ggtt},
- {"register-save-restore", .show = xe_gt_debugfs_simple_show, .data = register_save_restore},
- {"workarounds", .show = xe_gt_debugfs_simple_show, .data = workarounds},
- {"tunings", .show = xe_gt_debugfs_simple_show, .data = tunings},
- {"default_lrc_rcs", .show = xe_gt_debugfs_simple_show, .data = rcs_default_lrc},
- {"default_lrc_ccs", .show = xe_gt_debugfs_simple_show, .data = ccs_default_lrc},
- {"default_lrc_bcs", .show = xe_gt_debugfs_simple_show, .data = bcs_default_lrc},
- {"default_lrc_vcs", .show = xe_gt_debugfs_simple_show, .data = vcs_default_lrc},
- {"default_lrc_vecs", .show = xe_gt_debugfs_simple_show, .data = vecs_default_lrc},
- {"stats", .show = xe_gt_debugfs_simple_show, .data = xe_gt_stats_print_info},
- {"hwconfig", .show = xe_gt_debugfs_simple_show, .data = hwconfig},
+ { "topology", .show = xe_gt_debugfs_show_with_rpm, .data = xe_gt_topology_dump },
+ { "register-save-restore",
+ .show = xe_gt_debugfs_show_with_rpm, .data = register_save_restore },
+ { "workarounds", .show = xe_gt_debugfs_show_with_rpm, .data = xe_wa_gt_dump },
+ { "tunings", .show = xe_gt_debugfs_show_with_rpm, .data = xe_tuning_dump },
+ { "default_lrc_rcs", .show = xe_gt_debugfs_show_with_rpm, .data = rcs_default_lrc },
+ { "default_lrc_ccs", .show = xe_gt_debugfs_show_with_rpm, .data = ccs_default_lrc },
+ { "default_lrc_bcs", .show = xe_gt_debugfs_show_with_rpm, .data = bcs_default_lrc },
+ { "default_lrc_vcs", .show = xe_gt_debugfs_show_with_rpm, .data = vcs_default_lrc },
+ { "default_lrc_vecs", .show = xe_gt_debugfs_show_with_rpm, .data = vecs_default_lrc },
+ { "hwconfig", .show = xe_gt_debugfs_show_with_rpm, .data = hwconfig },
};
/* everything else should be added here */
static const struct drm_info_list pf_only_debugfs_list[] = {
- {"hw_engines", .show = xe_gt_debugfs_simple_show, .data = hw_engines},
- {"mocs", .show = xe_gt_debugfs_simple_show, .data = mocs},
- {"pat", .show = xe_gt_debugfs_simple_show, .data = pat},
- {"powergate_info", .show = xe_gt_debugfs_simple_show, .data = powergate_info},
- {"steering", .show = xe_gt_debugfs_simple_show, .data = steering},
+ { "hw_engines", .show = xe_gt_debugfs_show_with_rpm, .data = hw_engines },
+ { "mocs", .show = xe_gt_debugfs_show_with_rpm, .data = xe_mocs_dump },
+ { "pat", .show = xe_gt_debugfs_show_with_rpm, .data = xe_pat_dump },
+ { "powergate_info", .show = xe_gt_debugfs_show_with_rpm, .data = xe_gt_idle_pg_print },
+ { "steering", .show = xe_gt_debugfs_show_with_rpm, .data = steering },
};
+static ssize_t write_to_gt_call(const char __user *userbuf, size_t count, loff_t *ppos,
+ void (*call)(struct xe_gt *), struct xe_gt *gt)
+{
+ bool yes;
+ int ret;
+
+ if (*ppos)
+ return -EINVAL;
+ ret = kstrtobool_from_user(userbuf, count, &yes);
+ if (ret < 0)
+ return ret;
+ if (yes)
+ call(gt);
+ return count;
+}
+
+static ssize_t stats_write(struct file *file, const char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ struct seq_file *s = file->private_data;
+ struct xe_gt *gt = s->private;
+
+ return write_to_gt_call(userbuf, count, ppos, xe_gt_stats_clear, gt);
+}
+
+static int stats_show(struct seq_file *s, void *unused)
+{
+ struct drm_printer p = drm_seq_file_printer(s);
+ struct xe_gt *gt = s->private;
+
+ return xe_gt_stats_print_info(gt, &p);
+}
+DEFINE_SHOW_STORE_ATTRIBUTE(stats);
+
+static void force_reset(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+
+ xe_pm_runtime_get(xe);
+ xe_gt_reset_async(gt);
+ xe_pm_runtime_put(xe);
+}
+
+static ssize_t force_reset_write(struct file *file,
+ const char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ struct seq_file *s = file->private_data;
+ struct xe_gt *gt = s->private;
+
+ return write_to_gt_call(userbuf, count, ppos, force_reset, gt);
+}
+
+static int force_reset_show(struct seq_file *s, void *unused)
+{
+ struct xe_gt *gt = s->private;
+
+ force_reset(gt); /* to be deprecated! */
+ return 0;
+}
+DEFINE_SHOW_STORE_ATTRIBUTE(force_reset);
+
+static void force_reset_sync(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+
+ xe_pm_runtime_get(xe);
+ xe_gt_reset(gt);
+ xe_pm_runtime_put(xe);
+}
+
+static ssize_t force_reset_sync_write(struct file *file,
+ const char __user *userbuf,
+ size_t count, loff_t *ppos)
+{
+ struct seq_file *s = file->private_data;
+ struct xe_gt *gt = s->private;
+
+ return write_to_gt_call(userbuf, count, ppos, force_reset_sync, gt);
+}
+
+static int force_reset_sync_show(struct seq_file *s, void *unused)
+{
+ struct xe_gt *gt = s->private;
+
+ force_reset_sync(gt); /* to be deprecated! */
+ return 0;
+}
+DEFINE_SHOW_STORE_ATTRIBUTE(force_reset_sync);
+
void xe_gt_debugfs_register(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
struct drm_minor *minor = gt_to_xe(gt)->drm.primary;
+ struct dentry *parent = gt->tile->debugfs;
struct dentry *root;
+ char symlink[16];
char name[8];
xe_gt_assert(gt, minor->debugfs_root);
+ if (IS_ERR(parent))
+ return;
+
snprintf(name, sizeof(name), "gt%d", gt->info.id);
- root = debugfs_create_dir(name, minor->debugfs_root);
+ root = debugfs_create_dir(name, parent);
if (IS_ERR(root)) {
drm_warn(&xe->drm, "Create GT directory failed");
return;
@@ -355,6 +349,11 @@ void xe_gt_debugfs_register(struct xe_gt *gt)
*/
root->d_inode->i_private = gt;
+ /* VF safe */
+ debugfs_create_file("stats", 0600, root, gt, &stats_fops);
+ debugfs_create_file("force_reset", 0600, root, gt, &force_reset_fops);
+ debugfs_create_file("force_reset_sync", 0600, root, gt, &force_reset_sync_fops);
+
drm_debugfs_create_files(vf_safe_debugfs_list,
ARRAY_SIZE(vf_safe_debugfs_list),
root, minor);
@@ -370,4 +369,11 @@ void xe_gt_debugfs_register(struct xe_gt *gt)
xe_gt_sriov_pf_debugfs_register(gt, root);
else if (IS_SRIOV_VF(xe))
xe_gt_sriov_vf_debugfs_register(gt, root);
+
+ /*
+ * Backwards compatibility only: create a link for the legacy clients
+ * who may expect gt/ directory at the root level, not the tile level.
+ */
+ snprintf(symlink, sizeof(symlink), "tile%u/%s", gt->tile->id, name);
+ debugfs_create_symlink(name, minor->debugfs_root, symlink);
}
diff --git a/drivers/gpu/drm/xe/xe_gt_debugfs.h b/drivers/gpu/drm/xe/xe_gt_debugfs.h
index 05a6cc93c78c..32ee3264051b 100644
--- a/drivers/gpu/drm/xe/xe_gt_debugfs.h
+++ b/drivers/gpu/drm/xe/xe_gt_debugfs.h
@@ -11,5 +11,6 @@ struct xe_gt;
void xe_gt_debugfs_register(struct xe_gt *gt);
int xe_gt_debugfs_simple_show(struct seq_file *m, void *data);
+int xe_gt_debugfs_show_with_rpm(struct seq_file *m, void *data);
#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_freq.c b/drivers/gpu/drm/xe/xe_gt_freq.c
index 868a5d2c1a52..ce3c7810469f 100644
--- a/drivers/gpu/drm/xe/xe_gt_freq.c
+++ b/drivers/gpu/drm/xe/xe_gt_freq.c
@@ -29,19 +29,26 @@
* PCODE is the ultimate decision maker of the actual running frequency, based
* on thermal and other running conditions.
*
- * Xe's Freq provides a sysfs API for frequency management:
+ * Xe's Freq provides a sysfs API for frequency management under
+ * ``<device>/tile#/gt#/freq0/`` directory.
*
- * device/tile#/gt#/freq0/<item>_freq *read-only* files:
- * - act_freq: The actual resolved frequency decided by PCODE.
- * - cur_freq: The current one requested by GuC PC to the PCODE.
- * - rpn_freq: The Render Performance (RP) N level, which is the minimal one.
- * - rpe_freq: The Render Performance (RP) E level, which is the efficient one.
- * - rp0_freq: The Render Performance (RP) 0 level, which is the maximum one.
+ * **Read-only** attributes:
*
- * device/tile#/gt#/freq0/<item>_freq *read-write* files:
- * - min_freq: Min frequency request.
- * - max_freq: Max frequency request.
- * If max <= min, then freq_min becomes a fixed frequency request.
+ * - ``act_freq``: The actual resolved frequency decided by PCODE.
+ * - ``cur_freq``: The current one requested by GuC PC to the PCODE.
+ * - ``rpn_freq``: The Render Performance (RP) N level, which is the minimal one.
+ * - ``rpa_freq``: The Render Performance (RP) A level, which is the achievable one.
+ * Calculated by PCODE at runtime based on multiple running conditions
+ * - ``rpe_freq``: The Render Performance (RP) E level, which is the efficient one.
+ * Calculated by PCODE at runtime based on multiple running conditions
+ * - ``rp0_freq``: The Render Performance (RP) 0 level, which is the maximum one.
+ *
+ * **Read-write** attributes:
+ *
+ * - ``min_freq``: Min frequency request.
+ * - ``max_freq``: Max frequency request.
+ * If max <= min, then freq_min becomes a fixed frequency
+ * request.
*/
static struct xe_guc_pc *
@@ -94,13 +101,8 @@ static ssize_t rp0_freq_show(struct kobject *kobj,
{
struct device *dev = kobj_to_dev(kobj);
struct xe_guc_pc *pc = dev_to_pc(dev);
- u32 freq;
-
- xe_pm_runtime_get(dev_to_xe(dev));
- freq = xe_guc_pc_get_rp0_freq(pc);
- xe_pm_runtime_put(dev_to_xe(dev));
- return sysfs_emit(buf, "%d\n", freq);
+ return sysfs_emit(buf, "%d\n", xe_guc_pc_get_rp0_freq(pc));
}
static struct kobj_attribute attr_rp0_freq = __ATTR_RO(rp0_freq);
@@ -222,6 +224,33 @@ static ssize_t max_freq_store(struct kobject *kobj,
}
static struct kobj_attribute attr_max_freq = __ATTR_RW(max_freq);
+static ssize_t power_profile_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buff)
+{
+ struct device *dev = kobj_to_dev(kobj);
+
+ xe_guc_pc_get_power_profile(dev_to_pc(dev), buff);
+
+ return strlen(buff);
+}
+
+static ssize_t power_profile_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buff, size_t count)
+{
+ struct device *dev = kobj_to_dev(kobj);
+ struct xe_guc_pc *pc = dev_to_pc(dev);
+ int err;
+
+ xe_pm_runtime_get(dev_to_xe(dev));
+ err = xe_guc_pc_set_power_profile(pc, buff);
+ xe_pm_runtime_put(dev_to_xe(dev));
+
+ return err ?: count;
+}
+static struct kobj_attribute attr_power_profile = __ATTR_RW(power_profile);
+
static const struct attribute *freq_attrs[] = {
&attr_act_freq.attr,
&attr_cur_freq.attr,
@@ -231,6 +260,7 @@ static const struct attribute *freq_attrs[] = {
&attr_rpn_freq.attr,
&attr_min_freq.attr,
&attr_max_freq.attr,
+ &attr_power_profile.attr,
NULL
};
@@ -263,8 +293,10 @@ int xe_gt_freq_init(struct xe_gt *gt)
return -ENOMEM;
err = sysfs_create_files(gt->freq, freq_attrs);
- if (err)
+ if (err) {
+ kobject_put(gt->freq);
return err;
+ }
err = devm_add_action_or_reset(xe->drm.dev, freq_fini, gt->freq);
if (err)
diff --git a/drivers/gpu/drm/xe/xe_gt_idle.c b/drivers/gpu/drm/xe/xe_gt_idle.c
index c11206410a4d..3e3d1d52f630 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle.c
+++ b/drivers/gpu/drm/xe/xe_gt_idle.c
@@ -5,6 +5,7 @@
#include <drm/drm_managed.h>
+#include <generated/xe_wa_oob.h>
#include "xe_force_wake.h"
#include "xe_device.h"
#include "xe_gt.h"
@@ -16,6 +17,7 @@
#include "xe_mmio.h"
#include "xe_pm.h"
#include "xe_sriov.h"
+#include "xe_wa.h"
/**
* DOC: Xe GT Idle
@@ -121,9 +123,12 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt)
if (vcs_mask || vecs_mask)
gtidle->powergate_enable = MEDIA_POWERGATE_ENABLE;
- if (!xe_gt_is_media_type(gt))
+ if (xe_gt_is_main_type(gt))
gtidle->powergate_enable |= RENDER_POWERGATE_ENABLE;
+ if (MEDIA_VERx100(xe) >= 1100 && MEDIA_VERx100(xe) < 1255)
+ gtidle->powergate_enable |= MEDIA_SAMPLERS_POWERGATE_ENABLE;
+
if (xe->info.platform != XE_DG1) {
for (i = XE_HW_ENGINE_VCS0, j = 0; i <= XE_HW_ENGINE_VCS7; ++i, ++j) {
if ((gt->info.engine_mask & BIT(i)))
@@ -142,6 +147,12 @@ void xe_gt_idle_enable_pg(struct xe_gt *gt)
xe_mmio_write32(mmio, RENDER_POWERGATE_IDLE_HYSTERESIS, 25);
}
+ if (XE_GT_WA(gt, 14020316580))
+ gtidle->powergate_enable &= ~(VDN_HCP_POWERGATE_ENABLE(0) |
+ VDN_MFXVDENC_POWERGATE_ENABLE(0) |
+ VDN_HCP_POWERGATE_ENABLE(2) |
+ VDN_MFXVDENC_POWERGATE_ENABLE(2));
+
xe_mmio_write32(mmio, POWERGATE_ENABLE, gtidle->powergate_enable);
xe_force_wake_put(gt_to_fw(gt), fw_ref);
}
@@ -246,6 +257,11 @@ int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p)
drm_printf(p, "Media Slice%d Power Gate Status: %s\n", n,
str_up_down(pg_status & media_slices[n].status_bit));
}
+
+ if (MEDIA_VERx100(xe) >= 1100 && MEDIA_VERx100(xe) < 1255)
+ drm_printf(p, "Media Samplers Power Gating Enabled: %s\n",
+ str_yes_no(pg_enabled & MEDIA_SAMPLERS_POWERGATE_ENABLE));
+
return 0;
}
@@ -322,15 +338,11 @@ static void gt_idle_fini(void *arg)
{
struct kobject *kobj = arg;
struct xe_gt *gt = kobj_to_gt(kobj->parent);
- unsigned int fw_ref;
xe_gt_idle_disable_pg(gt);
- if (gt_to_xe(gt)->info.skip_guc_pc) {
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (gt_to_xe(gt)->info.skip_guc_pc)
xe_gt_idle_disable_c6(gt);
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
- }
sysfs_remove_files(kobj, gt_idle_attrs);
kobject_put(kobj);
@@ -390,14 +402,23 @@ void xe_gt_idle_enable_c6(struct xe_gt *gt)
RC_CTL_HW_ENABLE | RC_CTL_TO_MODE | RC_CTL_RC6_ENABLE);
}
-void xe_gt_idle_disable_c6(struct xe_gt *gt)
+int xe_gt_idle_disable_c6(struct xe_gt *gt)
{
+ unsigned int fw_ref;
+
xe_device_assert_mem_access(gt_to_xe(gt));
- xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
if (IS_SRIOV_VF(gt_to_xe(gt)))
- return;
+ return 0;
+
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
+ return -ETIMEDOUT;
xe_mmio_write32(&gt->mmio, RC_CONTROL, 0);
xe_mmio_write32(&gt->mmio, RC_STATE, 0);
+
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+
+ return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_gt_idle.h b/drivers/gpu/drm/xe/xe_gt_idle.h
index 591a01e181bc..9c34a155e102 100644
--- a/drivers/gpu/drm/xe/xe_gt_idle.h
+++ b/drivers/gpu/drm/xe/xe_gt_idle.h
@@ -13,7 +13,7 @@ struct xe_gt;
int xe_gt_idle_init(struct xe_gt_idle *gtidle);
void xe_gt_idle_enable_c6(struct xe_gt *gt);
-void xe_gt_idle_disable_c6(struct xe_gt *gt);
+int xe_gt_idle_disable_c6(struct xe_gt *gt);
void xe_gt_idle_enable_pg(struct xe_gt *gt);
void xe_gt_idle_disable_pg(struct xe_gt *gt);
int xe_gt_idle_pg_print(struct xe_gt *gt, struct drm_printer *p);
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
index d4d9730f0d2c..164010860664 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.c
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
@@ -46,8 +46,6 @@
* MCR registers are not available on Virtual Function (VF).
*/
-#define STEER_SEMAPHORE XE_REG(0xFD0)
-
static inline struct xe_reg to_xe_reg(struct xe_reg_mcr reg_mcr)
{
return reg_mcr.__reg;
@@ -171,6 +169,15 @@ static const struct xe_mmio_range xelpg_dss_steering_table[] = {
{},
};
+static const struct xe_mmio_range xe3p_xpc_xecore_steering_table[] = {
+ { 0x008140, 0x00817F }, /* SLICE, XeCore, SLICE */
+ { 0x009480, 0x00955F }, /* SLICE, XeCore */
+ { 0x00D800, 0x00D87F }, /* SLICE */
+ { 0x00DC00, 0x00E9FF }, /* SLICE, rsvd, XeCore, rsvd, XeCore, rsvd, XeCore */
+ { 0x013000, 0x0135FF }, /* XeCore, SLICE */
+ {},
+};
+
static const struct xe_mmio_range xelpmp_oaddrm_steering_table[] = {
{ 0x393200, 0x39323F },
{ 0x393400, 0x3934FF },
@@ -238,21 +245,60 @@ static const struct xe_mmio_range xe2lpm_instance0_steering_table[] = {
};
static const struct xe_mmio_range xe3lpm_instance0_steering_table[] = {
- { 0x384000, 0x3847DF }, /* GAM, rsvd, GAM */
+ { 0x384000, 0x3841FF }, /* GAM */
+ { 0x384400, 0x3847DF }, /* GAM */
{ 0x384900, 0x384AFF }, /* GAM */
{ 0x389560, 0x3895FF }, /* MEDIAINF */
{ 0x38B600, 0x38B8FF }, /* L3BANK */
{ 0x38C800, 0x38D07F }, /* GAM, MEDIAINF */
- { 0x38D0D0, 0x38F0FF }, /* MEDIAINF, GAM */
+ { 0x38D0D0, 0x38F0FF }, /* MEDIAINF, rsvd, GAM */
{ 0x393C00, 0x393C7F }, /* MEDIAINF */
{},
};
+/*
+ * Different "GAM" ranges have different rules; GAMWKRS, STLB, and GAMREQSTRM
+ * range subtypes need to be steered to (1,0), while all other GAM subtypes
+ * are steered to (0,0) and are included in the "INSTANCE0" table farther
+ * down.
+ */
+static const struct xe_mmio_range xe3p_xpc_gam_grp1_steering_table[] = {
+ { 0x004000, 0x004AFF }, /* GAMREQSTRM, rsvd, STLB, GAMWKRS, GAMREQSTRM */
+ { 0x00F100, 0x00FFFF }, /* GAMWKRS */
+ {},
+};
+
+static const struct xe_mmio_range xe3p_xpc_node_steering_table[] = {
+ { 0x00B000, 0x00B0FF },
+ { 0x00D880, 0x00D8FF },
+ {},
+};
+
+static const struct xe_mmio_range xe3p_xpc_instance0_steering_table[] = {
+ { 0x00B500, 0x00B6FF }, /* PSMI */
+ { 0x00C800, 0x00CFFF }, /* GAMCTRL */
+ { 0x00F000, 0x00F0FF }, /* GAMCTRL */
+ {},
+};
+
static void init_steering_l3bank(struct xe_gt *gt)
{
+ struct xe_device *xe = gt_to_xe(gt);
struct xe_mmio *mmio = &gt->mmio;
- if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
+ if (GRAPHICS_VER(xe) >= 35) {
+ unsigned int first_bank = xe_l3_bank_mask_ffs(gt->fuse_topo.l3_bank_mask);
+ const int banks_per_node = 4;
+ unsigned int node = first_bank / banks_per_node;
+
+ /* L3BANK ranges place node in grpID, bank in instanceid */
+ gt->steering[L3BANK].group_target = node;
+ gt->steering[L3BANK].instance_target = first_bank % banks_per_node;
+
+ /* NODE ranges split the node across grpid and instanceid */
+ gt->steering[NODE].group_target = node >> 1;
+ gt->steering[NODE].instance_target = node & 1;
+ } else if (GRAPHICS_VERx100(xe) >= 1270) {
u32 mslice_mask = REG_FIELD_GET(MEML3_EN_MASK,
xe_mmio_read32(mmio, MIRROR_FUSE3));
u32 bank_mask = REG_FIELD_GET(GT_L3_EXC_MASK,
@@ -265,7 +311,7 @@ static void init_steering_l3bank(struct xe_gt *gt)
gt->steering[L3BANK].group_target = __ffs(mslice_mask);
gt->steering[L3BANK].instance_target =
bank_mask & BIT(0) ? 0 : 2;
- } else if (gt_to_xe(gt)->info.platform == XE_DG2) {
+ } else if (xe->info.platform == XE_DG2) {
u32 mslice_mask = REG_FIELD_GET(MEML3_EN_MASK,
xe_mmio_read32(mmio, MIRROR_FUSE3));
u32 bank = __ffs(mslice_mask) * 8;
@@ -364,7 +410,7 @@ fallback:
* @group: pointer to storage for steering group ID
* @instance: pointer to storage for steering instance ID
*/
-void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance)
+void xe_gt_mcr_get_dss_steering(const struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance)
{
xe_gt_assert(gt, dss < XE_MAX_DSS_FUSE_BITS);
@@ -420,10 +466,10 @@ static void init_steering_sqidi_psmi(struct xe_gt *gt)
gt->steering[SQIDI_PSMI].instance_target = select & 0x1;
}
-static void init_steering_inst0(struct xe_gt *gt)
+static void init_steering_gam1(struct xe_gt *gt)
{
- gt->steering[INSTANCE0].group_target = 0; /* unused */
- gt->steering[INSTANCE0].instance_target = 0; /* unused */
+ gt->steering[GAM1].group_target = 1;
+ gt->steering[GAM1].instance_target = 0;
}
static const struct {
@@ -431,12 +477,14 @@ static const struct {
void (*init)(struct xe_gt *gt);
} xe_steering_types[] = {
[L3BANK] = { "L3BANK", init_steering_l3bank },
+ [NODE] = { "NODE", NULL }, /* initialized by l3bank init */
[MSLICE] = { "MSLICE", init_steering_mslice },
[LNCF] = { "LNCF", NULL }, /* initialized by mslice init */
- [DSS] = { "DSS", init_steering_dss },
+ [DSS] = { "DSS / XeCore", init_steering_dss },
[OADDRM] = { "OADDRM / GPMXMT", init_steering_oaddrm },
[SQIDI_PSMI] = { "SQIDI_PSMI", init_steering_sqidi_psmi },
- [INSTANCE0] = { "INSTANCE 0", init_steering_inst0 },
+ [GAM1] = { "GAMWKRS / STLB / GAMREQSTRM", init_steering_gam1 },
+ [INSTANCE0] = { "INSTANCE 0", NULL },
[IMPLICIT_STEERING] = { "IMPLICIT", NULL },
};
@@ -446,25 +494,17 @@ static const struct {
*
* Perform early software only initialization of the MCR lock to allow
* the synchronization on accessing the STEER_SEMAPHORE register and
- * use the xe_gt_mcr_multicast_write() function.
+ * use the xe_gt_mcr_multicast_write() function, plus the minimum
+ * safe MCR registers required for VRAM/CCS probing.
*/
void xe_gt_mcr_init_early(struct xe_gt *gt)
{
+ struct xe_device *xe = gt_to_xe(gt);
+
BUILD_BUG_ON(IMPLICIT_STEERING + 1 != NUM_STEERING_TYPES);
BUILD_BUG_ON(ARRAY_SIZE(xe_steering_types) != NUM_STEERING_TYPES);
spin_lock_init(&gt->mcr_lock);
-}
-
-/**
- * xe_gt_mcr_init - Normal initialization of the MCR support
- * @gt: GT structure
- *
- * Perform normal initialization of the MCR for all usages.
- */
-void xe_gt_mcr_init(struct xe_gt *gt)
-{
- struct xe_device *xe = gt_to_xe(gt);
if (IS_SRIOV_VF(xe))
return;
@@ -482,7 +522,19 @@ void xe_gt_mcr_init(struct xe_gt *gt)
gt->steering[OADDRM].ranges = xelpmp_oaddrm_steering_table;
}
} else {
- if (GRAPHICS_VER(xe) >= 20) {
+ if (GRAPHICS_VERx100(xe) == 3511) {
+ /*
+ * TODO: there are some ranges in bspec with missing
+ * termination: [0x00B000, 0x00B0FF] and
+ * [0x00D880, 0x00D8FF] (NODE); [0x00B100, 0x00B3FF]
+ * (L3BANK). Update them here once bspec is updated.
+ */
+ gt->steering[DSS].ranges = xe3p_xpc_xecore_steering_table;
+ gt->steering[GAM1].ranges = xe3p_xpc_gam_grp1_steering_table;
+ gt->steering[INSTANCE0].ranges = xe3p_xpc_instance0_steering_table;
+ gt->steering[L3BANK].ranges = xelpg_l3bank_steering_table;
+ gt->steering[NODE].ranges = xe3p_xpc_node_steering_table;
+ } else if (GRAPHICS_VER(xe) >= 20) {
gt->steering[DSS].ranges = xe2lpg_dss_steering_table;
gt->steering[SQIDI_PSMI].ranges = xe2lpg_sqidi_psmi_steering_table;
gt->steering[INSTANCE0].ranges = xe2lpg_instance0_steering_table;
@@ -505,10 +557,27 @@ void xe_gt_mcr_init(struct xe_gt *gt)
}
}
+ /* Mark instance 0 as initialized, we need this early for VRAM and CCS probe. */
+ gt->steering[INSTANCE0].initialized = true;
+}
+
+/**
+ * xe_gt_mcr_init - Normal initialization of the MCR support
+ * @gt: GT structure
+ *
+ * Perform normal initialization of the MCR for all usages.
+ */
+void xe_gt_mcr_init(struct xe_gt *gt)
+{
+ if (IS_SRIOV_VF(gt_to_xe(gt)))
+ return;
+
/* Select non-terminated steering target for each type */
- for (int i = 0; i < NUM_STEERING_TYPES; i++)
+ for (int i = 0; i < NUM_STEERING_TYPES; i++) {
+ gt->steering[i].initialized = true;
if (gt->steering[i].ranges && xe_steering_types[i].init)
xe_steering_types[i].init(gt);
+ }
}
/**
@@ -530,7 +599,7 @@ void xe_gt_mcr_set_implicit_defaults(struct xe_gt *gt)
u32 steer_val = REG_FIELD_PREP(MCR_SLICE_MASK, 0) |
REG_FIELD_PREP(MCR_SUBSLICE_MASK, 2);
- xe_mmio_write32(&gt->mmio, MCFG_MCR_SELECTOR, steer_val);
+ xe_mmio_write32(&gt->mmio, STEER_SEMAPHORE, steer_val);
xe_mmio_write32(&gt->mmio, SF_MCR_SELECTOR, steer_val);
/*
* For GAM registers, all reads should be directed to instance 1
@@ -570,6 +639,10 @@ bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
for (int i = 0; gt->steering[type].ranges[i].end > 0; i++) {
if (xe_mmio_in_range(&gt->mmio, &gt->steering[type].ranges[i], reg)) {
+ drm_WARN(&gt_to_xe(gt)->drm, !gt->steering[type].initialized,
+ "Uninitialized usage of MCR register %s/%#x\n",
+ xe_steering_types[type].name, reg.addr);
+
*group = gt->steering[type].group_target;
*instance = gt->steering[type].instance_target;
return true;
diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h
index bc06520befab..283a1c9770e2 100644
--- a/drivers/gpu/drm/xe/xe_gt_mcr.h
+++ b/drivers/gpu/drm/xe/xe_gt_mcr.h
@@ -31,7 +31,8 @@ bool xe_gt_mcr_get_nonterminated_steering(struct xe_gt *gt,
u8 *group, u8 *instance);
void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p);
-void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, u16 *group, u16 *instance);
+void xe_gt_mcr_get_dss_steering(const struct xe_gt *gt,
+ unsigned int dss, u16 *group, u16 *instance);
u32 xe_gt_mcr_steering_info_to_dss_id(struct xe_gt *gt, u16 group, u16 instance);
/*
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
deleted file mode 100644
index 10622ca471a2..000000000000
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ /dev/null
@@ -1,713 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2022 Intel Corporation
- */
-
-#include "xe_gt_pagefault.h"
-
-#include <linux/bitfield.h>
-#include <linux/circ_buf.h>
-
-#include <drm/drm_exec.h>
-#include <drm/drm_managed.h>
-
-#include "abi/guc_actions_abi.h"
-#include "xe_bo.h"
-#include "xe_gt.h"
-#include "xe_gt_stats.h"
-#include "xe_gt_tlb_invalidation.h"
-#include "xe_guc.h"
-#include "xe_guc_ct.h"
-#include "xe_migrate.h"
-#include "xe_svm.h"
-#include "xe_trace_bo.h"
-#include "xe_vm.h"
-
-struct pagefault {
- u64 page_addr;
- u32 asid;
- u16 pdata;
- u8 vfid;
- u8 access_type;
- u8 fault_type;
- u8 fault_level;
- u8 engine_class;
- u8 engine_instance;
- u8 fault_unsuccessful;
- bool trva_fault;
-};
-
-enum access_type {
- ACCESS_TYPE_READ = 0,
- ACCESS_TYPE_WRITE = 1,
- ACCESS_TYPE_ATOMIC = 2,
- ACCESS_TYPE_RESERVED = 3,
-};
-
-enum fault_type {
- NOT_PRESENT = 0,
- WRITE_ACCESS_VIOLATION = 1,
- ATOMIC_ACCESS_VIOLATION = 2,
-};
-
-struct acc {
- u64 va_range_base;
- u32 asid;
- u32 sub_granularity;
- u8 granularity;
- u8 vfid;
- u8 access_type;
- u8 engine_class;
- u8 engine_instance;
-};
-
-static bool access_is_atomic(enum access_type access_type)
-{
- return access_type == ACCESS_TYPE_ATOMIC;
-}
-
-static bool vma_is_valid(struct xe_tile *tile, struct xe_vma *vma)
-{
- return BIT(tile->id) & vma->tile_present &&
- !(BIT(tile->id) & vma->tile_invalidated);
-}
-
-static bool vma_matches(struct xe_vma *vma, u64 page_addr)
-{
- if (page_addr > xe_vma_end(vma) - 1 ||
- page_addr + SZ_4K - 1 < xe_vma_start(vma))
- return false;
-
- return true;
-}
-
-static struct xe_vma *lookup_vma(struct xe_vm *vm, u64 page_addr)
-{
- struct xe_vma *vma = NULL;
-
- if (vm->usm.last_fault_vma) { /* Fast lookup */
- if (vma_matches(vm->usm.last_fault_vma, page_addr))
- vma = vm->usm.last_fault_vma;
- }
- if (!vma)
- vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K);
-
- return vma;
-}
-
-static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma,
- bool atomic, unsigned int id)
-{
- struct xe_bo *bo = xe_vma_bo(vma);
- struct xe_vm *vm = xe_vma_vm(vma);
- int err;
-
- err = xe_vm_lock_vma(exec, vma);
- if (err)
- return err;
-
- if (atomic && IS_DGFX(vm->xe)) {
- if (xe_vma_is_userptr(vma)) {
- err = -EACCES;
- return err;
- }
-
- /* Migrate to VRAM, move should invalidate the VMA first */
- err = xe_bo_migrate(bo, XE_PL_VRAM0 + id);
- if (err)
- return err;
- } else if (bo) {
- /* Create backing store if needed */
- err = xe_bo_validate(bo, vm, true);
- if (err)
- return err;
- }
-
- return 0;
-}
-
-static int handle_vma_pagefault(struct xe_gt *gt, struct xe_vma *vma,
- bool atomic)
-{
- struct xe_vm *vm = xe_vma_vm(vma);
- struct xe_tile *tile = gt_to_tile(gt);
- struct drm_exec exec;
- struct dma_fence *fence;
- ktime_t end = 0;
- int err;
-
- lockdep_assert_held_write(&vm->lock);
-
- xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT, 1);
- xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_KB, xe_vma_size(vma) / 1024);
-
- trace_xe_vma_pagefault(vma);
-
- /* Check if VMA is valid */
- if (vma_is_valid(tile, vma) && !atomic)
- return 0;
-
-retry_userptr:
- if (xe_vma_is_userptr(vma) &&
- xe_vma_userptr_check_repin(to_userptr_vma(vma))) {
- struct xe_userptr_vma *uvma = to_userptr_vma(vma);
-
- err = xe_vma_userptr_pin_pages(uvma);
- if (err)
- return err;
- }
-
- /* Lock VM and BOs dma-resv */
- drm_exec_init(&exec, 0, 0);
- drm_exec_until_all_locked(&exec) {
- err = xe_pf_begin(&exec, vma, atomic, tile->id);
- drm_exec_retry_on_contention(&exec);
- if (xe_vm_validate_should_retry(&exec, err, &end))
- err = -EAGAIN;
- if (err)
- goto unlock_dma_resv;
-
- /* Bind VMA only to the GT that has faulted */
- trace_xe_vma_pf_bind(vma);
- fence = xe_vma_rebind(vm, vma, BIT(tile->id));
- if (IS_ERR(fence)) {
- err = PTR_ERR(fence);
- if (xe_vm_validate_should_retry(&exec, err, &end))
- err = -EAGAIN;
- goto unlock_dma_resv;
- }
- }
-
- dma_fence_wait(fence, false);
- dma_fence_put(fence);
- vma->tile_invalidated &= ~BIT(tile->id);
-
-unlock_dma_resv:
- drm_exec_fini(&exec);
- if (err == -EAGAIN)
- goto retry_userptr;
-
- return err;
-}
-
-static struct xe_vm *asid_to_vm(struct xe_device *xe, u32 asid)
-{
- struct xe_vm *vm;
-
- down_read(&xe->usm.lock);
- vm = xa_load(&xe->usm.asid_to_vm, asid);
- if (vm && xe_vm_in_fault_mode(vm))
- xe_vm_get(vm);
- else
- vm = ERR_PTR(-EINVAL);
- up_read(&xe->usm.lock);
-
- return vm;
-}
-
-static int handle_pagefault(struct xe_gt *gt, struct pagefault *pf)
-{
- struct xe_device *xe = gt_to_xe(gt);
- struct xe_vm *vm;
- struct xe_vma *vma = NULL;
- int err;
- bool atomic;
-
- /* SW isn't expected to handle TRTT faults */
- if (pf->trva_fault)
- return -EFAULT;
-
- vm = asid_to_vm(xe, pf->asid);
- if (IS_ERR(vm))
- return PTR_ERR(vm);
-
- /*
- * TODO: Change to read lock? Using write lock for simplicity.
- */
- down_write(&vm->lock);
-
- if (xe_vm_is_closed(vm)) {
- err = -ENOENT;
- goto unlock_vm;
- }
-
- vma = lookup_vma(vm, pf->page_addr);
- if (!vma) {
- err = -EINVAL;
- goto unlock_vm;
- }
-
- atomic = access_is_atomic(pf->access_type);
-
- if (xe_vma_is_cpu_addr_mirror(vma))
- err = xe_svm_handle_pagefault(vm, vma, gt,
- pf->page_addr, atomic);
- else
- err = handle_vma_pagefault(gt, vma, atomic);
-
-unlock_vm:
- if (!err)
- vm->usm.last_fault_vma = vma;
- up_write(&vm->lock);
- xe_vm_put(vm);
-
- return err;
-}
-
-static int send_pagefault_reply(struct xe_guc *guc,
- struct xe_guc_pagefault_reply *reply)
-{
- u32 action[] = {
- XE_GUC_ACTION_PAGE_FAULT_RES_DESC,
- reply->dw0,
- reply->dw1,
- };
-
- return xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
-}
-
-static void print_pagefault(struct xe_device *xe, struct pagefault *pf)
-{
- drm_dbg(&xe->drm, "\n\tASID: %d\n"
- "\tVFID: %d\n"
- "\tPDATA: 0x%04x\n"
- "\tFaulted Address: 0x%08x%08x\n"
- "\tFaultType: %d\n"
- "\tAccessType: %d\n"
- "\tFaultLevel: %d\n"
- "\tEngineClass: %d %s\n"
- "\tEngineInstance: %d\n",
- pf->asid, pf->vfid, pf->pdata, upper_32_bits(pf->page_addr),
- lower_32_bits(pf->page_addr),
- pf->fault_type, pf->access_type, pf->fault_level,
- pf->engine_class, xe_hw_engine_class_to_str(pf->engine_class),
- pf->engine_instance);
-}
-
-#define PF_MSG_LEN_DW 4
-
-static bool get_pagefault(struct pf_queue *pf_queue, struct pagefault *pf)
-{
- const struct xe_guc_pagefault_desc *desc;
- bool ret = false;
-
- spin_lock_irq(&pf_queue->lock);
- if (pf_queue->tail != pf_queue->head) {
- desc = (const struct xe_guc_pagefault_desc *)
- (pf_queue->data + pf_queue->tail);
-
- pf->fault_level = FIELD_GET(PFD_FAULT_LEVEL, desc->dw0);
- pf->trva_fault = FIELD_GET(XE2_PFD_TRVA_FAULT, desc->dw0);
- pf->engine_class = FIELD_GET(PFD_ENG_CLASS, desc->dw0);
- pf->engine_instance = FIELD_GET(PFD_ENG_INSTANCE, desc->dw0);
- pf->pdata = FIELD_GET(PFD_PDATA_HI, desc->dw1) <<
- PFD_PDATA_HI_SHIFT;
- pf->pdata |= FIELD_GET(PFD_PDATA_LO, desc->dw0);
- pf->asid = FIELD_GET(PFD_ASID, desc->dw1);
- pf->vfid = FIELD_GET(PFD_VFID, desc->dw2);
- pf->access_type = FIELD_GET(PFD_ACCESS_TYPE, desc->dw2);
- pf->fault_type = FIELD_GET(PFD_FAULT_TYPE, desc->dw2);
- pf->page_addr = (u64)(FIELD_GET(PFD_VIRTUAL_ADDR_HI, desc->dw3)) <<
- PFD_VIRTUAL_ADDR_HI_SHIFT;
- pf->page_addr |= FIELD_GET(PFD_VIRTUAL_ADDR_LO, desc->dw2) <<
- PFD_VIRTUAL_ADDR_LO_SHIFT;
-
- pf_queue->tail = (pf_queue->tail + PF_MSG_LEN_DW) %
- pf_queue->num_dw;
- ret = true;
- }
- spin_unlock_irq(&pf_queue->lock);
-
- return ret;
-}
-
-static bool pf_queue_full(struct pf_queue *pf_queue)
-{
- lockdep_assert_held(&pf_queue->lock);
-
- return CIRC_SPACE(pf_queue->head, pf_queue->tail,
- pf_queue->num_dw) <=
- PF_MSG_LEN_DW;
-}
-
-int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len)
-{
- struct xe_gt *gt = guc_to_gt(guc);
- struct xe_device *xe = gt_to_xe(gt);
- struct pf_queue *pf_queue;
- unsigned long flags;
- u32 asid;
- bool full;
-
- if (unlikely(len != PF_MSG_LEN_DW))
- return -EPROTO;
-
- asid = FIELD_GET(PFD_ASID, msg[1]);
- pf_queue = gt->usm.pf_queue + (asid % NUM_PF_QUEUE);
-
- /*
- * The below logic doesn't work unless PF_QUEUE_NUM_DW % PF_MSG_LEN_DW == 0
- */
- xe_gt_assert(gt, !(pf_queue->num_dw % PF_MSG_LEN_DW));
-
- spin_lock_irqsave(&pf_queue->lock, flags);
- full = pf_queue_full(pf_queue);
- if (!full) {
- memcpy(pf_queue->data + pf_queue->head, msg, len * sizeof(u32));
- pf_queue->head = (pf_queue->head + len) %
- pf_queue->num_dw;
- queue_work(gt->usm.pf_wq, &pf_queue->worker);
- } else {
- drm_warn(&xe->drm, "PF Queue full, shouldn't be possible");
- }
- spin_unlock_irqrestore(&pf_queue->lock, flags);
-
- return full ? -ENOSPC : 0;
-}
-
-#define USM_QUEUE_MAX_RUNTIME_MS 20
-
-static void pf_queue_work_func(struct work_struct *w)
-{
- struct pf_queue *pf_queue = container_of(w, struct pf_queue, worker);
- struct xe_gt *gt = pf_queue->gt;
- struct xe_device *xe = gt_to_xe(gt);
- struct xe_guc_pagefault_reply reply = {};
- struct pagefault pf = {};
- unsigned long threshold;
- int ret;
-
- threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS);
-
- while (get_pagefault(pf_queue, &pf)) {
- ret = handle_pagefault(gt, &pf);
- if (unlikely(ret)) {
- print_pagefault(xe, &pf);
- pf.fault_unsuccessful = 1;
- drm_dbg(&xe->drm, "Fault response: Unsuccessful %d\n", ret);
- }
-
- reply.dw0 = FIELD_PREP(PFR_VALID, 1) |
- FIELD_PREP(PFR_SUCCESS, pf.fault_unsuccessful) |
- FIELD_PREP(PFR_REPLY, PFR_ACCESS) |
- FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) |
- FIELD_PREP(PFR_ASID, pf.asid);
-
- reply.dw1 = FIELD_PREP(PFR_VFID, pf.vfid) |
- FIELD_PREP(PFR_ENG_INSTANCE, pf.engine_instance) |
- FIELD_PREP(PFR_ENG_CLASS, pf.engine_class) |
- FIELD_PREP(PFR_PDATA, pf.pdata);
-
- send_pagefault_reply(&gt->uc.guc, &reply);
-
- if (time_after(jiffies, threshold) &&
- pf_queue->tail != pf_queue->head) {
- queue_work(gt->usm.pf_wq, w);
- break;
- }
- }
-}
-
-static void acc_queue_work_func(struct work_struct *w);
-
-static void pagefault_fini(void *arg)
-{
- struct xe_gt *gt = arg;
- struct xe_device *xe = gt_to_xe(gt);
-
- if (!xe->info.has_usm)
- return;
-
- destroy_workqueue(gt->usm.acc_wq);
- destroy_workqueue(gt->usm.pf_wq);
-}
-
-static int xe_alloc_pf_queue(struct xe_gt *gt, struct pf_queue *pf_queue)
-{
- struct xe_device *xe = gt_to_xe(gt);
- xe_dss_mask_t all_dss;
- int num_dss, num_eus;
-
- bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask,
- XE_MAX_DSS_FUSE_BITS);
-
- num_dss = bitmap_weight(all_dss, XE_MAX_DSS_FUSE_BITS);
- num_eus = bitmap_weight(gt->fuse_topo.eu_mask_per_dss,
- XE_MAX_EU_FUSE_BITS) * num_dss;
-
- /*
- * user can issue separate page faults per EU and per CS
- *
- * XXX: Multiplier required as compute UMD are getting PF queue errors
- * without it. Follow on why this multiplier is required.
- */
-#define PF_MULTIPLIER 8
- pf_queue->num_dw =
- (num_eus + XE_NUM_HW_ENGINES) * PF_MSG_LEN_DW * PF_MULTIPLIER;
-#undef PF_MULTIPLIER
-
- pf_queue->gt = gt;
- pf_queue->data = devm_kcalloc(xe->drm.dev, pf_queue->num_dw,
- sizeof(u32), GFP_KERNEL);
- if (!pf_queue->data)
- return -ENOMEM;
-
- spin_lock_init(&pf_queue->lock);
- INIT_WORK(&pf_queue->worker, pf_queue_work_func);
-
- return 0;
-}
-
-int xe_gt_pagefault_init(struct xe_gt *gt)
-{
- struct xe_device *xe = gt_to_xe(gt);
- int i, ret = 0;
-
- if (!xe->info.has_usm)
- return 0;
-
- for (i = 0; i < NUM_PF_QUEUE; ++i) {
- ret = xe_alloc_pf_queue(gt, &gt->usm.pf_queue[i]);
- if (ret)
- return ret;
- }
- for (i = 0; i < NUM_ACC_QUEUE; ++i) {
- gt->usm.acc_queue[i].gt = gt;
- spin_lock_init(&gt->usm.acc_queue[i].lock);
- INIT_WORK(&gt->usm.acc_queue[i].worker, acc_queue_work_func);
- }
-
- gt->usm.pf_wq = alloc_workqueue("xe_gt_page_fault_work_queue",
- WQ_UNBOUND | WQ_HIGHPRI, NUM_PF_QUEUE);
- if (!gt->usm.pf_wq)
- return -ENOMEM;
-
- gt->usm.acc_wq = alloc_workqueue("xe_gt_access_counter_work_queue",
- WQ_UNBOUND | WQ_HIGHPRI,
- NUM_ACC_QUEUE);
- if (!gt->usm.acc_wq) {
- destroy_workqueue(gt->usm.pf_wq);
- return -ENOMEM;
- }
-
- return devm_add_action_or_reset(xe->drm.dev, pagefault_fini, gt);
-}
-
-void xe_gt_pagefault_reset(struct xe_gt *gt)
-{
- struct xe_device *xe = gt_to_xe(gt);
- int i;
-
- if (!xe->info.has_usm)
- return;
-
- for (i = 0; i < NUM_PF_QUEUE; ++i) {
- spin_lock_irq(&gt->usm.pf_queue[i].lock);
- gt->usm.pf_queue[i].head = 0;
- gt->usm.pf_queue[i].tail = 0;
- spin_unlock_irq(&gt->usm.pf_queue[i].lock);
- }
-
- for (i = 0; i < NUM_ACC_QUEUE; ++i) {
- spin_lock(&gt->usm.acc_queue[i].lock);
- gt->usm.acc_queue[i].head = 0;
- gt->usm.acc_queue[i].tail = 0;
- spin_unlock(&gt->usm.acc_queue[i].lock);
- }
-}
-
-static int granularity_in_byte(int val)
-{
- switch (val) {
- case 0:
- return SZ_128K;
- case 1:
- return SZ_2M;
- case 2:
- return SZ_16M;
- case 3:
- return SZ_64M;
- default:
- return 0;
- }
-}
-
-static int sub_granularity_in_byte(int val)
-{
- return (granularity_in_byte(val) / 32);
-}
-
-static void print_acc(struct xe_device *xe, struct acc *acc)
-{
- drm_warn(&xe->drm, "Access counter request:\n"
- "\tType: %s\n"
- "\tASID: %d\n"
- "\tVFID: %d\n"
- "\tEngine: %d:%d\n"
- "\tGranularity: 0x%x KB Region/ %d KB sub-granularity\n"
- "\tSub_Granularity Vector: 0x%08x\n"
- "\tVA Range base: 0x%016llx\n",
- acc->access_type ? "AC_NTFY_VAL" : "AC_TRIG_VAL",
- acc->asid, acc->vfid, acc->engine_class, acc->engine_instance,
- granularity_in_byte(acc->granularity) / SZ_1K,
- sub_granularity_in_byte(acc->granularity) / SZ_1K,
- acc->sub_granularity, acc->va_range_base);
-}
-
-static struct xe_vma *get_acc_vma(struct xe_vm *vm, struct acc *acc)
-{
- u64 page_va = acc->va_range_base + (ffs(acc->sub_granularity) - 1) *
- sub_granularity_in_byte(acc->granularity);
-
- return xe_vm_find_overlapping_vma(vm, page_va, SZ_4K);
-}
-
-static int handle_acc(struct xe_gt *gt, struct acc *acc)
-{
- struct xe_device *xe = gt_to_xe(gt);
- struct xe_tile *tile = gt_to_tile(gt);
- struct drm_exec exec;
- struct xe_vm *vm;
- struct xe_vma *vma;
- int ret = 0;
-
- /* We only support ACC_TRIGGER at the moment */
- if (acc->access_type != ACC_TRIGGER)
- return -EINVAL;
-
- vm = asid_to_vm(xe, acc->asid);
- if (IS_ERR(vm))
- return PTR_ERR(vm);
-
- down_read(&vm->lock);
-
- /* Lookup VMA */
- vma = get_acc_vma(vm, acc);
- if (!vma) {
- ret = -EINVAL;
- goto unlock_vm;
- }
-
- trace_xe_vma_acc(vma);
-
- /* Userptr or null can't be migrated, nothing to do */
- if (xe_vma_has_no_bo(vma))
- goto unlock_vm;
-
- /* Lock VM and BOs dma-resv */
- drm_exec_init(&exec, 0, 0);
- drm_exec_until_all_locked(&exec) {
- ret = xe_pf_begin(&exec, vma, true, tile->id);
- drm_exec_retry_on_contention(&exec);
- if (ret)
- break;
- }
-
- drm_exec_fini(&exec);
-unlock_vm:
- up_read(&vm->lock);
- xe_vm_put(vm);
-
- return ret;
-}
-
-#define make_u64(hi__, low__) ((u64)(hi__) << 32 | (u64)(low__))
-
-#define ACC_MSG_LEN_DW 4
-
-static bool get_acc(struct acc_queue *acc_queue, struct acc *acc)
-{
- const struct xe_guc_acc_desc *desc;
- bool ret = false;
-
- spin_lock(&acc_queue->lock);
- if (acc_queue->tail != acc_queue->head) {
- desc = (const struct xe_guc_acc_desc *)
- (acc_queue->data + acc_queue->tail);
-
- acc->granularity = FIELD_GET(ACC_GRANULARITY, desc->dw2);
- acc->sub_granularity = FIELD_GET(ACC_SUBG_HI, desc->dw1) << 31 |
- FIELD_GET(ACC_SUBG_LO, desc->dw0);
- acc->engine_class = FIELD_GET(ACC_ENG_CLASS, desc->dw1);
- acc->engine_instance = FIELD_GET(ACC_ENG_INSTANCE, desc->dw1);
- acc->asid = FIELD_GET(ACC_ASID, desc->dw1);
- acc->vfid = FIELD_GET(ACC_VFID, desc->dw2);
- acc->access_type = FIELD_GET(ACC_TYPE, desc->dw0);
- acc->va_range_base = make_u64(desc->dw3 & ACC_VIRTUAL_ADDR_RANGE_HI,
- desc->dw2 & ACC_VIRTUAL_ADDR_RANGE_LO);
-
- acc_queue->tail = (acc_queue->tail + ACC_MSG_LEN_DW) %
- ACC_QUEUE_NUM_DW;
- ret = true;
- }
- spin_unlock(&acc_queue->lock);
-
- return ret;
-}
-
-static void acc_queue_work_func(struct work_struct *w)
-{
- struct acc_queue *acc_queue = container_of(w, struct acc_queue, worker);
- struct xe_gt *gt = acc_queue->gt;
- struct xe_device *xe = gt_to_xe(gt);
- struct acc acc = {};
- unsigned long threshold;
- int ret;
-
- threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS);
-
- while (get_acc(acc_queue, &acc)) {
- ret = handle_acc(gt, &acc);
- if (unlikely(ret)) {
- print_acc(xe, &acc);
- drm_warn(&xe->drm, "ACC: Unsuccessful %d\n", ret);
- }
-
- if (time_after(jiffies, threshold) &&
- acc_queue->tail != acc_queue->head) {
- queue_work(gt->usm.acc_wq, w);
- break;
- }
- }
-}
-
-static bool acc_queue_full(struct acc_queue *acc_queue)
-{
- lockdep_assert_held(&acc_queue->lock);
-
- return CIRC_SPACE(acc_queue->head, acc_queue->tail, ACC_QUEUE_NUM_DW) <=
- ACC_MSG_LEN_DW;
-}
-
-int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len)
-{
- struct xe_gt *gt = guc_to_gt(guc);
- struct acc_queue *acc_queue;
- u32 asid;
- bool full;
-
- /*
- * The below logic doesn't work unless ACC_QUEUE_NUM_DW % ACC_MSG_LEN_DW == 0
- */
- BUILD_BUG_ON(ACC_QUEUE_NUM_DW % ACC_MSG_LEN_DW);
-
- if (unlikely(len != ACC_MSG_LEN_DW))
- return -EPROTO;
-
- asid = FIELD_GET(ACC_ASID, msg[1]);
- acc_queue = &gt->usm.acc_queue[asid % NUM_ACC_QUEUE];
-
- spin_lock(&acc_queue->lock);
- full = acc_queue_full(acc_queue);
- if (!full) {
- memcpy(acc_queue->data + acc_queue->head, msg,
- len * sizeof(u32));
- acc_queue->head = (acc_queue->head + len) % ACC_QUEUE_NUM_DW;
- queue_work(gt->usm.acc_wq, &acc_queue->worker);
- } else {
- drm_warn(&gt_to_xe(gt)->drm, "ACC Queue full, dropping ACC");
- }
- spin_unlock(&acc_queue->lock);
-
- return full ? -ENOSPC : 0;
-}
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.h b/drivers/gpu/drm/xe/xe_gt_pagefault.h
deleted file mode 100644
index 839c065a5e4c..000000000000
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.h
+++ /dev/null
@@ -1,19 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2022 Intel Corporation
- */
-
-#ifndef _XE_GT_PAGEFAULT_H_
-#define _XE_GT_PAGEFAULT_H_
-
-#include <linux/types.h>
-
-struct xe_gt;
-struct xe_guc;
-
-int xe_gt_pagefault_init(struct xe_gt *gt);
-void xe_gt_pagefault_reset(struct xe_gt *gt);
-int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len);
-int xe_guc_access_counter_notify_handler(struct xe_guc *guc, u32 *msg, u32 len);
-
-#endif /* _XE_GT_PAGEFAULT_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_printk.h b/drivers/gpu/drm/xe/xe_gt_printk.h
index 11da0228cea7..1313d32862db 100644
--- a/drivers/gpu/drm/xe/xe_gt_printk.h
+++ b/drivers/gpu/drm/xe/xe_gt_printk.h
@@ -6,18 +6,22 @@
#ifndef _XE_GT_PRINTK_H_
#define _XE_GT_PRINTK_H_
-#include <drm/drm_print.h>
-
#include "xe_gt_types.h"
+#include "xe_tile_printk.h"
+
+#define __XE_GT_PRINTK_FMT(_gt, _fmt, _args...) "GT%u: " _fmt, (_gt)->info.id, ##_args
#define xe_gt_printk(_gt, _level, _fmt, ...) \
- drm_##_level(&gt_to_xe(_gt)->drm, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__)
+ xe_tile_printk((_gt)->tile, _level, __XE_GT_PRINTK_FMT((_gt), _fmt, ##__VA_ARGS__))
+
+#define xe_gt_err(_gt, _fmt, ...) \
+ xe_gt_printk((_gt), err, _fmt, ##__VA_ARGS__)
#define xe_gt_err_once(_gt, _fmt, ...) \
xe_gt_printk((_gt), err_once, _fmt, ##__VA_ARGS__)
-#define xe_gt_err(_gt, _fmt, ...) \
- xe_gt_printk((_gt), err, _fmt, ##__VA_ARGS__)
+#define xe_gt_err_ratelimited(_gt, _fmt, ...) \
+ xe_gt_printk((_gt), err_ratelimited, _fmt, ##__VA_ARGS__)
#define xe_gt_warn(_gt, _fmt, ...) \
xe_gt_printk((_gt), warn, _fmt, ##__VA_ARGS__)
@@ -31,20 +35,20 @@
#define xe_gt_dbg(_gt, _fmt, ...) \
xe_gt_printk((_gt), dbg, _fmt, ##__VA_ARGS__)
-#define xe_gt_err_ratelimited(_gt, _fmt, ...) \
- xe_gt_printk((_gt), err_ratelimited, _fmt, ##__VA_ARGS__)
+#define xe_gt_WARN_type(_gt, _type, _condition, _fmt, ...) \
+ xe_tile_WARN##_type((_gt)->tile, _condition, _fmt, ## __VA_ARGS__)
#define xe_gt_WARN(_gt, _condition, _fmt, ...) \
- drm_WARN(&gt_to_xe(_gt)->drm, _condition, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__)
+ xe_gt_WARN_type((_gt),, _condition, __XE_GT_PRINTK_FMT((_gt), _fmt, ##__VA_ARGS__))
#define xe_gt_WARN_ONCE(_gt, _condition, _fmt, ...) \
- drm_WARN_ONCE(&gt_to_xe(_gt)->drm, _condition, "GT%u: " _fmt, (_gt)->info.id, ##__VA_ARGS__)
+ xe_gt_WARN_type((_gt), _ONCE, _condition, __XE_GT_PRINTK_FMT((_gt), _fmt, ##__VA_ARGS__))
#define xe_gt_WARN_ON(_gt, _condition) \
- xe_gt_WARN((_gt), _condition, "%s(%s)", "gt_WARN_ON", __stringify(_condition))
+ xe_gt_WARN((_gt), _condition, "%s(%s)", "WARN_ON", __stringify(_condition))
#define xe_gt_WARN_ON_ONCE(_gt, _condition) \
- xe_gt_WARN_ONCE((_gt), _condition, "%s(%s)", "gt_WARN_ON_ONCE", __stringify(_condition))
+ xe_gt_WARN_ONCE((_gt), _condition, "%s(%s)", "WARN_ON_ONCE", __stringify(_condition))
static inline void __xe_gt_printfn_err(struct drm_printer *p, struct va_format *vaf)
{
@@ -67,12 +71,12 @@ static inline void __xe_gt_printfn_dbg(struct drm_printer *p, struct va_format *
/*
* The original xe_gt_dbg() callsite annotations are useless here,
- * redirect to the tweaked drm_dbg_printer() instead.
+ * redirect to the tweaked xe_tile_dbg_printer() instead.
*/
- dbg = drm_dbg_printer(&gt_to_xe(gt)->drm, DRM_UT_DRIVER, NULL);
+ dbg = xe_tile_dbg_printer((gt)->tile);
dbg.origin = p->origin;
- drm_printf(&dbg, "GT%u: %pV", gt->info.id, vaf);
+ drm_printf(&dbg, __XE_GT_PRINTK_FMT(gt, "%pV", vaf));
}
/**
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
index c08efca6420e..0714c758b9c1 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.c
@@ -16,6 +16,7 @@
#include "xe_gt_sriov_pf_migration.h"
#include "xe_gt_sriov_pf_service.h"
#include "xe_gt_sriov_printk.h"
+#include "xe_guc_submit.h"
#include "xe_mmio.h"
#include "xe_pm.h"
@@ -47,9 +48,21 @@ static int pf_alloc_metadata(struct xe_gt *gt)
static void pf_init_workers(struct xe_gt *gt)
{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
INIT_WORK(&gt->sriov.pf.workers.restart, pf_worker_restart_func);
}
+static void pf_fini_workers(struct xe_gt *gt)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+
+ if (disable_work_sync(&gt->sriov.pf.workers.restart)) {
+ xe_gt_sriov_dbg_verbose(gt, "pending restart disabled!\n");
+ /* release an rpm reference taken on the worker's behalf */
+ xe_pm_runtime_put(gt_to_xe(gt));
+ }
+}
+
/**
* xe_gt_sriov_pf_init_early - Prepare SR-IOV PF data structures on PF.
* @gt: the &xe_gt to initialize
@@ -79,6 +92,21 @@ int xe_gt_sriov_pf_init_early(struct xe_gt *gt)
return 0;
}
+static void pf_fini_action(void *arg)
+{
+ struct xe_gt *gt = arg;
+
+ pf_fini_workers(gt);
+}
+
+static int pf_init_late(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+
+ xe_gt_assert(gt, IS_SRIOV_PF(xe));
+ return devm_add_action_or_reset(xe->drm.dev, pf_fini_action, gt);
+}
+
/**
* xe_gt_sriov_pf_init - Prepare SR-IOV PF data structures on PF.
* @gt: the &xe_gt to initialize
@@ -95,7 +123,15 @@ int xe_gt_sriov_pf_init(struct xe_gt *gt)
if (err)
return err;
- return xe_gt_sriov_pf_migration_init(gt);
+ err = xe_gt_sriov_pf_migration_init(gt);
+ if (err)
+ return err;
+
+ err = pf_init_late(gt);
+ if (err)
+ return err;
+
+ return 0;
}
static bool pf_needs_enable_ggtt_guest_update(struct xe_device *xe)
@@ -122,39 +158,19 @@ void xe_gt_sriov_pf_init_hw(struct xe_gt *gt)
xe_gt_sriov_pf_service_update(gt);
}
-static u32 pf_get_vf_regs_stride(struct xe_device *xe)
-{
- return GRAPHICS_VERx100(xe) > 1200 ? 0x400 : 0x1000;
-}
-
-static struct xe_reg xe_reg_vf_to_pf(struct xe_reg vf_reg, unsigned int vfid, u32 stride)
-{
- struct xe_reg pf_reg = vf_reg;
-
- pf_reg.vf = 0;
- pf_reg.addr += stride * vfid;
-
- return pf_reg;
-}
-
static void pf_clear_vf_scratch_regs(struct xe_gt *gt, unsigned int vfid)
{
- u32 stride = pf_get_vf_regs_stride(gt_to_xe(gt));
- struct xe_reg scratch;
- int n, count;
+ struct xe_mmio mmio;
+ int n;
+
+ xe_mmio_init_vf_view(&mmio, &gt->mmio, vfid);
if (xe_gt_is_media_type(gt)) {
- count = MED_VF_SW_FLAG_COUNT;
- for (n = 0; n < count; n++) {
- scratch = xe_reg_vf_to_pf(MED_VF_SW_FLAG(n), vfid, stride);
- xe_mmio_write32(&gt->mmio, scratch, 0);
- }
+ for (n = 0; n < MED_VF_SW_FLAG_COUNT; n++)
+ xe_mmio_write32(&mmio, MED_VF_SW_FLAG(n), 0);
} else {
- count = VF_SW_FLAG_COUNT;
- for (n = 0; n < count; n++) {
- scratch = xe_reg_vf_to_pf(VF_SW_FLAG(n), vfid, stride);
- xe_mmio_write32(&gt->mmio, scratch, 0);
- }
+ for (n = 0; n < VF_SW_FLAG_COUNT; n++)
+ xe_mmio_write32(&mmio, VF_SW_FLAG(n), 0);
}
}
@@ -172,13 +188,38 @@ void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid)
pf_clear_vf_scratch_regs(gt, vfid);
}
+static void pf_cancel_restart(struct xe_gt *gt)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+
+ if (cancel_work_sync(&gt->sriov.pf.workers.restart)) {
+ xe_gt_sriov_dbg_verbose(gt, "pending restart canceled!\n");
+ /* release an rpm reference taken on the worker's behalf */
+ xe_pm_runtime_put(gt_to_xe(gt));
+ }
+}
+
+/**
+ * xe_gt_sriov_pf_stop_prepare() - Prepare to stop SR-IOV support.
+ * @gt: the &xe_gt
+ *
+ * This function can only be called on the PF.
+ */
+void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt)
+{
+ pf_cancel_restart(gt);
+}
+
static void pf_restart(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
- xe_pm_runtime_get(xe);
+ xe_gt_assert(gt, !xe_pm_runtime_suspended(xe));
+
xe_gt_sriov_pf_config_restart(gt);
xe_gt_sriov_pf_control_restart(gt);
+
+ /* release an rpm reference taken on our behalf */
xe_pm_runtime_put(xe);
xe_gt_sriov_dbg(gt, "restart completed\n");
@@ -197,8 +238,13 @@ static void pf_queue_restart(struct xe_gt *gt)
xe_gt_assert(gt, IS_SRIOV_PF(xe));
- if (!queue_work(xe->sriov.wq, &gt->sriov.pf.workers.restart))
+ /* take an rpm reference on behalf of the worker */
+ xe_pm_runtime_get_noresume(xe);
+
+ if (!queue_work(xe->sriov.wq, &gt->sriov.pf.workers.restart)) {
xe_gt_sriov_dbg(gt, "restart already in queue!\n");
+ xe_pm_runtime_put(xe);
+ }
}
/**
@@ -211,3 +257,27 @@ void xe_gt_sriov_pf_restart(struct xe_gt *gt)
{
pf_queue_restart(gt);
}
+
+static void pf_flush_restart(struct xe_gt *gt)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ flush_work(&gt->sriov.pf.workers.restart);
+}
+
+/**
+ * xe_gt_sriov_pf_wait_ready() - Wait until per-GT PF SR-IOV support is ready.
+ * @gt: the &xe_gt
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_wait_ready(struct xe_gt *gt)
+{
+ /* don't wait if there is another ongoing reset */
+ if (xe_guc_read_stopped(&gt->uc.guc))
+ return -EBUSY;
+
+ pf_flush_restart(gt);
+ return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
index f474509411c0..e7fde3f9937a 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf.h
@@ -11,8 +11,10 @@ struct xe_gt;
#ifdef CONFIG_PCI_IOV
int xe_gt_sriov_pf_init_early(struct xe_gt *gt);
int xe_gt_sriov_pf_init(struct xe_gt *gt);
+int xe_gt_sriov_pf_wait_ready(struct xe_gt *gt);
void xe_gt_sriov_pf_init_hw(struct xe_gt *gt);
void xe_gt_sriov_pf_sanitize_hw(struct xe_gt *gt, unsigned int vfid);
+void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt);
void xe_gt_sriov_pf_restart(struct xe_gt *gt);
#else
static inline int xe_gt_sriov_pf_init_early(struct xe_gt *gt)
@@ -29,6 +31,10 @@ static inline void xe_gt_sriov_pf_init_hw(struct xe_gt *gt)
{
}
+static inline void xe_gt_sriov_pf_stop_prepare(struct xe_gt *gt)
+{
+}
+
static inline void xe_gt_sriov_pf_restart(struct xe_gt *gt)
{
}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
index 2420a548cacc..59c5c6b4d994 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.c
@@ -9,6 +9,7 @@
#include "abi/guc_actions_sriov_abi.h"
#include "abi/guc_klvs_abi.h"
+#include "regs/xe_gtt_defs.h"
#include "regs/xe_guc_regs.h"
#include "xe_bo.h"
@@ -33,6 +34,7 @@
#include "xe_migrate.h"
#include "xe_sriov.h"
#include "xe_ttm_vram_mgr.h"
+#include "xe_vram_types.h"
#include "xe_wopcm.h"
#define make_u64_from_u32(hi, lo) ((u64)((u64)(u32)(hi) << 32 | (u32)(lo)))
@@ -104,13 +106,13 @@ static int pf_push_vf_buf_klvs(struct xe_gt *gt, unsigned int vfid, u32 num_klvs
}
if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) {
- struct drm_printer p = xe_gt_info_printer(gt);
+ struct drm_printer p = xe_gt_dbg_printer(gt);
void *klvs = xe_guc_buf_cpu_ptr(buf);
char name[8];
- xe_gt_sriov_info(gt, "pushed %s config with %u KLV%s:\n",
- xe_sriov_function_name(vfid, name, sizeof(name)),
- num_klvs, str_plural(num_klvs));
+ xe_gt_sriov_dbg(gt, "pushed %s config with %u KLV%s:\n",
+ xe_sriov_function_name(vfid, name, sizeof(name)),
+ num_klvs, str_plural(num_klvs));
xe_guc_klv_print(klvs, num_dwords, &p);
}
@@ -238,26 +240,35 @@ static struct xe_gt_sriov_config *pf_pick_vf_config(struct xe_gt *gt, unsigned i
}
/* Return: number of configuration dwords written */
-static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config, bool details)
+static u32 encode_ggtt(u32 *cfg, u64 start, u64 size, bool details)
{
u32 n = 0;
- if (xe_ggtt_node_allocated(config->ggtt_region)) {
- if (details) {
- cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_START);
- cfg[n++] = lower_32_bits(config->ggtt_region->base.start);
- cfg[n++] = upper_32_bits(config->ggtt_region->base.start);
- }
-
- cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_SIZE);
- cfg[n++] = lower_32_bits(config->ggtt_region->base.size);
- cfg[n++] = upper_32_bits(config->ggtt_region->base.size);
+ if (details) {
+ cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_START);
+ cfg[n++] = lower_32_bits(start);
+ cfg[n++] = upper_32_bits(start);
}
+ cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_GGTT_SIZE);
+ cfg[n++] = lower_32_bits(size);
+ cfg[n++] = upper_32_bits(size);
+
return n;
}
/* Return: number of configuration dwords written */
+static u32 encode_config_ggtt(u32 *cfg, const struct xe_gt_sriov_config *config, bool details)
+{
+ struct xe_ggtt_node *node = config->ggtt_region;
+
+ if (!xe_ggtt_node_allocated(node))
+ return 0;
+
+ return encode_ggtt(cfg, node->base.start, node->base.size, details);
+}
+
+/* Return: number of configuration dwords written */
static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config, bool details)
{
u32 n = 0;
@@ -282,8 +293,8 @@ static u32 encode_config(u32 *cfg, const struct xe_gt_sriov_config *config, bool
if (config->lmem_obj) {
cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_LMEM_SIZE);
- cfg[n++] = lower_32_bits(config->lmem_obj->size);
- cfg[n++] = upper_32_bits(config->lmem_obj->size);
+ cfg[n++] = lower_32_bits(xe_bo_size(config->lmem_obj));
+ cfg[n++] = upper_32_bits(xe_bo_size(config->lmem_obj));
}
cfg[n++] = PREP_GUC_KLV_TAG(VF_CFG_EXEC_QUANTUM);
@@ -332,6 +343,17 @@ static int pf_push_full_vf_config(struct xe_gt *gt, unsigned int vfid)
}
xe_gt_assert(gt, num_dwords <= max_cfg_dwords);
+ if (vfid == PFID) {
+ u64 ggtt_start = xe_wopcm_size(gt_to_xe(gt));
+ u64 ggtt_size = gt_to_tile(gt)->mem.ggtt->size - ggtt_start;
+
+ /* plain PF config data will never include a real GGTT region */
+ xe_gt_assert(gt, !encode_config_ggtt(cfg + num_dwords, config, true));
+
+ /* fake PF GGTT config covers full GGTT range except reserved WOPCM */
+ num_dwords += encode_ggtt(cfg + num_dwords, ggtt_start, ggtt_size, true);
+ }
+
num_klvs = xe_guc_klv_count(cfg, num_dwords);
err = pf_push_vf_buf_klvs(gt, vfid, num_klvs, buf, num_dwords);
@@ -376,7 +398,7 @@ static u64 pf_get_spare_ggtt(struct xe_gt *gt)
{
u64 spare;
- xe_gt_assert(gt, !xe_gt_is_media_type(gt));
+ xe_gt_assert(gt, xe_gt_is_main_type(gt));
xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
@@ -388,7 +410,7 @@ static u64 pf_get_spare_ggtt(struct xe_gt *gt)
static int pf_set_spare_ggtt(struct xe_gt *gt, u64 size)
{
- xe_gt_assert(gt, !xe_gt_is_media_type(gt));
+ xe_gt_assert(gt, xe_gt_is_main_type(gt));
xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
@@ -443,7 +465,7 @@ static int pf_provision_vf_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size)
int err;
xe_gt_assert(gt, vfid);
- xe_gt_assert(gt, !xe_gt_is_media_type(gt));
+ xe_gt_assert(gt, xe_gt_is_main_type(gt));
xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
size = round_up(size, alignment);
@@ -492,7 +514,7 @@ static u64 pf_get_vf_config_ggtt(struct xe_gt *gt, unsigned int vfid)
struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
struct xe_ggtt_node *node = config->ggtt_region;
- xe_gt_assert(gt, !xe_gt_is_media_type(gt));
+ xe_gt_assert(gt, xe_gt_is_main_type(gt));
return xe_ggtt_node_allocated(node) ? node->base.size : 0;
}
@@ -560,7 +582,7 @@ int xe_gt_sriov_pf_config_set_ggtt(struct xe_gt *gt, unsigned int vfid, u64 size
{
int err;
- xe_gt_assert(gt, !xe_gt_is_media_type(gt));
+ xe_gt_assert(gt, xe_gt_is_main_type(gt));
mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
if (vfid)
@@ -622,7 +644,7 @@ int xe_gt_sriov_pf_config_bulk_set_ggtt(struct xe_gt *gt, unsigned int vfid,
int err = 0;
xe_gt_assert(gt, vfid);
- xe_gt_assert(gt, !xe_gt_is_media_type(gt));
+ xe_gt_assert(gt, xe_gt_is_main_type(gt));
if (!num_vfs)
return 0;
@@ -676,6 +698,22 @@ static u64 pf_estimate_fair_ggtt(struct xe_gt *gt, unsigned int num_vfs)
return fair;
}
+static u64 pf_profile_fair_ggtt(struct xe_gt *gt, unsigned int num_vfs)
+{
+ bool admin_only_pf = xe_sriov_pf_admin_only(gt_to_xe(gt));
+ u64 shareable = ALIGN_DOWN(GUC_GGTT_TOP, SZ_512M);
+ u64 alignment = pf_get_ggtt_alignment(gt);
+
+ if (admin_only_pf && num_vfs == 1)
+ return ALIGN_DOWN(shareable, alignment);
+
+ /* need to hardcode due to ~512M of GGTT being reserved */
+ if (num_vfs > 56)
+ return SZ_64M - SZ_8M;
+
+ return rounddown_pow_of_two(div_u64(shareable, num_vfs));
+}
+
/**
* xe_gt_sriov_pf_config_set_fair_ggtt - Provision many VFs with fair GGTT.
* @gt: the &xe_gt (can't be media)
@@ -689,11 +727,12 @@ static u64 pf_estimate_fair_ggtt(struct xe_gt *gt, unsigned int num_vfs)
int xe_gt_sriov_pf_config_set_fair_ggtt(struct xe_gt *gt, unsigned int vfid,
unsigned int num_vfs)
{
+ u64 profile = pf_profile_fair_ggtt(gt, num_vfs);
u64 fair;
xe_gt_assert(gt, vfid);
xe_gt_assert(gt, num_vfs);
- xe_gt_assert(gt, !xe_gt_is_media_type(gt));
+ xe_gt_assert(gt, xe_gt_is_main_type(gt));
mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
fair = pf_estimate_fair_ggtt(gt, num_vfs);
@@ -702,9 +741,71 @@ int xe_gt_sriov_pf_config_set_fair_ggtt(struct xe_gt *gt, unsigned int vfid,
if (!fair)
return -ENOSPC;
+ fair = min(fair, profile);
+ if (fair < profile)
+ xe_gt_sriov_info(gt, "Using non-profile provisioning (%s %llu vs %llu)\n",
+ "GGTT", fair, profile);
+
return xe_gt_sriov_pf_config_bulk_set_ggtt(gt, vfid, num_vfs, fair);
}
+/**
+ * xe_gt_sriov_pf_config_ggtt_save() - Save a VF provisioned GGTT data into a buffer.
+ * @gt: the &xe_gt
+ * @vfid: VF identifier (can't be 0)
+ * @buf: the GGTT data destination buffer (or NULL to query the buf size)
+ * @size: the size of the buffer (or 0 to query the buf size)
+ *
+ * This function can only be called on PF.
+ *
+ * Return: size of the buffer needed to save GGTT data if querying,
+ * 0 on successful save or a negative error code on failure.
+ */
+ssize_t xe_gt_sriov_pf_config_ggtt_save(struct xe_gt *gt, unsigned int vfid,
+ void *buf, size_t size)
+{
+ struct xe_ggtt_node *node;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ xe_gt_assert(gt, vfid);
+ xe_gt_assert(gt, !(!buf ^ !size));
+
+ guard(mutex)(xe_gt_sriov_pf_master_mutex(gt));
+
+ node = pf_pick_vf_config(gt, vfid)->ggtt_region;
+
+ if (!buf)
+ return xe_ggtt_node_pt_size(node);
+
+ return xe_ggtt_node_save(node, buf, size, vfid);
+}
+
+/**
+ * xe_gt_sriov_pf_config_ggtt_restore() - Restore a VF provisioned GGTT data from a buffer.
+ * @gt: the &xe_gt
+ * @vfid: VF identifier (can't be 0)
+ * @buf: the GGTT data source buffer
+ * @size: the size of the buffer
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_ggtt_restore(struct xe_gt *gt, unsigned int vfid,
+ const void *buf, size_t size)
+{
+ struct xe_ggtt_node *node;
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ xe_gt_assert(gt, vfid);
+
+ guard(mutex)(xe_gt_sriov_pf_master_mutex(gt));
+
+ node = pf_pick_vf_config(gt, vfid)->ggtt_region;
+
+ return xe_ggtt_node_load(node, buf, size, vfid);
+}
+
static u32 pf_get_min_spare_ctxs(struct xe_gt *gt)
{
/* XXX: preliminary */
@@ -903,7 +1004,8 @@ static int pf_config_bulk_set_u32_done(struct xe_gt *gt, unsigned int first, uns
const char *what, const char *(*unit)(u32),
unsigned int last, int err)
{
- xe_gt_assert(gt, first);
+ char name[8];
+
xe_gt_assert(gt, num_vfs);
xe_gt_assert(gt, first <= last);
@@ -911,8 +1013,9 @@ static int pf_config_bulk_set_u32_done(struct xe_gt *gt, unsigned int first, uns
return pf_config_set_u32_done(gt, first, value, get(gt, first), what, unit, err);
if (unlikely(err)) {
- xe_gt_sriov_notice(gt, "Failed to bulk provision VF%u..VF%u with %s\n",
- first, first + num_vfs - 1, what);
+ xe_gt_sriov_notice(gt, "Failed to bulk provision %s..VF%u with %s\n",
+ xe_sriov_function_name(first, name, sizeof(name)),
+ first + num_vfs - 1, what);
if (last > first)
pf_config_bulk_set_u32_done(gt, first, last - first, value,
get, what, unit, last, 0);
@@ -921,8 +1024,9 @@ static int pf_config_bulk_set_u32_done(struct xe_gt *gt, unsigned int first, uns
/* pick actual value from first VF - bulk provisioning shall be equal across all VFs */
value = get(gt, first);
- xe_gt_sriov_info(gt, "VF%u..VF%u provisioned with %u%s %s\n",
- first, first + num_vfs - 1, value, unit(value), what);
+ xe_gt_sriov_info(gt, "%s..VF%u provisioned with %u%s %s\n",
+ xe_sriov_function_name(first, name, sizeof(name)),
+ first + num_vfs - 1, value, unit(value), what);
return 0;
}
@@ -961,6 +1065,16 @@ int xe_gt_sriov_pf_config_bulk_set_ctxs(struct xe_gt *gt, unsigned int vfid,
"GuC context IDs", no_unit, n, err);
}
+static u32 pf_profile_fair_ctxs(struct xe_gt *gt, unsigned int num_vfs)
+{
+ bool admin_only_pf = xe_sriov_pf_admin_only(gt_to_xe(gt));
+
+ if (admin_only_pf && num_vfs == 1)
+ return ALIGN_DOWN(GUC_ID_MAX, SZ_1K);
+
+ return rounddown_pow_of_two(GUC_ID_MAX / num_vfs);
+}
+
static u32 pf_estimate_fair_ctxs(struct xe_gt *gt, unsigned int num_vfs)
{
struct xe_guc_id_mgr *idm = &gt->uc.guc.submission_state.idm;
@@ -993,6 +1107,7 @@ static u32 pf_estimate_fair_ctxs(struct xe_gt *gt, unsigned int num_vfs)
int xe_gt_sriov_pf_config_set_fair_ctxs(struct xe_gt *gt, unsigned int vfid,
unsigned int num_vfs)
{
+ u32 profile = pf_profile_fair_ctxs(gt, num_vfs);
u32 fair;
xe_gt_assert(gt, vfid);
@@ -1005,6 +1120,11 @@ int xe_gt_sriov_pf_config_set_fair_ctxs(struct xe_gt *gt, unsigned int vfid,
if (!fair)
return -ENOSPC;
+ fair = min(fair, profile);
+ if (fair < profile)
+ xe_gt_sriov_info(gt, "Using non-profile provisioning (%s %u vs %u)\n",
+ "GuC context IDs", fair, profile);
+
return xe_gt_sriov_pf_config_bulk_set_ctxs(gt, vfid, num_vfs, fair);
}
@@ -1209,6 +1329,17 @@ int xe_gt_sriov_pf_config_bulk_set_dbs(struct xe_gt *gt, unsigned int vfid,
"GuC doorbell IDs", no_unit, n, err);
}
+static u32 pf_profile_fair_dbs(struct xe_gt *gt, unsigned int num_vfs)
+{
+ bool admin_only_pf = xe_sriov_pf_admin_only(gt_to_xe(gt));
+
+ /* XXX: preliminary */
+ if (admin_only_pf && num_vfs == 1)
+ return GUC_NUM_DOORBELLS - SZ_16;
+
+ return rounddown_pow_of_two(GUC_NUM_DOORBELLS / (num_vfs + 1));
+}
+
static u32 pf_estimate_fair_dbs(struct xe_gt *gt, unsigned int num_vfs)
{
struct xe_guc_db_mgr *dbm = &gt->uc.guc.dbm;
@@ -1241,6 +1372,7 @@ static u32 pf_estimate_fair_dbs(struct xe_gt *gt, unsigned int num_vfs)
int xe_gt_sriov_pf_config_set_fair_dbs(struct xe_gt *gt, unsigned int vfid,
unsigned int num_vfs)
{
+ u32 profile = pf_profile_fair_dbs(gt, num_vfs);
u32 fair;
xe_gt_assert(gt, vfid);
@@ -1253,6 +1385,11 @@ int xe_gt_sriov_pf_config_set_fair_dbs(struct xe_gt *gt, unsigned int vfid,
if (!fair)
return -ENOSPC;
+ fair = min(fair, profile);
+ if (fair < profile)
+ xe_gt_sriov_info(gt, "Using non-profile provisioning (%s %u vs %u)\n",
+ "GuC doorbell IDs", fair, profile);
+
return xe_gt_sriov_pf_config_bulk_set_dbs(gt, vfid, num_vfs, fair);
}
@@ -1299,7 +1436,7 @@ static u64 pf_get_vf_config_lmem(struct xe_gt *gt, unsigned int vfid)
struct xe_bo *bo;
bo = config->lmem_obj;
- return bo ? bo->size : 0;
+ return bo ? xe_bo_size(bo) : 0;
}
static int pf_distribute_config_lmem(struct xe_gt *gt, unsigned int vfid, u64 size)
@@ -1327,7 +1464,17 @@ static int pf_distribute_config_lmem(struct xe_gt *gt, unsigned int vfid, u64 si
static void pf_force_lmtt_invalidate(struct xe_device *xe)
{
- /* TODO */
+ struct xe_lmtt *lmtt;
+ struct xe_tile *tile;
+ unsigned int tid;
+
+ xe_assert(xe, xe_device_has_lmtt(xe));
+ xe_assert(xe, IS_SRIOV_PF(xe));
+
+ for_each_tile(tile, xe, tid) {
+ lmtt = &tile->sriov.pf.lmtt;
+ xe_lmtt_invalidate_hw(lmtt);
+ }
}
static void pf_reset_vf_lmtt(struct xe_device *xe, unsigned int vfid)
@@ -1388,7 +1535,7 @@ static int pf_update_vf_lmtt(struct xe_device *xe, unsigned int vfid)
err = xe_lmtt_populate_pages(lmtt, vfid, bo, offset);
if (err)
goto fail;
- offset += bo->size;
+ offset += xe_bo_size(bo);
}
}
@@ -1403,16 +1550,19 @@ fail:
return err;
}
-static void pf_release_vf_config_lmem(struct xe_gt *gt, struct xe_gt_sriov_config *config)
+/* Return: %true if there was an LMEM provisioned, %false otherwise */
+static bool pf_release_vf_config_lmem(struct xe_gt *gt, struct xe_gt_sriov_config *config)
{
xe_gt_assert(gt, IS_DGFX(gt_to_xe(gt)));
- xe_gt_assert(gt, !xe_gt_is_media_type(gt));
+ xe_gt_assert(gt, xe_gt_is_main_type(gt));
lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
if (config->lmem_obj) {
xe_bo_unpin_map_no_vm(config->lmem_obj);
config->lmem_obj = NULL;
+ return true;
}
+ return false;
}
static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size)
@@ -1425,7 +1575,7 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size)
xe_gt_assert(gt, vfid);
xe_gt_assert(gt, IS_DGFX(xe));
- xe_gt_assert(gt, !xe_gt_is_media_type(gt));
+ xe_gt_assert(gt, xe_gt_is_main_type(gt));
size = round_up(size, pf_get_lmem_alignment(gt));
@@ -1444,23 +1594,17 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size)
return 0;
xe_gt_assert(gt, pf_get_lmem_alignment(gt) == SZ_2M);
- bo = xe_bo_create_locked(xe, tile, NULL,
- ALIGN(size, PAGE_SIZE),
- ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(tile) |
- XE_BO_FLAG_NEEDS_2M |
- XE_BO_FLAG_PINNED |
- XE_BO_FLAG_PINNED_LATE_RESTORE);
+ bo = xe_bo_create_pin_range_novm(xe, tile,
+ ALIGN(size, PAGE_SIZE), 0, ~0ull,
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_NEEDS_2M |
+ XE_BO_FLAG_PINNED |
+ XE_BO_FLAG_PINNED_LATE_RESTORE |
+ XE_BO_FLAG_FORCE_USER_VRAM);
if (IS_ERR(bo))
return PTR_ERR(bo);
- err = xe_bo_pin(bo);
- xe_bo_unlock(bo);
- if (unlikely(err)) {
- xe_bo_put(bo);
- return err;
- }
-
config->lmem_obj = bo;
if (xe_device_has_lmtt(xe)) {
@@ -1469,12 +1613,12 @@ static int pf_provision_vf_lmem(struct xe_gt *gt, unsigned int vfid, u64 size)
goto release;
}
- err = pf_push_vf_cfg_lmem(gt, vfid, bo->size);
+ err = pf_push_vf_cfg_lmem(gt, vfid, xe_bo_size(bo));
if (unlikely(err))
goto reset_lmtt;
xe_gt_sriov_dbg_verbose(gt, "VF%u LMEM %zu (%zuM)\n",
- vfid, bo->size, bo->size / SZ_1M);
+ vfid, xe_bo_size(bo), xe_bo_size(bo) / SZ_1M);
return 0;
reset_lmtt:
@@ -1520,6 +1664,9 @@ int xe_gt_sriov_pf_config_set_lmem(struct xe_gt *gt, unsigned int vfid, u64 size
{
int err;
+ if (!xe_device_has_lmtt(gt_to_xe(gt)))
+ return -EPERM;
+
mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
if (vfid)
err = pf_provision_vf_lmem(gt, vfid, size);
@@ -1550,7 +1697,7 @@ int xe_gt_sriov_pf_config_bulk_set_lmem(struct xe_gt *gt, unsigned int vfid,
int err = 0;
xe_gt_assert(gt, vfid);
- xe_gt_assert(gt, !xe_gt_is_media_type(gt));
+ xe_gt_assert(gt, xe_gt_is_main_type(gt));
if (!num_vfs)
return 0;
@@ -1568,11 +1715,37 @@ int xe_gt_sriov_pf_config_bulk_set_lmem(struct xe_gt *gt, unsigned int vfid,
"LMEM", n, err);
}
+static struct xe_bo *pf_get_vf_config_lmem_obj(struct xe_gt *gt, unsigned int vfid)
+{
+ struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
+
+ return config->lmem_obj;
+}
+
+/**
+ * xe_gt_sriov_pf_config_get_lmem_obj() - Take a reference to the struct &xe_bo backing VF LMEM.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier (can't be 0)
+ *
+ * This function can only be called on PF.
+ * The caller is responsible for calling xe_bo_put() on the returned object.
+ *
+ * Return: pointer to struct &xe_bo backing VF LMEM (if any).
+ */
+struct xe_bo *xe_gt_sriov_pf_config_get_lmem_obj(struct xe_gt *gt, unsigned int vfid)
+{
+ xe_gt_assert(gt, vfid);
+
+ guard(mutex)(xe_gt_sriov_pf_master_mutex(gt));
+
+ return xe_bo_get(pf_get_vf_config_lmem_obj(gt, vfid));
+}
+
static u64 pf_query_free_lmem(struct xe_gt *gt)
{
struct xe_tile *tile = gt->tile;
- return xe_ttm_vram_get_avail(&tile->mem.vram.ttm.manager);
+ return xe_ttm_vram_get_avail(&tile->mem.vram->ttm.manager);
}
static u64 pf_query_max_lmem(struct xe_gt *gt)
@@ -1600,7 +1773,6 @@ static u64 pf_estimate_fair_lmem(struct xe_gt *gt, unsigned int num_vfs)
u64 fair;
fair = div_u64(available, num_vfs);
- fair = rounddown_pow_of_two(fair); /* XXX: ttm_vram_mgr & drm_buddy limitation */
fair = ALIGN_DOWN(fair, alignment);
#ifdef MAX_FAIR_LMEM
fair = min_t(u64, MAX_FAIR_LMEM, fair);
@@ -1627,9 +1799,9 @@ int xe_gt_sriov_pf_config_set_fair_lmem(struct xe_gt *gt, unsigned int vfid,
xe_gt_assert(gt, vfid);
xe_gt_assert(gt, num_vfs);
- xe_gt_assert(gt, !xe_gt_is_media_type(gt));
+ xe_gt_assert(gt, xe_gt_is_main_type(gt));
- if (!IS_DGFX(gt_to_xe(gt)))
+ if (!xe_device_has_lmtt(gt_to_xe(gt)))
return 0;
mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
@@ -1661,7 +1833,7 @@ int xe_gt_sriov_pf_config_set_fair(struct xe_gt *gt, unsigned int vfid,
xe_gt_assert(gt, vfid);
xe_gt_assert(gt, num_vfs);
- if (!xe_gt_is_media_type(gt)) {
+ if (xe_gt_is_main_type(gt)) {
err = xe_gt_sriov_pf_config_set_fair_ggtt(gt, vfid, num_vfs);
result = result ?: err;
err = xe_gt_sriov_pf_config_set_fair_lmem(gt, vfid, num_vfs);
@@ -1694,7 +1866,7 @@ static int pf_provision_exec_quantum(struct xe_gt *gt, unsigned int vfid,
return 0;
}
-static int pf_get_exec_quantum(struct xe_gt *gt, unsigned int vfid)
+static u32 pf_get_exec_quantum(struct xe_gt *gt, unsigned int vfid)
{
struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
@@ -1702,47 +1874,107 @@ static int pf_get_exec_quantum(struct xe_gt *gt, unsigned int vfid)
}
/**
- * xe_gt_sriov_pf_config_set_exec_quantum - Configure execution quantum for the VF.
+ * xe_gt_sriov_pf_config_set_exec_quantum_locked() - Configure PF/VF execution quantum.
* @gt: the &xe_gt
- * @vfid: the VF identifier
+ * @vfid: the PF or VF identifier
* @exec_quantum: requested execution quantum in milliseconds (0 is infinity)
*
- * This function can only be called on PF.
+ * This function can only be called on PF with the master mutex hold.
+ * It will log the provisioned value or an error in case of the failure.
*
* Return: 0 on success or a negative error code on failure.
*/
-int xe_gt_sriov_pf_config_set_exec_quantum(struct xe_gt *gt, unsigned int vfid,
- u32 exec_quantum)
+int xe_gt_sriov_pf_config_set_exec_quantum_locked(struct xe_gt *gt, unsigned int vfid,
+ u32 exec_quantum)
{
int err;
- mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
err = pf_provision_exec_quantum(gt, vfid, exec_quantum);
- mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
return pf_config_set_u32_done(gt, vfid, exec_quantum,
- xe_gt_sriov_pf_config_get_exec_quantum(gt, vfid),
+ pf_get_exec_quantum(gt, vfid),
"execution quantum", exec_quantum_unit, err);
}
/**
- * xe_gt_sriov_pf_config_get_exec_quantum - Get VF's execution quantum.
+ * xe_gt_sriov_pf_config_set_exec_quantum() - Configure PF/VF execution quantum.
* @gt: the &xe_gt
- * @vfid: the VF identifier
+ * @vfid: the PF or VF identifier
+ * @exec_quantum: requested execution quantum in milliseconds (0 is infinity)
*
* This function can only be called on PF.
+ * It will log the provisioned value or an error in case of the failure.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_set_exec_quantum(struct xe_gt *gt, unsigned int vfid,
+ u32 exec_quantum)
+{
+ guard(mutex)(xe_gt_sriov_pf_master_mutex(gt));
+
+ return xe_gt_sriov_pf_config_set_exec_quantum_locked(gt, vfid, exec_quantum);
+}
+
+/**
+ * xe_gt_sriov_pf_config_get_exec_quantum_locked() - Get PF/VF execution quantum.
+ * @gt: the &xe_gt
+ * @vfid: the PF or VF identifier
*
- * Return: VF's (or PF's) execution quantum in milliseconds.
+ * This function can only be called on PF with the master mutex hold.
+ *
+ * Return: execution quantum in milliseconds (or 0 if infinity).
+ */
+u32 xe_gt_sriov_pf_config_get_exec_quantum_locked(struct xe_gt *gt, unsigned int vfid)
+{
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+ return pf_get_exec_quantum(gt, vfid);
+}
+
+/**
+ * xe_gt_sriov_pf_config_get_exec_quantum() - Get PF/VF execution quantum.
+ * @gt: the &xe_gt
+ * @vfid: the PF or VF identifier
+ *
+ * This function can only be called on PF.
+ *
+ * Return: execution quantum in milliseconds (or 0 if infinity).
*/
u32 xe_gt_sriov_pf_config_get_exec_quantum(struct xe_gt *gt, unsigned int vfid)
{
- u32 exec_quantum;
+ guard(mutex)(xe_gt_sriov_pf_master_mutex(gt));
- mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
- exec_quantum = pf_get_exec_quantum(gt, vfid);
- mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+ return pf_get_exec_quantum(gt, vfid);
+}
+
+/**
+ * xe_gt_sriov_pf_config_bulk_set_exec_quantum_locked() - Configure EQ for PF and VFs.
+ * @gt: the &xe_gt to configure
+ * @exec_quantum: requested execution quantum in milliseconds (0 is infinity)
+ *
+ * This function can only be called on PF with the master mutex hold.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_bulk_set_exec_quantum_locked(struct xe_gt *gt, u32 exec_quantum)
+{
+ unsigned int totalvfs = xe_gt_sriov_pf_get_totalvfs(gt);
+ unsigned int n;
+ int err = 0;
+
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
- return exec_quantum;
+ for (n = 0; n <= totalvfs; n++) {
+ err = pf_provision_exec_quantum(gt, VFID(n), exec_quantum);
+ if (err)
+ break;
+ }
+
+ return pf_config_bulk_set_u32_done(gt, 0, 1 + totalvfs, exec_quantum,
+ pf_get_exec_quantum, "execution quantum",
+ exec_quantum_unit, n, err);
}
static const char *preempt_timeout_unit(u32 preempt_timeout)
@@ -1765,7 +1997,7 @@ static int pf_provision_preempt_timeout(struct xe_gt *gt, unsigned int vfid,
return 0;
}
-static int pf_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid)
+static u32 pf_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid)
{
struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
@@ -1773,47 +2005,106 @@ static int pf_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid)
}
/**
- * xe_gt_sriov_pf_config_set_preempt_timeout - Configure preemption timeout for the VF.
+ * xe_gt_sriov_pf_config_set_preempt_timeout_locked() - Configure PF/VF preemption timeout.
* @gt: the &xe_gt
- * @vfid: the VF identifier
+ * @vfid: the PF or VF identifier
* @preempt_timeout: requested preemption timeout in microseconds (0 is infinity)
*
- * This function can only be called on PF.
+ * This function can only be called on PF with the master mutex hold.
+ * It will log the provisioned value or an error in case of the failure.
*
* Return: 0 on success or a negative error code on failure.
*/
-int xe_gt_sriov_pf_config_set_preempt_timeout(struct xe_gt *gt, unsigned int vfid,
- u32 preempt_timeout)
+int xe_gt_sriov_pf_config_set_preempt_timeout_locked(struct xe_gt *gt, unsigned int vfid,
+ u32 preempt_timeout)
{
int err;
- mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
err = pf_provision_preempt_timeout(gt, vfid, preempt_timeout);
- mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
return pf_config_set_u32_done(gt, vfid, preempt_timeout,
- xe_gt_sriov_pf_config_get_preempt_timeout(gt, vfid),
+ pf_get_preempt_timeout(gt, vfid),
"preemption timeout", preempt_timeout_unit, err);
}
/**
- * xe_gt_sriov_pf_config_get_preempt_timeout - Get VF's preemption timeout.
+ * xe_gt_sriov_pf_config_set_preempt_timeout() - Configure PF/VF preemption timeout.
* @gt: the &xe_gt
- * @vfid: the VF identifier
+ * @vfid: the PF or VF identifier
+ * @preempt_timeout: requested preemption timeout in microseconds (0 is infinity)
*
* This function can only be called on PF.
*
- * Return: VF's (or PF's) preemption timeout in microseconds.
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_set_preempt_timeout(struct xe_gt *gt, unsigned int vfid,
+ u32 preempt_timeout)
+{
+ guard(mutex)(xe_gt_sriov_pf_master_mutex(gt));
+
+ return xe_gt_sriov_pf_config_set_preempt_timeout_locked(gt, vfid, preempt_timeout);
+}
+
+/**
+ * xe_gt_sriov_pf_config_get_preempt_timeout_locked() - Get PF/VF preemption timeout.
+ * @gt: the &xe_gt
+ * @vfid: the PF or VF identifier
+ *
+ * This function can only be called on PF with the master mutex hold.
+ *
+ * Return: preemption timeout in microseconds (or 0 if infinity).
+ */
+u32 xe_gt_sriov_pf_config_get_preempt_timeout_locked(struct xe_gt *gt, unsigned int vfid)
+{
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+ return pf_get_preempt_timeout(gt, vfid);
+}
+
+/**
+ * xe_gt_sriov_pf_config_get_preempt_timeout() - Get PF/VF preemption timeout.
+ * @gt: the &xe_gt
+ * @vfid: the PF or VF identifier
+ *
+ * This function can only be called on PF.
+ *
+ * Return: preemption timeout in microseconds (or 0 if infinity).
*/
u32 xe_gt_sriov_pf_config_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid)
{
- u32 preempt_timeout;
+ guard(mutex)(xe_gt_sriov_pf_master_mutex(gt));
- mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
- preempt_timeout = pf_get_preempt_timeout(gt, vfid);
- mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+ return pf_get_preempt_timeout(gt, vfid);
+}
- return preempt_timeout;
+/**
+ * xe_gt_sriov_pf_config_bulk_set_preempt_timeout_locked() - Configure PT for PF and VFs.
+ * @gt: the &xe_gt to configure
+ * @preempt_timeout: requested preemption timeout in microseconds (0 is infinity)
+ *
+ * This function can only be called on PF with the master mutex hold.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_config_bulk_set_preempt_timeout_locked(struct xe_gt *gt, u32 preempt_timeout)
+{
+ unsigned int totalvfs = xe_gt_sriov_pf_get_totalvfs(gt);
+ unsigned int n;
+ int err = 0;
+
+ lockdep_assert_held(xe_gt_sriov_pf_master_mutex(gt));
+
+ for (n = 0; n <= totalvfs; n++) {
+ err = pf_provision_preempt_timeout(gt, VFID(n), preempt_timeout);
+ if (err)
+ break;
+ }
+
+ return pf_config_bulk_set_u32_done(gt, 0, 1 + totalvfs, preempt_timeout,
+ pf_get_preempt_timeout, "preemption timeout",
+ preempt_timeout_unit, n, err);
}
static const char *sched_priority_unit(u32 priority)
@@ -1988,12 +2279,13 @@ static void pf_release_vf_config(struct xe_gt *gt, unsigned int vfid)
{
struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, vfid);
struct xe_device *xe = gt_to_xe(gt);
+ bool released;
- if (!xe_gt_is_media_type(gt)) {
+ if (xe_gt_is_main_type(gt)) {
pf_release_vf_config_ggtt(gt, config);
if (IS_DGFX(xe)) {
- pf_release_vf_config_lmem(gt, config);
- if (xe_device_has_lmtt(xe))
+ released = pf_release_vf_config_lmem(gt, config);
+ if (released && xe_device_has_lmtt(xe))
pf_update_vf_lmtt(xe, vfid);
}
}
@@ -2080,7 +2372,7 @@ static int pf_sanitize_vf_resources(struct xe_gt *gt, u32 vfid, long timeout)
* Only GGTT and LMEM requires to be cleared by the PF.
* GuC doorbell IDs and context IDs do not need any clearing.
*/
- if (!xe_gt_is_media_type(gt)) {
+ if (xe_gt_is_main_type(gt)) {
pf_sanitize_ggtt(config->ggtt_region, vfid);
if (IS_DGFX(xe))
err = pf_sanitize_lmem(tile, config->lmem_obj, timeout);
@@ -2147,7 +2439,7 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid)
{
struct xe_gt *primary_gt = gt_to_tile(gt)->primary_gt;
struct xe_device *xe = gt_to_xe(gt);
- bool is_primary = !xe_gt_is_media_type(gt);
+ bool is_primary = xe_gt_is_main_type(gt);
bool valid_ggtt, valid_ctxs, valid_dbs;
bool valid_any, valid_all;
@@ -2163,7 +2455,7 @@ static int pf_validate_vf_config(struct xe_gt *gt, unsigned int vfid)
valid_all = valid_all && valid_ggtt;
valid_any = valid_any || (valid_ggtt && is_primary);
- if (IS_DGFX(xe)) {
+ if (xe_device_has_lmtt(xe)) {
bool valid_lmem = pf_get_vf_config_lmem(primary_gt, vfid);
valid_any = valid_any || (valid_lmem && is_primary);
@@ -2347,7 +2639,7 @@ int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid,
return -EINVAL;
if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) {
- struct drm_printer p = xe_gt_info_printer(gt);
+ struct drm_printer p = xe_gt_dbg_printer(gt);
drm_printf(&p, "restoring VF%u config:\n", vfid);
xe_guc_klv_print(buf, size / sizeof(u32), &p);
@@ -2364,6 +2656,35 @@ int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid,
return err;
}
+static void pf_prepare_self_config(struct xe_gt *gt)
+{
+ struct xe_gt_sriov_config *config = pf_pick_vf_config(gt, PFID);
+
+ /*
+ * We want PF to be allowed to use all of context ID, doorbells IDs
+ * and whole usable GGTT area. While we can store ctxs/dbs numbers
+ * directly in the config structure, can't do the same with the GGTT
+ * configuration, so let it be prepared on demand while pushing KLVs.
+ */
+ config->num_ctxs = GUC_ID_MAX;
+ config->num_dbs = GUC_NUM_DOORBELLS;
+}
+
+static int pf_push_self_config(struct xe_gt *gt)
+{
+ int err;
+
+ err = pf_push_full_vf_config(gt, PFID);
+ if (err) {
+ xe_gt_sriov_err(gt, "Failed to push self configuration (%pe)\n",
+ ERR_PTR(err));
+ return err;
+ }
+
+ xe_gt_sriov_dbg_verbose(gt, "self configuration completed\n");
+ return 0;
+}
+
static void fini_config(void *arg)
{
struct xe_gt *gt = arg;
@@ -2387,9 +2708,18 @@ static void fini_config(void *arg)
int xe_gt_sriov_pf_config_init(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
+ int err;
xe_gt_assert(gt, IS_SRIOV_PF(xe));
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ pf_prepare_self_config(gt);
+ err = pf_push_self_config(gt);
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
+ if (err)
+ return err;
+
return devm_add_action_or_reset(xe->drm.dev, fini_config, gt);
}
@@ -2407,6 +2737,10 @@ void xe_gt_sriov_pf_config_restart(struct xe_gt *gt)
unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(gt_to_xe(gt));
unsigned int fail = 0, skip = 0;
+ mutex_lock(xe_gt_sriov_pf_master_mutex(gt));
+ pf_push_self_config(gt);
+ mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
+
for (n = 1; n <= total_vfs; n++) {
if (xe_gt_sriov_pf_config_is_empty(gt, n))
skip++;
@@ -2550,10 +2884,10 @@ int xe_gt_sriov_pf_config_print_lmem(struct xe_gt *gt, struct drm_printer *p)
if (!config->lmem_obj)
continue;
- string_get_size(config->lmem_obj->size, 1, STRING_UNITS_2,
+ string_get_size(xe_bo_size(config->lmem_obj), 1, STRING_UNITS_2,
buf, sizeof(buf));
drm_printf(p, "VF%u:\t%zu\t(%s)\n",
- n, config->lmem_obj->size, buf);
+ n, xe_bo_size(config->lmem_obj), buf);
}
mutex_unlock(xe_gt_sriov_pf_master_mutex(gt));
@@ -2598,3 +2932,7 @@ int xe_gt_sriov_pf_config_print_available_ggtt(struct xe_gt *gt, struct drm_prin
return 0;
}
+
+#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_gt_sriov_pf_config_kunit.c"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
index 513e6512a575..4975730423d7 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_config.h
@@ -36,14 +36,25 @@ int xe_gt_sriov_pf_config_set_lmem(struct xe_gt *gt, unsigned int vfid, u64 size
int xe_gt_sriov_pf_config_set_fair_lmem(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs);
int xe_gt_sriov_pf_config_bulk_set_lmem(struct xe_gt *gt, unsigned int vfid, unsigned int num_vfs,
u64 size);
+struct xe_bo *xe_gt_sriov_pf_config_get_lmem_obj(struct xe_gt *gt, unsigned int vfid);
u32 xe_gt_sriov_pf_config_get_exec_quantum(struct xe_gt *gt, unsigned int vfid);
int xe_gt_sriov_pf_config_set_exec_quantum(struct xe_gt *gt, unsigned int vfid, u32 exec_quantum);
+u32 xe_gt_sriov_pf_config_get_exec_quantum_locked(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_config_set_exec_quantum_locked(struct xe_gt *gt, unsigned int vfid,
+ u32 exec_quantum);
+int xe_gt_sriov_pf_config_bulk_set_exec_quantum_locked(struct xe_gt *gt, u32 exec_quantum);
+
u32 xe_gt_sriov_pf_config_get_preempt_timeout(struct xe_gt *gt, unsigned int vfid);
int xe_gt_sriov_pf_config_set_preempt_timeout(struct xe_gt *gt, unsigned int vfid,
u32 preempt_timeout);
+u32 xe_gt_sriov_pf_config_get_preempt_timeout_locked(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_config_set_preempt_timeout_locked(struct xe_gt *gt, unsigned int vfid,
+ u32 preempt_timeout);
+int xe_gt_sriov_pf_config_bulk_set_preempt_timeout_locked(struct xe_gt *gt, u32 preempt_timeout);
+
u32 xe_gt_sriov_pf_config_get_sched_priority(struct xe_gt *gt, unsigned int vfid);
int xe_gt_sriov_pf_config_set_sched_priority(struct xe_gt *gt, unsigned int vfid, u32 priority);
@@ -61,6 +72,11 @@ ssize_t xe_gt_sriov_pf_config_save(struct xe_gt *gt, unsigned int vfid, void *bu
int xe_gt_sriov_pf_config_restore(struct xe_gt *gt, unsigned int vfid,
const void *buf, size_t size);
+ssize_t xe_gt_sriov_pf_config_ggtt_save(struct xe_gt *gt, unsigned int vfid,
+ void *buf, size_t size);
+int xe_gt_sriov_pf_config_ggtt_restore(struct xe_gt *gt, unsigned int vfid,
+ const void *buf, size_t size);
+
bool xe_gt_sriov_pf_config_is_empty(struct xe_gt *gt, unsigned int vfid);
int xe_gt_sriov_pf_config_init(struct xe_gt *gt);
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
index 1f50aec3a059..bf48b05797de 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.c
@@ -15,10 +15,15 @@
#include "xe_gt_sriov_pf_helpers.h"
#include "xe_gt_sriov_pf_migration.h"
#include "xe_gt_sriov_pf_monitor.h"
-#include "xe_gt_sriov_pf_service.h"
#include "xe_gt_sriov_printk.h"
#include "xe_guc_ct.h"
#include "xe_sriov.h"
+#include "xe_sriov_packet.h"
+#include "xe_sriov_packet_types.h"
+#include "xe_sriov_pf_control.h"
+#include "xe_sriov_pf_migration.h"
+#include "xe_sriov_pf_service.h"
+#include "xe_tile.h"
static const char *control_cmd_to_string(u32 cmd)
{
@@ -169,6 +174,7 @@ static const char *control_bit_to_string(enum xe_gt_sriov_control_bits bit)
CASE2STR(FLR_SEND_START);
CASE2STR(FLR_WAIT_GUC);
CASE2STR(FLR_GUC_DONE);
+ CASE2STR(FLR_SYNC);
CASE2STR(FLR_RESET_CONFIG);
CASE2STR(FLR_RESET_DATA);
CASE2STR(FLR_RESET_MMIO);
@@ -178,9 +184,20 @@ static const char *control_bit_to_string(enum xe_gt_sriov_control_bits bit)
CASE2STR(PAUSE_SEND_PAUSE);
CASE2STR(PAUSE_WAIT_GUC);
CASE2STR(PAUSE_GUC_DONE);
- CASE2STR(PAUSE_SAVE_GUC);
CASE2STR(PAUSE_FAILED);
CASE2STR(PAUSED);
+ CASE2STR(SAVE_WIP);
+ CASE2STR(SAVE_PROCESS_DATA);
+ CASE2STR(SAVE_WAIT_DATA);
+ CASE2STR(SAVE_DATA_DONE);
+ CASE2STR(SAVE_FAILED);
+ CASE2STR(SAVED);
+ CASE2STR(RESTORE_WIP);
+ CASE2STR(RESTORE_PROCESS_DATA);
+ CASE2STR(RESTORE_WAIT_DATA);
+ CASE2STR(RESTORE_DATA_DONE);
+ CASE2STR(RESTORE_FAILED);
+ CASE2STR(RESTORED);
CASE2STR(RESUME_WIP);
CASE2STR(RESUME_SEND_RESUME);
CASE2STR(RESUME_FAILED);
@@ -205,6 +222,8 @@ static unsigned long pf_get_default_timeout(enum xe_gt_sriov_control_bits bit)
case XE_GT_SRIOV_STATE_FLR_WIP:
case XE_GT_SRIOV_STATE_FLR_RESET_CONFIG:
return 5 * HZ;
+ case XE_GT_SRIOV_STATE_RESTORE_WIP:
+ return 20 * HZ;
default:
return HZ;
}
@@ -222,7 +241,7 @@ static unsigned long *pf_peek_vf_state(struct xe_gt *gt, unsigned int vfid)
{
struct xe_gt_sriov_control_state *cs = pf_pick_vf_control(gt, vfid);
- return &cs->state;
+ return cs->state;
}
static bool pf_check_vf_state(struct xe_gt *gt, unsigned int vfid,
@@ -270,12 +289,19 @@ static bool pf_expect_vf_not_state(struct xe_gt *gt, unsigned int vfid,
return result;
}
+static void pf_track_vf_state(struct xe_gt *gt, unsigned int vfid,
+ enum xe_gt_sriov_control_bits bit,
+ const char *what)
+{
+ xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) %s\n",
+ vfid, control_bit_to_string(bit), bit, what);
+}
+
static bool pf_enter_vf_state(struct xe_gt *gt, unsigned int vfid,
enum xe_gt_sriov_control_bits bit)
{
if (!test_and_set_bit(bit, pf_peek_vf_state(gt, vfid))) {
- xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) enter\n",
- vfid, control_bit_to_string(bit), bit);
+ pf_track_vf_state(gt, vfid, bit, "enter");
return true;
}
return false;
@@ -285,8 +311,7 @@ static bool pf_exit_vf_state(struct xe_gt *gt, unsigned int vfid,
enum xe_gt_sriov_control_bits bit)
{
if (test_and_clear_bit(bit, pf_peek_vf_state(gt, vfid))) {
- xe_gt_sriov_dbg_verbose(gt, "VF%u state %s(%d) exit\n",
- vfid, control_bit_to_string(bit), bit);
+ pf_track_vf_state(gt, vfid, bit, "exit");
return true;
}
return false;
@@ -320,6 +345,8 @@ static void pf_exit_vf_mismatch(struct xe_gt *gt, unsigned int vfid)
pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_FAILED);
pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUME_FAILED);
pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED);
+ pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_FAILED);
+ pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED);
}
#define pf_enter_vf_state_machine_bug(gt, vfid) ({ \
@@ -350,6 +377,8 @@ static void pf_queue_vf(struct xe_gt *gt, unsigned int vfid)
static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid);
static void pf_exit_vf_stop_wip(struct xe_gt *gt, unsigned int vfid);
+static void pf_exit_vf_save_wip(struct xe_gt *gt, unsigned int vfid);
+static void pf_exit_vf_restore_wip(struct xe_gt *gt, unsigned int vfid);
static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid);
static void pf_exit_vf_resume_wip(struct xe_gt *gt, unsigned int vfid);
@@ -371,6 +400,8 @@ static void pf_exit_vf_wip(struct xe_gt *gt, unsigned int vfid)
pf_exit_vf_flr_wip(gt, vfid);
pf_exit_vf_stop_wip(gt, vfid);
+ pf_exit_vf_save_wip(gt, vfid);
+ pf_exit_vf_restore_wip(gt, vfid);
pf_exit_vf_pause_wip(gt, vfid);
pf_exit_vf_resume_wip(gt, vfid);
@@ -390,6 +421,8 @@ static void pf_enter_vf_ready(struct xe_gt *gt, unsigned int vfid)
pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED);
pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
+ pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED);
+ pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED);
pf_exit_vf_mismatch(gt, vfid);
pf_exit_vf_wip(gt, vfid);
}
@@ -420,8 +453,7 @@ static void pf_enter_vf_ready(struct xe_gt *gt, unsigned int vfid)
* : PAUSE_GUC_DONE o-----restart
* : | :
* : | o---<--busy :
- * : v / / :
- * : PAUSE_SAVE_GUC :
+ * : / :
* : / :
* : / :
* :....o..............o...............o...........:
@@ -441,7 +473,6 @@ static void pf_exit_vf_pause_wip(struct xe_gt *gt, unsigned int vfid)
pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE);
pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC);
pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE);
- pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC);
}
}
@@ -472,41 +503,12 @@ static void pf_enter_vf_pause_rejected(struct xe_gt *gt, unsigned int vfid)
pf_enter_vf_pause_failed(gt, vfid);
}
-static void pf_enter_vf_pause_save_guc(struct xe_gt *gt, unsigned int vfid)
-{
- if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC))
- pf_enter_vf_state_machine_bug(gt, vfid);
-}
-
-static bool pf_exit_vf_pause_save_guc(struct xe_gt *gt, unsigned int vfid)
-{
- int err;
-
- if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC))
- return false;
-
- err = xe_gt_sriov_pf_migration_save_guc_state(gt, vfid);
- if (err) {
- /* retry if busy */
- if (err == -EBUSY) {
- pf_enter_vf_pause_save_guc(gt, vfid);
- return true;
- }
- /* give up on error */
- if (err == -EIO)
- pf_enter_vf_mismatch(gt, vfid);
- }
-
- pf_enter_vf_pause_completed(gt, vfid);
- return true;
-}
-
static bool pf_exit_vf_pause_guc_done(struct xe_gt *gt, unsigned int vfid)
{
if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSE_GUC_DONE))
return false;
- pf_enter_vf_pause_save_guc(gt, vfid);
+ pf_enter_vf_pause_completed(gt, vfid);
return true;
}
@@ -615,7 +617,7 @@ int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid)
}
if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
- xe_gt_sriov_info(gt, "VF%u paused!\n", vfid);
+ xe_gt_sriov_dbg(gt, "VF%u paused!\n", vfid);
return 0;
}
@@ -666,6 +668,8 @@ static void pf_enter_vf_resumed(struct xe_gt *gt, unsigned int vfid)
{
pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
+ pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED);
+ pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED);
pf_exit_vf_mismatch(gt, vfid);
pf_exit_vf_wip(gt, vfid);
}
@@ -744,6 +748,16 @@ int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid)
return -EPERM;
}
+ if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) {
+ xe_gt_sriov_dbg(gt, "VF%u save is in progress!\n", vfid);
+ return -EBUSY;
+ }
+
+ if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) {
+ xe_gt_sriov_dbg(gt, "VF%u restore is in progress!\n", vfid);
+ return -EBUSY;
+ }
+
if (!pf_enter_vf_resume_wip(gt, vfid)) {
xe_gt_sriov_dbg(gt, "VF%u resume already in progress!\n", vfid);
return -EALREADY;
@@ -754,7 +768,7 @@ int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid)
return err;
if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED)) {
- xe_gt_sriov_info(gt, "VF%u resumed!\n", vfid);
+ xe_gt_sriov_dbg(gt, "VF%u resumed!\n", vfid);
return 0;
}
@@ -768,6 +782,562 @@ int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid)
}
/**
+ * DOC: The VF SAVE state machine
+ *
+ * SAVE extends the PAUSED state.
+ *
+ * The VF SAVE state machine looks like::
+ *
+ * ....PAUSED....................................................
+ * : :
+ * : (O)<---------o :
+ * : | \ :
+ * : save (SAVED) (SAVE_FAILED) :
+ * : | ^ ^ :
+ * : | | | :
+ * : ....V...............o...........o......SAVE_WIP......... :
+ * : : | | | : :
+ * : : | empty | : :
+ * : : | | | : :
+ * : : | | | : :
+ * : : | DATA_DONE | : :
+ * : : | ^ | : :
+ * : : | | error : :
+ * : : | no_data / : :
+ * : : | / / : :
+ * : : | / / : :
+ * : : | / / : :
+ * : : o---------->PROCESS_DATA<----consume : :
+ * : : \ \ : :
+ * : : \ \ : :
+ * : : \ \ : :
+ * : : ring_full----->WAIT_DATA : :
+ * : : : :
+ * : :......................................................: :
+ * :............................................................:
+ *
+ * For the full state machine view, see `The VF state machine`_.
+ */
+
+static void pf_exit_vf_save_wip(struct xe_gt *gt, unsigned int vfid)
+{
+ if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) {
+ xe_gt_sriov_pf_migration_ring_free(gt, vfid);
+
+ pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA);
+ pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WAIT_DATA);
+ pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE);
+ }
+}
+
+static void pf_enter_vf_saved(struct xe_gt *gt, unsigned int vfid)
+{
+ if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED))
+ pf_enter_vf_state_machine_bug(gt, vfid);
+
+ xe_gt_sriov_dbg(gt, "VF%u saved!\n", vfid);
+
+ pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
+ pf_exit_vf_mismatch(gt, vfid);
+ pf_exit_vf_wip(gt, vfid);
+}
+
+static void pf_enter_vf_save_failed(struct xe_gt *gt, unsigned int vfid)
+{
+ if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_FAILED))
+ pf_enter_vf_state_machine_bug(gt, vfid);
+
+ wake_up_all(xe_sriov_pf_migration_waitqueue(gt_to_xe(gt), vfid));
+
+ pf_exit_vf_wip(gt, vfid);
+}
+
+static int pf_handle_vf_save_data(struct xe_gt *gt, unsigned int vfid)
+{
+ int ret;
+
+ if (xe_gt_sriov_pf_migration_save_data_pending(gt, vfid,
+ XE_SRIOV_PACKET_TYPE_GUC)) {
+ ret = xe_gt_sriov_pf_migration_guc_save(gt, vfid);
+ if (ret)
+ return ret;
+
+ xe_gt_sriov_pf_migration_save_data_complete(gt, vfid,
+ XE_SRIOV_PACKET_TYPE_GUC);
+
+ return -EAGAIN;
+ }
+
+ if (xe_gt_sriov_pf_migration_save_data_pending(gt, vfid,
+ XE_SRIOV_PACKET_TYPE_GGTT)) {
+ ret = xe_gt_sriov_pf_migration_ggtt_save(gt, vfid);
+ if (ret)
+ return ret;
+
+ xe_gt_sriov_pf_migration_save_data_complete(gt, vfid,
+ XE_SRIOV_PACKET_TYPE_GGTT);
+
+ return -EAGAIN;
+ }
+
+ if (xe_gt_sriov_pf_migration_save_data_pending(gt, vfid,
+ XE_SRIOV_PACKET_TYPE_MMIO)) {
+ ret = xe_gt_sriov_pf_migration_mmio_save(gt, vfid);
+ if (ret)
+ return ret;
+
+ xe_gt_sriov_pf_migration_save_data_complete(gt, vfid,
+ XE_SRIOV_PACKET_TYPE_MMIO);
+
+ return -EAGAIN;
+ }
+
+ if (xe_gt_sriov_pf_migration_save_data_pending(gt, vfid,
+ XE_SRIOV_PACKET_TYPE_VRAM)) {
+ ret = xe_gt_sriov_pf_migration_vram_save(gt, vfid);
+ if (ret == -EAGAIN)
+ return -EAGAIN;
+ else if (ret)
+ return ret;
+
+ xe_gt_sriov_pf_migration_save_data_complete(gt, vfid,
+ XE_SRIOV_PACKET_TYPE_VRAM);
+
+ return -EAGAIN;
+ }
+
+ return 0;
+}
+
+static bool pf_handle_vf_save(struct xe_gt *gt, unsigned int vfid)
+{
+ int ret;
+
+ if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA))
+ return false;
+
+ if (xe_gt_sriov_pf_migration_ring_full(gt, vfid)) {
+ pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WAIT_DATA);
+ return true;
+ }
+
+ ret = pf_handle_vf_save_data(gt, vfid);
+ if (ret == -EAGAIN)
+ pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA);
+ else if (ret)
+ pf_enter_vf_save_failed(gt, vfid);
+ else
+ pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE);
+
+ return true;
+}
+
+static void pf_exit_vf_save_wait_data(struct xe_gt *gt, unsigned int vfid)
+{
+ if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WAIT_DATA))
+ return;
+
+ pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA);
+ pf_queue_vf(gt, vfid);
+}
+
+static bool pf_enter_vf_save_wip(struct xe_gt *gt, unsigned int vfid)
+{
+ if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) {
+ xe_gt_sriov_pf_migration_save_init(gt, vfid);
+ pf_enter_vf_wip(gt, vfid);
+ pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA);
+ pf_queue_vf(gt, vfid);
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * xe_gt_sriov_pf_control_check_save_data_done() - Check if all save migration data was produced.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: true if all migration data was produced, false otherwise.
+ */
+bool xe_gt_sriov_pf_control_check_save_data_done(struct xe_gt *gt, unsigned int vfid)
+{
+ return pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE);
+}
+
+/**
+ * xe_gt_sriov_pf_control_check_save_failed() - Check if save processing has failed.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: true if save processing failed, false otherwise.
+ */
+bool xe_gt_sriov_pf_control_check_save_failed(struct xe_gt *gt, unsigned int vfid)
+{
+ return pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_FAILED);
+}
+
+/**
+ * xe_gt_sriov_pf_control_process_save_data() - Queue VF save migration data processing.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_control_process_save_data(struct xe_gt *gt, unsigned int vfid)
+{
+ if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_FAILED))
+ return -EIO;
+
+ pf_exit_vf_save_wait_data(gt, vfid);
+
+ return 0;
+}
+
+/**
+ * xe_gt_sriov_pf_control_trigger_save_vf() - Start an SR-IOV VF migration data save sequence.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_control_trigger_save_vf(struct xe_gt *gt, unsigned int vfid)
+{
+ if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
+ xe_gt_sriov_dbg(gt, "VF%u is stopped!\n", vfid);
+ return -EPERM;
+ }
+
+ if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
+ xe_gt_sriov_dbg(gt, "VF%u is not paused!\n", vfid);
+ return -EPERM;
+ }
+
+ if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) {
+ xe_gt_sriov_dbg(gt, "VF%u restore is in progress!\n", vfid);
+ return -EBUSY;
+ }
+
+ if (!pf_enter_vf_save_wip(gt, vfid)) {
+ xe_gt_sriov_dbg(gt, "VF%u save already in progress!\n", vfid);
+ return -EALREADY;
+ }
+
+ return 0;
+}
+
+/**
+ * xe_gt_sriov_pf_control_finish_save_vf() - Complete a VF migration data save sequence.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_control_finish_save_vf(struct xe_gt *gt, unsigned int vfid)
+{
+ if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE)) {
+ xe_gt_sriov_err(gt, "VF%u save is still in progress!\n", vfid);
+ return -EIO;
+ }
+
+ pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
+ pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_DATA_DONE);
+ pf_enter_vf_saved(gt, vfid);
+
+ return 0;
+}
+
+/**
+ * DOC: The VF RESTORE state machine
+ *
+ * RESTORE extends the PAUSED state.
+ *
+ * The VF RESTORE state machine looks like::
+ *
+ * ....PAUSED....................................................
+ * : :
+ * : (O)<---------o :
+ * : | \ :
+ * : restore (RESTORED) (RESTORE_FAILED) :
+ * : | ^ ^ :
+ * : | | | :
+ * : ....V...............o...........o......RESTORE_WIP...... :
+ * : : | | | : :
+ * : : | empty | : :
+ * : : | | | : :
+ * : : | | | : :
+ * : : | DATA_DONE | : :
+ * : : | ^ | : :
+ * : : | | error : :
+ * : : | trailer / : :
+ * : : | / / : :
+ * : : | / / : :
+ * : : | / / : :
+ * : : o---------->PROCESS_DATA<----produce : :
+ * : : \ \ : :
+ * : : \ \ : :
+ * : : \ \ : :
+ * : : ring_empty---->WAIT_DATA : :
+ * : : : :
+ * : :......................................................: :
+ * :............................................................:
+ *
+ * For the full state machine view, see `The VF state machine`_.
+ */
+
+static void pf_exit_vf_restore_wip(struct xe_gt *gt, unsigned int vfid)
+{
+ if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) {
+ xe_gt_sriov_pf_migration_ring_free(gt, vfid);
+
+ pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA);
+ pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA);
+ pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_DATA_DONE);
+ }
+}
+
+static void pf_enter_vf_restored(struct xe_gt *gt, unsigned int vfid)
+{
+ if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED))
+ pf_enter_vf_state_machine_bug(gt, vfid);
+
+ xe_gt_sriov_dbg(gt, "VF%u restored!\n", vfid);
+
+ pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
+ pf_exit_vf_mismatch(gt, vfid);
+ pf_exit_vf_wip(gt, vfid);
+}
+
+static void pf_enter_vf_restore_failed(struct xe_gt *gt, unsigned int vfid)
+{
+ if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED))
+ pf_enter_vf_state_machine_bug(gt, vfid);
+
+ wake_up_all(xe_sriov_pf_migration_waitqueue(gt_to_xe(gt), vfid));
+
+ pf_exit_vf_wip(gt, vfid);
+}
+
+static int pf_handle_vf_restore_data(struct xe_gt *gt, unsigned int vfid)
+{
+ struct xe_sriov_packet *data = xe_gt_sriov_pf_migration_restore_consume(gt, vfid);
+ int ret = 0;
+
+ switch (data->hdr.type) {
+ case XE_SRIOV_PACKET_TYPE_GGTT:
+ ret = xe_gt_sriov_pf_migration_ggtt_restore(gt, vfid, data);
+ break;
+ case XE_SRIOV_PACKET_TYPE_MMIO:
+ ret = xe_gt_sriov_pf_migration_mmio_restore(gt, vfid, data);
+ break;
+ case XE_SRIOV_PACKET_TYPE_GUC:
+ ret = xe_gt_sriov_pf_migration_guc_restore(gt, vfid, data);
+ break;
+ case XE_SRIOV_PACKET_TYPE_VRAM:
+ ret = xe_gt_sriov_pf_migration_vram_restore(gt, vfid, data);
+ break;
+ default:
+ xe_gt_sriov_notice(gt, "Skipping VF%u unknown data type: %d\n",
+ vfid, data->hdr.type);
+ break;
+ }
+
+ xe_sriov_packet_free(data);
+
+ return ret;
+}
+
+static bool pf_handle_vf_restore(struct xe_gt *gt, unsigned int vfid)
+{
+ int ret;
+
+ if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA))
+ return false;
+
+ if (xe_gt_sriov_pf_migration_ring_empty(gt, vfid)) {
+ if (pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_DATA_DONE))
+ pf_enter_vf_restored(gt, vfid);
+ else
+ pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA);
+
+ return true;
+ }
+
+ ret = pf_handle_vf_restore_data(gt, vfid);
+ if (ret)
+ pf_enter_vf_restore_failed(gt, vfid);
+ else
+ pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA);
+
+ return true;
+}
+
+static void pf_exit_vf_restore_wait_data(struct xe_gt *gt, unsigned int vfid)
+{
+ if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA))
+ return;
+
+ pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA);
+ pf_queue_vf(gt, vfid);
+}
+
+static bool pf_enter_vf_restore_wip(struct xe_gt *gt, unsigned int vfid)
+{
+ if (pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WIP)) {
+ pf_enter_vf_wip(gt, vfid);
+ pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA);
+ pf_queue_vf(gt, vfid);
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * xe_gt_sriov_pf_control_check_restore_failed() - Check if restore processing has failed.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: true if restore processing failed, false otherwise.
+ */
+bool xe_gt_sriov_pf_control_check_restore_failed(struct xe_gt *gt, unsigned int vfid)
+{
+ return pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED);
+}
+
+/**
+ * xe_gt_sriov_pf_control_restore_data_done() - Indicate the end of VF migration data stream.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_control_restore_data_done(struct xe_gt *gt, unsigned int vfid)
+{
+ if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_DATA_DONE)) {
+ pf_enter_vf_state_machine_bug(gt, vfid);
+ return -EIO;
+ }
+
+ return xe_gt_sriov_pf_control_process_restore_data(gt, vfid);
+}
+
+/**
+ * xe_gt_sriov_pf_control_process_restore_data() - Queue VF restore migration data processing.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_control_process_restore_data(struct xe_gt *gt, unsigned int vfid)
+{
+ if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED)) {
+ xe_gt_sriov_pf_migration_ring_free(gt, vfid);
+ return -EIO;
+ }
+
+ pf_exit_vf_restore_wait_data(gt, vfid);
+
+ return 0;
+}
+
+/**
+ * xe_gt_sriov_pf_control_trigger restore_vf() - Start an SR-IOV VF migration data restore sequence.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_control_trigger_restore_vf(struct xe_gt *gt, unsigned int vfid)
+{
+ if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
+ xe_gt_sriov_dbg(gt, "VF%u is stopped!\n", vfid);
+ return -EPERM;
+ }
+
+ if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED)) {
+ xe_gt_sriov_dbg(gt, "VF%u is not paused!\n", vfid);
+ return -EPERM;
+ }
+
+ if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WIP)) {
+ xe_gt_sriov_dbg(gt, "VF%u save is in progress!\n", vfid);
+ return -EBUSY;
+ }
+
+ if (!pf_enter_vf_restore_wip(gt, vfid)) {
+ xe_gt_sriov_dbg(gt, "VF%u restore already in progress!\n", vfid);
+ return -EALREADY;
+ }
+
+ return 0;
+}
+
+static int pf_wait_vf_restore_done(struct xe_gt *gt, unsigned int vfid)
+{
+ unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_RESTORE_WIP);
+ int err;
+
+ err = pf_wait_vf_wip_done(gt, vfid, timeout);
+ if (err) {
+ xe_gt_sriov_notice(gt, "VF%u RESTORE didn't finish in %u ms (%pe)\n",
+ vfid, jiffies_to_msecs(timeout), ERR_PTR(err));
+ return err;
+ }
+
+ if (!pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_FAILED))
+ return -EIO;
+
+ return 0;
+}
+
+/**
+ * xe_gt_sriov_pf_control_finish_restore_vf() - Complete a VF migration data restore sequence.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_control_finish_restore_vf(struct xe_gt *gt, unsigned int vfid)
+{
+ int ret;
+
+ ret = pf_wait_vf_restore_done(gt, vfid);
+ if (ret)
+ return ret;
+
+ if (!pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED)) {
+ pf_enter_vf_mismatch(gt, vfid);
+ return -EIO;
+ }
+
+ pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
+
+ return 0;
+}
+
+/**
* DOC: The VF STOP state machine
*
* The VF STOP state machine looks like::
@@ -808,6 +1378,8 @@ static void pf_enter_vf_stopped(struct xe_gt *gt, unsigned int vfid)
pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESUMED);
pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_PAUSED);
+ pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVED);
+ pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORED);
pf_exit_vf_mismatch(gt, vfid);
pf_exit_vf_wip(gt, vfid);
}
@@ -895,7 +1467,7 @@ int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid)
return err;
if (pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_STOPPED)) {
- xe_gt_sriov_info(gt, "VF%u stopped!\n", vfid);
+ xe_gt_sriov_dbg(gt, "VF%u stopped!\n", vfid);
return 0;
}
@@ -933,6 +1505,10 @@ int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid)
* : v : | |
* : FLR_GUC_DONE : | |
* : | : | |
+ * : | o--<--sync : | |
+ * : |/ / : | |
+ * : FLR_SYNC--o : | |
+ * : | : | |
* : FLR_RESET_CONFIG---failed--->-----------o--------+-----------o
* : | : | |
* : FLR_RESET_DATA : | |
@@ -984,6 +1560,8 @@ static void pf_exit_vf_flr_wip(struct xe_gt *gt, unsigned int vfid)
pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE);
pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WAIT_GUC);
pf_escape_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SEND_START);
+
+ xe_sriov_pf_control_sync_flr(gt_to_xe(gt), vfid);
}
}
@@ -1064,7 +1642,9 @@ static bool pf_exit_vf_flr_reset_data(struct xe_gt *gt, unsigned int vfid)
if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_RESET_DATA))
return false;
- xe_gt_sriov_pf_service_reset(gt, vfid);
+ if (xe_tile_is_root(gt->tile) && xe_gt_is_main_type(gt))
+ xe_sriov_pf_service_reset_vf(gt_to_xe(gt), vfid);
+
xe_gt_sriov_pf_monitor_flr(gt, vfid);
pf_enter_vf_flr_reset_mmio(gt, vfid);
@@ -1138,12 +1718,38 @@ static bool pf_exit_vf_flr_send_start(struct xe_gt *gt, unsigned int vfid)
return true;
}
+static bool pf_exit_vf_flr_sync(struct xe_gt *gt, unsigned int vfid)
+{
+ if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC))
+ return false;
+
+ pf_enter_vf_flr_reset_config(gt, vfid);
+ return true;
+}
+
+static void pf_enter_vf_flr_sync(struct xe_gt *gt, unsigned int vfid)
+{
+ int ret;
+
+ if (!pf_enter_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC))
+ pf_enter_vf_state_machine_bug(gt, vfid);
+
+ ret = xe_sriov_pf_control_sync_flr(gt_to_xe(gt), vfid);
+ if (ret < 0) {
+ xe_gt_sriov_dbg_verbose(gt, "FLR checkpoint %pe\n", ERR_PTR(ret));
+ pf_expect_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC);
+ } else {
+ xe_gt_sriov_dbg_verbose(gt, "FLR checkpoint pass\n");
+ pf_expect_vf_not_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC);
+ }
+}
+
static bool pf_exit_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid)
{
if (!pf_exit_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_GUC_DONE))
return false;
- pf_enter_vf_flr_reset_config(gt, vfid);
+ pf_enter_vf_flr_sync(gt, vfid);
return true;
}
@@ -1164,10 +1770,52 @@ static void pf_enter_vf_flr_guc_done(struct xe_gt *gt, unsigned int vfid)
*/
int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid)
{
+ pf_enter_vf_flr_wip(gt, vfid);
+
+ return 0;
+}
+
+/**
+ * xe_gt_sriov_pf_control_sync_flr() - Synchronize on the VF FLR checkpoint.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ * @sync: if true it will allow to exit the checkpoint
+ *
+ * Return: non-zero if FLR checkpoint has been reached, zero if the is no FLR
+ * in progress, or a negative error code on the FLR busy or failed.
+ */
+int xe_gt_sriov_pf_control_sync_flr(struct xe_gt *gt, unsigned int vfid, bool sync)
+{
+ if (sync && pf_exit_vf_flr_sync(gt, vfid))
+ return 1;
+ if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_SYNC))
+ return 1;
+ if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP))
+ return -EBUSY;
+ if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED))
+ return -EIO;
+ return 0;
+}
+
+/**
+ * xe_gt_sriov_pf_control_wait_flr() - Wait for a VF FLR to complete.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_control_wait_flr(struct xe_gt *gt, unsigned int vfid)
+{
unsigned long timeout = pf_get_default_timeout(XE_GT_SRIOV_STATE_FLR_WIP);
int err;
- pf_enter_vf_flr_wip(gt, vfid);
+ if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_FAILED))
+ return -EIO;
+
+ if (!pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_FLR_WIP))
+ return 0;
err = pf_wait_vf_wip_done(gt, vfid, timeout);
if (err) {
@@ -1375,7 +2023,22 @@ static bool pf_process_vf_state_machine(struct xe_gt *gt, unsigned int vfid)
if (pf_exit_vf_pause_guc_done(gt, vfid))
return true;
- if (pf_exit_vf_pause_save_guc(gt, vfid))
+ if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_SAVE_WAIT_DATA)) {
+ xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid,
+ control_bit_to_string(XE_GT_SRIOV_STATE_SAVE_WAIT_DATA));
+ return false;
+ }
+
+ if (pf_handle_vf_save(gt, vfid))
+ return true;
+
+ if (pf_check_vf_state(gt, vfid, XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA)) {
+ xe_gt_sriov_dbg_verbose(gt, "VF%u in %s\n", vfid,
+ control_bit_to_string(XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA));
+ return false;
+ }
+
+ if (pf_handle_vf_restore(gt, vfid))
return true;
if (pf_exit_vf_resume_send_resume(gt, vfid))
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h
index c85e64f099cc..c36c8767f3ad 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control.h
@@ -16,8 +16,20 @@ void xe_gt_sriov_pf_control_restart(struct xe_gt *gt);
int xe_gt_sriov_pf_control_pause_vf(struct xe_gt *gt, unsigned int vfid);
int xe_gt_sriov_pf_control_resume_vf(struct xe_gt *gt, unsigned int vfid);
+bool xe_gt_sriov_pf_control_check_save_data_done(struct xe_gt *gt, unsigned int vfid);
+bool xe_gt_sriov_pf_control_check_save_failed(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_control_process_save_data(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_control_trigger_save_vf(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_control_finish_save_vf(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_control_restore_data_done(struct xe_gt *gt, unsigned int vfid);
+bool xe_gt_sriov_pf_control_check_restore_failed(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_control_process_restore_data(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_control_trigger_restore_vf(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_control_finish_restore_vf(struct xe_gt *gt, unsigned int vfid);
int xe_gt_sriov_pf_control_stop_vf(struct xe_gt *gt, unsigned int vfid);
int xe_gt_sriov_pf_control_trigger_flr(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_control_sync_flr(struct xe_gt *gt, unsigned int vfid, bool sync);
+int xe_gt_sriov_pf_control_wait_flr(struct xe_gt *gt, unsigned int vfid);
#ifdef CONFIG_PCI_IOV
int xe_gt_sriov_pf_control_process_guc2pf(struct xe_gt *gt, const u32 *msg, u32 len);
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h
index f02f941b4ad2..6027ba05a7f2 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_control_types.h
@@ -18,6 +18,7 @@
* @XE_GT_SRIOV_STATE_FLR_SEND_START: indicates that the PF wants to send a FLR START command.
* @XE_GT_SRIOV_STATE_FLR_WAIT_GUC: indicates that the PF awaits for a response from the GuC.
* @XE_GT_SRIOV_STATE_FLR_GUC_DONE: indicates that the PF has received a response from the GuC.
+ * @XE_GT_SRIOV_STATE_FLR_SYNC: indicates that the PF awaits to synchronize with other GuCs.
* @XE_GT_SRIOV_STATE_FLR_RESET_CONFIG: indicates that the PF needs to clear VF's resources.
* @XE_GT_SRIOV_STATE_FLR_RESET_DATA: indicates that the PF needs to clear VF's data.
* @XE_GT_SRIOV_STATE_FLR_RESET_MMIO: indicates that the PF needs to reset VF's registers.
@@ -27,9 +28,20 @@
* @XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE: indicates that the PF is about to send a PAUSE command.
* @XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC: indicates that the PF awaits for a response from the GuC.
* @XE_GT_SRIOV_STATE_PAUSE_GUC_DONE: indicates that the PF has received a response from the GuC.
- * @XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC: indicates that the PF needs to save the VF GuC state.
* @XE_GT_SRIOV_STATE_PAUSE_FAILED: indicates that a VF pause operation has failed.
* @XE_GT_SRIOV_STATE_PAUSED: indicates that the VF is paused.
+ * @XE_GT_SRIOV_STATE_SAVE_WIP: indicates that VF save operation is in progress.
+ * @XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA: indicates that VF migration data is being produced.
+ * @XE_GT_SRIOV_STATE_SAVE_WAIT_DATA: indicates that PF awaits for space in migration data ring.
+ * @XE_GT_SRIOV_STATE_SAVE_DATA_DONE: indicates that all migration data was produced by Xe.
+ * @XE_GT_SRIOV_STATE_SAVE_FAILED: indicates that VF save operation has failed.
+ * @XE_GT_SRIOV_STATE_SAVED: indicates that VF data is saved.
+ * @XE_GT_SRIOV_STATE_RESTORE_WIP: indicates that VF restore operation is in progress.
+ * @XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA: indicates that VF migration data is being consumed.
+ * @XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA: indicates that PF awaits for data in migration data ring.
+ * @XE_GT_SRIOV_STATE_RESTORE_DATA_DONE: indicates that all migration data was produced by the user.
+ * @XE_GT_SRIOV_STATE_RESTORE_FAILED: indicates that VF restore operation has failed.
+ * @XE_GT_SRIOV_STATE_RESTORED: indicates that VF data is restored.
* @XE_GT_SRIOV_STATE_RESUME_WIP: indicates the a VF resume operation is in progress.
* @XE_GT_SRIOV_STATE_RESUME_SEND_RESUME: indicates that the PF is about to send RESUME command.
* @XE_GT_SRIOV_STATE_RESUME_FAILED: indicates that a VF resume operation has failed.
@@ -47,6 +59,7 @@ enum xe_gt_sriov_control_bits {
XE_GT_SRIOV_STATE_FLR_SEND_START,
XE_GT_SRIOV_STATE_FLR_WAIT_GUC,
XE_GT_SRIOV_STATE_FLR_GUC_DONE,
+ XE_GT_SRIOV_STATE_FLR_SYNC,
XE_GT_SRIOV_STATE_FLR_RESET_CONFIG,
XE_GT_SRIOV_STATE_FLR_RESET_DATA,
XE_GT_SRIOV_STATE_FLR_RESET_MMIO,
@@ -57,10 +70,23 @@ enum xe_gt_sriov_control_bits {
XE_GT_SRIOV_STATE_PAUSE_SEND_PAUSE,
XE_GT_SRIOV_STATE_PAUSE_WAIT_GUC,
XE_GT_SRIOV_STATE_PAUSE_GUC_DONE,
- XE_GT_SRIOV_STATE_PAUSE_SAVE_GUC,
XE_GT_SRIOV_STATE_PAUSE_FAILED,
XE_GT_SRIOV_STATE_PAUSED,
+ XE_GT_SRIOV_STATE_SAVE_WIP,
+ XE_GT_SRIOV_STATE_SAVE_PROCESS_DATA,
+ XE_GT_SRIOV_STATE_SAVE_WAIT_DATA,
+ XE_GT_SRIOV_STATE_SAVE_DATA_DONE,
+ XE_GT_SRIOV_STATE_SAVE_FAILED,
+ XE_GT_SRIOV_STATE_SAVED,
+
+ XE_GT_SRIOV_STATE_RESTORE_WIP,
+ XE_GT_SRIOV_STATE_RESTORE_PROCESS_DATA,
+ XE_GT_SRIOV_STATE_RESTORE_WAIT_DATA,
+ XE_GT_SRIOV_STATE_RESTORE_DATA_DONE,
+ XE_GT_SRIOV_STATE_RESTORE_FAILED,
+ XE_GT_SRIOV_STATE_RESTORED,
+
XE_GT_SRIOV_STATE_RESUME_WIP,
XE_GT_SRIOV_STATE_RESUME_SEND_RESUME,
XE_GT_SRIOV_STATE_RESUME_FAILED,
@@ -71,9 +97,11 @@ enum xe_gt_sriov_control_bits {
XE_GT_SRIOV_STATE_STOP_FAILED,
XE_GT_SRIOV_STATE_STOPPED,
- XE_GT_SRIOV_STATE_MISMATCH = BITS_PER_LONG - 1,
+ XE_GT_SRIOV_STATE_MISMATCH, /* always keep as last */
};
+#define XE_GT_SRIOV_NUM_STATES (XE_GT_SRIOV_STATE_MISMATCH + 1)
+
/**
* struct xe_gt_sriov_control_state - GT-level per-VF control state.
*
@@ -81,7 +109,7 @@ enum xe_gt_sriov_control_bits {
*/
struct xe_gt_sriov_control_state {
/** @state: VF state bits */
- unsigned long state;
+ DECLARE_BITMAP(state, XE_GT_SRIOV_NUM_STATES);
/** @done: completion of async operations */
struct completion done;
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
index 0fe47f41b63c..5278ea4fd655 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.c
@@ -22,14 +22,26 @@
#include "xe_gt_sriov_pf_policy.h"
#include "xe_gt_sriov_pf_service.h"
#include "xe_pm.h"
+#include "xe_sriov_pf.h"
+#include "xe_sriov_pf_provision.h"
/*
- * /sys/kernel/debug/dri/0/
- * ├── gt0 # d_inode->i_private = gt
- * │   ├── pf # d_inode->i_private = gt
- * │   ├── vf1 # d_inode->i_private = VFID(1)
- * :   :
- * │   ├── vfN # d_inode->i_private = VFID(N)
+ * /sys/kernel/debug/dri/BDF/
+ * ├── sriov # d_inode->i_private = (xe_device*)
+ * │ ├── pf # d_inode->i_private = (xe_device*)
+ * │ │ ├── tile0 # d_inode->i_private = (xe_tile*)
+ * │ │ │ ├── gt0 # d_inode->i_private = (xe_gt*)
+ * │ │ │ ├── gt1 # d_inode->i_private = (xe_gt*)
+ * │ │ ├── tile1
+ * │ │ │ :
+ * │ ├── vf1 # d_inode->i_private = VFID(1)
+ * │ │ ├── tile0 # d_inode->i_private = (xe_tile*)
+ * │ │ │ ├── gt0 # d_inode->i_private = (xe_gt*)
+ * │ │ │ ├── gt1 # d_inode->i_private = (xe_gt*)
+ * │ │ ├── tile1
+ * │ │ │ :
+ * : :
+ * │ ├── vfN # d_inode->i_private = VFID(N)
*/
static void *extract_priv(struct dentry *d)
@@ -39,26 +51,31 @@ static void *extract_priv(struct dentry *d)
static struct xe_gt *extract_gt(struct dentry *d)
{
- return extract_priv(d->d_parent);
+ return extract_priv(d);
+}
+
+static struct xe_device *extract_xe(struct dentry *d)
+{
+ return extract_priv(d->d_parent->d_parent->d_parent);
}
static unsigned int extract_vfid(struct dentry *d)
{
- return extract_priv(d) == extract_gt(d) ? PFID : (uintptr_t)extract_priv(d);
+ void *priv = extract_priv(d->d_parent->d_parent);
+
+ return priv == extract_xe(d) ? PFID : (uintptr_t)priv;
}
/*
- * /sys/kernel/debug/dri/0/
- * ├── gt0
- * │   ├── pf
- * │   │   ├── contexts_provisioned
- * │   │   ├── doorbells_provisioned
- * │   │   ├── runtime_registers
- * │   │   ├── negotiated_versions
- * │   │   ├── adverse_events
- * ├── gt1
- * │   ├── pf
- * │   │   ├── ...
+ * /sys/kernel/debug/dri/BDF/
+ * ├── sriov
+ * : ├── pf
+ * : ├── tile0
+ * : ├── gt0
+ * : ├── contexts_provisioned
+ * ├── doorbells_provisioned
+ * ├── runtime_registers
+ * ├── adverse_events
*/
static const struct drm_info_list pf_info[] = {
@@ -78,11 +95,6 @@ static const struct drm_info_list pf_info[] = {
.data = xe_gt_sriov_pf_service_print_runtime,
},
{
- "negotiated_versions",
- .show = xe_gt_debugfs_simple_show,
- .data = xe_gt_sriov_pf_service_print_version,
- },
- {
"adverse_events",
.show = xe_gt_debugfs_simple_show,
.data = xe_gt_sriov_pf_monitor_print_events,
@@ -90,48 +102,14 @@ static const struct drm_info_list pf_info[] = {
};
/*
- * /sys/kernel/debug/dri/0/
- * ├── gt0
- * │   ├── pf
- * │   │   ├── ggtt_available
- * │   │   ├── ggtt_provisioned
- */
-
-static const struct drm_info_list pf_ggtt_info[] = {
- {
- "ggtt_available",
- .show = xe_gt_debugfs_simple_show,
- .data = xe_gt_sriov_pf_config_print_available_ggtt,
- },
- {
- "ggtt_provisioned",
- .show = xe_gt_debugfs_simple_show,
- .data = xe_gt_sriov_pf_config_print_ggtt,
- },
-};
-
-/*
- * /sys/kernel/debug/dri/0/
- * ├── gt0
- * │   ├── pf
- * │   │   ├── lmem_provisioned
- */
-
-static const struct drm_info_list pf_lmem_info[] = {
- {
- "lmem_provisioned",
- .show = xe_gt_debugfs_simple_show,
- .data = xe_gt_sriov_pf_config_print_lmem,
- },
-};
-
-/*
- * /sys/kernel/debug/dri/0/
- * ├── gt0
- * │   ├── pf
- * │   │   ├── reset_engine
- * │   │   ├── sample_period
- * │   │   ├── sched_if_idle
+ * /sys/kernel/debug/dri/BDF/
+ * ├── sriov
+ * : ├── pf
+ * : ├── tile0
+ * : ├── gt0
+ * : ├── reset_engine
+ * ├── sample_period
+ * ├── sched_if_idle
*/
#define DEFINE_SRIOV_GT_POLICY_DEBUGFS_ATTRIBUTE(POLICY, TYPE, FORMAT) \
@@ -147,6 +125,8 @@ static int POLICY##_set(void *data, u64 val) \
\
xe_pm_runtime_get(xe); \
err = xe_gt_sriov_pf_policy_set_##POLICY(gt, val); \
+ if (!err) \
+ xe_sriov_pf_provision_set_custom_mode(xe); \
xe_pm_runtime_put(xe); \
\
return err; \
@@ -177,24 +157,24 @@ static void pf_add_policy_attrs(struct xe_gt *gt, struct dentry *parent)
}
/*
- * /sys/kernel/debug/dri/0/
- * ├── gt0
- * │   ├── pf
- * │   │   ├── ggtt_spare
- * │   │   ├── lmem_spare
- * │   │   ├── doorbells_spare
- * │   │   ├── contexts_spare
- * │   │   ├── exec_quantum_ms
- * │   │   ├── preempt_timeout_us
- * │   │   ├── sched_priority
- * │   ├── vf1
- * │   │   ├── ggtt_quota
- * │   │   ├── lmem_quota
- * │   │   ├── doorbells_quota
- * │   │   ├── contexts_quota
- * │   │   ├── exec_quantum_ms
- * │   │   ├── preempt_timeout_us
- * │   │   ├── sched_priority
+ * /sys/kernel/debug/dri/BDF/
+ * ├── sriov
+ * : ├── pf
+ * │ ├── tile0
+ * │ : ├── gt0
+ * │ : ├── doorbells_spare
+ * │ ├── contexts_spare
+ * │ ├── exec_quantum_ms
+ * │ ├── preempt_timeout_us
+ * │ ├── sched_priority
+ * ├── vf1
+ * : ├── tile0
+ * : ├── gt0
+ * : ├── doorbells_quota
+ * ├── contexts_quota
+ * ├── exec_quantum_ms
+ * ├── preempt_timeout_us
+ * ├── sched_priority
*/
#define DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(CONFIG, TYPE, FORMAT) \
@@ -210,7 +190,10 @@ static int CONFIG##_set(void *data, u64 val) \
return -EOVERFLOW; \
\
xe_pm_runtime_get(xe); \
- err = xe_gt_sriov_pf_config_set_##CONFIG(gt, vfid, val); \
+ err = xe_sriov_pf_wait_ready(xe) ?: \
+ xe_gt_sriov_pf_config_set_##CONFIG(gt, vfid, val); \
+ if (!err) \
+ xe_sriov_pf_provision_set_custom_mode(xe); \
xe_pm_runtime_put(xe); \
\
return err; \
@@ -227,8 +210,6 @@ static int CONFIG##_get(void *data, u64 *val) \
\
DEFINE_DEBUGFS_ATTRIBUTE(CONFIG##_fops, CONFIG##_get, CONFIG##_set, FORMAT)
-DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(ggtt, u64, "%llu\n");
-DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(lmem, u64, "%llu\n");
DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(ctxs, u32, "%llu\n");
DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(dbs, u32, "%llu\n");
DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(exec_quantum, u32, "%llu\n");
@@ -236,22 +217,26 @@ DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(preempt_timeout, u32, "%llu\n");
DEFINE_SRIOV_GT_CONFIG_DEBUGFS_ATTRIBUTE(sched_priority, u32, "%llu\n");
/*
- * /sys/kernel/debug/dri/0/
- * ├── gt0
- * │   ├── pf
- * │   │   ├── threshold_cat_error_count
- * │   │   ├── threshold_doorbell_time_us
- * │   │   ├── threshold_engine_reset_count
- * │   │   ├── threshold_guc_time_us
- * │   │   ├── threshold_irq_time_us
- * │   │   ├── threshold_page_fault_count
- * │   ├── vf1
- * │   │   ├── threshold_cat_error_count
- * │   │   ├── threshold_doorbell_time_us
- * │   │   ├── threshold_engine_reset_count
- * │   │   ├── threshold_guc_time_us
- * │   │   ├── threshold_irq_time_us
- * │   │   ├── threshold_page_fault_count
+ * /sys/kernel/debug/dri/BDF/
+ * ├── sriov
+ * : ├── pf
+ * │ ├── tile0
+ * │ : ├── gt0
+ * │ : ├── threshold_cat_error_count
+ * │ ├── threshold_doorbell_time_us
+ * │ ├── threshold_engine_reset_count
+ * │ ├── threshold_guc_time_us
+ * │ ├── threshold_irq_time_us
+ * │ ├── threshold_page_fault_count
+ * ├── vf1
+ * : ├── tile0
+ * : ├── gt0
+ * : ├── threshold_cat_error_count
+ * ├── threshold_doorbell_time_us
+ * ├── threshold_engine_reset_count
+ * ├── threshold_guc_time_us
+ * ├── threshold_irq_time_us
+ * ├── threshold_page_fault_count
*/
static int set_threshold(void *data, u64 val, enum xe_guc_klv_threshold_index index)
@@ -266,6 +251,8 @@ static int set_threshold(void *data, u64 val, enum xe_guc_klv_threshold_index in
xe_pm_runtime_get(xe);
err = xe_gt_sriov_pf_config_set_threshold(gt, vfid, index, val);
+ if (!err)
+ xe_sriov_pf_provision_set_custom_mode(xe);
xe_pm_runtime_put(xe);
return err;
@@ -305,13 +292,6 @@ static void pf_add_config_attrs(struct xe_gt *gt, struct dentry *parent, unsigne
xe_gt_assert(gt, gt == extract_gt(parent));
xe_gt_assert(gt, vfid == extract_vfid(parent));
- if (!xe_gt_is_media_type(gt)) {
- debugfs_create_file_unsafe(vfid ? "ggtt_quota" : "ggtt_spare",
- 0644, parent, parent, &ggtt_fops);
- if (IS_DGFX(gt_to_xe(gt)))
- debugfs_create_file_unsafe(vfid ? "lmem_quota" : "lmem_spare",
- 0644, parent, parent, &lmem_fops);
- }
debugfs_create_file_unsafe(vfid ? "doorbells_quota" : "doorbells_spare",
0644, parent, parent, &dbs_fops);
debugfs_create_file_unsafe(vfid ? "contexts_quota" : "contexts_spare",
@@ -332,10 +312,12 @@ static void pf_add_config_attrs(struct xe_gt *gt, struct dentry *parent, unsigne
}
/*
- * /sys/kernel/debug/dri/0/
- * ├── gt0
- * │   ├── vf1
- * │   │   ├── control { stop, pause, resume }
+ * /sys/kernel/debug/dri/BDF/
+ * ├── sriov
+ * : ├── vf1
+ * : ├── tile0
+ * : ├── gt0
+ * : ├── control { stop, pause, resume }
*/
static const struct {
@@ -345,9 +327,6 @@ static const struct {
{ "stop", xe_gt_sriov_pf_control_stop_vf },
{ "pause", xe_gt_sriov_pf_control_pause_vf },
{ "resume", xe_gt_sriov_pf_control_resume_vf },
-#ifdef CONFIG_DRM_XE_DEBUG_SRIOV
- { "restore!", xe_gt_sriov_pf_migration_restore_guc_state },
-#endif
};
static ssize_t control_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
@@ -412,58 +391,27 @@ static const struct file_operations control_ops = {
};
/*
- * /sys/kernel/debug/dri/0/
- * ├── gt0
- * │   ├── vf1
- * │   │   ├── guc_state
+ * /sys/kernel/debug/dri/BDF/
+ * ├── sriov
+ * : ├── vf1
+ * : ├── tile0
+ * : ├── gt0
+ * : ├── config_blob
*/
-static ssize_t guc_state_read(struct file *file, char __user *buf,
- size_t count, loff_t *pos)
-{
- struct dentry *dent = file_dentry(file);
- struct dentry *parent = dent->d_parent;
- struct xe_gt *gt = extract_gt(parent);
- unsigned int vfid = extract_vfid(parent);
-
- return xe_gt_sriov_pf_migration_read_guc_state(gt, vfid, buf, count, pos);
-}
-
-static ssize_t guc_state_write(struct file *file, const char __user *buf,
- size_t count, loff_t *pos)
-{
- struct dentry *dent = file_dentry(file);
- struct dentry *parent = dent->d_parent;
- struct xe_gt *gt = extract_gt(parent);
- unsigned int vfid = extract_vfid(parent);
-
- if (*pos)
- return -EINVAL;
-
- return xe_gt_sriov_pf_migration_write_guc_state(gt, vfid, buf, count);
-}
-static const struct file_operations guc_state_ops = {
- .owner = THIS_MODULE,
- .read = guc_state_read,
- .write = guc_state_write,
- .llseek = default_llseek,
+struct config_blob_data {
+ size_t size;
+ u8 blob[];
};
-/*
- * /sys/kernel/debug/dri/0/
- * ├── gt0
- * │   ├── vf1
- * │   │   ├── config_blob
- */
-static ssize_t config_blob_read(struct file *file, char __user *buf,
- size_t count, loff_t *pos)
+static int config_blob_open(struct inode *inode, struct file *file)
{
struct dentry *dent = file_dentry(file);
struct dentry *parent = dent->d_parent;
struct xe_gt *gt = extract_gt(parent);
unsigned int vfid = extract_vfid(parent);
+ struct config_blob_data *cbd;
ssize_t ret;
- void *tmp;
ret = xe_gt_sriov_pf_config_save(gt, vfid, NULL, 0);
if (!ret)
@@ -471,16 +419,27 @@ static ssize_t config_blob_read(struct file *file, char __user *buf,
if (ret < 0)
return ret;
- tmp = kzalloc(ret, GFP_KERNEL);
- if (!tmp)
+ cbd = kzalloc(struct_size(cbd, blob, ret), GFP_KERNEL);
+ if (!cbd)
return -ENOMEM;
- ret = xe_gt_sriov_pf_config_save(gt, vfid, tmp, ret);
- if (ret > 0)
- ret = simple_read_from_buffer(buf, count, pos, tmp, ret);
+ ret = xe_gt_sriov_pf_config_save(gt, vfid, cbd->blob, ret);
+ if (ret < 0) {
+ kfree(cbd);
+ return ret;
+ }
- kfree(tmp);
- return ret;
+ cbd->size = ret;
+ file->private_data = cbd;
+ return nonseekable_open(inode, file);
+}
+
+static ssize_t config_blob_read(struct file *file, char __user *buf,
+ size_t count, loff_t *pos)
+{
+ struct config_blob_data *cbd = file->private_data;
+
+ return simple_read_from_buffer(buf, count, pos, cbd->blob, cbd->size);
}
static ssize_t config_blob_write(struct file *file, const char __user *buf,
@@ -517,80 +476,147 @@ static ssize_t config_blob_write(struct file *file, const char __user *buf,
return ret;
}
+static int config_blob_release(struct inode *inode, struct file *file)
+{
+ kfree(file->private_data);
+ return 0;
+}
+
static const struct file_operations config_blob_ops = {
.owner = THIS_MODULE,
+ .open = config_blob_open,
.read = config_blob_read,
.write = config_blob_write,
- .llseek = default_llseek,
+ .release = config_blob_release,
};
-/**
- * xe_gt_sriov_pf_debugfs_register - Register SR-IOV PF specific entries in GT debugfs.
- * @gt: the &xe_gt to register
- * @root: the &dentry that represents the GT directory
- *
- * Register SR-IOV PF entries that are GT related and must be shown under GT debugfs.
- */
-void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root)
+static void pf_add_compat_attrs(struct xe_gt *gt, struct dentry *dent, unsigned int vfid)
{
struct xe_device *xe = gt_to_xe(gt);
- struct drm_minor *minor = xe->drm.primary;
- int n, totalvfs = xe_sriov_pf_get_totalvfs(xe);
- struct dentry *pfdentry;
- struct dentry *vfdentry;
- char buf[14]; /* should be enough up to "vf%u\0" for 2^32 - 1 */
-
- xe_gt_assert(gt, IS_SRIOV_PF(xe));
- xe_gt_assert(gt, root->d_inode->i_private == gt);
- /*
- * /sys/kernel/debug/dri/0/
- * ├── gt0
- * │   ├── pf
- */
- pfdentry = debugfs_create_dir("pf", root);
- if (IS_ERR(pfdentry))
+ if (!xe_gt_is_main_type(gt))
return;
- pfdentry->d_inode->i_private = gt;
-
- drm_debugfs_create_files(pf_info, ARRAY_SIZE(pf_info), pfdentry, minor);
- if (!xe_gt_is_media_type(gt)) {
- drm_debugfs_create_files(pf_ggtt_info,
- ARRAY_SIZE(pf_ggtt_info),
- pfdentry, minor);
- if (IS_DGFX(gt_to_xe(gt)))
- drm_debugfs_create_files(pf_lmem_info,
- ARRAY_SIZE(pf_lmem_info),
- pfdentry, minor);
+
+ if (vfid) {
+ debugfs_create_symlink("ggtt_quota", dent, "../ggtt_quota");
+ if (xe_device_has_lmtt(xe))
+ debugfs_create_symlink("lmem_quota", dent, "../vram_quota");
+ } else {
+ debugfs_create_symlink("ggtt_spare", dent, "../ggtt_spare");
+ debugfs_create_symlink("ggtt_available", dent, "../ggtt_available");
+ debugfs_create_symlink("ggtt_provisioned", dent, "../ggtt_provisioned");
+ if (xe_device_has_lmtt(xe)) {
+ debugfs_create_symlink("lmem_spare", dent, "../vram_spare");
+ debugfs_create_symlink("lmem_provisioned", dent, "../vram_provisioned");
+ }
}
+}
- pf_add_policy_attrs(gt, pfdentry);
- pf_add_config_attrs(gt, pfdentry, PFID);
-
- for (n = 1; n <= totalvfs; n++) {
- /*
- * /sys/kernel/debug/dri/0/
- * ├── gt0
- * │   ├── vf1
- * │   ├── vf2
- */
- snprintf(buf, sizeof(buf), "vf%u", n);
- vfdentry = debugfs_create_dir(buf, root);
- if (IS_ERR(vfdentry))
- break;
- vfdentry->d_inode->i_private = (void *)(uintptr_t)n;
+static void pf_populate_gt(struct xe_gt *gt, struct dentry *dent, unsigned int vfid)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ struct drm_minor *minor = xe->drm.primary;
- pf_add_config_attrs(gt, vfdentry, VFID(n));
- debugfs_create_file("control", 0600, vfdentry, NULL, &control_ops);
+ if (vfid) {
+ pf_add_config_attrs(gt, dent, vfid);
+
+ debugfs_create_file("control", 0600, dent, NULL, &control_ops);
/* for testing/debugging purposes only! */
if (IS_ENABLED(CONFIG_DRM_XE_DEBUG)) {
- debugfs_create_file("guc_state",
- IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) ? 0600 : 0400,
- vfdentry, NULL, &guc_state_ops);
debugfs_create_file("config_blob",
IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) ? 0600 : 0400,
- vfdentry, NULL, &config_blob_ops);
+ dent, NULL, &config_blob_ops);
}
+
+ } else {
+ pf_add_config_attrs(gt, dent, PFID);
+ pf_add_policy_attrs(gt, dent);
+
+ drm_debugfs_create_files(pf_info, ARRAY_SIZE(pf_info), dent, minor);
}
+
+ /* for backward compatibility only */
+ pf_add_compat_attrs(gt, dent, vfid);
+}
+
+/**
+ * xe_gt_sriov_pf_debugfs_populate() - Create SR-IOV GT-level debugfs directories and files.
+ * @gt: the &xe_gt to register
+ * @parent: the parent &dentry that represents a &xe_tile
+ * @vfid: the VF identifier
+ *
+ * Add to the @parent directory new debugfs directory that will represent a @gt and
+ * populate it with GT files that are related to the SR-IOV @vfid function.
+ *
+ * This function can only be called on PF.
+ */
+void xe_gt_sriov_pf_debugfs_populate(struct xe_gt *gt, struct dentry *parent, unsigned int vfid)
+{
+ struct dentry *dent;
+ char name[8]; /* should be enough up to "gt%u\0" for 2^8 - 1 */
+
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ xe_gt_assert(gt, extract_priv(parent) == gt->tile);
+ xe_gt_assert(gt, extract_priv(parent->d_parent) == gt_to_xe(gt) ||
+ (uintptr_t)extract_priv(parent->d_parent) == vfid);
+
+ /*
+ * /sys/kernel/debug/dri/BDF/
+ * ├── sriov
+ * │ ├── pf
+ * │ │ ├── tile0 # parent
+ * │ │ │ ├── gt0 # d_inode->i_private = (xe_gt*)
+ * │ │ │ ├── gt1
+ * │ │ : :
+ * │ ├── vf1
+ * │ │ ├── tile0 # parent
+ * │ │ │ ├── gt0 # d_inode->i_private = (xe_gt*)
+ * │ │ │ ├── gt1
+ * │ : : :
+ */
+ snprintf(name, sizeof(name), "gt%u", gt->info.id);
+ dent = debugfs_create_dir(name, parent);
+ if (IS_ERR(dent))
+ return;
+ dent->d_inode->i_private = gt;
+
+ xe_gt_assert(gt, extract_gt(dent) == gt);
+ xe_gt_assert(gt, extract_vfid(dent) == vfid);
+
+ pf_populate_gt(gt, dent, vfid);
+}
+
+static void pf_add_links(struct xe_gt *gt, struct dentry *dent)
+{
+ unsigned int totalvfs = xe_gt_sriov_pf_get_totalvfs(gt);
+ unsigned int vfid;
+ char name[16]; /* should be more than enough for "vf%u\0" and VFID(UINT_MAX) */
+ char symlink[64]; /* should be more enough for "../../sriov/vf%u/tile%u/gt%u\0" */
+
+ for (vfid = 0; vfid <= totalvfs; vfid++) {
+ if (vfid)
+ snprintf(name, sizeof(name), "vf%u", vfid);
+ else
+ snprintf(name, sizeof(name), "pf");
+ snprintf(symlink, sizeof(symlink), "../../sriov/%s/tile%u/gt%u",
+ name, gt->tile->id, gt->info.id);
+ debugfs_create_symlink(name, dent, symlink);
+ }
+}
+
+/**
+ * xe_gt_sriov_pf_debugfs_register - Register SR-IOV PF specific entries in GT debugfs.
+ * @gt: the &xe_gt to register
+ * @dent: the &dentry that represents the GT directory
+ *
+ * Instead of actual files, create symlinks for PF and each VF to their GT specific
+ * attributes that should be already exposed in the dedicated debugfs SR-IOV tree.
+ */
+void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *dent)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ xe_gt_assert(gt, dent->d_inode->i_private == gt);
+
+ pf_add_links(gt, dent);
}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.h
index 038cc8ddc244..82ff3b7f0532 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_debugfs.h
@@ -11,6 +11,7 @@ struct dentry;
#ifdef CONFIG_PCI_IOV
void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root);
+void xe_gt_sriov_pf_debugfs_populate(struct xe_gt *gt, struct dentry *parent, unsigned int vfid);
#else
static inline void xe_gt_sriov_pf_debugfs_register(struct xe_gt *gt, struct dentry *root) { }
#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
index c712111aa30d..3174a8dee779 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.c
@@ -5,14 +5,150 @@
#include <drm/drm_managed.h>
+#include "regs/xe_guc_regs.h"
+
#include "abi/guc_actions_sriov_abi.h"
#include "xe_bo.h"
+#include "xe_ggtt.h"
+#include "xe_gt.h"
+#include "xe_gt_sriov_pf.h"
+#include "xe_gt_sriov_pf_config.h"
+#include "xe_gt_sriov_pf_control.h"
#include "xe_gt_sriov_pf_helpers.h"
#include "xe_gt_sriov_pf_migration.h"
#include "xe_gt_sriov_printk.h"
#include "xe_guc.h"
+#include "xe_guc_buf.h"
#include "xe_guc_ct.h"
+#include "xe_migrate.h"
+#include "xe_mmio.h"
#include "xe_sriov.h"
+#include "xe_sriov_packet.h"
+#include "xe_sriov_packet_types.h"
+#include "xe_sriov_pf_migration.h"
+
+#define XE_GT_SRIOV_PF_MIGRATION_RING_SIZE 5
+
+static struct xe_gt_sriov_migration_data *pf_pick_gt_migration(struct xe_gt *gt, unsigned int vfid)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ xe_gt_assert(gt, vfid != PFID);
+ xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
+
+ return &gt->sriov.pf.vfs[vfid].migration;
+}
+
+static void pf_dump_mig_data(struct xe_gt *gt, unsigned int vfid,
+ struct xe_sriov_packet *data,
+ const char *what)
+{
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) {
+ struct drm_printer p = xe_gt_dbg_printer(gt);
+
+ drm_printf(&p, "VF%u %s (%llu bytes)\n", vfid, what, data->hdr.size);
+ drm_print_hex_dump(&p, "mig_hdr: ", (void *)&data->hdr, sizeof(data->hdr));
+ drm_print_hex_dump(&p, "mig_data: ", data->vaddr, min(SZ_64, data->hdr.size));
+ }
+}
+
+static ssize_t pf_migration_ggtt_size(struct xe_gt *gt, unsigned int vfid)
+{
+ if (!xe_gt_is_main_type(gt))
+ return 0;
+
+ return xe_gt_sriov_pf_config_ggtt_save(gt, vfid, NULL, 0);
+}
+
+static int pf_save_vf_ggtt_mig_data(struct xe_gt *gt, unsigned int vfid)
+{
+ struct xe_sriov_packet *data;
+ size_t size;
+ int ret;
+
+ size = pf_migration_ggtt_size(gt, vfid);
+ xe_gt_assert(gt, size);
+
+ data = xe_sriov_packet_alloc(gt_to_xe(gt));
+ if (!data)
+ return -ENOMEM;
+
+ ret = xe_sriov_packet_init(data, gt->tile->id, gt->info.id,
+ XE_SRIOV_PACKET_TYPE_GGTT, 0, size);
+ if (ret)
+ goto fail;
+
+ ret = xe_gt_sriov_pf_config_ggtt_save(gt, vfid, data->vaddr, size);
+ if (ret)
+ goto fail;
+
+ pf_dump_mig_data(gt, vfid, data, "GGTT data save");
+
+ ret = xe_gt_sriov_pf_migration_save_produce(gt, vfid, data);
+ if (ret)
+ goto fail;
+
+ return 0;
+
+fail:
+ xe_sriov_packet_free(data);
+ xe_gt_sriov_err(gt, "Failed to save VF%u GGTT data (%pe)\n", vfid, ERR_PTR(ret));
+ return ret;
+}
+
+static int pf_restore_vf_ggtt_mig_data(struct xe_gt *gt, unsigned int vfid,
+ struct xe_sriov_packet *data)
+{
+ int ret;
+
+ pf_dump_mig_data(gt, vfid, data, "GGTT data restore");
+
+ ret = xe_gt_sriov_pf_config_ggtt_restore(gt, vfid, data->vaddr, data->hdr.size);
+ if (ret) {
+ xe_gt_sriov_err(gt, "Failed to restore VF%u GGTT data (%pe)\n",
+ vfid, ERR_PTR(ret));
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * xe_gt_sriov_pf_migration_ggtt_save() - Save VF GGTT migration data.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier (can't be 0)
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_migration_ggtt_save(struct xe_gt *gt, unsigned int vfid)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ xe_gt_assert(gt, vfid != PFID);
+ xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
+
+ return pf_save_vf_ggtt_mig_data(gt, vfid);
+}
+
+/**
+ * xe_gt_sriov_pf_migration_ggtt_restore() - Restore VF GGTT migration data.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier (can't be 0)
+ * @data: the &xe_sriov_packet containing migration data
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_migration_ggtt_restore(struct xe_gt *gt, unsigned int vfid,
+ struct xe_sriov_packet *data)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ xe_gt_assert(gt, vfid != PFID);
+ xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
+
+ return pf_restore_vf_ggtt_mig_data(gt, vfid, data);
+}
/* Return: number of dwords saved/restored/required or a negative error code on failure */
static int guc_action_vf_save_restore(struct xe_guc *guc, u32 vfid, u32 opcode,
@@ -33,7 +169,7 @@ static int guc_action_vf_save_restore(struct xe_guc *guc, u32 vfid, u32 opcode,
}
/* Return: size of the state in dwords or a negative error code on failure */
-static int pf_send_guc_query_vf_state_size(struct xe_gt *gt, unsigned int vfid)
+static int pf_send_guc_query_vf_mig_data_size(struct xe_gt *gt, unsigned int vfid)
{
int ret;
@@ -42,353 +178,856 @@ static int pf_send_guc_query_vf_state_size(struct xe_gt *gt, unsigned int vfid)
}
/* Return: number of state dwords saved or a negative error code on failure */
-static int pf_send_guc_save_vf_state(struct xe_gt *gt, unsigned int vfid,
- void *buff, size_t size)
+static int pf_send_guc_save_vf_mig_data(struct xe_gt *gt, unsigned int vfid,
+ void *dst, size_t size)
{
const int ndwords = size / sizeof(u32);
- struct xe_tile *tile = gt_to_tile(gt);
- struct xe_device *xe = tile_to_xe(tile);
struct xe_guc *guc = &gt->uc.guc;
- struct xe_bo *bo;
+ CLASS(xe_guc_buf, buf)(&guc->buf, ndwords);
int ret;
xe_gt_assert(gt, size % sizeof(u32) == 0);
xe_gt_assert(gt, size == ndwords * sizeof(u32));
- bo = xe_bo_create_pin_map(xe, tile, NULL,
- ALIGN(size, PAGE_SIZE),
- ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM |
- XE_BO_FLAG_GGTT |
- XE_BO_FLAG_GGTT_INVALIDATE);
- if (IS_ERR(bo))
- return PTR_ERR(bo);
+ if (!xe_guc_buf_is_valid(buf))
+ return -ENOBUFS;
+
+ /* FW expects this buffer to be zero-initialized */
+ memset(xe_guc_buf_cpu_ptr(buf), 0, size);
ret = guc_action_vf_save_restore(guc, vfid, GUC_PF_OPCODE_VF_SAVE,
- xe_bo_ggtt_addr(bo), ndwords);
+ xe_guc_buf_flush(buf), ndwords);
if (!ret)
ret = -ENODATA;
else if (ret > ndwords)
ret = -EPROTO;
else if (ret > 0)
- xe_map_memcpy_from(xe, buff, &bo->vmap, 0, ret * sizeof(u32));
+ memcpy(dst, xe_guc_buf_sync_read(buf), ret * sizeof(u32));
- xe_bo_unpin_map_no_vm(bo);
return ret;
}
/* Return: number of state dwords restored or a negative error code on failure */
-static int pf_send_guc_restore_vf_state(struct xe_gt *gt, unsigned int vfid,
- const void *buff, size_t size)
+static int pf_send_guc_restore_vf_mig_data(struct xe_gt *gt, unsigned int vfid,
+ const void *src, size_t size)
{
const int ndwords = size / sizeof(u32);
- struct xe_tile *tile = gt_to_tile(gt);
- struct xe_device *xe = tile_to_xe(tile);
struct xe_guc *guc = &gt->uc.guc;
- struct xe_bo *bo;
+ CLASS(xe_guc_buf_from_data, buf)(&guc->buf, src, size);
int ret;
xe_gt_assert(gt, size % sizeof(u32) == 0);
xe_gt_assert(gt, size == ndwords * sizeof(u32));
- bo = xe_bo_create_pin_map(xe, tile, NULL,
- ALIGN(size, PAGE_SIZE),
- ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM |
- XE_BO_FLAG_GGTT |
- XE_BO_FLAG_GGTT_INVALIDATE);
- if (IS_ERR(bo))
- return PTR_ERR(bo);
-
- xe_map_memcpy_to(xe, &bo->vmap, 0, buff, size);
+ if (!xe_guc_buf_is_valid(buf))
+ return -ENOBUFS;
ret = guc_action_vf_save_restore(guc, vfid, GUC_PF_OPCODE_VF_RESTORE,
- xe_bo_ggtt_addr(bo), ndwords);
+ xe_guc_buf_flush(buf), ndwords);
if (!ret)
ret = -ENODATA;
else if (ret > ndwords)
ret = -EPROTO;
- xe_bo_unpin_map_no_vm(bo);
return ret;
}
static bool pf_migration_supported(struct xe_gt *gt)
{
- xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
- return gt->sriov.pf.migration.supported;
+ return xe_sriov_pf_migration_supported(gt_to_xe(gt));
}
-static struct mutex *pf_migration_mutex(struct xe_gt *gt)
+static int pf_save_vf_guc_mig_data(struct xe_gt *gt, unsigned int vfid)
{
- xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
- return &gt->sriov.pf.migration.snapshot_lock;
+ struct xe_sriov_packet *data;
+ size_t size;
+ int ret;
+
+ ret = pf_send_guc_query_vf_mig_data_size(gt, vfid);
+ if (ret < 0)
+ goto fail;
+
+ size = ret * sizeof(u32);
+
+ data = xe_sriov_packet_alloc(gt_to_xe(gt));
+ if (!data) {
+ ret = -ENOMEM;
+ goto fail;
+ }
+
+ ret = xe_sriov_packet_init(data, gt->tile->id, gt->info.id,
+ XE_SRIOV_PACKET_TYPE_GUC, 0, size);
+ if (ret)
+ goto fail_free;
+
+ ret = pf_send_guc_save_vf_mig_data(gt, vfid, data->vaddr, size);
+ if (ret < 0)
+ goto fail_free;
+ size = ret * sizeof(u32);
+ xe_gt_assert(gt, size);
+ xe_gt_assert(gt, size <= data->hdr.size);
+ data->hdr.size = size;
+ data->remaining = size;
+
+ pf_dump_mig_data(gt, vfid, data, "GuC data save");
+
+ ret = xe_gt_sriov_pf_migration_save_produce(gt, vfid, data);
+ if (ret)
+ goto fail_free;
+
+ return 0;
+
+fail_free:
+ xe_sriov_packet_free(data);
+fail:
+ xe_gt_sriov_err(gt, "Failed to save VF%u GuC data (%pe)\n",
+ vfid, ERR_PTR(ret));
+ return ret;
}
-static struct xe_gt_sriov_state_snapshot *pf_pick_vf_snapshot(struct xe_gt *gt,
- unsigned int vfid)
+static ssize_t pf_migration_guc_size(struct xe_gt *gt, unsigned int vfid)
+{
+ ssize_t size;
+
+ if (!pf_migration_supported(gt))
+ return -ENOPKG;
+
+ size = pf_send_guc_query_vf_mig_data_size(gt, vfid);
+ if (size >= 0)
+ size *= sizeof(u32);
+
+ return size;
+}
+
+/**
+ * xe_gt_sriov_pf_migration_guc_save() - Save VF GuC migration data.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_migration_guc_save(struct xe_gt *gt, unsigned int vfid)
{
xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ xe_gt_assert(gt, vfid != PFID);
xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
- lockdep_assert_held(pf_migration_mutex(gt));
- return &gt->sriov.pf.vfs[vfid].snapshot;
+ if (!pf_migration_supported(gt))
+ return -ENOPKG;
+
+ return pf_save_vf_guc_mig_data(gt, vfid);
}
-static unsigned int pf_snapshot_index(struct xe_gt *gt, struct xe_gt_sriov_state_snapshot *snapshot)
+static int pf_restore_vf_guc_state(struct xe_gt *gt, unsigned int vfid,
+ struct xe_sriov_packet *data)
{
- return container_of(snapshot, struct xe_gt_sriov_metadata, snapshot) - gt->sriov.pf.vfs;
+ int ret;
+
+ xe_gt_assert(gt, data->hdr.size);
+
+ pf_dump_mig_data(gt, vfid, data, "GuC data restore");
+
+ ret = pf_send_guc_restore_vf_mig_data(gt, vfid, data->vaddr, data->hdr.size);
+ if (ret < 0)
+ goto fail;
+
+ return 0;
+
+fail:
+ xe_gt_sriov_err(gt, "Failed to restore VF%u GuC data (%pe)\n",
+ vfid, ERR_PTR(ret));
+ return ret;
}
-static void pf_free_guc_state(struct xe_gt *gt, struct xe_gt_sriov_state_snapshot *snapshot)
+/**
+ * xe_gt_sriov_pf_migration_guc_restore() - Restore VF GuC migration data.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ * @data: the &xe_sriov_packet containing migration data
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_migration_guc_restore(struct xe_gt *gt, unsigned int vfid,
+ struct xe_sriov_packet *data)
{
- struct xe_device *xe = gt_to_xe(gt);
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ xe_gt_assert(gt, vfid != PFID);
+ xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
- drmm_kfree(&xe->drm, snapshot->guc.buff);
- snapshot->guc.buff = NULL;
- snapshot->guc.size = 0;
+ if (!pf_migration_supported(gt))
+ return -ENOPKG;
+
+ return pf_restore_vf_guc_state(gt, vfid, data);
}
-static int pf_alloc_guc_state(struct xe_gt *gt,
- struct xe_gt_sriov_state_snapshot *snapshot,
- size_t size)
+static ssize_t pf_migration_mmio_size(struct xe_gt *gt, unsigned int vfid)
{
- struct xe_device *xe = gt_to_xe(gt);
- void *p;
-
- pf_free_guc_state(gt, snapshot);
+ if (xe_gt_is_media_type(gt))
+ return MED_VF_SW_FLAG_COUNT * sizeof(u32);
+ else
+ return VF_SW_FLAG_COUNT * sizeof(u32);
+}
- if (!size)
- return -ENODATA;
+static int pf_migration_mmio_save(struct xe_gt *gt, unsigned int vfid, void *buf, size_t size)
+{
+ struct xe_mmio mmio;
+ u32 *regs = buf;
+ int n;
- if (size % sizeof(u32))
+ if (size != pf_migration_mmio_size(gt, vfid))
return -EINVAL;
- if (size > SZ_2M)
- return -EFBIG;
+ xe_mmio_init_vf_view(&mmio, &gt->mmio, vfid);
- p = drmm_kzalloc(&xe->drm, size, GFP_KERNEL);
- if (!p)
- return -ENOMEM;
+ if (xe_gt_is_media_type(gt))
+ for (n = 0; n < MED_VF_SW_FLAG_COUNT; n++)
+ regs[n] = xe_mmio_read32(&gt->mmio, MED_VF_SW_FLAG(n));
+ else
+ for (n = 0; n < VF_SW_FLAG_COUNT; n++)
+ regs[n] = xe_mmio_read32(&gt->mmio, VF_SW_FLAG(n));
- snapshot->guc.buff = p;
- snapshot->guc.size = size;
return 0;
}
-static void pf_dump_guc_state(struct xe_gt *gt, struct xe_gt_sriov_state_snapshot *snapshot)
+static int pf_migration_mmio_restore(struct xe_gt *gt, unsigned int vfid,
+ const void *buf, size_t size)
{
- if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) {
- unsigned int vfid __maybe_unused = pf_snapshot_index(gt, snapshot);
+ const u32 *regs = buf;
+ struct xe_mmio mmio;
+ int n;
- xe_gt_sriov_dbg_verbose(gt, "VF%u GuC state is %zu dwords:\n",
- vfid, snapshot->guc.size / sizeof(u32));
- print_hex_dump_bytes("state: ", DUMP_PREFIX_OFFSET,
- snapshot->guc.buff, min(SZ_64, snapshot->guc.size));
- }
+ if (size != pf_migration_mmio_size(gt, vfid))
+ return -EINVAL;
+
+ xe_mmio_init_vf_view(&mmio, &gt->mmio, vfid);
+
+ if (xe_gt_is_media_type(gt))
+ for (n = 0; n < MED_VF_SW_FLAG_COUNT; n++)
+ xe_mmio_write32(&gt->mmio, MED_VF_SW_FLAG(n), regs[n]);
+ else
+ for (n = 0; n < VF_SW_FLAG_COUNT; n++)
+ xe_mmio_write32(&gt->mmio, VF_SW_FLAG(n), regs[n]);
+
+ return 0;
}
-static int pf_save_vf_guc_state(struct xe_gt *gt, unsigned int vfid)
+static int pf_save_vf_mmio_mig_data(struct xe_gt *gt, unsigned int vfid)
{
- struct xe_gt_sriov_state_snapshot *snapshot = pf_pick_vf_snapshot(gt, vfid);
+ struct xe_sriov_packet *data;
size_t size;
int ret;
- ret = pf_send_guc_query_vf_state_size(gt, vfid);
- if (ret < 0)
+ size = pf_migration_mmio_size(gt, vfid);
+ xe_gt_assert(gt, size);
+
+ data = xe_sriov_packet_alloc(gt_to_xe(gt));
+ if (!data)
+ return -ENOMEM;
+
+ ret = xe_sriov_packet_init(data, gt->tile->id, gt->info.id,
+ XE_SRIOV_PACKET_TYPE_MMIO, 0, size);
+ if (ret)
goto fail;
- size = ret * sizeof(u32);
- xe_gt_sriov_dbg_verbose(gt, "VF%u state size is %d dwords (%zu bytes)\n", vfid, ret, size);
- ret = pf_alloc_guc_state(gt, snapshot, size);
- if (ret < 0)
+ ret = pf_migration_mmio_save(gt, vfid, data->vaddr, size);
+ if (ret)
goto fail;
- ret = pf_send_guc_save_vf_state(gt, vfid, snapshot->guc.buff, size);
- if (ret < 0)
+ pf_dump_mig_data(gt, vfid, data, "MMIO data save");
+
+ ret = xe_gt_sriov_pf_migration_save_produce(gt, vfid, data);
+ if (ret)
goto fail;
- size = ret * sizeof(u32);
- xe_gt_assert(gt, size);
- xe_gt_assert(gt, size <= snapshot->guc.size);
- snapshot->guc.size = size;
- pf_dump_guc_state(gt, snapshot);
return 0;
fail:
- xe_gt_sriov_dbg(gt, "Unable to save VF%u state (%pe)\n", vfid, ERR_PTR(ret));
- pf_free_guc_state(gt, snapshot);
+ xe_sriov_packet_free(data);
+ xe_gt_sriov_err(gt, "Failed to save VF%u MMIO data (%pe)\n", vfid, ERR_PTR(ret));
return ret;
}
+static int pf_restore_vf_mmio_mig_data(struct xe_gt *gt, unsigned int vfid,
+ struct xe_sriov_packet *data)
+{
+ int ret;
+
+ pf_dump_mig_data(gt, vfid, data, "MMIO data restore");
+
+ ret = pf_migration_mmio_restore(gt, vfid, data->vaddr, data->hdr.size);
+ if (ret) {
+ xe_gt_sriov_err(gt, "Failed to restore VF%u MMIO data (%pe)\n",
+ vfid, ERR_PTR(ret));
+
+ return ret;
+ }
+
+ return 0;
+}
+
/**
- * xe_gt_sriov_pf_migration_save_guc_state() - Take a GuC VF state snapshot.
+ * xe_gt_sriov_pf_migration_mmio_save() - Save VF MMIO migration data.
* @gt: the &xe_gt
- * @vfid: the VF identifier
+ * @vfid: the VF identifier (can't be 0)
*
* This function is for PF only.
*
* Return: 0 on success or a negative error code on failure.
*/
-int xe_gt_sriov_pf_migration_save_guc_state(struct xe_gt *gt, unsigned int vfid)
+int xe_gt_sriov_pf_migration_mmio_save(struct xe_gt *gt, unsigned int vfid)
{
- int err;
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ xe_gt_assert(gt, vfid != PFID);
+ xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
+ return pf_save_vf_mmio_mig_data(gt, vfid);
+}
+
+/**
+ * xe_gt_sriov_pf_migration_mmio_restore() - Restore VF MMIO migration data.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier (can't be 0)
+ * @data: the &xe_sriov_packet containing migration data
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_migration_mmio_restore(struct xe_gt *gt, unsigned int vfid,
+ struct xe_sriov_packet *data)
+{
xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
xe_gt_assert(gt, vfid != PFID);
xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
- if (!pf_migration_supported(gt))
- return -ENOPKG;
+ return pf_restore_vf_mmio_mig_data(gt, vfid, data);
+}
- mutex_lock(pf_migration_mutex(gt));
- err = pf_save_vf_guc_state(gt, vfid);
- mutex_unlock(pf_migration_mutex(gt));
+static ssize_t pf_migration_vram_size(struct xe_gt *gt, unsigned int vfid)
+{
+ if (!xe_gt_is_main_type(gt))
+ return 0;
- return err;
+ return xe_gt_sriov_pf_config_get_lmem(gt, vfid);
+}
+
+static struct dma_fence *__pf_save_restore_vram(struct xe_gt *gt, unsigned int vfid,
+ struct xe_bo *vram, u64 vram_offset,
+ struct xe_bo *sysmem, u64 sysmem_offset,
+ size_t size, bool save)
+{
+ struct dma_fence *ret = NULL;
+ struct drm_exec exec;
+ int err;
+
+ drm_exec_init(&exec, 0, 0);
+ drm_exec_until_all_locked(&exec) {
+ err = drm_exec_lock_obj(&exec, &vram->ttm.base);
+ drm_exec_retry_on_contention(&exec);
+ if (err) {
+ ret = ERR_PTR(err);
+ goto err;
+ }
+
+ err = drm_exec_lock_obj(&exec, &sysmem->ttm.base);
+ drm_exec_retry_on_contention(&exec);
+ if (err) {
+ ret = ERR_PTR(err);
+ goto err;
+ }
+ }
+
+ ret = xe_migrate_vram_copy_chunk(vram, vram_offset, sysmem, sysmem_offset, size,
+ save ? XE_MIGRATE_COPY_TO_SRAM : XE_MIGRATE_COPY_TO_VRAM);
+
+err:
+ drm_exec_fini(&exec);
+
+ return ret;
}
-static int pf_restore_vf_guc_state(struct xe_gt *gt, unsigned int vfid)
+#define PF_VRAM_SAVE_RESTORE_TIMEOUT (5 * HZ)
+static int pf_save_vram_chunk(struct xe_gt *gt, unsigned int vfid,
+ struct xe_bo *src_vram, u64 src_vram_offset,
+ size_t size)
{
- struct xe_gt_sriov_state_snapshot *snapshot = pf_pick_vf_snapshot(gt, vfid);
+ struct xe_sriov_packet *data;
+ struct dma_fence *fence;
int ret;
- if (!snapshot->guc.size)
- return -ENODATA;
+ data = xe_sriov_packet_alloc(gt_to_xe(gt));
+ if (!data)
+ return -ENOMEM;
- xe_gt_sriov_dbg_verbose(gt, "restoring %zu dwords of VF%u GuC state\n",
- snapshot->guc.size / sizeof(u32), vfid);
- ret = pf_send_guc_restore_vf_state(gt, vfid, snapshot->guc.buff, snapshot->guc.size);
- if (ret < 0)
+ ret = xe_sriov_packet_init(data, gt->tile->id, gt->info.id,
+ XE_SRIOV_PACKET_TYPE_VRAM, src_vram_offset,
+ size);
+ if (ret)
+ goto fail;
+
+ fence = __pf_save_restore_vram(gt, vfid,
+ src_vram, src_vram_offset,
+ data->bo, 0, size, true);
+ if (IS_ERR(fence)) {
+ ret = PTR_ERR(fence);
+ goto fail;
+ }
+
+ ret = dma_fence_wait_timeout(fence, false, PF_VRAM_SAVE_RESTORE_TIMEOUT);
+ dma_fence_put(fence);
+ if (!ret) {
+ ret = -ETIME;
+ goto fail;
+ }
+
+ pf_dump_mig_data(gt, vfid, data, "VRAM data save");
+
+ ret = xe_gt_sriov_pf_migration_save_produce(gt, vfid, data);
+ if (ret)
goto fail;
- xe_gt_sriov_dbg_verbose(gt, "restored %d dwords of VF%u GuC state\n", ret, vfid);
return 0;
fail:
- xe_gt_sriov_dbg(gt, "Failed to restore VF%u GuC state (%pe)\n", vfid, ERR_PTR(ret));
+ xe_sriov_packet_free(data);
+ return ret;
+}
+
+#define VF_VRAM_STATE_CHUNK_MAX_SIZE SZ_512M
+static int pf_save_vf_vram_mig_data(struct xe_gt *gt, unsigned int vfid)
+{
+ struct xe_gt_sriov_migration_data *migration = pf_pick_gt_migration(gt, vfid);
+ loff_t *offset = &migration->save.vram_offset;
+ struct xe_bo *vram;
+ size_t vram_size, chunk_size;
+ int ret;
+
+ vram = xe_gt_sriov_pf_config_get_lmem_obj(gt, vfid);
+ if (!vram)
+ return -ENXIO;
+
+ vram_size = xe_bo_size(vram);
+
+ xe_gt_assert(gt, *offset < vram_size);
+
+ chunk_size = min(vram_size - *offset, VF_VRAM_STATE_CHUNK_MAX_SIZE);
+
+ ret = pf_save_vram_chunk(gt, vfid, vram, *offset, chunk_size);
+ if (ret)
+ goto fail;
+
+ *offset += chunk_size;
+
+ xe_bo_put(vram);
+
+ if (*offset < vram_size)
+ return -EAGAIN;
+
+ return 0;
+
+fail:
+ xe_bo_put(vram);
+ xe_gt_sriov_err(gt, "Failed to save VF%u VRAM data (%pe)\n", vfid, ERR_PTR(ret));
+ return ret;
+}
+
+static int pf_restore_vf_vram_mig_data(struct xe_gt *gt, unsigned int vfid,
+ struct xe_sriov_packet *data)
+{
+ u64 end = data->hdr.offset + data->hdr.size;
+ struct dma_fence *fence;
+ struct xe_bo *vram;
+ size_t size;
+ int ret = 0;
+
+ vram = xe_gt_sriov_pf_config_get_lmem_obj(gt, vfid);
+ if (!vram)
+ return -ENXIO;
+
+ size = xe_bo_size(vram);
+
+ if (end > size || end < data->hdr.size) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ pf_dump_mig_data(gt, vfid, data, "VRAM data restore");
+
+ fence = __pf_save_restore_vram(gt, vfid, vram, data->hdr.offset,
+ data->bo, 0, data->hdr.size, false);
+ if (IS_ERR(fence)) {
+ ret = PTR_ERR(fence);
+ goto err;
+ }
+
+ ret = dma_fence_wait_timeout(fence, false, PF_VRAM_SAVE_RESTORE_TIMEOUT);
+ dma_fence_put(fence);
+ if (!ret) {
+ ret = -ETIME;
+ goto err;
+ }
+
+ xe_bo_put(vram);
+
+ return 0;
+err:
+ xe_bo_put(vram);
+ xe_gt_sriov_err(gt, "Failed to restore VF%u VRAM data (%pe)\n", vfid, ERR_PTR(ret));
return ret;
}
/**
- * xe_gt_sriov_pf_migration_restore_guc_state() - Restore a GuC VF state.
+ * xe_gt_sriov_pf_migration_vram_save() - Save VF VRAM migration data.
* @gt: the &xe_gt
- * @vfid: the VF identifier
+ * @vfid: the VF identifier (can't be 0)
*
* This function is for PF only.
*
* Return: 0 on success or a negative error code on failure.
*/
-int xe_gt_sriov_pf_migration_restore_guc_state(struct xe_gt *gt, unsigned int vfid)
+int xe_gt_sriov_pf_migration_vram_save(struct xe_gt *gt, unsigned int vfid)
{
- int ret;
-
xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
xe_gt_assert(gt, vfid != PFID);
xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
- if (!pf_migration_supported(gt))
- return -ENOPKG;
+ return pf_save_vf_vram_mig_data(gt, vfid);
+}
- mutex_lock(pf_migration_mutex(gt));
- ret = pf_restore_vf_guc_state(gt, vfid);
- mutex_unlock(pf_migration_mutex(gt));
+/**
+ * xe_gt_sriov_pf_migration_vram_restore() - Restore VF VRAM migration data.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier (can't be 0)
+ * @data: the &xe_sriov_packet containing migration data
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_migration_vram_restore(struct xe_gt *gt, unsigned int vfid,
+ struct xe_sriov_packet *data)
+{
+ xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
+ xe_gt_assert(gt, vfid != PFID);
+ xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
- return ret;
+ return pf_restore_vf_vram_mig_data(gt, vfid, data);
}
-#ifdef CONFIG_DEBUG_FS
/**
- * xe_gt_sriov_pf_migration_read_guc_state() - Read a GuC VF state.
+ * xe_gt_sriov_pf_migration_size() - Total size of migration data from all components within a GT.
* @gt: the &xe_gt
- * @vfid: the VF identifier
- * @buf: the user space buffer to read to
- * @count: the maximum number of bytes to read
- * @pos: the current position in the buffer
+ * @vfid: the VF identifier (can't be 0)
*
* This function is for PF only.
*
- * This function reads up to @count bytes from the saved VF GuC state buffer
- * at offset @pos into the user space address starting at @buf.
- *
- * Return: the number of bytes read or a negative error code on failure.
+ * Return: total migration data size in bytes or a negative error code on failure.
*/
-ssize_t xe_gt_sriov_pf_migration_read_guc_state(struct xe_gt *gt, unsigned int vfid,
- char __user *buf, size_t count, loff_t *pos)
+ssize_t xe_gt_sriov_pf_migration_size(struct xe_gt *gt, unsigned int vfid)
{
- struct xe_gt_sriov_state_snapshot *snapshot;
- ssize_t ret;
+ ssize_t total = 0;
+ ssize_t size;
xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
xe_gt_assert(gt, vfid != PFID);
xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
- if (!pf_migration_supported(gt))
- return -ENOPKG;
+ size = pf_migration_guc_size(gt, vfid);
+ if (size < 0)
+ return size;
+ if (size > 0)
+ size += sizeof(struct xe_sriov_packet_hdr);
+ total += size;
+
+ size = pf_migration_ggtt_size(gt, vfid);
+ if (size < 0)
+ return size;
+ if (size > 0)
+ size += sizeof(struct xe_sriov_packet_hdr);
+ total += size;
+
+ size = pf_migration_mmio_size(gt, vfid);
+ if (size < 0)
+ return size;
+ if (size > 0)
+ size += sizeof(struct xe_sriov_packet_hdr);
+ total += size;
+
+ size = pf_migration_vram_size(gt, vfid);
+ if (size < 0)
+ return size;
+ if (size > 0)
+ size += sizeof(struct xe_sriov_packet_hdr);
+ total += size;
+
+ return total;
+}
- mutex_lock(pf_migration_mutex(gt));
- snapshot = pf_pick_vf_snapshot(gt, vfid);
- if (snapshot->guc.size)
- ret = simple_read_from_buffer(buf, count, pos, snapshot->guc.buff,
- snapshot->guc.size);
- else
- ret = -ENODATA;
- mutex_unlock(pf_migration_mutex(gt));
+/**
+ * xe_gt_sriov_pf_migration_ring_empty() - Check if a migration ring is empty.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * Return: true if the ring is empty, otherwise false.
+ */
+bool xe_gt_sriov_pf_migration_ring_empty(struct xe_gt *gt, unsigned int vfid)
+{
+ return ptr_ring_empty(&pf_pick_gt_migration(gt, vfid)->ring);
+}
- return ret;
+/**
+ * xe_gt_sriov_pf_migration_ring_full() - Check if a migration ring is full.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * Return: true if the ring is full, otherwise false.
+ */
+bool xe_gt_sriov_pf_migration_ring_full(struct xe_gt *gt, unsigned int vfid)
+{
+ return ptr_ring_full(&pf_pick_gt_migration(gt, vfid)->ring);
+}
+
+/**
+ * xe_gt_sriov_pf_migration_ring_free() - Consume and free all data in migration ring
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ */
+void xe_gt_sriov_pf_migration_ring_free(struct xe_gt *gt, unsigned int vfid)
+{
+ struct xe_gt_sriov_migration_data *migration = pf_pick_gt_migration(gt, vfid);
+ struct xe_sriov_packet *data;
+
+ if (ptr_ring_empty(&migration->ring))
+ return;
+
+ xe_gt_sriov_notice(gt, "VF%u unprocessed migration data left in the ring!\n", vfid);
+
+ while ((data = ptr_ring_consume(&migration->ring)))
+ xe_sriov_packet_free(data);
+}
+
+static void pf_migration_save_data_todo(struct xe_gt *gt, unsigned int vfid,
+ enum xe_sriov_packet_type type)
+{
+ set_bit(type, &pf_pick_gt_migration(gt, vfid)->save.data_remaining);
+}
+
+/**
+ * xe_gt_sriov_pf_migration_save_init() - Initialize per-GT migration related data.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier (can't be 0)
+ */
+void xe_gt_sriov_pf_migration_save_init(struct xe_gt *gt, unsigned int vfid)
+{
+ struct xe_gt_sriov_migration_data *migration = pf_pick_gt_migration(gt, vfid);
+
+ migration->save.data_remaining = 0;
+ migration->save.vram_offset = 0;
+
+ xe_gt_assert(gt, pf_migration_guc_size(gt, vfid) > 0);
+ pf_migration_save_data_todo(gt, vfid, XE_SRIOV_PACKET_TYPE_GUC);
+
+ if (pf_migration_ggtt_size(gt, vfid) > 0)
+ pf_migration_save_data_todo(gt, vfid, XE_SRIOV_PACKET_TYPE_GGTT);
+
+ xe_gt_assert(gt, pf_migration_mmio_size(gt, vfid) > 0);
+ pf_migration_save_data_todo(gt, vfid, XE_SRIOV_PACKET_TYPE_MMIO);
+
+ if (pf_migration_vram_size(gt, vfid) > 0)
+ pf_migration_save_data_todo(gt, vfid, XE_SRIOV_PACKET_TYPE_VRAM);
}
/**
- * xe_gt_sriov_pf_migration_write_guc_state() - Write a GuC VF state.
+ * xe_gt_sriov_pf_migration_save_data_pending() - Check if migration data type needs to be saved.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier (can't be 0)
+ * @type: the &xe_sriov_packet_type of data to be checked
+ *
+ * Return: true if the data needs saving, otherwise false.
+ */
+bool xe_gt_sriov_pf_migration_save_data_pending(struct xe_gt *gt, unsigned int vfid,
+ enum xe_sriov_packet_type type)
+{
+ return test_bit(type, &pf_pick_gt_migration(gt, vfid)->save.data_remaining);
+}
+
+/**
+ * xe_gt_sriov_pf_migration_save_data_complete() - Complete migration data type save.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier (can't be 0)
+ * @type: the &xe_sriov_packet_type to be marked as completed.
+ */
+void xe_gt_sriov_pf_migration_save_data_complete(struct xe_gt *gt, unsigned int vfid,
+ enum xe_sriov_packet_type type)
+{
+ clear_bit(type, &pf_pick_gt_migration(gt, vfid)->save.data_remaining);
+}
+
+/**
+ * xe_gt_sriov_pf_migration_save_produce() - Add VF save data packet to migration ring.
* @gt: the &xe_gt
* @vfid: the VF identifier
- * @buf: the user space buffer with GuC VF state
- * @size: the size of GuC VF state (in bytes)
+ * @data: the &xe_sriov_packet
*
- * This function is for PF only.
+ * Called by the save migration data producer (PF SR-IOV Control worker) when
+ * processing migration data.
+ * Wakes up the save migration data consumer (userspace), that is potentially
+ * waiting for data when the ring was empty.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_migration_save_produce(struct xe_gt *gt, unsigned int vfid,
+ struct xe_sriov_packet *data)
+{
+ int ret;
+
+ ret = ptr_ring_produce(&pf_pick_gt_migration(gt, vfid)->ring, data);
+ if (ret)
+ return ret;
+
+ wake_up_all(xe_sriov_pf_migration_waitqueue(gt_to_xe(gt), vfid));
+
+ return 0;
+}
+
+/**
+ * xe_gt_sriov_pf_migration_restore_consume() - Get VF restore data packet from migration ring.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
*
- * This function reads @size bytes of the VF GuC state stored at user space
- * address @buf and writes it into a internal VF state buffer.
+ * Called by the restore migration data consumer (PF SR-IOV Control worker) when
+ * processing migration data.
+ * Wakes up the restore migration data producer (userspace), that is
+ * potentially waiting to add more data when the ring is full.
*
- * Return: the number of bytes used or a negative error code on failure.
+ * Return: Pointer to &xe_sriov_packet on success,
+ * NULL if ring is empty.
*/
-ssize_t xe_gt_sriov_pf_migration_write_guc_state(struct xe_gt *gt, unsigned int vfid,
- const char __user *buf, size_t size)
+struct xe_sriov_packet *
+xe_gt_sriov_pf_migration_restore_consume(struct xe_gt *gt, unsigned int vfid)
{
- struct xe_gt_sriov_state_snapshot *snapshot;
- loff_t pos = 0;
- ssize_t ret;
+ struct xe_gt_sriov_migration_data *migration = pf_pick_gt_migration(gt, vfid);
+ struct wait_queue_head *wq = xe_sriov_pf_migration_waitqueue(gt_to_xe(gt), vfid);
+ struct xe_sriov_packet *data;
- xe_gt_assert(gt, IS_SRIOV_PF(gt_to_xe(gt)));
- xe_gt_assert(gt, vfid != PFID);
- xe_gt_assert(gt, vfid <= xe_sriov_pf_get_totalvfs(gt_to_xe(gt)));
+ data = ptr_ring_consume(&migration->ring);
+ if (data)
+ wake_up_all(wq);
- if (!pf_migration_supported(gt))
- return -ENOPKG;
+ return data;
+}
- mutex_lock(pf_migration_mutex(gt));
- snapshot = pf_pick_vf_snapshot(gt, vfid);
- ret = pf_alloc_guc_state(gt, snapshot, size);
- if (!ret) {
- ret = simple_write_to_buffer(snapshot->guc.buff, size, &pos, buf, size);
- if (ret < 0)
- pf_free_guc_state(gt, snapshot);
- else
- pf_dump_guc_state(gt, snapshot);
+static bool pf_restore_data_ready(struct xe_gt *gt, unsigned int vfid)
+{
+ if (xe_gt_sriov_pf_control_check_restore_failed(gt, vfid) ||
+ !ptr_ring_full(&pf_pick_gt_migration(gt, vfid)->ring))
+ return true;
+
+ return false;
+}
+
+/**
+ * xe_gt_sriov_pf_migration_restore_produce() - Add VF restore data packet to migration ring.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ * @data: the &xe_sriov_packet
+ *
+ * Called by the restore migration data producer (userspace) when processing
+ * migration data.
+ * If the ring is full, waits until there is space.
+ * Queues the restore migration data consumer (PF SR-IOV Control worker), that
+ * is potentially waiting for data when the ring was empty.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_gt_sriov_pf_migration_restore_produce(struct xe_gt *gt, unsigned int vfid,
+ struct xe_sriov_packet *data)
+{
+ int ret;
+
+ xe_gt_assert(gt, data->hdr.tile_id == gt->tile->id);
+ xe_gt_assert(gt, data->hdr.gt_id == gt->info.id);
+
+ for (;;) {
+ if (xe_gt_sriov_pf_control_check_restore_failed(gt, vfid))
+ return -EIO;
+
+ ret = ptr_ring_produce(&pf_pick_gt_migration(gt, vfid)->ring, data);
+ if (!ret)
+ break;
+
+ ret = wait_event_interruptible(*xe_sriov_pf_migration_waitqueue(gt_to_xe(gt), vfid),
+ pf_restore_data_ready(gt, vfid));
+ if (ret)
+ return ret;
}
- mutex_unlock(pf_migration_mutex(gt));
- return ret;
+ return xe_gt_sriov_pf_control_process_restore_data(gt, vfid);
}
-#endif /* CONFIG_DEBUG_FS */
-static bool pf_check_migration_support(struct xe_gt *gt)
+/**
+ * xe_gt_sriov_pf_migration_save_consume() - Get VF save data packet from migration ring.
+ * @gt: the &xe_gt
+ * @vfid: the VF identifier
+ *
+ * Called by the save migration data consumer (userspace) when
+ * processing migration data.
+ * Queues the save migration data producer (PF SR-IOV Control worker), that is
+ * potentially waiting to add more data when the ring is full.
+ *
+ * Return: Pointer to &xe_sriov_packet on success,
+ * NULL if ring is empty and there's no more data available,
+ * ERR_PTR(-EAGAIN) if the ring is empty, but data is still produced.
+ */
+struct xe_sriov_packet *
+xe_gt_sriov_pf_migration_save_consume(struct xe_gt *gt, unsigned int vfid)
{
- /* GuC 70.25 with save/restore v2 is required */
- xe_gt_assert(gt, GUC_FIRMWARE_VER(&gt->uc.guc) >= MAKE_GUC_VER(70, 25, 0));
+ struct xe_gt_sriov_migration_data *migration = pf_pick_gt_migration(gt, vfid);
+ struct xe_sriov_packet *data;
+ int ret;
+
+ data = ptr_ring_consume(&migration->ring);
+ if (data) {
+ ret = xe_gt_sriov_pf_control_process_save_data(gt, vfid);
+ if (ret) {
+ xe_sriov_packet_free(data);
+ return ERR_PTR(ret);
+ }
- /* XXX: for now this is for feature enabling only */
- return IS_ENABLED(CONFIG_DRM_XE_DEBUG);
+ return data;
+ }
+
+ if (xe_gt_sriov_pf_control_check_save_data_done(gt, vfid))
+ return NULL;
+
+ if (xe_gt_sriov_pf_control_check_save_failed(gt, vfid))
+ return ERR_PTR(-EIO);
+
+ return ERR_PTR(-EAGAIN);
+}
+
+static void destroy_pf_packet(void *ptr)
+{
+ struct xe_sriov_packet *data = ptr;
+
+ xe_sriov_packet_free(data);
+}
+
+static void action_ring_cleanup(void *arg)
+{
+ struct ptr_ring *r = arg;
+
+ ptr_ring_cleanup(r, destroy_pf_packet);
+}
+
+static void pf_gt_migration_check_support(struct xe_gt *gt)
+{
+ if (GUC_FIRMWARE_VER(&gt->uc.guc) < MAKE_GUC_VER(70, 54, 0))
+ xe_sriov_pf_migration_disable(gt_to_xe(gt), "requires GuC version >= 70.54.0");
}
/**
@@ -402,18 +1041,29 @@ static bool pf_check_migration_support(struct xe_gt *gt)
int xe_gt_sriov_pf_migration_init(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
+ unsigned int n, totalvfs;
int err;
xe_gt_assert(gt, IS_SRIOV_PF(xe));
- gt->sriov.pf.migration.supported = pf_check_migration_support(gt);
+ pf_gt_migration_check_support(gt);
if (!pf_migration_supported(gt))
return 0;
- err = drmm_mutex_init(&xe->drm, &gt->sriov.pf.migration.snapshot_lock);
- if (err)
- return err;
+ totalvfs = xe_sriov_pf_get_totalvfs(xe);
+ for (n = 1; n <= totalvfs; n++) {
+ struct xe_gt_sriov_migration_data *migration = pf_pick_gt_migration(gt, n);
+
+ err = ptr_ring_init(&migration->ring,
+ XE_GT_SRIOV_PF_MIGRATION_RING_SIZE, GFP_KERNEL);
+ if (err)
+ return err;
+
+ err = devm_add_action_or_reset(xe->drm.dev, action_ring_cleanup, &migration->ring);
+ if (err)
+ return err;
+ }
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h
index 09faeae00ddb..181207a637b9 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration.h
@@ -9,16 +9,46 @@
#include <linux/types.h>
struct xe_gt;
+struct xe_sriov_packet;
+enum xe_sriov_packet_type;
+
+/* TODO: get this information by querying GuC in the future */
+#define XE_GT_SRIOV_PF_MIGRATION_GUC_DATA_MAX_SIZE SZ_8M
int xe_gt_sriov_pf_migration_init(struct xe_gt *gt);
-int xe_gt_sriov_pf_migration_save_guc_state(struct xe_gt *gt, unsigned int vfid);
-int xe_gt_sriov_pf_migration_restore_guc_state(struct xe_gt *gt, unsigned int vfid);
-
-#ifdef CONFIG_DEBUG_FS
-ssize_t xe_gt_sriov_pf_migration_read_guc_state(struct xe_gt *gt, unsigned int vfid,
- char __user *buf, size_t count, loff_t *pos);
-ssize_t xe_gt_sriov_pf_migration_write_guc_state(struct xe_gt *gt, unsigned int vfid,
- const char __user *buf, size_t count);
-#endif
+int xe_gt_sriov_pf_migration_guc_save(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_migration_guc_restore(struct xe_gt *gt, unsigned int vfid,
+ struct xe_sriov_packet *data);
+int xe_gt_sriov_pf_migration_ggtt_save(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_migration_ggtt_restore(struct xe_gt *gt, unsigned int vfid,
+ struct xe_sriov_packet *data);
+int xe_gt_sriov_pf_migration_mmio_save(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_migration_mmio_restore(struct xe_gt *gt, unsigned int vfid,
+ struct xe_sriov_packet *data);
+int xe_gt_sriov_pf_migration_vram_save(struct xe_gt *gt, unsigned int vfid);
+int xe_gt_sriov_pf_migration_vram_restore(struct xe_gt *gt, unsigned int vfid,
+ struct xe_sriov_packet *data);
+
+ssize_t xe_gt_sriov_pf_migration_size(struct xe_gt *gt, unsigned int vfid);
+
+bool xe_gt_sriov_pf_migration_ring_empty(struct xe_gt *gt, unsigned int vfid);
+bool xe_gt_sriov_pf_migration_ring_full(struct xe_gt *gt, unsigned int vfid);
+void xe_gt_sriov_pf_migration_ring_free(struct xe_gt *gt, unsigned int vfid);
+
+void xe_gt_sriov_pf_migration_save_init(struct xe_gt *gt, unsigned int vfid);
+bool xe_gt_sriov_pf_migration_save_data_pending(struct xe_gt *gt, unsigned int vfid,
+ enum xe_sriov_packet_type type);
+void xe_gt_sriov_pf_migration_save_data_complete(struct xe_gt *gt, unsigned int vfid,
+ enum xe_sriov_packet_type type);
+
+int xe_gt_sriov_pf_migration_save_produce(struct xe_gt *gt, unsigned int vfid,
+ struct xe_sriov_packet *data);
+struct xe_sriov_packet *
+xe_gt_sriov_pf_migration_restore_consume(struct xe_gt *gt, unsigned int vfid);
+
+int xe_gt_sriov_pf_migration_restore_produce(struct xe_gt *gt, unsigned int vfid,
+ struct xe_sriov_packet *data);
+struct xe_sriov_packet *
+xe_gt_sriov_pf_migration_save_consume(struct xe_gt *gt, unsigned int vfid);
#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h
index 1f3110b6d44f..f50c64241e9c 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_migration_types.h
@@ -6,35 +6,23 @@
#ifndef _XE_GT_SRIOV_PF_MIGRATION_TYPES_H_
#define _XE_GT_SRIOV_PF_MIGRATION_TYPES_H_
-#include <linux/mutex.h>
-#include <linux/types.h>
+#include <linux/ptr_ring.h>
/**
- * struct xe_gt_sriov_state_snapshot - GT-level per-VF state snapshot data.
+ * struct xe_gt_sriov_migration_data - GT-level per-VF migration data.
*
* Used by the PF driver to maintain per-VF migration data.
*/
-struct xe_gt_sriov_state_snapshot {
- /** @guc: GuC VF state snapshot */
+struct xe_gt_sriov_migration_data {
+ /** @ring: queue containing VF save / restore migration data */
+ struct ptr_ring ring;
+ /** @save: structure for currently processed save migration data */
struct {
- /** @guc.buff: buffer with the VF state */
- u32 *buff;
- /** @guc.size: size of the buffer (must be dwords aligned) */
- u32 size;
- } guc;
-};
-
-/**
- * struct xe_gt_sriov_pf_migration - GT-level data.
- *
- * Used by the PF driver to maintain non-VF specific per-GT data.
- */
-struct xe_gt_sriov_pf_migration {
- /** @supported: indicates whether the feature is supported */
- bool supported;
-
- /** @snapshot_lock: protects all VFs snapshots */
- struct mutex snapshot_lock;
+ /** @save.data_remaining: bitmap of migration types that need to be saved */
+ unsigned long data_remaining;
+ /** @save.vram_offset: last saved offset within VRAM, used for chunked VRAM save */
+ loff_t vram_offset;
+ } save;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c
index 821cfcc34e6b..2eb21610e5a0 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.c
@@ -19,91 +19,7 @@
#include "xe_gt_sriov_pf_service_types.h"
#include "xe_guc_ct.h"
#include "xe_guc_hxg_helpers.h"
-
-static void pf_init_versions(struct xe_gt *gt)
-{
- BUILD_BUG_ON(!GUC_RELAY_VERSION_BASE_MAJOR && !GUC_RELAY_VERSION_BASE_MINOR);
- BUILD_BUG_ON(GUC_RELAY_VERSION_BASE_MAJOR > GUC_RELAY_VERSION_LATEST_MAJOR);
-
- /* base versions may differ between platforms */
- gt->sriov.pf.service.version.base.major = GUC_RELAY_VERSION_BASE_MAJOR;
- gt->sriov.pf.service.version.base.minor = GUC_RELAY_VERSION_BASE_MINOR;
-
- /* latest version is same for all platforms */
- gt->sriov.pf.service.version.latest.major = GUC_RELAY_VERSION_LATEST_MAJOR;
- gt->sriov.pf.service.version.latest.minor = GUC_RELAY_VERSION_LATEST_MINOR;
-}
-
-/* Return: 0 on success or a negative error code on failure. */
-static int pf_negotiate_version(struct xe_gt *gt,
- u32 wanted_major, u32 wanted_minor,
- u32 *major, u32 *minor)
-{
- struct xe_gt_sriov_pf_service_version base = gt->sriov.pf.service.version.base;
- struct xe_gt_sriov_pf_service_version latest = gt->sriov.pf.service.version.latest;
-
- xe_gt_assert(gt, base.major);
- xe_gt_assert(gt, base.major <= latest.major);
- xe_gt_assert(gt, (base.major < latest.major) || (base.minor <= latest.minor));
-
- /* VF doesn't care - return our latest */
- if (wanted_major == VF2PF_HANDSHAKE_MAJOR_ANY &&
- wanted_minor == VF2PF_HANDSHAKE_MINOR_ANY) {
- *major = latest.major;
- *minor = latest.minor;
- return 0;
- }
-
- /* VF wants newer than our - return our latest */
- if (wanted_major > latest.major) {
- *major = latest.major;
- *minor = latest.minor;
- return 0;
- }
-
- /* VF wants older than min required - reject */
- if (wanted_major < base.major ||
- (wanted_major == base.major && wanted_minor < base.minor)) {
- return -EPERM;
- }
-
- /* previous major - return wanted, as we should still support it */
- if (wanted_major < latest.major) {
- /* XXX: we are not prepared for multi-versions yet */
- xe_gt_assert(gt, base.major == latest.major);
- return -ENOPKG;
- }
-
- /* same major - return common minor */
- *major = wanted_major;
- *minor = min_t(u32, latest.minor, wanted_minor);
- return 0;
-}
-
-static void pf_connect(struct xe_gt *gt, u32 vfid, u32 major, u32 minor)
-{
- xe_gt_sriov_pf_assert_vfid(gt, vfid);
- xe_gt_assert(gt, major || minor);
-
- gt->sriov.pf.vfs[vfid].version.major = major;
- gt->sriov.pf.vfs[vfid].version.minor = minor;
-}
-
-static void pf_disconnect(struct xe_gt *gt, u32 vfid)
-{
- xe_gt_sriov_pf_assert_vfid(gt, vfid);
-
- gt->sriov.pf.vfs[vfid].version.major = 0;
- gt->sriov.pf.vfs[vfid].version.minor = 0;
-}
-
-static bool pf_is_negotiated(struct xe_gt *gt, u32 vfid, u32 major, u32 minor)
-{
- xe_gt_sriov_pf_assert_vfid(gt, vfid);
-
- return major == gt->sriov.pf.vfs[vfid].version.major &&
- minor <= gt->sriov.pf.vfs[vfid].version.minor;
-}
+#include "xe_sriov_pf_service.h"
static const struct xe_reg tgl_runtime_regs[] = {
RPM_CONFIG0, /* _MMIO(0x0d00) */
@@ -183,11 +99,30 @@ static const struct xe_reg ver_3000_runtime_regs[] = {
HUC_KERNEL_LOAD_INFO, /* _MMIO(0xc1dc) */
};
+static const struct xe_reg ver_35_runtime_regs[] = {
+ RPM_CONFIG0, /* _MMIO(0x0d00) */
+ XEHP_FUSE4, /* _MMIO(0x9114) */
+ MIRROR_FUSE3, /* _MMIO(0x9118) */
+ MIRROR_L3BANK_ENABLE, /* _MMIO(0x9130) */
+ XELP_EU_ENABLE, /* _MMIO(0x9134) */
+ XELP_GT_GEOMETRY_DSS_ENABLE, /* _MMIO(0x913c) */
+ GT_VEBOX_VDBOX_DISABLE, /* _MMIO(0x9140) */
+ XEHP_GT_COMPUTE_DSS_ENABLE, /* _MMIO(0x9144) */
+ XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,/* _MMIO(0x9148) */
+ XE2_GT_COMPUTE_DSS_2, /* _MMIO(0x914c) */
+ XE2_GT_GEOMETRY_DSS_1, /* _MMIO(0x9150) */
+ XE2_GT_GEOMETRY_DSS_2, /* _MMIO(0x9154) */
+ SERVICE_COPY_ENABLE, /* _MMIO(0x9170) */
+};
+
static const struct xe_reg *pick_runtime_regs(struct xe_device *xe, unsigned int *count)
{
const struct xe_reg *regs;
- if (GRAPHICS_VERx100(xe) >= 3000) {
+ if (GRAPHICS_VER(xe) >= 35) {
+ *count = ARRAY_SIZE(ver_35_runtime_regs);
+ regs = ver_35_runtime_regs;
+ } else if (GRAPHICS_VERx100(xe) >= 3000) {
*count = ARRAY_SIZE(ver_3000_runtime_regs);
regs = ver_3000_runtime_regs;
} else if (GRAPHICS_VERx100(xe) >= 2000) {
@@ -266,7 +201,7 @@ static void pf_prepare_runtime_info(struct xe_gt *gt)
read_many(gt, size, regs, values);
if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV)) {
- struct drm_printer p = xe_gt_info_printer(gt);
+ struct drm_printer p = xe_gt_dbg_printer(gt);
xe_gt_sriov_pf_service_print_runtime(gt, &p);
}
@@ -285,8 +220,6 @@ int xe_gt_sriov_pf_service_init(struct xe_gt *gt)
{
int err;
- pf_init_versions(gt);
-
err = pf_alloc_runtime_info(gt);
if (unlikely(err))
goto failed;
@@ -311,47 +244,6 @@ void xe_gt_sriov_pf_service_update(struct xe_gt *gt)
pf_prepare_runtime_info(gt);
}
-/**
- * xe_gt_sriov_pf_service_reset - Reset a connection with the VF.
- * @gt: the &xe_gt
- * @vfid: the VF identifier
- *
- * Reset a VF driver negotiated VF/PF ABI version.
- * After that point, the VF driver will have to perform new version handshake
- * to continue use of the PF services again.
- *
- * This function can only be called on PF.
- */
-void xe_gt_sriov_pf_service_reset(struct xe_gt *gt, unsigned int vfid)
-{
- pf_disconnect(gt, vfid);
-}
-
-/* Return: 0 on success or a negative error code on failure. */
-static int pf_process_handshake(struct xe_gt *gt, u32 vfid,
- u32 wanted_major, u32 wanted_minor,
- u32 *major, u32 *minor)
-{
- int err;
-
- xe_gt_sriov_dbg_verbose(gt, "VF%u wants ABI version %u.%u\n",
- vfid, wanted_major, wanted_minor);
-
- err = pf_negotiate_version(gt, wanted_major, wanted_minor, major, minor);
-
- if (err < 0) {
- xe_gt_sriov_notice(gt, "VF%u failed to negotiate ABI %u.%u (%pe)\n",
- vfid, wanted_major, wanted_minor, ERR_PTR(err));
- pf_disconnect(gt, vfid);
- } else {
- xe_gt_sriov_dbg(gt, "VF%u negotiated ABI version %u.%u\n",
- vfid, *major, *minor);
- pf_connect(gt, vfid, *major, *minor);
- }
-
- return 0;
-}
-
/* Return: length of the response message or a negative error code on failure. */
static int pf_process_handshake_msg(struct xe_gt *gt, u32 origin,
const u32 *request, u32 len, u32 *response, u32 size)
@@ -371,7 +263,8 @@ static int pf_process_handshake_msg(struct xe_gt *gt, u32 origin,
wanted_major = FIELD_GET(VF2PF_HANDSHAKE_REQUEST_MSG_1_MAJOR, request[1]);
wanted_minor = FIELD_GET(VF2PF_HANDSHAKE_REQUEST_MSG_1_MINOR, request[1]);
- err = pf_process_handshake(gt, origin, wanted_major, wanted_minor, &major, &minor);
+ err = xe_sriov_pf_service_handshake_vf(gt_to_xe(gt), origin, wanted_major, wanted_minor,
+ &major, &minor);
if (err < 0)
return err;
@@ -430,8 +323,10 @@ static int pf_process_runtime_query_msg(struct xe_gt *gt, u32 origin,
u32 remaining = 0;
int ret;
- if (!pf_is_negotiated(gt, origin, 1, 0))
+ /* this action is available from ABI 1.0 */
+ if (!xe_sriov_pf_service_is_negotiated(gt_to_xe(gt), origin, 1, 0))
return -EACCES;
+
if (unlikely(msg_len > VF2PF_QUERY_RUNTIME_REQUEST_MSG_LEN))
return -EMSGSIZE;
if (unlikely(msg_len < VF2PF_QUERY_RUNTIME_REQUEST_MSG_LEN))
@@ -528,33 +423,3 @@ int xe_gt_sriov_pf_service_print_runtime(struct xe_gt *gt, struct drm_printer *p
return 0;
}
-
-/**
- * xe_gt_sriov_pf_service_print_version - Print ABI versions negotiated with VFs.
- * @gt: the &xe_gt
- * @p: the &drm_printer
- *
- * This function is for PF use only.
- */
-int xe_gt_sriov_pf_service_print_version(struct xe_gt *gt, struct drm_printer *p)
-{
- struct xe_device *xe = gt_to_xe(gt);
- unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(xe);
- struct xe_gt_sriov_pf_service_version *version;
-
- xe_gt_assert(gt, IS_SRIOV_PF(xe));
-
- for (n = 1; n <= total_vfs; n++) {
- version = &gt->sriov.pf.vfs[n].version;
- if (!version->major && !version->minor)
- continue;
-
- drm_printf(p, "VF%u:\t%u.%u\n", n, version->major, version->minor);
- }
-
- return 0;
-}
-
-#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST)
-#include "tests/xe_gt_sriov_pf_service_test.c"
-#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h
index 56aaadf0360d..10b02c9b651c 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_service.h
@@ -14,9 +14,7 @@ struct xe_gt;
int xe_gt_sriov_pf_service_init(struct xe_gt *gt);
void xe_gt_sriov_pf_service_update(struct xe_gt *gt);
-void xe_gt_sriov_pf_service_reset(struct xe_gt *gt, unsigned int vfid);
-int xe_gt_sriov_pf_service_print_version(struct xe_gt *gt, struct drm_printer *p);
int xe_gt_sriov_pf_service_print_runtime(struct xe_gt *gt, struct drm_printer *p);
#ifdef CONFIG_PCI_IOV
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
index a64a6835ad65..667b8310478d 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_pf_types.h
@@ -31,8 +31,8 @@ struct xe_gt_sriov_metadata {
/** @version: negotiated VF/PF ABI version */
struct xe_gt_sriov_pf_service_version version;
- /** @snapshot: snapshot of the VF state data */
- struct xe_gt_sriov_state_snapshot snapshot;
+ /** @migration: per-VF migration data. */
+ struct xe_gt_sriov_migration_data migration;
};
/**
@@ -58,7 +58,6 @@ struct xe_gt_sriov_pf {
struct xe_gt_sriov_pf_service service;
struct xe_gt_sriov_pf_control control;
struct xe_gt_sriov_pf_policy policy;
- struct xe_gt_sriov_pf_migration migration;
struct xe_gt_sriov_spare_config spare;
struct xe_gt_sriov_metadata *vfs;
};
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_printk.h b/drivers/gpu/drm/xe/xe_gt_sriov_printk.h
index 17624b16300a..d3457d608db8 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_printk.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_printk.h
@@ -7,10 +7,13 @@
#define _XE_GT_SRIOV_PRINTK_H_
#include "xe_gt_printk.h"
-#include "xe_sriov_printk.h"
+#include "xe_tile_sriov_printk.h"
+
+#define __XE_GT_SRIOV_PRINTK_FMT(_gt, _fmt, ...) \
+ __XE_TILE_SRIOV_PRINTK_FMT((_gt)->tile, __XE_GT_PRINTK_FMT((_gt), _fmt, ##__VA_ARGS__))
#define __xe_gt_sriov_printk(gt, _level, fmt, ...) \
- xe_gt_printk((gt), _level, "%s" fmt, xe_sriov_printk_prefix(gt_to_xe(gt)), ##__VA_ARGS__)
+ xe_sriov_##_level(gt_to_xe(gt), __XE_GT_SRIOV_PRINTK_FMT((gt), fmt, ##__VA_ARGS__))
#define xe_gt_sriov_err(_gt, _fmt, ...) \
__xe_gt_sriov_printk(_gt, err, _fmt, ##__VA_ARGS__)
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
index a439261bf4d7..033eae2d03d3 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.c
@@ -23,11 +23,19 @@
#include "xe_gt_sriov_vf.h"
#include "xe_gt_sriov_vf_types.h"
#include "xe_guc.h"
+#include "xe_guc_ct.h"
#include "xe_guc_hxg_helpers.h"
#include "xe_guc_relay.h"
+#include "xe_guc_submit.h"
+#include "xe_irq.h"
+#include "xe_lrc.h"
+#include "xe_memirq.h"
#include "xe_mmio.h"
#include "xe_sriov.h"
#include "xe_sriov_vf.h"
+#include "xe_sriov_vf_ccs.h"
+#include "xe_tile_sriov_vf.h"
+#include "xe_tlb_inval.h"
#include "xe_uc_fw.h"
#include "xe_wopcm.h"
@@ -82,17 +90,17 @@ int xe_gt_sriov_vf_reset(struct xe_gt *gt)
}
static int guc_action_match_version(struct xe_guc *guc,
- u32 wanted_branch, u32 wanted_major, u32 wanted_minor,
- u32 *branch, u32 *major, u32 *minor, u32 *patch)
+ struct xe_uc_fw_version *wanted,
+ struct xe_uc_fw_version *found)
{
u32 request[VF2GUC_MATCH_VERSION_REQUEST_MSG_LEN] = {
FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_HOST) |
FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
FIELD_PREP(GUC_HXG_REQUEST_MSG_0_ACTION,
GUC_ACTION_VF2GUC_MATCH_VERSION),
- FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_BRANCH, wanted_branch) |
- FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MAJOR, wanted_major) |
- FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MINOR, wanted_minor),
+ FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_BRANCH, wanted->branch) |
+ FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MAJOR, wanted->major) |
+ FIELD_PREP(VF2GUC_MATCH_VERSION_REQUEST_MSG_1_MINOR, wanted->minor),
};
u32 response[GUC_MAX_MMIO_MSG_LEN];
int ret;
@@ -106,120 +114,138 @@ static int guc_action_match_version(struct xe_guc *guc,
if (unlikely(FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_0_MBZ, response[0])))
return -EPROTO;
- *branch = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_BRANCH, response[1]);
- *major = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MAJOR, response[1]);
- *minor = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MINOR, response[1]);
- *patch = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_PATCH, response[1]);
+ memset(found, 0, sizeof(struct xe_uc_fw_version));
+ found->branch = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_BRANCH, response[1]);
+ found->major = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MAJOR, response[1]);
+ found->minor = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_MINOR, response[1]);
+ found->patch = FIELD_GET(VF2GUC_MATCH_VERSION_RESPONSE_MSG_1_PATCH, response[1]);
return 0;
}
-static void vf_minimum_guc_version(struct xe_gt *gt, u32 *branch, u32 *major, u32 *minor)
+static int guc_action_match_version_any(struct xe_guc *guc,
+ struct xe_uc_fw_version *found)
+{
+ struct xe_uc_fw_version wanted = {
+ .branch = GUC_VERSION_BRANCH_ANY,
+ .major = GUC_VERSION_MAJOR_ANY,
+ .minor = GUC_VERSION_MINOR_ANY,
+ .patch = 0
+ };
+
+ return guc_action_match_version(guc, &wanted, found);
+}
+
+static void vf_minimum_guc_version(struct xe_gt *gt, struct xe_uc_fw_version *ver)
{
struct xe_device *xe = gt_to_xe(gt);
+ memset(ver, 0, sizeof(struct xe_uc_fw_version));
+
switch (xe->info.platform) {
case XE_TIGERLAKE ... XE_PVC:
/* 1.1 this is current baseline for Xe driver */
- *branch = 0;
- *major = 1;
- *minor = 1;
+ ver->branch = 0;
+ ver->major = 1;
+ ver->minor = 1;
break;
default:
/* 1.2 has support for the GMD_ID KLV */
- *branch = 0;
- *major = 1;
- *minor = 2;
+ ver->branch = 0;
+ ver->major = 1;
+ ver->minor = 2;
break;
}
}
-static void vf_wanted_guc_version(struct xe_gt *gt, u32 *branch, u32 *major, u32 *minor)
+static void vf_wanted_guc_version(struct xe_gt *gt, struct xe_uc_fw_version *ver)
{
/* for now it's the same as minimum */
- return vf_minimum_guc_version(gt, branch, major, minor);
+ return vf_minimum_guc_version(gt, ver);
}
static int vf_handshake_with_guc(struct xe_gt *gt)
{
- struct xe_gt_sriov_vf_guc_version *guc_version = &gt->sriov.vf.guc_version;
+ struct xe_uc_fw_version *guc_version = &gt->sriov.vf.guc_version;
+ struct xe_uc_fw_version wanted = {0};
struct xe_guc *guc = &gt->uc.guc;
- u32 wanted_branch, wanted_major, wanted_minor;
- u32 branch, major, minor, patch;
+ bool old = false;
int err;
xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
/* select wanted version - prefer previous (if any) */
if (guc_version->major || guc_version->minor) {
- wanted_branch = guc_version->branch;
- wanted_major = guc_version->major;
- wanted_minor = guc_version->minor;
+ wanted = *guc_version;
+ old = true;
} else {
- vf_wanted_guc_version(gt, &wanted_branch, &wanted_major, &wanted_minor);
- xe_gt_assert(gt, wanted_major != GUC_VERSION_MAJOR_ANY);
+ vf_wanted_guc_version(gt, &wanted);
+ xe_gt_assert(gt, wanted.major != GUC_VERSION_MAJOR_ANY);
+
+ /* First time we handshake, so record the minimum wanted */
+ gt->sriov.vf.wanted_guc_version = wanted;
}
- err = guc_action_match_version(guc, wanted_branch, wanted_major, wanted_minor,
- &branch, &major, &minor, &patch);
+ err = guc_action_match_version(guc, &wanted, guc_version);
if (unlikely(err))
goto fail;
- /* we don't support interface version change */
- if ((guc_version->major || guc_version->minor) &&
- (guc_version->branch != branch || guc_version->major != major ||
- guc_version->minor != minor)) {
- xe_gt_sriov_err(gt, "New GuC interface version detected: %u.%u.%u.%u\n",
- branch, major, minor, patch);
- xe_gt_sriov_info(gt, "Previously used version was: %u.%u.%u.%u\n",
- guc_version->branch, guc_version->major,
- guc_version->minor, guc_version->patch);
- err = -EREMCHG;
- goto fail;
+ if (old) {
+ /* we don't support interface version change */
+ if (MAKE_GUC_VER_STRUCT(*guc_version) != MAKE_GUC_VER_STRUCT(wanted)) {
+ xe_gt_sriov_err(gt, "New GuC interface version detected: %u.%u.%u.%u\n",
+ guc_version->branch, guc_version->major,
+ guc_version->minor, guc_version->patch);
+ xe_gt_sriov_info(gt, "Previously used version was: %u.%u.%u.%u\n",
+ wanted.branch, wanted.major,
+ wanted.minor, wanted.patch);
+ err = -EREMCHG;
+ goto fail;
+ } else {
+ /* version is unchanged, no need to re-verify it */
+ return 0;
+ }
}
/* illegal */
- if (major > wanted_major) {
+ if (guc_version->major > wanted.major) {
err = -EPROTO;
goto unsupported;
}
/* there's no fallback on major version. */
- if (major != wanted_major) {
+ if (guc_version->major != wanted.major) {
err = -ENOPKG;
goto unsupported;
}
/* check against minimum version supported by us */
- vf_minimum_guc_version(gt, &wanted_branch, &wanted_major, &wanted_minor);
- xe_gt_assert(gt, major != GUC_VERSION_MAJOR_ANY);
- if (major < wanted_major || (major == wanted_major && minor < wanted_minor)) {
+ vf_minimum_guc_version(gt, &wanted);
+ xe_gt_assert(gt, wanted.major != GUC_VERSION_MAJOR_ANY);
+ if (MAKE_GUC_VER_STRUCT(*guc_version) < MAKE_GUC_VER_STRUCT(wanted)) {
err = -ENOKEY;
goto unsupported;
}
xe_gt_sriov_dbg(gt, "using GuC interface version %u.%u.%u.%u\n",
- branch, major, minor, patch);
+ guc_version->branch, guc_version->major,
+ guc_version->minor, guc_version->patch);
- guc_version->branch = branch;
- guc_version->major = major;
- guc_version->minor = minor;
- guc_version->patch = patch;
return 0;
unsupported:
xe_gt_sriov_err(gt, "Unsupported GuC version %u.%u.%u.%u (%pe)\n",
- branch, major, minor, patch, ERR_PTR(err));
+ guc_version->branch, guc_version->major,
+ guc_version->minor, guc_version->patch,
+ ERR_PTR(err));
fail:
xe_gt_sriov_err(gt, "Unable to confirm GuC version %u.%u (%pe)\n",
- wanted_major, wanted_minor, ERR_PTR(err));
+ wanted.major, wanted.minor, ERR_PTR(err));
/* try again with *any* just to query which version is supported */
- if (!guc_action_match_version(guc, GUC_VERSION_BRANCH_ANY,
- GUC_VERSION_MAJOR_ANY, GUC_VERSION_MINOR_ANY,
- &branch, &major, &minor, &patch))
+ if (!guc_action_match_version_any(guc, &wanted))
xe_gt_sriov_notice(gt, "GuC reports interface version %u.%u.%u.%u\n",
- branch, major, minor, patch);
+ wanted.branch, wanted.major, wanted.minor, wanted.patch);
return err;
}
@@ -250,6 +276,29 @@ int xe_gt_sriov_vf_bootstrap(struct xe_gt *gt)
return 0;
}
+/**
+ * xe_gt_sriov_vf_guc_versions - Minimum required and found GuC ABI versions
+ * @gt: the &xe_gt
+ * @wanted: pointer to the xe_uc_fw_version to be filled with the wanted version
+ * @found: pointer to the xe_uc_fw_version to be filled with the found version
+ *
+ * This function is for VF use only and it can only be used after successful
+ * version handshake with the GuC.
+ */
+void xe_gt_sriov_vf_guc_versions(struct xe_gt *gt,
+ struct xe_uc_fw_version *wanted,
+ struct xe_uc_fw_version *found)
+{
+ xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+ xe_gt_assert(gt, gt->sriov.vf.guc_version.major);
+
+ if (wanted)
+ *wanted = gt->sriov.vf.wanted_guc_version;
+
+ if (found)
+ *found = gt->sriov.vf.guc_version;
+}
+
static int guc_action_vf_notify_resfix_done(struct xe_guc *guc)
{
u32 request[GUC_HXG_REQUEST_MSG_MIN_LEN] = {
@@ -265,13 +314,13 @@ static int guc_action_vf_notify_resfix_done(struct xe_guc *guc)
}
/**
- * xe_gt_sriov_vf_notify_resfix_done - Notify GuC about resource fixups apply completed.
+ * vf_notify_resfix_done - Notify GuC about resource fixups apply completed.
* @gt: the &xe_gt struct instance linked to target GuC
*
* Returns: 0 if the operation completed successfully, or a negative error
* code otherwise.
*/
-int xe_gt_sriov_vf_notify_resfix_done(struct xe_gt *gt)
+static int vf_notify_resfix_done(struct xe_gt *gt)
{
struct xe_guc *guc = &gt->uc.guc;
int err;
@@ -391,13 +440,17 @@ u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt)
static int vf_get_ggtt_info(struct xe_gt *gt)
{
- struct xe_gt_sriov_vf_selfconfig *config = &gt->sriov.vf.self_config;
+ struct xe_tile *tile = gt_to_tile(gt);
+ struct xe_ggtt *ggtt = tile->mem.ggtt;
struct xe_guc *guc = &gt->uc.guc;
- u64 start, size;
+ u64 start, size, ggtt_size;
+ s64 shift;
int err;
xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+ guard(mutex)(&ggtt->lock);
+
err = guc_action_query_single_klv64(guc, GUC_KLV_VF_CFG_GGTT_START_KEY, &start);
if (unlikely(err))
return err;
@@ -406,27 +459,44 @@ static int vf_get_ggtt_info(struct xe_gt *gt)
if (unlikely(err))
return err;
- if (config->ggtt_size && config->ggtt_size != size) {
+ if (!size)
+ return -ENODATA;
+
+ ggtt_size = xe_tile_sriov_vf_ggtt(tile);
+ if (ggtt_size && ggtt_size != size) {
xe_gt_sriov_err(gt, "Unexpected GGTT reassignment: %lluK != %lluK\n",
- size / SZ_1K, config->ggtt_size / SZ_1K);
+ size / SZ_1K, ggtt_size / SZ_1K);
return -EREMCHG;
}
xe_gt_sriov_dbg_verbose(gt, "GGTT %#llx-%#llx = %lluK\n",
start, start + size - 1, size / SZ_1K);
- config->ggtt_base = start;
- config->ggtt_size = size;
+ shift = start - (s64)xe_tile_sriov_vf_ggtt_base(tile);
+ xe_tile_sriov_vf_ggtt_base_store(tile, start);
+ xe_tile_sriov_vf_ggtt_store(tile, size);
+
+ if (shift && shift != start) {
+ xe_gt_sriov_info(gt, "Shifting GGTT base by %lld to 0x%016llx\n",
+ shift, start);
+ xe_tile_sriov_vf_fixup_ggtt_nodes_locked(gt_to_tile(gt), shift);
+ }
+
+ if (xe_sriov_vf_migration_supported(gt_to_xe(gt))) {
+ WRITE_ONCE(gt->sriov.vf.migration.ggtt_need_fixes, false);
+ smp_wmb(); /* Ensure above write visible before wake */
+ wake_up_all(&gt->sriov.vf.migration.wq);
+ }
- return config->ggtt_size ? 0 : -ENODATA;
+ return 0;
}
static int vf_get_lmem_info(struct xe_gt *gt)
{
- struct xe_gt_sriov_vf_selfconfig *config = &gt->sriov.vf.self_config;
+ struct xe_tile *tile = gt_to_tile(gt);
struct xe_guc *guc = &gt->uc.guc;
char size_str[10];
- u64 size;
+ u64 size, lmem_size;
int err;
xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
@@ -435,18 +505,19 @@ static int vf_get_lmem_info(struct xe_gt *gt)
if (unlikely(err))
return err;
- if (config->lmem_size && config->lmem_size != size) {
+ lmem_size = xe_tile_sriov_vf_lmem(tile);
+ if (lmem_size && lmem_size != size) {
xe_gt_sriov_err(gt, "Unexpected LMEM reassignment: %lluM != %lluM\n",
- size / SZ_1M, config->lmem_size / SZ_1M);
+ size / SZ_1M, lmem_size / SZ_1M);
return -EREMCHG;
}
string_get_size(size, 1, STRING_UNITS_2, size_str, sizeof(size_str));
xe_gt_sriov_dbg_verbose(gt, "LMEM %lluM %s\n", size / SZ_1M, size_str);
- config->lmem_size = size;
+ xe_tile_sriov_vf_lmem_store(tile, size);
- return config->lmem_size ? 0 : -ENODATA;
+ return size ? 0 : -ENODATA;
}
static int vf_get_submission_cfg(struct xe_gt *gt)
@@ -497,7 +568,9 @@ static void vf_cache_gmdid(struct xe_gt *gt)
* xe_gt_sriov_vf_query_config - Query SR-IOV config data over MMIO.
* @gt: the &xe_gt
*
- * This function is for VF use only.
+ * This function is for VF use only. This function may shift the GGTT and is
+ * performed under GGTT lock, making this step visible to all GTs that share a
+ * GGTT.
*
* Return: 0 on success or a negative error code on failure.
*/
@@ -510,7 +583,7 @@ int xe_gt_sriov_vf_query_config(struct xe_gt *gt)
if (unlikely(err))
return err;
- if (IS_DGFX(xe) && !xe_gt_is_media_type(gt)) {
+ if (IS_DGFX(xe) && xe_gt_is_main_type(gt)) {
err = vf_get_lmem_info(gt);
if (unlikely(err))
return err;
@@ -543,125 +616,6 @@ u16 xe_gt_sriov_vf_guc_ids(struct xe_gt *gt)
return gt->sriov.vf.self_config.num_ctxs;
}
-/**
- * xe_gt_sriov_vf_lmem - VF LMEM configuration.
- * @gt: the &xe_gt
- *
- * This function is for VF use only.
- *
- * Return: size of the LMEM assigned to VF.
- */
-u64 xe_gt_sriov_vf_lmem(struct xe_gt *gt)
-{
- xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
- xe_gt_assert(gt, gt->sriov.vf.guc_version.major);
- xe_gt_assert(gt, gt->sriov.vf.self_config.lmem_size);
-
- return gt->sriov.vf.self_config.lmem_size;
-}
-
-static struct xe_ggtt_node *
-vf_balloon_ggtt_node(struct xe_ggtt *ggtt, u64 start, u64 end)
-{
- struct xe_ggtt_node *node;
- int err;
-
- node = xe_ggtt_node_init(ggtt);
- if (IS_ERR(node))
- return node;
-
- err = xe_ggtt_node_insert_balloon(node, start, end);
- if (err) {
- xe_ggtt_node_fini(node);
- return ERR_PTR(err);
- }
-
- return node;
-}
-
-static int vf_balloon_ggtt(struct xe_gt *gt)
-{
- struct xe_gt_sriov_vf_selfconfig *config = &gt->sriov.vf.self_config;
- struct xe_tile *tile = gt_to_tile(gt);
- struct xe_ggtt *ggtt = tile->mem.ggtt;
- struct xe_device *xe = gt_to_xe(gt);
- u64 start, end;
-
- xe_gt_assert(gt, IS_SRIOV_VF(xe));
- xe_gt_assert(gt, !xe_gt_is_media_type(gt));
-
- if (!config->ggtt_size)
- return -ENODATA;
-
- /*
- * VF can only use part of the GGTT as allocated by the PF:
- *
- * WOPCM GUC_GGTT_TOP
- * |<------------ Total GGTT size ------------------>|
- *
- * VF GGTT base -->|<- size ->|
- *
- * +--------------------+----------+-----------------+
- * |////////////////////| block |\\\\\\\\\\\\\\\\\|
- * +--------------------+----------+-----------------+
- *
- * |<--- balloon[0] --->|<-- VF -->|<-- balloon[1] ->|
- */
-
- start = xe_wopcm_size(xe);
- end = config->ggtt_base;
- if (end != start) {
- tile->sriov.vf.ggtt_balloon[0] = vf_balloon_ggtt_node(ggtt, start, end);
- if (IS_ERR(tile->sriov.vf.ggtt_balloon[0]))
- return PTR_ERR(tile->sriov.vf.ggtt_balloon[0]);
- }
-
- start = config->ggtt_base + config->ggtt_size;
- end = GUC_GGTT_TOP;
- if (end != start) {
- tile->sriov.vf.ggtt_balloon[1] = vf_balloon_ggtt_node(ggtt, start, end);
- if (IS_ERR(tile->sriov.vf.ggtt_balloon[1])) {
- xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[0]);
- return PTR_ERR(tile->sriov.vf.ggtt_balloon[1]);
- }
- }
-
- return 0;
-}
-
-static void deballoon_ggtt(struct drm_device *drm, void *arg)
-{
- struct xe_tile *tile = arg;
-
- xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
- xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[1]);
- xe_ggtt_node_remove_balloon(tile->sriov.vf.ggtt_balloon[0]);
-}
-
-/**
- * xe_gt_sriov_vf_prepare_ggtt - Prepare a VF's GGTT configuration.
- * @gt: the &xe_gt
- *
- * This function is for VF use only.
- *
- * Return: 0 on success or a negative error code on failure.
- */
-int xe_gt_sriov_vf_prepare_ggtt(struct xe_gt *gt)
-{
- struct xe_tile *tile = gt_to_tile(gt);
- struct xe_device *xe = tile_to_xe(tile);
- int err;
-
- if (xe_gt_is_media_type(gt))
- return 0;
-
- err = vf_balloon_ggtt(gt);
- if (err)
- return err;
-
- return drmm_add_action_or_reset(&xe->drm, deballoon_ggtt, tile);
-}
-
static int relay_action_handshake(struct xe_gt *gt, u32 *major, u32 *minor)
{
u32 request[VF2PF_HANDSHAKE_REQUEST_MSG_LEN] = {
@@ -694,21 +648,22 @@ static int relay_action_handshake(struct xe_gt *gt, u32 *major, u32 *minor)
return 0;
}
-static void vf_connect_pf(struct xe_gt *gt, u16 major, u16 minor)
+static void vf_connect_pf(struct xe_device *xe, u16 major, u16 minor)
{
- xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+ xe_assert(xe, IS_SRIOV_VF(xe));
- gt->sriov.vf.pf_version.major = major;
- gt->sriov.vf.pf_version.minor = minor;
+ xe->sriov.vf.pf_version.major = major;
+ xe->sriov.vf.pf_version.minor = minor;
}
-static void vf_disconnect_pf(struct xe_gt *gt)
+static void vf_disconnect_pf(struct xe_device *xe)
{
- vf_connect_pf(gt, 0, 0);
+ vf_connect_pf(xe, 0, 0);
}
static int vf_handshake_with_pf(struct xe_gt *gt)
{
+ struct xe_device *xe = gt_to_xe(gt);
u32 major_wanted = GUC_RELAY_VERSION_LATEST_MAJOR;
u32 minor_wanted = GUC_RELAY_VERSION_LATEST_MINOR;
u32 major = major_wanted, minor = minor_wanted;
@@ -724,13 +679,13 @@ static int vf_handshake_with_pf(struct xe_gt *gt)
}
xe_gt_sriov_dbg(gt, "using VF/PF ABI %u.%u\n", major, minor);
- vf_connect_pf(gt, major, minor);
+ vf_connect_pf(xe, major, minor);
return 0;
failed:
xe_gt_sriov_err(gt, "Unable to confirm VF/PF ABI version %u.%u (%pe)\n",
major, minor, ERR_PTR(err));
- vf_disconnect_pf(gt);
+ vf_disconnect_pf(xe);
return err;
}
@@ -758,6 +713,44 @@ failed:
}
/**
+ * xe_gt_sriov_vf_default_lrcs_hwsp_rebase - Update GGTT references in HWSP of default LRCs.
+ * @gt: the &xe_gt struct instance
+ */
+static void xe_gt_sriov_vf_default_lrcs_hwsp_rebase(struct xe_gt *gt)
+{
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
+
+ for_each_hw_engine(hwe, gt, id)
+ xe_default_lrc_update_memirq_regs_with_address(hwe);
+}
+
+static void vf_start_migration_recovery(struct xe_gt *gt)
+{
+ bool started;
+
+ xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+
+ spin_lock(&gt->sriov.vf.migration.lock);
+
+ if (!gt->sriov.vf.migration.recovery_queued &&
+ !gt->sriov.vf.migration.recovery_teardown) {
+ gt->sriov.vf.migration.recovery_queued = true;
+ WRITE_ONCE(gt->sriov.vf.migration.recovery_inprogress, true);
+ WRITE_ONCE(gt->sriov.vf.migration.ggtt_need_fixes, true);
+ smp_wmb(); /* Ensure above writes visible before wake */
+
+ xe_guc_ct_wake_waiters(&gt->uc.guc.ct);
+
+ started = queue_work(gt->ordered_wq, &gt->sriov.vf.migration.worker);
+ xe_gt_sriov_info(gt, "VF migration recovery %s\n", started ?
+ "scheduled" : "already in progress");
+ }
+
+ spin_unlock(&gt->sriov.vf.migration.lock);
+}
+
+/**
* xe_gt_sriov_vf_migrated_event_handler - Start a VF migration recovery,
* or just mark that a GuC is ready for it.
* @gt: the &xe_gt struct instance linked to target GuC
@@ -769,24 +762,25 @@ void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt)
struct xe_device *xe = gt_to_xe(gt);
xe_gt_assert(gt, IS_SRIOV_VF(xe));
+ xe_gt_assert(gt, xe_gt_sriov_vf_recovery_pending(gt));
- set_bit(gt->info.id, &xe->sriov.vf.migration.gt_flags);
- /*
- * We need to be certain that if all flags were set, at least one
- * thread will notice that and schedule the recovery.
- */
- smp_mb__after_atomic();
+ if (!xe_sriov_vf_migration_supported(xe)) {
+ xe_gt_sriov_err(gt, "migration not supported\n");
+ return;
+ }
xe_gt_sriov_info(gt, "ready for recovery after migration\n");
- xe_sriov_vf_start_migration_recovery(xe);
+ vf_start_migration_recovery(gt);
}
static bool vf_is_negotiated(struct xe_gt *gt, u16 major, u16 minor)
{
- xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+ struct xe_device *xe = gt_to_xe(gt);
+
+ xe_gt_assert(gt, IS_SRIOV_VF(xe));
- return major == gt->sriov.vf.pf_version.major &&
- minor <= gt->sriov.vf.pf_version.minor;
+ return major == xe->sriov.vf.pf_version.major &&
+ minor <= xe->sriov.vf.pf_version.minor;
}
static int vf_prepare_runtime_info(struct xe_gt *gt, unsigned int num_regs)
@@ -974,7 +968,6 @@ u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg)
struct vf_runtime_reg *rr;
xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
- xe_gt_assert(gt, gt->sriov.vf.pf_version.major);
xe_gt_assert(gt, !reg.vf);
if (reg.addr == GMD_ID.addr) {
@@ -1032,20 +1025,25 @@ void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_gt_sriov_vf_selfconfig *config = &gt->sriov.vf.self_config;
struct xe_device *xe = gt_to_xe(gt);
+ u64 lmem_size;
char buf[10];
xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
- drm_printf(p, "GGTT range:\t%#llx-%#llx\n",
- config->ggtt_base,
- config->ggtt_base + config->ggtt_size - 1);
+ if (xe_gt_is_main_type(gt)) {
+ u64 ggtt_size = xe_tile_sriov_vf_ggtt(gt_to_tile(gt));
+ u64 ggtt_base = xe_tile_sriov_vf_ggtt_base(gt_to_tile(gt));
- string_get_size(config->ggtt_size, 1, STRING_UNITS_2, buf, sizeof(buf));
- drm_printf(p, "GGTT size:\t%llu (%s)\n", config->ggtt_size, buf);
+ drm_printf(p, "GGTT range:\t%#llx-%#llx\n",
+ ggtt_base, ggtt_base + ggtt_size - 1);
+ string_get_size(ggtt_size, 1, STRING_UNITS_2, buf, sizeof(buf));
+ drm_printf(p, "GGTT size:\t%llu (%s)\n", ggtt_size, buf);
- if (IS_DGFX(xe) && !xe_gt_is_media_type(gt)) {
- string_get_size(config->lmem_size, 1, STRING_UNITS_2, buf, sizeof(buf));
- drm_printf(p, "LMEM size:\t%llu (%s)\n", config->lmem_size, buf);
+ if (IS_DGFX(xe)) {
+ lmem_size = xe_tile_sriov_vf_lmem(gt_to_tile(gt));
+ string_get_size(lmem_size, 1, STRING_UNITS_2, buf, sizeof(buf));
+ drm_printf(p, "LMEM size:\t%llu (%s)\n", lmem_size, buf);
+ }
}
drm_printf(p, "GuC contexts:\t%u\n", config->num_ctxs);
@@ -1079,19 +1077,21 @@ void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p)
*/
void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p)
{
- struct xe_gt_sriov_vf_guc_version *guc_version = &gt->sriov.vf.guc_version;
- struct xe_gt_sriov_vf_relay_version *pf_version = &gt->sriov.vf.pf_version;
- u32 branch, major, minor;
+ struct xe_device *xe = gt_to_xe(gt);
+ struct xe_uc_fw_version *guc_version = &gt->sriov.vf.guc_version;
+ struct xe_uc_fw_version *wanted = &gt->sriov.vf.wanted_guc_version;
+ struct xe_sriov_vf_relay_version *pf_version = &xe->sriov.vf.pf_version;
+ struct xe_uc_fw_version ver;
xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
drm_printf(p, "GuC ABI:\n");
- vf_minimum_guc_version(gt, &branch, &major, &minor);
- drm_printf(p, "\tbase:\t%u.%u.%u.*\n", branch, major, minor);
+ vf_minimum_guc_version(gt, &ver);
+ drm_printf(p, "\tbase:\t%u.%u.%u.*\n", ver.branch, ver.major, ver.minor);
- vf_wanted_guc_version(gt, &branch, &major, &minor);
- drm_printf(p, "\twanted:\t%u.%u.%u.*\n", branch, major, minor);
+ drm_printf(p, "\twanted:\t%u.%u.%u.*\n",
+ wanted->branch, wanted->major, wanted->minor);
drm_printf(p, "\thandshake:\t%u.%u.%u.%u\n",
guc_version->branch, guc_version->major,
@@ -1106,3 +1106,272 @@ void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p)
drm_printf(p, "\thandshake:\t%u.%u\n",
pf_version->major, pf_version->minor);
}
+
+static bool vf_post_migration_shutdown(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+
+ /*
+ * On platforms where CCS must be restored by the primary GT, the media
+ * GT's VF post-migration recovery must run afterward. Detect this case
+ * and re-queue the media GT's restore work item if necessary.
+ */
+ if (xe->info.needs_shared_vf_gt_wq && xe_gt_is_media_type(gt)) {
+ struct xe_gt *primary_gt = gt_to_tile(gt)->primary_gt;
+
+ if (xe_gt_sriov_vf_recovery_pending(primary_gt))
+ return true;
+ }
+
+ spin_lock_irq(&gt->sriov.vf.migration.lock);
+ gt->sriov.vf.migration.recovery_queued = false;
+ spin_unlock_irq(&gt->sriov.vf.migration.lock);
+
+ xe_guc_ct_flush_and_stop(&gt->uc.guc.ct);
+ xe_guc_submit_pause(&gt->uc.guc);
+ xe_tlb_inval_reset(&gt->tlb_inval);
+
+ return false;
+}
+
+static size_t post_migration_scratch_size(struct xe_device *xe)
+{
+ return max(xe_lrc_reg_size(xe), LRC_WA_BB_SIZE);
+}
+
+static int vf_post_migration_fixups(struct xe_gt *gt)
+{
+ void *buf = gt->sriov.vf.migration.scratch;
+ int err;
+
+ /* xe_gt_sriov_vf_query_config will fixup the GGTT addresses */
+ err = xe_gt_sriov_vf_query_config(gt);
+ if (err)
+ return err;
+
+ if (xe_gt_is_main_type(gt))
+ xe_sriov_vf_ccs_rebase(gt_to_xe(gt));
+
+ xe_gt_sriov_vf_default_lrcs_hwsp_rebase(gt);
+ err = xe_guc_contexts_hwsp_rebase(&gt->uc.guc, buf);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static void vf_post_migration_rearm(struct xe_gt *gt)
+{
+ xe_guc_ct_restart(&gt->uc.guc.ct);
+ xe_guc_submit_unpause_prepare(&gt->uc.guc);
+}
+
+static void vf_post_migration_kickstart(struct xe_gt *gt)
+{
+ xe_guc_submit_unpause(&gt->uc.guc);
+}
+
+static void vf_post_migration_abort(struct xe_gt *gt)
+{
+ spin_lock_irq(&gt->sriov.vf.migration.lock);
+ WRITE_ONCE(gt->sriov.vf.migration.recovery_inprogress, false);
+ WRITE_ONCE(gt->sriov.vf.migration.ggtt_need_fixes, false);
+ spin_unlock_irq(&gt->sriov.vf.migration.lock);
+
+ wake_up_all(&gt->sriov.vf.migration.wq);
+
+ xe_guc_submit_pause_abort(&gt->uc.guc);
+}
+
+static int vf_post_migration_notify_resfix_done(struct xe_gt *gt)
+{
+ bool skip_resfix = false;
+
+ spin_lock_irq(&gt->sriov.vf.migration.lock);
+ if (gt->sriov.vf.migration.recovery_queued) {
+ skip_resfix = true;
+ xe_gt_sriov_dbg(gt, "another recovery imminent, resfix skipped\n");
+ } else {
+ WRITE_ONCE(gt->sriov.vf.migration.recovery_inprogress, false);
+ }
+ spin_unlock_irq(&gt->sriov.vf.migration.lock);
+
+ if (skip_resfix)
+ return -EAGAIN;
+
+ /*
+ * Make sure interrupts on the new HW are properly set. The GuC IRQ
+ * must be working at this point, since the recovery did started,
+ * but the rest was not enabled using the procedure from spec.
+ */
+ xe_irq_resume(gt_to_xe(gt));
+
+ return vf_notify_resfix_done(gt);
+}
+
+static void vf_post_migration_recovery(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ int err;
+ bool retry;
+
+ xe_gt_sriov_dbg(gt, "migration recovery in progress\n");
+
+ retry = vf_post_migration_shutdown(gt);
+ if (retry)
+ goto queue;
+
+ if (!xe_sriov_vf_migration_supported(xe)) {
+ xe_gt_sriov_err(gt, "migration is not supported\n");
+ err = -ENOTRECOVERABLE;
+ goto fail;
+ }
+
+ err = vf_post_migration_fixups(gt);
+ if (err)
+ goto fail;
+
+ vf_post_migration_rearm(gt);
+
+ err = vf_post_migration_notify_resfix_done(gt);
+ if (err && err != -EAGAIN)
+ goto fail;
+
+ vf_post_migration_kickstart(gt);
+
+ xe_gt_sriov_notice(gt, "migration recovery ended\n");
+ return;
+fail:
+ vf_post_migration_abort(gt);
+ xe_gt_sriov_err(gt, "migration recovery failed (%pe)\n", ERR_PTR(err));
+ xe_device_declare_wedged(xe);
+ return;
+
+queue:
+ xe_gt_sriov_info(gt, "Re-queuing migration recovery\n");
+ queue_work(gt->ordered_wq, &gt->sriov.vf.migration.worker);
+}
+
+static void migration_worker_func(struct work_struct *w)
+{
+ struct xe_gt *gt = container_of(w, struct xe_gt,
+ sriov.vf.migration.worker);
+
+ vf_post_migration_recovery(gt);
+}
+
+static void vf_migration_fini(void *arg)
+{
+ struct xe_gt *gt = arg;
+
+ spin_lock_irq(&gt->sriov.vf.migration.lock);
+ gt->sriov.vf.migration.recovery_teardown = true;
+ spin_unlock_irq(&gt->sriov.vf.migration.lock);
+
+ cancel_work_sync(&gt->sriov.vf.migration.worker);
+}
+
+/**
+ * xe_gt_sriov_vf_init_early() - GT VF init early
+ * @gt: the &xe_gt
+ *
+ * Return 0 on success, errno on failure
+ */
+int xe_gt_sriov_vf_init_early(struct xe_gt *gt)
+{
+ void *buf;
+
+ if (!xe_sriov_vf_migration_supported(gt_to_xe(gt)))
+ return 0;
+
+ buf = drmm_kmalloc(&gt_to_xe(gt)->drm,
+ post_migration_scratch_size(gt_to_xe(gt)),
+ GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ gt->sriov.vf.migration.scratch = buf;
+ spin_lock_init(&gt->sriov.vf.migration.lock);
+ INIT_WORK(&gt->sriov.vf.migration.worker, migration_worker_func);
+ init_waitqueue_head(&gt->sriov.vf.migration.wq);
+
+ return 0;
+}
+
+/**
+ * xe_gt_sriov_vf_init() - GT VF init
+ * @gt: the &xe_gt
+ *
+ * Return 0 on success, errno on failure
+ */
+int xe_gt_sriov_vf_init(struct xe_gt *gt)
+{
+ if (!xe_sriov_vf_migration_supported(gt_to_xe(gt)))
+ return 0;
+
+ /*
+ * We want to tear down the VF post-migration early during driver
+ * unload; therefore, we add this finalization action later during
+ * driver load.
+ */
+ return devm_add_action_or_reset(gt_to_xe(gt)->drm.dev,
+ vf_migration_fini, gt);
+}
+
+/**
+ * xe_gt_sriov_vf_recovery_pending() - VF post migration recovery pending
+ * @gt: the &xe_gt
+ *
+ * The return value of this function must be immediately visible upon vCPU
+ * unhalt and must persist until RESFIX_DONE is issued. This guarantee is
+ * currently implemented only for platforms that support memirq. If non-memirq
+ * platforms begin to support VF migration, this function will need to be
+ * updated accordingly.
+ *
+ * Return: True if VF post migration recovery is pending, False otherwise
+ */
+bool xe_gt_sriov_vf_recovery_pending(struct xe_gt *gt)
+{
+ struct xe_memirq *memirq = &gt_to_tile(gt)->memirq;
+
+ xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+
+ /* early detection until recovery starts */
+ if (xe_device_uses_memirq(gt_to_xe(gt)) &&
+ xe_memirq_guc_sw_int_0_irq_pending(memirq, &gt->uc.guc))
+ return true;
+
+ return READ_ONCE(gt->sriov.vf.migration.recovery_inprogress);
+}
+
+static bool vf_valid_ggtt(struct xe_gt *gt)
+{
+ struct xe_memirq *memirq = &gt_to_tile(gt)->memirq;
+ bool irq_pending = xe_device_uses_memirq(gt_to_xe(gt)) &&
+ xe_memirq_guc_sw_int_0_irq_pending(memirq, &gt->uc.guc);
+
+ xe_gt_assert(gt, IS_SRIOV_VF(gt_to_xe(gt)));
+
+ if (irq_pending || READ_ONCE(gt->sriov.vf.migration.ggtt_need_fixes))
+ return false;
+
+ return true;
+}
+
+/**
+ * xe_gt_sriov_vf_wait_valid_ggtt() - VF wait for valid GGTT addresses
+ * @gt: the &xe_gt
+ */
+void xe_gt_sriov_vf_wait_valid_ggtt(struct xe_gt *gt)
+{
+ int ret;
+
+ if (!IS_SRIOV_VF(gt_to_xe(gt)) ||
+ !xe_sriov_vf_migration_supported(gt_to_xe(gt)))
+ return;
+
+ ret = wait_event_interruptible_timeout(gt->sriov.vf.migration.wq,
+ vf_valid_ggtt(gt),
+ HZ * 5);
+ xe_gt_WARN_ON(gt, !ret);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
index ba6c5d74e326..af40276790fa 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf.h
@@ -11,19 +11,26 @@
struct drm_printer;
struct xe_gt;
struct xe_reg;
+struct xe_uc_fw_version;
int xe_gt_sriov_vf_reset(struct xe_gt *gt);
int xe_gt_sriov_vf_bootstrap(struct xe_gt *gt);
+void xe_gt_sriov_vf_guc_versions(struct xe_gt *gt,
+ struct xe_uc_fw_version *wanted,
+ struct xe_uc_fw_version *found);
int xe_gt_sriov_vf_query_config(struct xe_gt *gt);
int xe_gt_sriov_vf_connect(struct xe_gt *gt);
int xe_gt_sriov_vf_query_runtime(struct xe_gt *gt);
-int xe_gt_sriov_vf_prepare_ggtt(struct xe_gt *gt);
-int xe_gt_sriov_vf_notify_resfix_done(struct xe_gt *gt);
void xe_gt_sriov_vf_migrated_event_handler(struct xe_gt *gt);
+int xe_gt_sriov_vf_init_early(struct xe_gt *gt);
+int xe_gt_sriov_vf_init(struct xe_gt *gt);
+bool xe_gt_sriov_vf_recovery_pending(struct xe_gt *gt);
+
u32 xe_gt_sriov_vf_gmdid(struct xe_gt *gt);
u16 xe_gt_sriov_vf_guc_ids(struct xe_gt *gt);
u64 xe_gt_sriov_vf_lmem(struct xe_gt *gt);
+
u32 xe_gt_sriov_vf_read32(struct xe_gt *gt, struct xe_reg reg);
void xe_gt_sriov_vf_write32(struct xe_gt *gt, struct xe_reg reg, u32 val);
@@ -31,4 +38,6 @@ void xe_gt_sriov_vf_print_config(struct xe_gt *gt, struct drm_printer *p);
void xe_gt_sriov_vf_print_runtime(struct xe_gt *gt, struct drm_printer *p);
void xe_gt_sriov_vf_print_version(struct xe_gt *gt, struct drm_printer *p);
+void xe_gt_sriov_vf_wait_valid_ggtt(struct xe_gt *gt);
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h
index a57f13b5afcd..420b0e6089de 100644
--- a/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_sriov_vf_types.h
@@ -7,41 +7,14 @@
#define _XE_GT_SRIOV_VF_TYPES_H_
#include <linux/types.h>
-
-/**
- * struct xe_gt_sriov_vf_guc_version - GuC ABI version details.
- */
-struct xe_gt_sriov_vf_guc_version {
- /** @branch: branch version. */
- u8 branch;
- /** @major: major version. */
- u8 major;
- /** @minor: minor version. */
- u8 minor;
- /** @patch: patch version. */
- u8 patch;
-};
-
-/**
- * struct xe_gt_sriov_vf_relay_version - PF ABI version details.
- */
-struct xe_gt_sriov_vf_relay_version {
- /** @major: major version. */
- u16 major;
- /** @minor: minor version. */
- u16 minor;
-};
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+#include "xe_uc_fw_types.h"
/**
* struct xe_gt_sriov_vf_selfconfig - VF configuration data.
*/
struct xe_gt_sriov_vf_selfconfig {
- /** @ggtt_base: assigned base offset of the GGTT region. */
- u64 ggtt_base;
- /** @ggtt_size: assigned size of the GGTT region. */
- u64 ggtt_size;
- /** @lmem_size: assigned size of the LMEM. */
- u64 lmem_size;
/** @num_ctxs: assigned number of GuC submission context IDs. */
u16 num_ctxs;
/** @num_dbs: assigned number of GuC doorbells IDs. */
@@ -68,17 +41,41 @@ struct xe_gt_sriov_vf_runtime {
};
/**
+ * xe_gt_sriov_vf_migration - VF migration data.
+ */
+struct xe_gt_sriov_vf_migration {
+ /** @migration: VF migration recovery worker */
+ struct work_struct worker;
+ /** @lock: Protects recovery_queued, teardown */
+ spinlock_t lock;
+ /** @wq: wait queue for migration fixes */
+ wait_queue_head_t wq;
+ /** @scratch: Scratch memory for VF recovery */
+ void *scratch;
+ /** @recovery_teardown: VF post migration recovery is being torn down */
+ bool recovery_teardown;
+ /** @recovery_queued: VF post migration recovery in queued */
+ bool recovery_queued;
+ /** @recovery_inprogress: VF post migration recovery in progress */
+ bool recovery_inprogress;
+ /** @ggtt_need_fixes: VF GGTT needs fixes */
+ bool ggtt_need_fixes;
+};
+
+/**
* struct xe_gt_sriov_vf - GT level VF virtualization data.
*/
struct xe_gt_sriov_vf {
+ /** @wanted_guc_version: minimum wanted GuC ABI version. */
+ struct xe_uc_fw_version wanted_guc_version;
/** @guc_version: negotiated GuC ABI version. */
- struct xe_gt_sriov_vf_guc_version guc_version;
+ struct xe_uc_fw_version guc_version;
/** @self_config: resource configurations. */
struct xe_gt_sriov_vf_selfconfig self_config;
- /** @pf_version: negotiated VF/PF ABI version. */
- struct xe_gt_sriov_vf_relay_version pf_version;
/** @runtime: runtime data retrieved from the PF. */
struct xe_gt_sriov_vf_runtime runtime;
+ /** @migration: migration data for the VF. */
+ struct xe_gt_sriov_vf_migration migration;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_stats.c b/drivers/gpu/drm/xe/xe_gt_stats.c
index 30f942671c2b..5f74706bab81 100644
--- a/drivers/gpu/drm/xe/xe_gt_stats.c
+++ b/drivers/gpu/drm/xe/xe_gt_stats.c
@@ -26,11 +26,46 @@ void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr)
atomic64_add(incr, &gt->stats.counters[id]);
}
+#define DEF_STAT_STR(ID, name) [XE_GT_STATS_ID_##ID] = name
+
static const char *const stat_description[__XE_GT_STATS_NUM_IDS] = {
- "svm_pagefault_count",
- "tlb_inval_count",
- "vma_pagefault_count",
- "vma_pagefault_kb",
+ DEF_STAT_STR(SVM_PAGEFAULT_COUNT, "svm_pagefault_count"),
+ DEF_STAT_STR(TLB_INVAL, "tlb_inval_count"),
+ DEF_STAT_STR(SVM_TLB_INVAL_COUNT, "svm_tlb_inval_count"),
+ DEF_STAT_STR(SVM_TLB_INVAL_US, "svm_tlb_inval_us"),
+ DEF_STAT_STR(VMA_PAGEFAULT_COUNT, "vma_pagefault_count"),
+ DEF_STAT_STR(VMA_PAGEFAULT_KB, "vma_pagefault_kb"),
+ DEF_STAT_STR(SVM_4K_PAGEFAULT_COUNT, "svm_4K_pagefault_count"),
+ DEF_STAT_STR(SVM_64K_PAGEFAULT_COUNT, "svm_64K_pagefault_count"),
+ DEF_STAT_STR(SVM_2M_PAGEFAULT_COUNT, "svm_2M_pagefault_count"),
+ DEF_STAT_STR(SVM_4K_VALID_PAGEFAULT_COUNT, "svm_4K_valid_pagefault_count"),
+ DEF_STAT_STR(SVM_64K_VALID_PAGEFAULT_COUNT, "svm_64K_valid_pagefault_count"),
+ DEF_STAT_STR(SVM_2M_VALID_PAGEFAULT_COUNT, "svm_2M_valid_pagefault_count"),
+ DEF_STAT_STR(SVM_4K_PAGEFAULT_US, "svm_4K_pagefault_us"),
+ DEF_STAT_STR(SVM_64K_PAGEFAULT_US, "svm_64K_pagefault_us"),
+ DEF_STAT_STR(SVM_2M_PAGEFAULT_US, "svm_2M_pagefault_us"),
+ DEF_STAT_STR(SVM_4K_MIGRATE_COUNT, "svm_4K_migrate_count"),
+ DEF_STAT_STR(SVM_64K_MIGRATE_COUNT, "svm_64K_migrate_count"),
+ DEF_STAT_STR(SVM_2M_MIGRATE_COUNT, "svm_2M_migrate_count"),
+ DEF_STAT_STR(SVM_4K_MIGRATE_US, "svm_4K_migrate_us"),
+ DEF_STAT_STR(SVM_64K_MIGRATE_US, "svm_64K_migrate_us"),
+ DEF_STAT_STR(SVM_2M_MIGRATE_US, "svm_2M_migrate_us"),
+ DEF_STAT_STR(SVM_DEVICE_COPY_US, "svm_device_copy_us"),
+ DEF_STAT_STR(SVM_4K_DEVICE_COPY_US, "svm_4K_device_copy_us"),
+ DEF_STAT_STR(SVM_64K_DEVICE_COPY_US, "svm_64K_device_copy_us"),
+ DEF_STAT_STR(SVM_2M_DEVICE_COPY_US, "svm_2M_device_copy_us"),
+ DEF_STAT_STR(SVM_CPU_COPY_US, "svm_cpu_copy_us"),
+ DEF_STAT_STR(SVM_4K_CPU_COPY_US, "svm_4K_cpu_copy_us"),
+ DEF_STAT_STR(SVM_64K_CPU_COPY_US, "svm_64K_cpu_copy_us"),
+ DEF_STAT_STR(SVM_2M_CPU_COPY_US, "svm_2M_cpu_copy_us"),
+ DEF_STAT_STR(SVM_DEVICE_COPY_KB, "svm_device_copy_kb"),
+ DEF_STAT_STR(SVM_CPU_COPY_KB, "svm_cpu_copy_kb"),
+ DEF_STAT_STR(SVM_4K_GET_PAGES_US, "svm_4K_get_pages_us"),
+ DEF_STAT_STR(SVM_64K_GET_PAGES_US, "svm_64K_get_pages_us"),
+ DEF_STAT_STR(SVM_2M_GET_PAGES_US, "svm_2M_get_pages_us"),
+ DEF_STAT_STR(SVM_4K_BIND_US, "svm_4K_bind_us"),
+ DEF_STAT_STR(SVM_64K_BIND_US, "svm_64K_bind_us"),
+ DEF_STAT_STR(SVM_2M_BIND_US, "svm_2M_bind_us"),
};
/**
@@ -50,3 +85,17 @@ int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p)
return 0;
}
+
+/**
+ * xe_gt_stats_clear - Clear the GT stats
+ * @gt: GT structure
+ *
+ * This clear (zeros) all the available GT stats.
+ */
+void xe_gt_stats_clear(struct xe_gt *gt)
+{
+ int id;
+
+ for (id = 0; id < ARRAY_SIZE(gt->stats.counters); ++id)
+ atomic64_set(&gt->stats.counters[id], 0);
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_stats.h b/drivers/gpu/drm/xe/xe_gt_stats.h
index 38325ef53617..e8aea32bc971 100644
--- a/drivers/gpu/drm/xe/xe_gt_stats.h
+++ b/drivers/gpu/drm/xe/xe_gt_stats.h
@@ -13,6 +13,7 @@ struct drm_printer;
#ifdef CONFIG_DEBUG_FS
int xe_gt_stats_print_info(struct xe_gt *gt, struct drm_printer *p);
+void xe_gt_stats_clear(struct xe_gt *gt);
void xe_gt_stats_incr(struct xe_gt *gt, const enum xe_gt_stats_id id, int incr);
#else
static inline void
diff --git a/drivers/gpu/drm/xe/xe_gt_stats_types.h b/drivers/gpu/drm/xe/xe_gt_stats_types.h
index be3244d7133c..d8348a8de2e1 100644
--- a/drivers/gpu/drm/xe/xe_gt_stats_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_stats_types.h
@@ -9,8 +9,41 @@
enum xe_gt_stats_id {
XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT,
XE_GT_STATS_ID_TLB_INVAL,
+ XE_GT_STATS_ID_SVM_TLB_INVAL_COUNT,
+ XE_GT_STATS_ID_SVM_TLB_INVAL_US,
XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT,
XE_GT_STATS_ID_VMA_PAGEFAULT_KB,
+ XE_GT_STATS_ID_SVM_4K_PAGEFAULT_COUNT,
+ XE_GT_STATS_ID_SVM_64K_PAGEFAULT_COUNT,
+ XE_GT_STATS_ID_SVM_2M_PAGEFAULT_COUNT,
+ XE_GT_STATS_ID_SVM_4K_VALID_PAGEFAULT_COUNT,
+ XE_GT_STATS_ID_SVM_64K_VALID_PAGEFAULT_COUNT,
+ XE_GT_STATS_ID_SVM_2M_VALID_PAGEFAULT_COUNT,
+ XE_GT_STATS_ID_SVM_4K_PAGEFAULT_US,
+ XE_GT_STATS_ID_SVM_64K_PAGEFAULT_US,
+ XE_GT_STATS_ID_SVM_2M_PAGEFAULT_US,
+ XE_GT_STATS_ID_SVM_4K_MIGRATE_COUNT,
+ XE_GT_STATS_ID_SVM_64K_MIGRATE_COUNT,
+ XE_GT_STATS_ID_SVM_2M_MIGRATE_COUNT,
+ XE_GT_STATS_ID_SVM_4K_MIGRATE_US,
+ XE_GT_STATS_ID_SVM_64K_MIGRATE_US,
+ XE_GT_STATS_ID_SVM_2M_MIGRATE_US,
+ XE_GT_STATS_ID_SVM_DEVICE_COPY_US,
+ XE_GT_STATS_ID_SVM_4K_DEVICE_COPY_US,
+ XE_GT_STATS_ID_SVM_64K_DEVICE_COPY_US,
+ XE_GT_STATS_ID_SVM_2M_DEVICE_COPY_US,
+ XE_GT_STATS_ID_SVM_CPU_COPY_US,
+ XE_GT_STATS_ID_SVM_4K_CPU_COPY_US,
+ XE_GT_STATS_ID_SVM_64K_CPU_COPY_US,
+ XE_GT_STATS_ID_SVM_2M_CPU_COPY_US,
+ XE_GT_STATS_ID_SVM_DEVICE_COPY_KB,
+ XE_GT_STATS_ID_SVM_CPU_COPY_KB,
+ XE_GT_STATS_ID_SVM_4K_GET_PAGES_US,
+ XE_GT_STATS_ID_SVM_64K_GET_PAGES_US,
+ XE_GT_STATS_ID_SVM_2M_GET_PAGES_US,
+ XE_GT_STATS_ID_SVM_4K_BIND_US,
+ XE_GT_STATS_ID_SVM_64K_BIND_US,
+ XE_GT_STATS_ID_SVM_2M_BIND_US,
/* must be the last entry */
__XE_GT_STATS_NUM_IDS,
};
diff --git a/drivers/gpu/drm/xe/xe_gt_throttle.c b/drivers/gpu/drm/xe/xe_gt_throttle.c
index aa962c783cdf..01477fc7b37b 100644
--- a/drivers/gpu/drm/xe/xe_gt_throttle.c
+++ b/drivers/gpu/drm/xe/xe_gt_throttle.c
@@ -8,221 +8,222 @@
#include <regs/xe_gt_regs.h>
#include "xe_device.h"
#include "xe_gt.h"
-#include "xe_gt_printk.h"
#include "xe_gt_sysfs.h"
#include "xe_gt_throttle.h"
#include "xe_mmio.h"
+#include "xe_platform_types.h"
#include "xe_pm.h"
/**
* DOC: Xe GT Throttle
*
- * Provides sysfs entries and other helpers for frequency throttle reasons in GT
+ * The GT frequency may be throttled by hardware/firmware for various reasons
+ * that are provided through attributes under the ``freq0/throttle/`` directory.
+ * Their availability depend on the platform and some may not be visible if that
+ * reason is not available.
*
- * device/gt#/freq0/throttle/status - Overall status
- * device/gt#/freq0/throttle/reason_pl1 - Frequency throttle due to PL1
- * device/gt#/freq0/throttle/reason_pl2 - Frequency throttle due to PL2
- * device/gt#/freq0/throttle/reason_pl4 - Frequency throttle due to PL4, Iccmax etc.
- * device/gt#/freq0/throttle/reason_thermal - Frequency throttle due to thermal
- * device/gt#/freq0/throttle/reason_prochot - Frequency throttle due to prochot
- * device/gt#/freq0/throttle/reason_ratl - Frequency throttle due to RATL
- * device/gt#/freq0/throttle/reason_vr_thermalert - Frequency throttle due to VR THERMALERT
- * device/gt#/freq0/throttle/reason_vr_tdc - Frequency throttle due to VR TDC
+ * The ``reasons`` attribute can be used by sysadmin to monitor all possible
+ * reasons for throttling and report them. It's preferred over monitoring
+ * ``status`` and then reading the reason from individual attributes since that
+ * is racy. If there's no throttling happening, "none" is returned.
+ *
+ * The following attributes are available on Crescent Island platform:
+ *
+ * - ``status``: Overall throttle status (0: no throttling, 1: throttling)
+ * - ``reasons``: Array of reasons causing throttling separated by space
+ * - ``reason_pl1``: package PL1
+ * - ``reason_pl2``: package PL2
+ * - ``reason_pl4``: package PL4
+ * - ``reason_prochot``: prochot
+ * - ``reason_soc_thermal``: SoC thermal
+ * - ``reason_mem_thermal``: Memory thermal
+ * - ``reason_vr_thermal``: VR thermal
+ * - ``reason_iccmax``: ICCMAX
+ * - ``reason_ratl``: RATL thermal algorithm
+ * - ``reason_soc_avg_thermal``: SoC average temp
+ * - ``reason_fastvmode``: VR is hitting FastVMode
+ * - ``reason_psys_pl1``: PSYS PL1
+ * - ``reason_psys_pl2``: PSYS PL2
+ * - ``reason_p0_freq``: P0 frequency
+ * - ``reason_psys_crit``: PSYS critical
+ *
+ * Other platforms support the following reasons:
+ *
+ * - ``status``: Overall throttle status (0: no throttling, 1: throttling)
+ * - ``reasons``: Array of reasons causing throttling separated by space
+ * - ``reason_pl1``: package PL1
+ * - ``reason_pl2``: package PL2
+ * - ``reason_pl4``: package PL4, Iccmax etc.
+ * - ``reason_thermal``: thermal
+ * - ``reason_prochot``: prochot
+ * - ``reason_ratl``: RATL hermal algorithm
+ * - ``reason_vr_thermalert``: VR THERMALERT
+ * - ``reason_vr_tdc``: VR TDC
*/
-static struct xe_gt *
-dev_to_gt(struct device *dev)
-{
- return kobj_to_gt(dev->kobj.parent);
-}
-
-u32 xe_gt_throttle_get_limit_reasons(struct xe_gt *gt)
-{
- u32 reg;
-
- xe_pm_runtime_get(gt_to_xe(gt));
- if (xe_gt_is_media_type(gt))
- reg = xe_mmio_read32(&gt->mmio, MTL_MEDIA_PERF_LIMIT_REASONS);
- else
- reg = xe_mmio_read32(&gt->mmio, GT0_PERF_LIMIT_REASONS);
- xe_pm_runtime_put(gt_to_xe(gt));
-
- return reg;
-}
-
-static u32 read_status(struct xe_gt *gt)
-{
- u32 status = xe_gt_throttle_get_limit_reasons(gt) & GT0_PERF_LIMIT_REASONS_MASK;
-
- xe_gt_dbg(gt, "throttle reasons: 0x%08x\n", status);
- return status;
-}
+struct throttle_attribute {
+ struct kobj_attribute attr;
+ u32 mask;
+};
-static u32 read_reason_pl1(struct xe_gt *gt)
+static struct xe_gt *dev_to_gt(struct device *dev)
{
- u32 pl1 = xe_gt_throttle_get_limit_reasons(gt) & POWER_LIMIT_1_MASK;
-
- return pl1;
+ return kobj_to_gt(dev->kobj.parent);
}
-static u32 read_reason_pl2(struct xe_gt *gt)
+static struct xe_gt *throttle_to_gt(struct kobject *kobj)
{
- u32 pl2 = xe_gt_throttle_get_limit_reasons(gt) & POWER_LIMIT_2_MASK;
-
- return pl2;
+ return dev_to_gt(kobj_to_dev(kobj));
}
-static u32 read_reason_pl4(struct xe_gt *gt)
+static struct throttle_attribute *kobj_attribute_to_throttle(struct kobj_attribute *attr)
{
- u32 pl4 = xe_gt_throttle_get_limit_reasons(gt) & POWER_LIMIT_4_MASK;
-
- return pl4;
+ return container_of(attr, struct throttle_attribute, attr);
}
-static u32 read_reason_thermal(struct xe_gt *gt)
-{
- u32 thermal = xe_gt_throttle_get_limit_reasons(gt) & THERMAL_LIMIT_MASK;
-
- return thermal;
-}
-
-static u32 read_reason_prochot(struct xe_gt *gt)
+u32 xe_gt_throttle_get_limit_reasons(struct xe_gt *gt)
{
- u32 prochot = xe_gt_throttle_get_limit_reasons(gt) & PROCHOT_MASK;
-
- return prochot;
-}
+ struct xe_device *xe = gt_to_xe(gt);
+ struct xe_reg reg;
+ u32 val, mask;
-static u32 read_reason_ratl(struct xe_gt *gt)
-{
- u32 ratl = xe_gt_throttle_get_limit_reasons(gt) & RATL_MASK;
+ if (xe_gt_is_media_type(gt))
+ reg = MTL_MEDIA_PERF_LIMIT_REASONS;
+ else
+ reg = GT0_PERF_LIMIT_REASONS;
- return ratl;
-}
+ if (xe->info.platform == XE_CRESCENTISLAND)
+ mask = CRI_PERF_LIMIT_REASONS_MASK;
+ else
+ mask = GT0_PERF_LIMIT_REASONS_MASK;
-static u32 read_reason_vr_thermalert(struct xe_gt *gt)
-{
- u32 thermalert = xe_gt_throttle_get_limit_reasons(gt) & VR_THERMALERT_MASK;
+ xe_pm_runtime_get(xe);
+ val = xe_mmio_read32(&gt->mmio, reg) & mask;
+ xe_pm_runtime_put(xe);
- return thermalert;
+ return val;
}
-static u32 read_reason_vr_tdc(struct xe_gt *gt)
+static bool is_throttled_by(struct xe_gt *gt, u32 mask)
{
- u32 tdc = xe_gt_throttle_get_limit_reasons(gt) & VR_TDC_MASK;
-
- return tdc;
+ return xe_gt_throttle_get_limit_reasons(gt) & mask;
}
-static ssize_t status_show(struct kobject *kobj,
+static ssize_t reason_show(struct kobject *kobj,
struct kobj_attribute *attr, char *buff)
{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
- bool status = !!read_status(gt);
+ struct throttle_attribute *ta = kobj_attribute_to_throttle(attr);
+ struct xe_gt *gt = throttle_to_gt(kobj);
- return sysfs_emit(buff, "%u\n", status);
+ return sysfs_emit(buff, "%u\n", is_throttled_by(gt, ta->mask));
}
-static struct kobj_attribute attr_status = __ATTR_RO(status);
-static ssize_t reason_pl1_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buff)
-{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
- bool pl1 = !!read_reason_pl1(gt);
+static const struct attribute_group *get_platform_throttle_group(struct xe_device *xe);
- return sysfs_emit(buff, "%u\n", pl1);
-}
-static struct kobj_attribute attr_reason_pl1 = __ATTR_RO(reason_pl1);
-
-static ssize_t reason_pl2_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buff)
+static ssize_t reasons_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buff)
{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
- bool pl2 = !!read_reason_pl2(gt);
+ struct xe_gt *gt = throttle_to_gt(kobj);
+ struct xe_device *xe = gt_to_xe(gt);
+ const struct attribute_group *group;
+ struct attribute **pother;
+ ssize_t ret = 0;
+ u32 reasons;
- return sysfs_emit(buff, "%u\n", pl2);
-}
-static struct kobj_attribute attr_reason_pl2 = __ATTR_RO(reason_pl2);
+ reasons = xe_gt_throttle_get_limit_reasons(gt);
+ if (!reasons)
+ goto ret_none;
-static ssize_t reason_pl4_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buff)
-{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
- bool pl4 = !!read_reason_pl4(gt);
+ group = get_platform_throttle_group(xe);
+ for (pother = group->attrs; *pother; pother++) {
+ struct kobj_attribute *kattr = container_of(*pother, struct kobj_attribute, attr);
+ struct throttle_attribute *other_ta = kobj_attribute_to_throttle(kattr);
- return sysfs_emit(buff, "%u\n", pl4);
-}
-static struct kobj_attribute attr_reason_pl4 = __ATTR_RO(reason_pl4);
+ if (other_ta->mask != U32_MAX && reasons & other_ta->mask)
+ ret += sysfs_emit_at(buff, ret, "%s ", (*pother)->name + strlen("reason_"));
+ }
-static ssize_t reason_thermal_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buff)
-{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
- bool thermal = !!read_reason_thermal(gt);
+ if (drm_WARN_ONCE(&xe->drm, !ret, "Unknown reason: %#x\n", reasons))
+ goto ret_none;
- return sysfs_emit(buff, "%u\n", thermal);
-}
-static struct kobj_attribute attr_reason_thermal = __ATTR_RO(reason_thermal);
+ /* Drop extra space from last iteration above */
+ ret--;
+ ret += sysfs_emit_at(buff, ret, "\n");
-static ssize_t reason_prochot_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buff)
-{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
- bool prochot = !!read_reason_prochot(gt);
+ return ret;
- return sysfs_emit(buff, "%u\n", prochot);
+ret_none:
+ return sysfs_emit(buff, "none\n");
}
-static struct kobj_attribute attr_reason_prochot = __ATTR_RO(reason_prochot);
-static ssize_t reason_ratl_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buff)
-{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
- bool ratl = !!read_reason_ratl(gt);
-
- return sysfs_emit(buff, "%u\n", ratl);
-}
-static struct kobj_attribute attr_reason_ratl = __ATTR_RO(reason_ratl);
-
-static ssize_t reason_vr_thermalert_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buff)
-{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
- bool thermalert = !!read_reason_vr_thermalert(gt);
-
- return sysfs_emit(buff, "%u\n", thermalert);
-}
-static struct kobj_attribute attr_reason_vr_thermalert = __ATTR_RO(reason_vr_thermalert);
-
-static ssize_t reason_vr_tdc_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buff)
-{
- struct device *dev = kobj_to_dev(kobj);
- struct xe_gt *gt = dev_to_gt(dev);
- bool tdc = !!read_reason_vr_tdc(gt);
-
- return sysfs_emit(buff, "%u\n", tdc);
-}
-static struct kobj_attribute attr_reason_vr_tdc = __ATTR_RO(reason_vr_tdc);
+#define THROTTLE_ATTR_RO(name, _mask) \
+ struct throttle_attribute attr_##name = { \
+ .attr = __ATTR(name, 0444, reason_show, NULL), \
+ .mask = _mask, \
+ }
+
+#define THROTTLE_ATTR_RO_FUNC(name, _mask, _show) \
+ struct throttle_attribute attr_##name = { \
+ .attr = __ATTR(name, 0444, _show, NULL), \
+ .mask = _mask, \
+ }
+
+static THROTTLE_ATTR_RO_FUNC(reasons, 0, reasons_show);
+static THROTTLE_ATTR_RO(status, U32_MAX);
+static THROTTLE_ATTR_RO(reason_pl1, POWER_LIMIT_1_MASK);
+static THROTTLE_ATTR_RO(reason_pl2, POWER_LIMIT_2_MASK);
+static THROTTLE_ATTR_RO(reason_pl4, POWER_LIMIT_4_MASK);
+static THROTTLE_ATTR_RO(reason_thermal, THERMAL_LIMIT_MASK);
+static THROTTLE_ATTR_RO(reason_prochot, PROCHOT_MASK);
+static THROTTLE_ATTR_RO(reason_ratl, RATL_MASK);
+static THROTTLE_ATTR_RO(reason_vr_thermalert, VR_THERMALERT_MASK);
+static THROTTLE_ATTR_RO(reason_vr_tdc, VR_TDC_MASK);
static struct attribute *throttle_attrs[] = {
- &attr_status.attr,
- &attr_reason_pl1.attr,
- &attr_reason_pl2.attr,
- &attr_reason_pl4.attr,
- &attr_reason_thermal.attr,
- &attr_reason_prochot.attr,
- &attr_reason_ratl.attr,
- &attr_reason_vr_thermalert.attr,
- &attr_reason_vr_tdc.attr,
+ &attr_reasons.attr.attr,
+ &attr_status.attr.attr,
+ &attr_reason_pl1.attr.attr,
+ &attr_reason_pl2.attr.attr,
+ &attr_reason_pl4.attr.attr,
+ &attr_reason_thermal.attr.attr,
+ &attr_reason_prochot.attr.attr,
+ &attr_reason_ratl.attr.attr,
+ &attr_reason_vr_thermalert.attr.attr,
+ &attr_reason_vr_tdc.attr.attr,
+ NULL
+};
+
+static THROTTLE_ATTR_RO(reason_vr_thermal, VR_THERMAL_MASK);
+static THROTTLE_ATTR_RO(reason_soc_thermal, SOC_THERMAL_LIMIT_MASK);
+static THROTTLE_ATTR_RO(reason_mem_thermal, MEM_THERMAL_MASK);
+static THROTTLE_ATTR_RO(reason_iccmax, ICCMAX_MASK);
+static THROTTLE_ATTR_RO(reason_soc_avg_thermal, SOC_AVG_THERMAL_MASK);
+static THROTTLE_ATTR_RO(reason_fastvmode, FASTVMODE_MASK);
+static THROTTLE_ATTR_RO(reason_psys_pl1, PSYS_PL1_MASK);
+static THROTTLE_ATTR_RO(reason_psys_pl2, PSYS_PL2_MASK);
+static THROTTLE_ATTR_RO(reason_p0_freq, P0_FREQ_MASK);
+static THROTTLE_ATTR_RO(reason_psys_crit, PSYS_CRIT_MASK);
+
+static struct attribute *cri_throttle_attrs[] = {
+ /* Common */
+ &attr_reasons.attr.attr,
+ &attr_status.attr.attr,
+ &attr_reason_pl1.attr.attr,
+ &attr_reason_pl2.attr.attr,
+ &attr_reason_pl4.attr.attr,
+ &attr_reason_prochot.attr.attr,
+ &attr_reason_ratl.attr.attr,
+ /* CRI */
+ &attr_reason_vr_thermal.attr.attr,
+ &attr_reason_soc_thermal.attr.attr,
+ &attr_reason_mem_thermal.attr.attr,
+ &attr_reason_iccmax.attr.attr,
+ &attr_reason_soc_avg_thermal.attr.attr,
+ &attr_reason_fastvmode.attr.attr,
+ &attr_reason_psys_pl1.attr.attr,
+ &attr_reason_psys_pl2.attr.attr,
+ &attr_reason_p0_freq.attr.attr,
+ &attr_reason_psys_crit.attr.attr,
NULL
};
@@ -231,19 +232,37 @@ static const struct attribute_group throttle_group_attrs = {
.attrs = throttle_attrs,
};
+static const struct attribute_group cri_throttle_group_attrs = {
+ .name = "throttle",
+ .attrs = cri_throttle_attrs,
+};
+
+static const struct attribute_group *get_platform_throttle_group(struct xe_device *xe)
+{
+ switch (xe->info.platform) {
+ case XE_CRESCENTISLAND:
+ return &cri_throttle_group_attrs;
+ default:
+ return &throttle_group_attrs;
+ }
+}
+
static void gt_throttle_sysfs_fini(void *arg)
{
struct xe_gt *gt = arg;
+ struct xe_device *xe = gt_to_xe(gt);
+ const struct attribute_group *group = get_platform_throttle_group(xe);
- sysfs_remove_group(gt->freq, &throttle_group_attrs);
+ sysfs_remove_group(gt->freq, group);
}
int xe_gt_throttle_init(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
+ const struct attribute_group *group = get_platform_throttle_group(xe);
int err;
- err = sysfs_create_group(gt->freq, &throttle_group_attrs);
+ err = sysfs_create_group(gt->freq, group);
if (err)
return err;
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
deleted file mode 100644
index 084cbdeba8ea..000000000000
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ /dev/null
@@ -1,578 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2023 Intel Corporation
- */
-
-#include "xe_gt_tlb_invalidation.h"
-
-#include "abi/guc_actions_abi.h"
-#include "xe_device.h"
-#include "xe_force_wake.h"
-#include "xe_gt.h"
-#include "xe_gt_printk.h"
-#include "xe_guc.h"
-#include "xe_guc_ct.h"
-#include "xe_gt_stats.h"
-#include "xe_mmio.h"
-#include "xe_pm.h"
-#include "xe_sriov.h"
-#include "xe_trace.h"
-#include "regs/xe_guc_regs.h"
-
-#define FENCE_STACK_BIT DMA_FENCE_FLAG_USER_BITS
-
-/*
- * TLB inval depends on pending commands in the CT queue and then the real
- * invalidation time. Double up the time to process full CT queue
- * just to be on the safe side.
- */
-static long tlb_timeout_jiffies(struct xe_gt *gt)
-{
- /* this reflects what HW/GuC needs to process TLB inv request */
- const long hw_tlb_timeout = HZ / 4;
-
- /* this estimates actual delay caused by the CTB transport */
- long delay = xe_guc_ct_queue_proc_time_jiffies(&gt->uc.guc.ct);
-
- return hw_tlb_timeout + 2 * delay;
-}
-
-static void xe_gt_tlb_invalidation_fence_fini(struct xe_gt_tlb_invalidation_fence *fence)
-{
- if (WARN_ON_ONCE(!fence->gt))
- return;
-
- xe_pm_runtime_put(gt_to_xe(fence->gt));
- fence->gt = NULL; /* fini() should be called once */
-}
-
-static void
-__invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
-{
- bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags);
-
- trace_xe_gt_tlb_invalidation_fence_signal(xe, fence);
- xe_gt_tlb_invalidation_fence_fini(fence);
- dma_fence_signal(&fence->base);
- if (!stack)
- dma_fence_put(&fence->base);
-}
-
-static void
-invalidation_fence_signal(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence)
-{
- list_del(&fence->link);
- __invalidation_fence_signal(xe, fence);
-}
-
-void xe_gt_tlb_invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence)
-{
- if (WARN_ON_ONCE(!fence->gt))
- return;
-
- __invalidation_fence_signal(gt_to_xe(fence->gt), fence);
-}
-
-static void xe_gt_tlb_fence_timeout(struct work_struct *work)
-{
- struct xe_gt *gt = container_of(work, struct xe_gt,
- tlb_invalidation.fence_tdr.work);
- struct xe_device *xe = gt_to_xe(gt);
- struct xe_gt_tlb_invalidation_fence *fence, *next;
-
- LNL_FLUSH_WORK(&gt->uc.guc.ct.g2h_worker);
-
- spin_lock_irq(&gt->tlb_invalidation.pending_lock);
- list_for_each_entry_safe(fence, next,
- &gt->tlb_invalidation.pending_fences, link) {
- s64 since_inval_ms = ktime_ms_delta(ktime_get(),
- fence->invalidation_time);
-
- if (msecs_to_jiffies(since_inval_ms) < tlb_timeout_jiffies(gt))
- break;
-
- trace_xe_gt_tlb_invalidation_fence_timeout(xe, fence);
- xe_gt_err(gt, "TLB invalidation fence timeout, seqno=%d recv=%d",
- fence->seqno, gt->tlb_invalidation.seqno_recv);
-
- fence->base.error = -ETIME;
- invalidation_fence_signal(xe, fence);
- }
- if (!list_empty(&gt->tlb_invalidation.pending_fences))
- queue_delayed_work(system_wq,
- &gt->tlb_invalidation.fence_tdr,
- tlb_timeout_jiffies(gt));
- spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
-}
-
-/**
- * xe_gt_tlb_invalidation_init_early - Initialize GT TLB invalidation state
- * @gt: GT structure
- *
- * Initialize GT TLB invalidation state, purely software initialization, should
- * be called once during driver load.
- *
- * Return: 0 on success, negative error code on error.
- */
-int xe_gt_tlb_invalidation_init_early(struct xe_gt *gt)
-{
- gt->tlb_invalidation.seqno = 1;
- INIT_LIST_HEAD(&gt->tlb_invalidation.pending_fences);
- spin_lock_init(&gt->tlb_invalidation.pending_lock);
- spin_lock_init(&gt->tlb_invalidation.lock);
- INIT_DELAYED_WORK(&gt->tlb_invalidation.fence_tdr,
- xe_gt_tlb_fence_timeout);
-
- return 0;
-}
-
-/**
- * xe_gt_tlb_invalidation_reset - Initialize GT TLB invalidation reset
- * @gt: GT structure
- *
- * Signal any pending invalidation fences, should be called during a GT reset
- */
-void xe_gt_tlb_invalidation_reset(struct xe_gt *gt)
-{
- struct xe_gt_tlb_invalidation_fence *fence, *next;
- int pending_seqno;
-
- /*
- * CT channel is already disabled at this point. No new TLB requests can
- * appear.
- */
-
- mutex_lock(&gt->uc.guc.ct.lock);
- spin_lock_irq(&gt->tlb_invalidation.pending_lock);
- cancel_delayed_work(&gt->tlb_invalidation.fence_tdr);
- /*
- * We might have various kworkers waiting for TLB flushes to complete
- * which are not tracked with an explicit TLB fence, however at this
- * stage that will never happen since the CT is already disabled, so
- * make sure we signal them here under the assumption that we have
- * completed a full GT reset.
- */
- if (gt->tlb_invalidation.seqno == 1)
- pending_seqno = TLB_INVALIDATION_SEQNO_MAX - 1;
- else
- pending_seqno = gt->tlb_invalidation.seqno - 1;
- WRITE_ONCE(gt->tlb_invalidation.seqno_recv, pending_seqno);
-
- list_for_each_entry_safe(fence, next,
- &gt->tlb_invalidation.pending_fences, link)
- invalidation_fence_signal(gt_to_xe(gt), fence);
- spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
- mutex_unlock(&gt->uc.guc.ct.lock);
-}
-
-static bool tlb_invalidation_seqno_past(struct xe_gt *gt, int seqno)
-{
- int seqno_recv = READ_ONCE(gt->tlb_invalidation.seqno_recv);
-
- if (seqno - seqno_recv < -(TLB_INVALIDATION_SEQNO_MAX / 2))
- return false;
-
- if (seqno - seqno_recv > (TLB_INVALIDATION_SEQNO_MAX / 2))
- return true;
-
- return seqno_recv >= seqno;
-}
-
-static int send_tlb_invalidation(struct xe_guc *guc,
- struct xe_gt_tlb_invalidation_fence *fence,
- u32 *action, int len)
-{
- struct xe_gt *gt = guc_to_gt(guc);
- struct xe_device *xe = gt_to_xe(gt);
- int seqno;
- int ret;
-
- xe_gt_assert(gt, fence);
-
- /*
- * XXX: The seqno algorithm relies on TLB invalidation being processed
- * in order which they currently are, if that changes the algorithm will
- * need to be updated.
- */
-
- mutex_lock(&guc->ct.lock);
- seqno = gt->tlb_invalidation.seqno;
- fence->seqno = seqno;
- trace_xe_gt_tlb_invalidation_fence_send(xe, fence);
- action[1] = seqno;
- ret = xe_guc_ct_send_locked(&guc->ct, action, len,
- G2H_LEN_DW_TLB_INVALIDATE, 1);
- if (!ret) {
- spin_lock_irq(&gt->tlb_invalidation.pending_lock);
- /*
- * We haven't actually published the TLB fence as per
- * pending_fences, but in theory our seqno could have already
- * been written as we acquired the pending_lock. In such a case
- * we can just go ahead and signal the fence here.
- */
- if (tlb_invalidation_seqno_past(gt, seqno)) {
- __invalidation_fence_signal(xe, fence);
- } else {
- fence->invalidation_time = ktime_get();
- list_add_tail(&fence->link,
- &gt->tlb_invalidation.pending_fences);
-
- if (list_is_singular(&gt->tlb_invalidation.pending_fences))
- queue_delayed_work(system_wq,
- &gt->tlb_invalidation.fence_tdr,
- tlb_timeout_jiffies(gt));
- }
- spin_unlock_irq(&gt->tlb_invalidation.pending_lock);
- } else {
- __invalidation_fence_signal(xe, fence);
- }
- if (!ret) {
- gt->tlb_invalidation.seqno = (gt->tlb_invalidation.seqno + 1) %
- TLB_INVALIDATION_SEQNO_MAX;
- if (!gt->tlb_invalidation.seqno)
- gt->tlb_invalidation.seqno = 1;
- }
- mutex_unlock(&guc->ct.lock);
- xe_gt_stats_incr(gt, XE_GT_STATS_ID_TLB_INVAL, 1);
-
- return ret;
-}
-
-#define MAKE_INVAL_OP(type) ((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \
- XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \
- XE_GUC_TLB_INVAL_FLUSH_CACHE)
-
-/**
- * xe_gt_tlb_invalidation_guc - Issue a TLB invalidation on this GT for the GuC
- * @gt: GT structure
- * @fence: invalidation fence which will be signal on TLB invalidation
- * completion
- *
- * Issue a TLB invalidation for the GuC. Completion of TLB is asynchronous and
- * caller can use the invalidation fence to wait for completion.
- *
- * Return: 0 on success, negative error code on error
- */
-static int xe_gt_tlb_invalidation_guc(struct xe_gt *gt,
- struct xe_gt_tlb_invalidation_fence *fence)
-{
- u32 action[] = {
- XE_GUC_ACTION_TLB_INVALIDATION,
- 0, /* seqno, replaced in send_tlb_invalidation */
- MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC),
- };
- int ret;
-
- ret = send_tlb_invalidation(&gt->uc.guc, fence, action,
- ARRAY_SIZE(action));
- /*
- * -ECANCELED indicates the CT is stopped for a GT reset. TLB caches
- * should be nuked on a GT reset so this error can be ignored.
- */
- if (ret == -ECANCELED)
- return 0;
-
- return ret;
-}
-
-/**
- * xe_gt_tlb_invalidation_ggtt - Issue a TLB invalidation on this GT for the GGTT
- * @gt: GT structure
- *
- * Issue a TLB invalidation for the GGTT. Completion of TLB invalidation is
- * synchronous.
- *
- * Return: 0 on success, negative error code on error
- */
-int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt)
-{
- struct xe_device *xe = gt_to_xe(gt);
- unsigned int fw_ref;
-
- if (xe_guc_ct_enabled(&gt->uc.guc.ct) &&
- gt->uc.guc.submission_state.enabled) {
- struct xe_gt_tlb_invalidation_fence fence;
- int ret;
-
- xe_gt_tlb_invalidation_fence_init(gt, &fence, true);
- ret = xe_gt_tlb_invalidation_guc(gt, &fence);
- if (ret)
- return ret;
-
- xe_gt_tlb_invalidation_fence_wait(&fence);
- } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) {
- struct xe_mmio *mmio = &gt->mmio;
-
- if (IS_SRIOV_VF(xe))
- return 0;
-
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) {
- xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC1,
- PVC_GUC_TLB_INV_DESC1_INVALIDATE);
- xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC0,
- PVC_GUC_TLB_INV_DESC0_VALID);
- } else {
- xe_mmio_write32(mmio, GUC_TLB_INV_CR,
- GUC_TLB_INV_CR_INVALIDATE);
- }
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
- }
-
- return 0;
-}
-
-/*
- * Ensure that roundup_pow_of_two(length) doesn't overflow.
- * Note that roundup_pow_of_two() operates on unsigned long,
- * not on u64.
- */
-#define MAX_RANGE_TLB_INVALIDATION_LENGTH (rounddown_pow_of_two(ULONG_MAX))
-
-/**
- * xe_gt_tlb_invalidation_range - Issue a TLB invalidation on this GT for an
- * address range
- *
- * @gt: GT structure
- * @fence: invalidation fence which will be signal on TLB invalidation
- * completion
- * @start: start address
- * @end: end address
- * @asid: address space id
- *
- * Issue a range based TLB invalidation if supported, if not fallback to a full
- * TLB invalidation. Completion of TLB is asynchronous and caller can use
- * the invalidation fence to wait for completion.
- *
- * Return: Negative error code on error, 0 on success
- */
-int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
- struct xe_gt_tlb_invalidation_fence *fence,
- u64 start, u64 end, u32 asid)
-{
- struct xe_device *xe = gt_to_xe(gt);
-#define MAX_TLB_INVALIDATION_LEN 7
- u32 action[MAX_TLB_INVALIDATION_LEN];
- u64 length = end - start;
- int len = 0;
-
- xe_gt_assert(gt, fence);
-
- /* Execlists not supported */
- if (gt_to_xe(gt)->info.force_execlist) {
- __invalidation_fence_signal(xe, fence);
- return 0;
- }
-
- action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
- action[len++] = 0; /* seqno, replaced in send_tlb_invalidation */
- if (!xe->info.has_range_tlb_invalidation ||
- length > MAX_RANGE_TLB_INVALIDATION_LENGTH) {
- action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
- } else {
- u64 orig_start = start;
- u64 align;
-
- if (length < SZ_4K)
- length = SZ_4K;
-
- /*
- * We need to invalidate a higher granularity if start address
- * is not aligned to length. When start is not aligned with
- * length we need to find the length large enough to create an
- * address mask covering the required range.
- */
- align = roundup_pow_of_two(length);
- start = ALIGN_DOWN(start, align);
- end = ALIGN(end, align);
- length = align;
- while (start + length < end) {
- length <<= 1;
- start = ALIGN_DOWN(orig_start, length);
- }
-
- /*
- * Minimum invalidation size for a 2MB page that the hardware
- * expects is 16MB
- */
- if (length >= SZ_2M) {
- length = max_t(u64, SZ_16M, length);
- start = ALIGN_DOWN(orig_start, length);
- }
-
- xe_gt_assert(gt, length >= SZ_4K);
- xe_gt_assert(gt, is_power_of_2(length));
- xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1,
- ilog2(SZ_2M) + 1)));
- xe_gt_assert(gt, IS_ALIGNED(start, length));
-
- action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
- action[len++] = asid;
- action[len++] = lower_32_bits(start);
- action[len++] = upper_32_bits(start);
- action[len++] = ilog2(length) - ilog2(SZ_4K);
- }
-
- xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN);
-
- return send_tlb_invalidation(&gt->uc.guc, fence, action, len);
-}
-
-/**
- * xe_gt_tlb_invalidation_vm - Issue a TLB invalidation on this GT for a VM
- * @gt: graphics tile
- * @vm: VM to invalidate
- *
- * Invalidate entire VM's address space
- */
-void xe_gt_tlb_invalidation_vm(struct xe_gt *gt, struct xe_vm *vm)
-{
- struct xe_gt_tlb_invalidation_fence fence;
- u64 range = 1ull << vm->xe->info.va_bits;
- int ret;
-
- xe_gt_tlb_invalidation_fence_init(gt, &fence, true);
-
- ret = xe_gt_tlb_invalidation_range(gt, &fence, 0, range, vm->usm.asid);
- if (ret < 0)
- return;
-
- xe_gt_tlb_invalidation_fence_wait(&fence);
-}
-
-/**
- * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA
- * @gt: GT structure
- * @fence: invalidation fence which will be signal on TLB invalidation
- * completion, can be NULL
- * @vma: VMA to invalidate
- *
- * Issue a range based TLB invalidation if supported, if not fallback to a full
- * TLB invalidation. Completion of TLB is asynchronous and caller can use
- * the invalidation fence to wait for completion.
- *
- * Return: Negative error code on error, 0 on success
- */
-int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
- struct xe_gt_tlb_invalidation_fence *fence,
- struct xe_vma *vma)
-{
- xe_gt_assert(gt, vma);
-
- return xe_gt_tlb_invalidation_range(gt, fence, xe_vma_start(vma),
- xe_vma_end(vma),
- xe_vma_vm(vma)->usm.asid);
-}
-
-/**
- * xe_guc_tlb_invalidation_done_handler - TLB invalidation done handler
- * @guc: guc
- * @msg: message indicating TLB invalidation done
- * @len: length of message
- *
- * Parse seqno of TLB invalidation, wake any waiters for seqno, and signal any
- * invalidation fences for seqno. Algorithm for this depends on seqno being
- * received in-order and asserts this assumption.
- *
- * Return: 0 on success, -EPROTO for malformed messages.
- */
-int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
-{
- struct xe_gt *gt = guc_to_gt(guc);
- struct xe_device *xe = gt_to_xe(gt);
- struct xe_gt_tlb_invalidation_fence *fence, *next;
- unsigned long flags;
-
- if (unlikely(len != 1))
- return -EPROTO;
-
- /*
- * This can also be run both directly from the IRQ handler and also in
- * process_g2h_msg(). Only one may process any individual CT message,
- * however the order they are processed here could result in skipping a
- * seqno. To handle that we just process all the seqnos from the last
- * seqno_recv up to and including the one in msg[0]. The delta should be
- * very small so there shouldn't be much of pending_fences we actually
- * need to iterate over here.
- *
- * From GuC POV we expect the seqnos to always appear in-order, so if we
- * see something later in the timeline we can be sure that anything
- * appearing earlier has already signalled, just that we have yet to
- * officially process the CT message like if racing against
- * process_g2h_msg().
- */
- spin_lock_irqsave(&gt->tlb_invalidation.pending_lock, flags);
- if (tlb_invalidation_seqno_past(gt, msg[0])) {
- spin_unlock_irqrestore(&gt->tlb_invalidation.pending_lock, flags);
- return 0;
- }
-
- WRITE_ONCE(gt->tlb_invalidation.seqno_recv, msg[0]);
-
- list_for_each_entry_safe(fence, next,
- &gt->tlb_invalidation.pending_fences, link) {
- trace_xe_gt_tlb_invalidation_fence_recv(xe, fence);
-
- if (!tlb_invalidation_seqno_past(gt, fence->seqno))
- break;
-
- invalidation_fence_signal(xe, fence);
- }
-
- if (!list_empty(&gt->tlb_invalidation.pending_fences))
- mod_delayed_work(system_wq,
- &gt->tlb_invalidation.fence_tdr,
- tlb_timeout_jiffies(gt));
- else
- cancel_delayed_work(&gt->tlb_invalidation.fence_tdr);
-
- spin_unlock_irqrestore(&gt->tlb_invalidation.pending_lock, flags);
-
- return 0;
-}
-
-static const char *
-invalidation_fence_get_driver_name(struct dma_fence *dma_fence)
-{
- return "xe";
-}
-
-static const char *
-invalidation_fence_get_timeline_name(struct dma_fence *dma_fence)
-{
- return "invalidation_fence";
-}
-
-static const struct dma_fence_ops invalidation_fence_ops = {
- .get_driver_name = invalidation_fence_get_driver_name,
- .get_timeline_name = invalidation_fence_get_timeline_name,
-};
-
-/**
- * xe_gt_tlb_invalidation_fence_init - Initialize TLB invalidation fence
- * @gt: GT
- * @fence: TLB invalidation fence to initialize
- * @stack: fence is stack variable
- *
- * Initialize TLB invalidation fence for use. xe_gt_tlb_invalidation_fence_fini
- * will be automatically called when fence is signalled (all fences must signal),
- * even on error.
- */
-void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt,
- struct xe_gt_tlb_invalidation_fence *fence,
- bool stack)
-{
- xe_pm_runtime_get_noresume(gt_to_xe(gt));
-
- spin_lock_irq(&gt->tlb_invalidation.lock);
- dma_fence_init(&fence->base, &invalidation_fence_ops,
- &gt->tlb_invalidation.lock,
- dma_fence_context_alloc(1), 1);
- spin_unlock_irq(&gt->tlb_invalidation.lock);
- INIT_LIST_HEAD(&fence->link);
- if (stack)
- set_bit(FENCE_STACK_BIT, &fence->base.flags);
- else
- dma_fence_get(&fence->base);
- fence->gt = gt;
-}
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
deleted file mode 100644
index abe9b03d543e..000000000000
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2023 Intel Corporation
- */
-
-#ifndef _XE_GT_TLB_INVALIDATION_H_
-#define _XE_GT_TLB_INVALIDATION_H_
-
-#include <linux/types.h>
-
-#include "xe_gt_tlb_invalidation_types.h"
-
-struct xe_gt;
-struct xe_guc;
-struct xe_vm;
-struct xe_vma;
-
-int xe_gt_tlb_invalidation_init_early(struct xe_gt *gt);
-
-void xe_gt_tlb_invalidation_reset(struct xe_gt *gt);
-int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt);
-int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
- struct xe_gt_tlb_invalidation_fence *fence,
- struct xe_vma *vma);
-void xe_gt_tlb_invalidation_vm(struct xe_gt *gt, struct xe_vm *vm);
-int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
- struct xe_gt_tlb_invalidation_fence *fence,
- u64 start, u64 end, u32 asid);
-int xe_guc_tlb_invalidation_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
-
-void xe_gt_tlb_invalidation_fence_init(struct xe_gt *gt,
- struct xe_gt_tlb_invalidation_fence *fence,
- bool stack);
-void xe_gt_tlb_invalidation_fence_signal(struct xe_gt_tlb_invalidation_fence *fence);
-
-static inline void
-xe_gt_tlb_invalidation_fence_wait(struct xe_gt_tlb_invalidation_fence *fence)
-{
- dma_fence_wait(&fence->base, false);
-}
-
-#endif /* _XE_GT_TLB_INVALIDATION_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h
deleted file mode 100644
index de6e825e0851..000000000000
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation_types.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/* SPDX-License-Identifier: MIT */
-/*
- * Copyright © 2023 Intel Corporation
- */
-
-#ifndef _XE_GT_TLB_INVALIDATION_TYPES_H_
-#define _XE_GT_TLB_INVALIDATION_TYPES_H_
-
-#include <linux/dma-fence.h>
-
-struct xe_gt;
-
-/**
- * struct xe_gt_tlb_invalidation_fence - XE GT TLB invalidation fence
- *
- * Optionally passed to xe_gt_tlb_invalidation and will be signaled upon TLB
- * invalidation completion.
- */
-struct xe_gt_tlb_invalidation_fence {
- /** @base: dma fence base */
- struct dma_fence base;
- /** @gt: GT which fence belong to */
- struct xe_gt *gt;
- /** @link: link into list of pending tlb fences */
- struct list_head link;
- /** @seqno: seqno of TLB invalidation to signal fence one */
- int seqno;
- /** @invalidation_time: time of TLB invalidation */
- ktime_t invalidation_time;
-};
-
-#endif
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
index 516c81e3b8dd..bd5260221d8d 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.c
+++ b/drivers/gpu/drm/xe/xe_gt_topology.c
@@ -12,23 +12,21 @@
#include "regs/xe_gt_regs.h"
#include "xe_assert.h"
#include "xe_gt.h"
+#include "xe_gt_mcr.h"
+#include "xe_gt_printk.h"
#include "xe_mmio.h"
#include "xe_wa.h"
-static void
-load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...)
+static void load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs,
+ const struct xe_reg regs[])
{
- va_list argp;
u32 fuse_val[XE_MAX_DSS_FUSE_REGS] = {};
int i;
- if (drm_WARN_ON(&gt_to_xe(gt)->drm, numregs > XE_MAX_DSS_FUSE_REGS))
- numregs = XE_MAX_DSS_FUSE_REGS;
+ xe_gt_assert(gt, numregs <= ARRAY_SIZE(fuse_val));
- va_start(argp, numregs);
for (i = 0; i < numregs; i++)
- fuse_val[i] = xe_mmio_read32(&gt->mmio, va_arg(argp, struct xe_reg));
- va_end(argp);
+ fuse_val[i] = xe_mmio_read32(&gt->mmio, regs[i]);
bitmap_from_arr32(mask, fuse_val, numregs * 32);
}
@@ -125,6 +123,21 @@ gen_l3_mask_from_pattern(struct xe_device *xe, xe_l3_bank_mask_t dst,
}
}
+bool xe_gt_topology_report_l3(struct xe_gt *gt)
+{
+ /*
+ * No known userspace needs/uses the L3 bank mask reported by
+ * the media GT, and the hardware itself is known to report bogus
+ * values on several platforms. Only report L3 bank mask as part
+ * of the media GT's topology on pre-Xe3 platforms since that's
+ * already part of our ABI.
+ */
+ if (xe_gt_is_media_type(gt) && MEDIA_VER(gt_to_xe(gt)) >= 30)
+ return false;
+
+ return true;
+}
+
static void
load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask)
{
@@ -132,19 +145,14 @@ load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask)
struct xe_mmio *mmio = &gt->mmio;
u32 fuse3 = xe_mmio_read32(mmio, MIRROR_FUSE3);
- /*
- * PTL platforms with media version 30.00 do not provide proper values
- * for the media GT's L3 bank registers. Skip the readout since we
- * don't have any way to obtain real values.
- *
- * This may get re-described as an official workaround in the future,
- * but there's no tracking number assigned yet so we use a custom
- * OOB workaround descriptor.
- */
- if (XE_WA(gt, no_media_l3))
+ if (!xe_gt_topology_report_l3(gt))
return;
- if (GRAPHICS_VER(xe) >= 30) {
+ if (GRAPHICS_VER(xe) >= 35) {
+ u32 fuse_val = xe_mmio_read32(mmio, MIRROR_L3BANK_ENABLE);
+
+ bitmap_from_arr32(l3_bank_mask, &fuse_val, 32);
+ } else if (GRAPHICS_VER(xe) >= 30) {
xe_l3_bank_mask_t per_node = {};
u32 meml3_en = REG_FIELD_GET(XE2_NODE_ENABLE_MASK, fuse3);
u32 mirror_l3bank_enable = xe_mmio_read32(mmio, MIRROR_L3BANK_ENABLE);
@@ -218,9 +226,19 @@ get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs)
void
xe_gt_topology_init(struct xe_gt *gt)
{
+ static const struct xe_reg geometry_regs[] = {
+ XELP_GT_GEOMETRY_DSS_ENABLE,
+ XE2_GT_GEOMETRY_DSS_1,
+ XE2_GT_GEOMETRY_DSS_2,
+ };
+ static const struct xe_reg compute_regs[] = {
+ XEHP_GT_COMPUTE_DSS_ENABLE,
+ XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,
+ XE2_GT_COMPUTE_DSS_2,
+ };
+ int num_geometry_regs, num_compute_regs;
struct xe_device *xe = gt_to_xe(gt);
struct drm_printer p;
- int num_geometry_regs, num_compute_regs;
get_num_dss_regs(xe, &num_geometry_regs, &num_compute_regs);
@@ -228,23 +246,18 @@ xe_gt_topology_init(struct xe_gt *gt)
* Register counts returned shouldn't exceed the number of registers
* passed as parameters below.
*/
- drm_WARN_ON(&xe->drm, num_geometry_regs > 3);
- drm_WARN_ON(&xe->drm, num_compute_regs > 3);
+ xe_gt_assert(gt, num_geometry_regs <= ARRAY_SIZE(geometry_regs));
+ xe_gt_assert(gt, num_compute_regs <= ARRAY_SIZE(compute_regs));
load_dss_mask(gt, gt->fuse_topo.g_dss_mask,
- num_geometry_regs,
- XELP_GT_GEOMETRY_DSS_ENABLE,
- XE2_GT_GEOMETRY_DSS_1,
- XE2_GT_GEOMETRY_DSS_2);
- load_dss_mask(gt, gt->fuse_topo.c_dss_mask, num_compute_regs,
- XEHP_GT_COMPUTE_DSS_ENABLE,
- XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,
- XE2_GT_COMPUTE_DSS_2);
+ num_geometry_regs, geometry_regs);
+ load_dss_mask(gt, gt->fuse_topo.c_dss_mask,
+ num_compute_regs, compute_regs);
+
load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss, &gt->fuse_topo.eu_type);
load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask);
- p = drm_dbg_printer(&gt_to_xe(gt)->drm, DRM_UT_DRIVER, "GT topology");
-
+ p = xe_gt_dbg_printer(gt);
xe_gt_topology_dump(gt, &p);
}
@@ -260,8 +273,14 @@ static const char *eu_type_to_str(enum xe_gt_eu_type eu_type)
return NULL;
}
-void
-xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
+/**
+ * xe_gt_topology_dump() - Dump GT topology into a drm printer.
+ * @gt: the &xe_gt
+ * @p: the &drm_printer
+ *
+ * Return: always 0.
+ */
+int xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
{
drm_printf(p, "dss mask (geometry): %*pb\n", XE_MAX_DSS_FUSE_BITS,
gt->fuse_topo.g_dss_mask);
@@ -273,8 +292,10 @@ xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
drm_printf(p, "EU type: %s\n",
eu_type_to_str(gt->fuse_topo.eu_type));
- drm_printf(p, "L3 bank mask: %*pb\n", XE_MAX_L3_BANK_MASK_BITS,
- gt->fuse_topo.l3_bank_mask);
+ if (xe_gt_topology_report_l3(gt))
+ drm_printf(p, "L3 bank mask: %*pb\n", XE_MAX_L3_BANK_MASK_BITS,
+ gt->fuse_topo.l3_bank_mask);
+ return 0;
}
/*
@@ -288,9 +309,11 @@ xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum)
return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize);
}
-bool xe_dss_mask_empty(const xe_dss_mask_t mask)
+/* Used to obtain the index of the first L3 bank. */
+unsigned int
+xe_l3_bank_mask_ffs(const xe_l3_bank_mask_t mask)
{
- return bitmap_empty(mask, XE_MAX_DSS_FUSE_BITS);
+ return find_first_bit(mask, XE_MAX_L3_BANK_MASK_BITS);
}
/**
@@ -331,3 +354,19 @@ bool xe_gt_has_compute_dss(struct xe_gt *gt, unsigned int dss)
{
return test_bit(dss, gt->fuse_topo.c_dss_mask);
}
+
+bool xe_gt_has_discontiguous_dss_groups(const struct xe_gt *gt)
+{
+ unsigned int xecore;
+ int last_group = -1;
+ u16 group, instance;
+
+ for_each_dss_steering(xecore, gt, group, instance) {
+ if (last_group != group) {
+ if (group - last_group > 1)
+ return true;
+ last_group = group;
+ }
+ }
+ return false;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h
index a72d26ba0653..162d603c9b81 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.h
+++ b/drivers/gpu/drm/xe/xe_gt_topology.h
@@ -23,7 +23,7 @@ struct drm_printer;
void xe_gt_topology_init(struct xe_gt *gt);
-void xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p);
+int xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p);
/**
* xe_gt_topology_mask_last_dss() - Returns the index of the last DSS in a mask.
@@ -40,8 +40,8 @@ xe_gt_topology_mask_last_dss(const xe_dss_mask_t mask)
unsigned int
xe_dss_mask_group_ffs(const xe_dss_mask_t mask, int groupsize, int groupnum);
-
-bool xe_dss_mask_empty(const xe_dss_mask_t mask);
+unsigned int
+xe_l3_bank_mask_ffs(const xe_l3_bank_mask_t mask);
bool
xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad);
@@ -49,4 +49,8 @@ xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad);
bool xe_gt_has_geometry_dss(struct xe_gt *gt, unsigned int dss);
bool xe_gt_has_compute_dss(struct xe_gt *gt, unsigned int dss);
+bool xe_gt_has_discontiguous_dss_groups(const struct xe_gt *gt);
+
+bool xe_gt_topology_report_l3(struct xe_gt *gt);
+
#endif /* _XE_GT_TOPOLOGY_H_ */
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index 7def0959da35..0a728180b6fe 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -17,6 +17,7 @@
#include "xe_oa_types.h"
#include "xe_reg_sr_types.h"
#include "xe_sa_types.h"
+#include "xe_tlb_inval_types.h"
#include "xe_uc_types.h"
struct xe_exec_queue_ops;
@@ -65,6 +66,7 @@ struct xe_mmio_range {
*/
enum xe_steering_type {
L3BANK,
+ NODE,
MSLICE,
LNCF,
DSS,
@@ -72,6 +74,13 @@ enum xe_steering_type {
SQIDI_PSMI,
/*
+ * Although most GAM ranges must be steered to (0,0) and thus use the
+ * INSTANCE0 type farther down, some platforms have special rules
+ * for specific subtypes that require steering to (1,0) instead.
+ */
+ GAM1,
+
+ /*
* On some platforms there are multiple types of MCR registers that
* will always return a non-terminated value at instance (0, 0). We'll
* lump those all into a single category to keep things simple.
@@ -185,34 +194,8 @@ struct xe_gt {
struct work_struct worker;
} reset;
- /** @tlb_invalidation: TLB invalidation state */
- struct {
- /** @tlb_invalidation.seqno: TLB invalidation seqno, protected by CT lock */
-#define TLB_INVALIDATION_SEQNO_MAX 0x100000
- int seqno;
- /**
- * @tlb_invalidation.seqno_recv: last received TLB invalidation seqno,
- * protected by CT lock
- */
- int seqno_recv;
- /**
- * @tlb_invalidation.pending_fences: list of pending fences waiting TLB
- * invaliations, protected by CT lock
- */
- struct list_head pending_fences;
- /**
- * @tlb_invalidation.pending_lock: protects @tlb_invalidation.pending_fences
- * and updating @tlb_invalidation.seqno_recv.
- */
- spinlock_t pending_lock;
- /**
- * @tlb_invalidation.fence_tdr: schedules a delayed call to
- * xe_gt_tlb_fence_timeout after the timeut interval is over.
- */
- struct delayed_work fence_tdr;
- /** @tlb_invalidation.lock: protects TLB invalidation fences */
- spinlock_t lock;
- } tlb_invalidation;
+ /** @tlb_inval: TLB invalidation state */
+ struct xe_tlb_inval tlb_inval;
/**
* @ccs_mode: Number of compute engines enabled.
@@ -227,81 +210,16 @@ struct xe_gt {
/**
* @usm.bb_pool: Pool from which batchbuffers, for USM operations
* (e.g. migrations, fixing page tables), are allocated.
- * Dedicated pool needed so USM operations to not get blocked
+ * Dedicated pool needed so USM operations do not get blocked
* behind any user operations which may have resulted in a
* fault.
*/
struct xe_sa_manager *bb_pool;
/**
* @usm.reserved_bcs_instance: reserved BCS instance used for USM
- * operations (e.g. mmigrations, fixing page tables)
+ * operations (e.g. migrations, fixing page tables)
*/
u16 reserved_bcs_instance;
- /** @usm.pf_wq: page fault work queue, unbound, high priority */
- struct workqueue_struct *pf_wq;
- /** @usm.acc_wq: access counter work queue, unbound, high priority */
- struct workqueue_struct *acc_wq;
- /**
- * @usm.pf_queue: Page fault queue used to sync faults so faults can
- * be processed not under the GuC CT lock. The queue is sized so
- * it can sync all possible faults (1 per physical engine).
- * Multiple queues exists for page faults from different VMs are
- * be processed in parallel.
- */
- struct pf_queue {
- /** @usm.pf_queue.gt: back pointer to GT */
- struct xe_gt *gt;
- /** @usm.pf_queue.data: data in the page fault queue */
- u32 *data;
- /**
- * @usm.pf_queue.num_dw: number of DWORDS in the page
- * fault queue. Dynamically calculated based on the number
- * of compute resources available.
- */
- u32 num_dw;
- /**
- * @usm.pf_queue.tail: tail pointer in DWs for page fault queue,
- * moved by worker which processes faults (consumer).
- */
- u16 tail;
- /**
- * @usm.pf_queue.head: head pointer in DWs for page fault queue,
- * moved by G2H handler (producer).
- */
- u16 head;
- /** @usm.pf_queue.lock: protects page fault queue */
- spinlock_t lock;
- /** @usm.pf_queue.worker: to process page faults */
- struct work_struct worker;
-#define NUM_PF_QUEUE 4
- } pf_queue[NUM_PF_QUEUE];
- /**
- * @usm.acc_queue: Same as page fault queue, cannot process access
- * counters under CT lock.
- */
- struct acc_queue {
- /** @usm.acc_queue.gt: back pointer to GT */
- struct xe_gt *gt;
-#define ACC_QUEUE_NUM_DW 128
- /** @usm.acc_queue.data: data in the page fault queue */
- u32 data[ACC_QUEUE_NUM_DW];
- /**
- * @usm.acc_queue.tail: tail pointer in DWs for access counter queue,
- * moved by worker which processes counters
- * (consumer).
- */
- u16 tail;
- /**
- * @usm.acc_queue.head: head pointer in DWs for access counter queue,
- * moved by G2H handler (producer).
- */
- u16 head;
- /** @usm.acc_queue.lock: protects page fault queue */
- spinlock_t lock;
- /** @usm.acc_queue.worker: to process access counters */
- struct work_struct worker;
-#define NUM_ACC_QUEUE 4
- } acc_queue[NUM_ACC_QUEUE];
} usm;
/** @ordered_wq: used to serialize GT resets and TDRs */
@@ -377,6 +295,8 @@ struct xe_gt {
u16 group_target;
/** @steering.instance_target: instance to steer accesses to */
u16 instance_target;
+ /** @steering.initialized: Whether this steering range is initialized */
+ bool initialized;
} steering[NUM_STEERING_TYPES];
/**
@@ -409,8 +329,8 @@ struct xe_gt {
unsigned long *oob;
/**
* @wa_active.oob_initialized: mark oob as initialized to help
- * detecting misuse of XE_WA() - it can only be called on
- * initialization after OOB WAs have being processed
+ * detecting misuse of XE_GT_WA() - it can only be called on
+ * initialization after OOB WAs have been processed
*/
bool oob_initialized;
} wa_active;
diff --git a/drivers/gpu/drm/xe/xe_guard.h b/drivers/gpu/drm/xe/xe_guard.h
new file mode 100644
index 000000000000..333f8e13b5a1
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guard.h
@@ -0,0 +1,119 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_GUARD_H_
+#define _XE_GUARD_H_
+
+#include <linux/spinlock.h>
+
+/**
+ * struct xe_guard - Simple logic to protect a feature.
+ *
+ * Implements simple semaphore-like logic that can be used to lockdown the
+ * feature unless it is already in use. Allows enabling of the otherwise
+ * incompatible features, where we can't follow the strict owner semantics
+ * required by the &rw_semaphore.
+ *
+ * NOTE! It shouldn't be used to protect a data, use &rw_semaphore instead.
+ */
+struct xe_guard {
+ /**
+ * @counter: implements simple exclusive/lockdown logic:
+ * if == 0 then guard/feature is idle/not in use,
+ * if < 0 then feature is active and can't be locked-down,
+ * if > 0 then feature is lockded-down and can't be activated.
+ */
+ int counter;
+
+ /** @name: the name of the guard (useful for debug) */
+ const char *name;
+
+ /** @owner: the info about the last owner of the guard (for debug) */
+ void *owner;
+
+ /** @lock: protects guard's data */
+ spinlock_t lock;
+};
+
+/**
+ * xe_guard_init() - Initialize the guard.
+ * @guard: the &xe_guard to init
+ * @name: name of the guard
+ */
+static inline void xe_guard_init(struct xe_guard *guard, const char *name)
+{
+ spin_lock_init(&guard->lock);
+ guard->counter = 0;
+ guard->name = name;
+}
+
+/**
+ * xe_guard_arm() - Arm the guard for the exclusive/lockdown mode.
+ * @guard: the &xe_guard to arm
+ * @lockdown: arm for lockdown(true) or exclusive(false) mode
+ * @who: optional owner info (for debug only)
+ *
+ * Multiple lockdown requests are allowed.
+ * Only single exclusive access can be granted.
+ * Will fail if the guard is already in exclusive mode.
+ * On success, must call the xe_guard_disarm() to release.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+static inline int xe_guard_arm(struct xe_guard *guard, bool lockdown, void *who)
+{
+ guard(spinlock)(&guard->lock);
+
+ if (lockdown) {
+ if (guard->counter < 0)
+ return -EBUSY;
+ guard->counter++;
+ } else {
+ if (guard->counter > 0)
+ return -EPERM;
+ if (guard->counter < 0)
+ return -EUSERS;
+ guard->counter--;
+ }
+
+ guard->owner = who;
+ return 0;
+}
+
+/**
+ * xe_guard_disarm() - Disarm the guard from exclusive/lockdown mode.
+ * @guard: the &xe_guard to disarm
+ * @lockdown: disarm from lockdown(true) or exclusive(false) mode
+ *
+ * Return: true if successfully disarmed or false in case of mismatch.
+ */
+static inline bool xe_guard_disarm(struct xe_guard *guard, bool lockdown)
+{
+ guard(spinlock)(&guard->lock);
+
+ if (lockdown) {
+ if (guard->counter <= 0)
+ return false;
+ guard->counter--;
+ } else {
+ if (guard->counter != -1)
+ return false;
+ guard->counter++;
+ }
+ return true;
+}
+
+/**
+ * xe_guard_mode_str() - Convert guard mode into a string.
+ * @lockdown: flag used to select lockdown or exclusive mode
+ *
+ * Return: "lockdown" or "exclusive" string.
+ */
+static inline const char *xe_guard_mode_str(bool lockdown)
+{
+ return lockdown ? "lockdown" : "exclusive";
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc.c b/drivers/gpu/drm/xe/xe_guc.c
index bac5471a1a78..a686b04879d6 100644
--- a/drivers/gpu/drm/xe/xe_guc.c
+++ b/drivers/gpu/drm/xe/xe_guc.c
@@ -5,6 +5,7 @@
#include "xe_guc.h"
+#include <linux/iopoll.h>
#include <drm/drm_managed.h>
#include <generated/xe_wa_oob.h>
@@ -16,12 +17,14 @@
#include "regs/xe_guc_regs.h"
#include "regs/xe_irq_regs.h"
#include "xe_bo.h"
+#include "xe_configfs.h"
#include "xe_device.h"
#include "xe_force_wake.h"
#include "xe_gt.h"
#include "xe_gt_printk.h"
#include "xe_gt_sriov_vf.h"
#include "xe_gt_throttle.h"
+#include "xe_gt_sriov_pf_migration.h"
#include "xe_guc_ads.h"
#include "xe_guc_buf.h"
#include "xe_guc_capture.h"
@@ -29,6 +32,7 @@
#include "xe_guc_db_mgr.h"
#include "xe_guc_engine_activity.h"
#include "xe_guc_hwconfig.h"
+#include "xe_guc_klv_helpers.h"
#include "xe_guc_log.h"
#include "xe_guc_pc.h"
#include "xe_guc_relay.h"
@@ -37,6 +41,7 @@
#include "xe_mmio.h"
#include "xe_platform_types.h"
#include "xe_sriov.h"
+#include "xe_sriov_pf_migration.h"
#include "xe_uc.h"
#include "xe_uc_fw.h"
#include "xe_wa.h"
@@ -59,7 +64,7 @@ static u32 guc_bo_ggtt_addr(struct xe_guc *guc,
/* GuC addresses above GUC_GGTT_TOP don't map through the GTT */
xe_assert(xe, addr >= xe_wopcm_size(guc_to_xe(guc)));
xe_assert(xe, addr < GUC_GGTT_TOP);
- xe_assert(xe, bo->size <= GUC_GGTT_TOP - addr);
+ xe_assert(xe, xe_bo_size(bo) <= GUC_GGTT_TOP - addr);
return addr;
}
@@ -72,19 +77,25 @@ static u32 guc_ctl_debug_flags(struct xe_guc *guc)
if (!GUC_LOG_LEVEL_IS_VERBOSE(level))
flags |= GUC_LOG_DISABLED;
else
- flags |= GUC_LOG_LEVEL_TO_VERBOSITY(level) <<
- GUC_LOG_VERBOSITY_SHIFT;
+ flags |= FIELD_PREP(GUC_LOG_VERBOSITY, GUC_LOG_LEVEL_TO_VERBOSITY(level));
return flags;
}
static u32 guc_ctl_feature_flags(struct xe_guc *guc)
{
+ struct xe_device *xe = guc_to_xe(guc);
u32 flags = GUC_CTL_ENABLE_LITE_RESTORE;
- if (!guc_to_xe(guc)->info.skip_guc_pc)
+ if (!xe->info.skip_guc_pc)
flags |= GUC_CTL_ENABLE_SLPC;
+ if (xe_configfs_get_psmi_enabled(to_pci_dev(xe->drm.dev)))
+ flags |= GUC_CTL_ENABLE_PSMI_LOGGING;
+
+ if (xe_guc_using_main_gamctrl_queues(guc))
+ flags |= GUC_CTL_MAIN_GAMCTRL_QUEUES;
+
return flags;
}
@@ -116,22 +127,14 @@ static u32 guc_ctl_log_params_flags(struct xe_guc *guc)
BUILD_BUG_ON(!CAPTURE_BUFFER_SIZE);
BUILD_BUG_ON(!IS_ALIGNED(CAPTURE_BUFFER_SIZE, CAPTURE_UNIT));
- BUILD_BUG_ON((CRASH_BUFFER_SIZE / LOG_UNIT - 1) >
- (GUC_LOG_CRASH_MASK >> GUC_LOG_CRASH_SHIFT));
- BUILD_BUG_ON((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) >
- (GUC_LOG_DEBUG_MASK >> GUC_LOG_DEBUG_SHIFT));
- BUILD_BUG_ON((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) >
- (GUC_LOG_CAPTURE_MASK >> GUC_LOG_CAPTURE_SHIFT));
-
flags = GUC_LOG_VALID |
GUC_LOG_NOTIFY_ON_HALF_FULL |
CAPTURE_FLAG |
LOG_FLAG |
- ((CRASH_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_CRASH_SHIFT) |
- ((DEBUG_BUFFER_SIZE / LOG_UNIT - 1) << GUC_LOG_DEBUG_SHIFT) |
- ((CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) <<
- GUC_LOG_CAPTURE_SHIFT) |
- (offset << GUC_LOG_BUF_ADDR_SHIFT);
+ FIELD_PREP(GUC_LOG_CRASH, CRASH_BUFFER_SIZE / LOG_UNIT - 1) |
+ FIELD_PREP(GUC_LOG_DEBUG, DEBUG_BUFFER_SIZE / LOG_UNIT - 1) |
+ FIELD_PREP(GUC_LOG_CAPTURE, CAPTURE_BUFFER_SIZE / CAPTURE_UNIT - 1) |
+ FIELD_PREP(GUC_LOG_BUF_ADDR, offset);
#undef LOG_UNIT
#undef LOG_FLAG
@@ -144,7 +147,7 @@ static u32 guc_ctl_log_params_flags(struct xe_guc *guc)
static u32 guc_ctl_ads_flags(struct xe_guc *guc)
{
u32 ads = guc_bo_ggtt_addr(guc, guc->ads.bo) >> PAGE_SHIFT;
- u32 flags = ads << GUC_ADS_ADDR_SHIFT;
+ u32 flags = FIELD_PREP(GUC_ADS_ADDR, ads);
return flags;
}
@@ -156,7 +159,7 @@ static bool needs_wa_dual_queue(struct xe_gt *gt)
* on RCS and CCSes with different address spaces, which on DG2 is
* required as a WA for an HW bug.
*/
- if (XE_WA(gt, 22011391025))
+ if (XE_GT_WA(gt, 22011391025))
return true;
/*
@@ -183,10 +186,10 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc)
struct xe_gt *gt = guc_to_gt(guc);
u32 flags = 0;
- if (XE_WA(gt, 22012773006))
+ if (XE_GT_WA(gt, 22012773006))
flags |= GUC_WA_POLLCS;
- if (XE_WA(gt, 14014475959))
+ if (XE_GT_WA(gt, 14014475959))
flags |= GUC_WA_HOLD_CCS_SWITCHOUT;
if (needs_wa_dual_queue(gt))
@@ -200,19 +203,22 @@ static u32 guc_ctl_wa_flags(struct xe_guc *guc)
if (GRAPHICS_VERx100(xe) < 1270)
flags |= GUC_WA_PRE_PARSER;
- if (XE_WA(gt, 22012727170) || XE_WA(gt, 22012727685))
+ if (XE_GT_WA(gt, 22012727170) || XE_GT_WA(gt, 22012727685))
flags |= GUC_WA_CONTEXT_ISOLATION;
- if (XE_WA(gt, 18020744125) &&
+ if (XE_GT_WA(gt, 18020744125) &&
!xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_RENDER))
flags |= GUC_WA_RCS_REGS_IN_CCS_REGS_LIST;
- if (XE_WA(gt, 1509372804))
+ if (XE_GT_WA(gt, 1509372804))
flags |= GUC_WA_RENDER_RST_RC6_EXIT;
- if (XE_WA(gt, 14018913170))
+ if (XE_GT_WA(gt, 14018913170))
flags |= GUC_WA_ENABLE_TSC_CHECK_ON_RC6;
+ if (XE_GT_WA(gt, 16023683509))
+ flags |= GUC_WA_SAVE_RESTORE_MCFG_REG_AT_MC6;
+
return flags;
}
@@ -420,7 +426,7 @@ static int guc_g2g_register(struct xe_guc *near_guc, struct xe_gt *far_gt, u32 t
buf = base + G2G_DESC_AREA_SIZE + slot * G2G_BUFFER_SIZE;
xe_assert(xe, (desc - base + G2G_DESC_SIZE) <= G2G_DESC_AREA_SIZE);
- xe_assert(xe, (buf - base + G2G_BUFFER_SIZE) <= g2g_bo->size);
+ xe_assert(xe, (buf - base + G2G_BUFFER_SIZE) <= xe_bo_size(g2g_bo));
return guc_action_register_g2g_buffer(near_guc, type, far_tile, far_dev,
desc, buf, G2G_BUFFER_SIZE);
@@ -570,6 +576,86 @@ err_deregister:
return err;
}
+static int __guc_opt_in_features_enable(struct xe_guc *guc, u64 addr, u32 num_dwords)
+{
+ u32 action[] = {
+ XE_GUC_ACTION_OPT_IN_FEATURE_KLV,
+ lower_32_bits(addr),
+ upper_32_bits(addr),
+ num_dwords
+ };
+
+ return xe_guc_ct_send_block(&guc->ct, action, ARRAY_SIZE(action));
+}
+
+static bool supports_dynamic_ics(struct xe_guc *guc)
+{
+ struct xe_device *xe = guc_to_xe(guc);
+ struct xe_gt *gt = guc_to_gt(guc);
+
+ /* Dynamic ICS is available for PVC and Xe2 and newer platforms. */
+ if (xe->info.platform != XE_PVC && GRAPHICS_VER(xe) < 20)
+ return false;
+
+ /*
+ * The feature is currently not compatible with multi-lrc, so the GuC
+ * does not support it at all on the media engines (which are the main
+ * users of mlrc). On the primary GT side, to avoid it being used in
+ * conjunction with mlrc, we only enable it if we are in single CCS
+ * mode.
+ */
+ if (xe_gt_is_media_type(gt) || gt->ccs_mode > 1)
+ return false;
+
+ /*
+ * Dynamic ICS requires GuC v70.40.1, which maps to compatibility
+ * version v1.18.4.
+ */
+ return GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 18, 4);
+}
+
+#define OPT_IN_MAX_DWORDS 16
+int xe_guc_opt_in_features_enable(struct xe_guc *guc)
+{
+ struct xe_device *xe = guc_to_xe(guc);
+ CLASS(xe_guc_buf, buf)(&guc->buf, OPT_IN_MAX_DWORDS);
+ u32 count = 0;
+ u32 *klvs;
+ int ret;
+
+ if (!xe_guc_buf_is_valid(buf))
+ return -ENOBUFS;
+
+ klvs = xe_guc_buf_cpu_ptr(buf);
+
+ /*
+ * The extra CAT error type opt-in was added in GuC v70.17.0, which maps
+ * to compatibility version v1.7.0.
+ * Note that the GuC allows enabling this KLV even on platforms that do
+ * not support the extra type; in such case the returned type variable
+ * will be set to a known invalid value which we can check against.
+ */
+ if (GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 7, 0))
+ klvs[count++] = PREP_GUC_KLV_TAG(OPT_IN_FEATURE_EXT_CAT_ERR_TYPE);
+
+ if (supports_dynamic_ics(guc))
+ klvs[count++] = PREP_GUC_KLV_TAG(OPT_IN_FEATURE_DYNAMIC_INHIBIT_CONTEXT_SWITCH);
+
+ if (count) {
+ xe_assert(xe, count <= OPT_IN_MAX_DWORDS);
+
+ ret = __guc_opt_in_features_enable(guc, xe_guc_buf_flush(buf), count);
+ if (ret < 0) {
+ xe_gt_err(guc_to_gt(guc),
+ "failed to enable GuC opt-in features: %pe\n",
+ ERR_PTR(ret));
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
static void guc_fini_hw(void *arg)
{
struct xe_guc *guc = arg;
@@ -577,7 +663,7 @@ static void guc_fini_hw(void *arg)
unsigned int fw_ref;
fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- xe_uc_fini_hw(&guc_to_gt(guc)->uc);
+ xe_uc_sanitize_reset(&guc_to_gt(guc)->uc);
xe_force_wake_put(gt_to_fw(gt), fw_ref);
guc_g2g_fini(guc);
@@ -620,30 +706,54 @@ static int xe_guc_realloc_post_hwconfig(struct xe_guc *guc)
if (ret)
return ret;
- ret = xe_managed_bo_reinit_in_vram(xe, tile, &guc->ct.bo);
- if (ret)
- return ret;
-
return 0;
}
-static int vf_guc_init(struct xe_guc *guc)
+static int vf_guc_init_noalloc(struct xe_guc *guc)
{
+ struct xe_gt *gt = guc_to_gt(guc);
int err;
- xe_guc_comm_init_early(guc);
-
- err = xe_guc_ct_init(&guc->ct);
+ err = xe_gt_sriov_vf_bootstrap(gt);
if (err)
return err;
- err = xe_guc_relay_init(&guc->relay);
+ err = xe_gt_sriov_vf_query_config(gt);
if (err)
return err;
return 0;
}
+int xe_guc_init_noalloc(struct xe_guc *guc)
+{
+ struct xe_device *xe = guc_to_xe(guc);
+ struct xe_gt *gt = guc_to_gt(guc);
+ int ret;
+
+ xe_guc_comm_init_early(guc);
+
+ ret = xe_guc_ct_init_noalloc(&guc->ct);
+ if (ret)
+ goto out;
+
+ ret = xe_guc_relay_init(&guc->relay);
+ if (ret)
+ goto out;
+
+ if (IS_SRIOV_VF(xe)) {
+ ret = vf_guc_init_noalloc(guc);
+ if (ret)
+ goto out;
+ }
+
+ return 0;
+
+out:
+ xe_gt_err(gt, "GuC init failed with %pe\n", ERR_PTR(ret));
+ return ret;
+}
+
int xe_guc_init(struct xe_guc *guc)
{
struct xe_device *xe = guc_to_xe(guc);
@@ -653,13 +763,13 @@ int xe_guc_init(struct xe_guc *guc)
guc->fw.type = XE_UC_FW_TYPE_GUC;
ret = xe_uc_fw_init(&guc->fw);
if (ret)
- goto out;
+ return ret;
if (!xe_uc_fw_is_enabled(&guc->fw))
return 0;
if (IS_SRIOV_VF(xe)) {
- ret = vf_guc_init(guc);
+ ret = xe_guc_ct_init(&guc->ct);
if (ret)
goto out;
return 0;
@@ -681,10 +791,6 @@ int xe_guc_init(struct xe_guc *guc)
if (ret)
goto out;
- ret = xe_guc_relay_init(&guc->relay);
- if (ret)
- goto out;
-
xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOADABLE);
ret = devm_add_action_or_reset(xe->drm.dev, guc_fini_hw, guc);
@@ -693,8 +799,6 @@ int xe_guc_init(struct xe_guc *guc)
guc_init_params(guc);
- xe_guc_comm_init_early(guc);
-
return 0;
out:
@@ -710,11 +814,23 @@ static int vf_guc_init_post_hwconfig(struct xe_guc *guc)
if (err)
return err;
+ err = xe_guc_buf_cache_init(&guc->buf);
+ if (err)
+ return err;
+
/* XXX xe_guc_db_mgr_init not needed for now */
return 0;
}
+static u32 guc_additional_cache_size(struct xe_device *xe)
+{
+ if (IS_SRIOV_PF(xe) && xe_sriov_pf_migration_supported(xe))
+ return XE_GT_SRIOV_PF_MIGRATION_GUC_DATA_MAX_SIZE;
+ else
+ return 0; /* Fallback to default size */
+}
+
/**
* xe_guc_init_post_hwconfig - initialize GuC post hwconfig load
* @guc: The GuC object
@@ -732,6 +848,10 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc)
if (ret)
return ret;
+ ret = xe_guc_ct_init_post_hwconfig(&guc->ct);
+ if (ret)
+ return ret;
+
guc_init_params_post_hwconfig(guc);
ret = xe_guc_submit_init(guc, ~0);
@@ -750,7 +870,8 @@ int xe_guc_init_post_hwconfig(struct xe_guc *guc)
if (ret)
return ret;
- ret = xe_guc_buf_cache_init(&guc->buf);
+ ret = xe_guc_buf_cache_init_with_size(&guc->buf,
+ guc_additional_cache_size(guc_to_xe(guc)));
if (ret)
return ret;
@@ -763,15 +884,17 @@ int xe_guc_post_load_init(struct xe_guc *guc)
xe_guc_ads_populate_post_load(&guc->ads);
+ ret = xe_guc_opt_in_features_enable(guc);
+ if (ret)
+ return ret;
+
if (xe_guc_g2g_wanted(guc_to_xe(guc))) {
ret = guc_g2g_start(guc);
if (ret)
return ret;
}
- guc->submission_state.enabled = true;
-
- return 0;
+ return xe_guc_submit_enable(guc);
}
int xe_guc_reset(struct xe_guc *guc)
@@ -863,33 +986,109 @@ static int guc_xfer_rsa(struct xe_guc *guc)
}
/*
- * Check a previously read GuC status register (GUC_STATUS) looking for
- * known terminal states (either completion or failure) of either the
- * microkernel status field or the boot ROM status field. Returns +1 for
- * successful completion, -1 for failure and 0 for any intermediate state.
+ * Wait for the GuC to start up.
+ *
+ * Measurements indicate this should take no more than 20ms (assuming the GT
+ * clock is at maximum frequency). However, thermal throttling and other issues
+ * can prevent the clock hitting max and thus making the load take significantly
+ * longer. Allow up to 3s as a safety margin in normal builds. For
+ * CONFIG_DRM_XE_DEBUG allow up to 10s to account for slower execution, issues
+ * in PCODE, driver, fan, etc.
+ *
+ * Keep checking the GUC_STATUS every 10ms with a debug message every 100
+ * attempts as a "I'm slow, but alive" message. Regardless, if it takes more
+ * than 200ms, emit a warning.
+ */
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+#define GUC_LOAD_TIMEOUT_SEC 20
+#else
+#define GUC_LOAD_TIMEOUT_SEC 3
+#endif
+#define GUC_LOAD_TIME_WARN_MSEC 200
+
+static void print_load_status_err(struct xe_gt *gt, u32 status)
+{
+ struct xe_mmio *mmio = &gt->mmio;
+ u32 ukernel = REG_FIELD_GET(GS_UKERNEL_MASK, status);
+ u32 bootrom = REG_FIELD_GET(GS_BOOTROM_MASK, status);
+
+ xe_gt_err(gt, "load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n",
+ REG_FIELD_GET(GS_MIA_IN_RESET, status),
+ bootrom, ukernel,
+ REG_FIELD_GET(GS_MIA_MASK, status),
+ REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
+
+ switch (bootrom) {
+ case XE_BOOTROM_STATUS_NO_KEY_FOUND:
+ xe_gt_err(gt, "invalid key requested, header = 0x%08X\n",
+ xe_mmio_read32(mmio, GUC_HEADER_INFO));
+ break;
+ case XE_BOOTROM_STATUS_RSA_FAILED:
+ xe_gt_err(gt, "firmware signature verification failed\n");
+ break;
+ case XE_BOOTROM_STATUS_PROD_KEY_CHECK_FAILURE:
+ xe_gt_err(gt, "firmware production part check failure\n");
+ break;
+ }
+
+ switch (ukernel) {
+ case XE_GUC_LOAD_STATUS_HWCONFIG_START:
+ xe_gt_err(gt, "still extracting hwconfig table.\n");
+ break;
+ case XE_GUC_LOAD_STATUS_EXCEPTION:
+ xe_gt_err(gt, "firmware exception. EIP: %#x\n",
+ xe_mmio_read32(mmio, SOFT_SCRATCH(13)));
+ break;
+ case XE_GUC_LOAD_STATUS_INIT_DATA_INVALID:
+ xe_gt_err(gt, "illegal init/ADS data\n");
+ break;
+ case XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID:
+ xe_gt_err(gt, "illegal register in save/restore workaround list\n");
+ break;
+ case XE_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR:
+ xe_gt_err(gt, "illegal workaround KLV data\n");
+ break;
+ case XE_GUC_LOAD_STATUS_INVALID_FTR_FLAG:
+ xe_gt_err(gt, "illegal feature flag specified\n");
+ break;
+ }
+}
+
+/*
+ * Check GUC_STATUS looking for known terminal states (either completion or
+ * failure) of either the microkernel status field or the boot ROM status field.
+ *
+ * Returns 1 for successful completion, -1 for failure and 0 for any
+ * intermediate state.
*/
-static int guc_load_done(u32 status)
+static int guc_load_done(struct xe_gt *gt, u32 *status, u32 *tries)
{
- u32 uk_val = REG_FIELD_GET(GS_UKERNEL_MASK, status);
- u32 br_val = REG_FIELD_GET(GS_BOOTROM_MASK, status);
+ u32 ukernel, bootrom;
+
+ *status = xe_mmio_read32(&gt->mmio, GUC_STATUS);
+ ukernel = REG_FIELD_GET(GS_UKERNEL_MASK, *status);
+ bootrom = REG_FIELD_GET(GS_BOOTROM_MASK, *status);
- switch (uk_val) {
+ switch (ukernel) {
case XE_GUC_LOAD_STATUS_READY:
return 1;
-
case XE_GUC_LOAD_STATUS_ERROR_DEVID_BUILD_MISMATCH:
case XE_GUC_LOAD_STATUS_GUC_PREPROD_BUILD_MISMATCH:
case XE_GUC_LOAD_STATUS_ERROR_DEVID_INVALID_GUCTYPE:
case XE_GUC_LOAD_STATUS_HWCONFIG_ERROR:
+ case XE_GUC_LOAD_STATUS_BOOTROM_VERSION_MISMATCH:
case XE_GUC_LOAD_STATUS_DPC_ERROR:
case XE_GUC_LOAD_STATUS_EXCEPTION:
case XE_GUC_LOAD_STATUS_INIT_DATA_INVALID:
case XE_GUC_LOAD_STATUS_MPU_DATA_INVALID:
case XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID:
+ case XE_GUC_LOAD_STATUS_KLV_WORKAROUND_INIT_ERROR:
+ case XE_GUC_LOAD_STATUS_INVALID_FTR_FLAG:
return -1;
}
- switch (br_val) {
+ switch (bootrom) {
case XE_BOOTROM_STATUS_NO_KEY_FOUND:
case XE_BOOTROM_STATUS_RSA_FAILED:
case XE_BOOTROM_STATUS_PAVPC_FAILED:
@@ -903,155 +1102,63 @@ static int guc_load_done(u32 status)
return -1;
}
- return 0;
-}
+ if (++*tries >= 100) {
+ struct xe_guc_pc *guc_pc = &gt->uc.guc.pc;
-static s32 guc_pc_get_cur_freq(struct xe_guc_pc *guc_pc)
-{
- u32 freq;
- int ret = xe_guc_pc_get_cur_freq(guc_pc, &freq);
+ *tries = 0;
+ xe_gt_dbg(gt, "GuC load still in progress, freq = %dMHz (req %dMHz), status = 0x%08X [0x%02X/%02X]\n",
+ xe_guc_pc_get_act_freq(guc_pc),
+ xe_guc_pc_get_cur_freq_fw(guc_pc),
+ *status, ukernel, bootrom);
+ }
- return ret ? ret : freq;
+ return 0;
}
-/*
- * Wait for the GuC to start up.
- *
- * Measurements indicate this should take no more than 20ms (assuming the GT
- * clock is at maximum frequency). However, thermal throttling and other issues
- * can prevent the clock hitting max and thus making the load take significantly
- * longer. Allow up to 200ms as a safety margin for real world worst case situations.
- *
- * However, bugs anywhere from KMD to GuC to PCODE to fan failure in a CI farm can
- * lead to even longer times. E.g. if the GT is clamped to minimum frequency then
- * the load times can be in the seconds range. So the timeout is increased for debug
- * builds to ensure that problems can be correctly analysed. For release builds, the
- * timeout is kept short so that users don't wait forever to find out that there is a
- * problem. In either case, if the load took longer than is reasonable even with some
- * 'sensible' throttling, then flag a warning because something is not right.
- *
- * Note that there is a limit on how long an individual usleep_range() can wait for,
- * hence longer waits require wrapping a shorter wait in a loop.
- *
- * Note that the only reason an end user should hit the shorter timeout is in case of
- * extreme thermal throttling. And a system that is that hot during boot is probably
- * dead anyway!
- */
-#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
-#define GUC_LOAD_RETRY_LIMIT 20
-#else
-#define GUC_LOAD_RETRY_LIMIT 3
-#endif
-#define GUC_LOAD_TIME_WARN_MS 200
-
-static void guc_wait_ucode(struct xe_guc *guc)
+static int guc_wait_ucode(struct xe_guc *guc)
{
struct xe_gt *gt = guc_to_gt(guc);
- struct xe_mmio *mmio = &gt->mmio;
struct xe_guc_pc *guc_pc = &gt->uc.guc.pc;
- ktime_t before, after, delta;
- int load_done;
- u32 status = 0;
- int count = 0;
+ u32 before_freq, act_freq, cur_freq;
+ u32 status = 0, tries = 0;
+ ktime_t before;
u64 delta_ms;
- u32 before_freq;
+ int ret;
before_freq = xe_guc_pc_get_act_freq(guc_pc);
before = ktime_get();
- /*
- * Note, can't use any kind of timing information from the call to xe_mmio_wait.
- * It could return a thousand intermediate stages at random times. Instead, must
- * manually track the total time taken and locally implement the timeout.
- */
- do {
- u32 last_status = status & (GS_UKERNEL_MASK | GS_BOOTROM_MASK);
- int ret;
- /*
- * Wait for any change (intermediate or terminal) in the status register.
- * Note, the return value is a don't care. The only failure code is timeout
- * but the timeouts need to be accumulated over all the intermediate partial
- * timeouts rather than allowing a huge timeout each time. So basically, need
- * to treat a timeout no different to a value change.
- */
- ret = xe_mmio_wait32_not(mmio, GUC_STATUS, GS_UKERNEL_MASK | GS_BOOTROM_MASK,
- last_status, 1000 * 1000, &status, false);
- if (ret < 0)
- count++;
- after = ktime_get();
- delta = ktime_sub(after, before);
- delta_ms = ktime_to_ms(delta);
-
- load_done = guc_load_done(status);
- if (load_done != 0)
- break;
+ ret = poll_timeout_us(ret = guc_load_done(gt, &status, &tries), ret,
+ 10 * USEC_PER_MSEC,
+ GUC_LOAD_TIMEOUT_SEC * USEC_PER_SEC, false);
- if (delta_ms >= (GUC_LOAD_RETRY_LIMIT * 1000))
- break;
+ delta_ms = ktime_to_ms(ktime_sub(ktime_get(), before));
+ act_freq = xe_guc_pc_get_act_freq(guc_pc);
+ cur_freq = xe_guc_pc_get_cur_freq_fw(guc_pc);
- xe_gt_dbg(gt, "load still in progress, timeouts = %d, freq = %dMHz (req %dMHz), status = 0x%08X [0x%02X/%02X]\n",
- count, xe_guc_pc_get_act_freq(guc_pc),
- guc_pc_get_cur_freq(guc_pc), status,
- REG_FIELD_GET(GS_BOOTROM_MASK, status),
- REG_FIELD_GET(GS_UKERNEL_MASK, status));
- } while (1);
-
- if (load_done != 1) {
- u32 ukernel = REG_FIELD_GET(GS_UKERNEL_MASK, status);
- u32 bootrom = REG_FIELD_GET(GS_BOOTROM_MASK, status);
-
- xe_gt_err(gt, "load failed: status = 0x%08X, time = %lldms, freq = %dMHz (req %dMHz), done = %d\n",
+ if (ret) {
+ xe_gt_err(gt, "load failed: status = 0x%08X, time = %lldms, freq = %dMHz (req %dMHz)\n",
status, delta_ms, xe_guc_pc_get_act_freq(guc_pc),
- guc_pc_get_cur_freq(guc_pc), load_done);
- xe_gt_err(gt, "load failed: status: Reset = %d, BootROM = 0x%02X, UKernel = 0x%02X, MIA = 0x%02X, Auth = 0x%02X\n",
- REG_FIELD_GET(GS_MIA_IN_RESET, status),
- bootrom, ukernel,
- REG_FIELD_GET(GS_MIA_MASK, status),
- REG_FIELD_GET(GS_AUTH_STATUS_MASK, status));
-
- switch (bootrom) {
- case XE_BOOTROM_STATUS_NO_KEY_FOUND:
- xe_gt_err(gt, "invalid key requested, header = 0x%08X\n",
- xe_mmio_read32(mmio, GUC_HEADER_INFO));
- break;
-
- case XE_BOOTROM_STATUS_RSA_FAILED:
- xe_gt_err(gt, "firmware signature verification failed\n");
- break;
+ xe_guc_pc_get_cur_freq_fw(guc_pc));
+ print_load_status_err(gt, status);
- case XE_BOOTROM_STATUS_PROD_KEY_CHECK_FAILURE:
- xe_gt_err(gt, "firmware production part check failure\n");
- break;
- }
-
- switch (ukernel) {
- case XE_GUC_LOAD_STATUS_EXCEPTION:
- xe_gt_err(gt, "firmware exception. EIP: %#x\n",
- xe_mmio_read32(mmio, SOFT_SCRATCH(13)));
- break;
-
- case XE_GUC_LOAD_STATUS_INIT_MMIO_SAVE_RESTORE_INVALID:
- xe_gt_err(gt, "illegal register in save/restore workaround list\n");
- break;
-
- case XE_GUC_LOAD_STATUS_HWCONFIG_START:
- xe_gt_err(gt, "still extracting hwconfig table.\n");
- break;
- }
+ return -EPROTO;
+ }
- xe_device_declare_wedged(gt_to_xe(gt));
- } else if (delta_ms > GUC_LOAD_TIME_WARN_MS) {
- xe_gt_warn(gt, "excessive init time: %lldms! [status = 0x%08X, timeouts = %d]\n",
- delta_ms, status, count);
- xe_gt_warn(gt, "excessive init time: [freq = %dMHz (req = %dMHz), before = %dMHz, perf_limit_reasons = 0x%08X]\n",
- xe_guc_pc_get_act_freq(guc_pc), guc_pc_get_cur_freq(guc_pc),
- before_freq, xe_gt_throttle_get_limit_reasons(gt));
+ if (delta_ms > GUC_LOAD_TIME_WARN_MSEC) {
+ xe_gt_warn(gt, "GuC load: excessive init time: %lldms! [status = 0x%08X]\n",
+ delta_ms, status);
+ xe_gt_warn(gt, "GuC load: excessive init time: [freq = %dMHz (req = %dMHz), before = %dMHz, perf_limit_reasons = 0x%08X]\n",
+ act_freq, cur_freq, before_freq,
+ xe_gt_throttle_get_limit_reasons(gt));
} else {
- xe_gt_dbg(gt, "init took %lldms, freq = %dMHz (req = %dMHz), before = %dMHz, status = 0x%08X, timeouts = %d\n",
- delta_ms, xe_guc_pc_get_act_freq(guc_pc), guc_pc_get_cur_freq(guc_pc),
- before_freq, status, count);
+ xe_gt_dbg(gt, "GuC load: init took %lldms, freq = %dMHz (req = %dMHz), before = %dMHz, status = 0x%08X\n",
+ delta_ms, act_freq, cur_freq, before_freq, status);
}
+
+ return 0;
}
+ALLOW_ERROR_INJECTION(guc_wait_ucode, ERRNO);
static int __xe_guc_upload(struct xe_guc *guc)
{
@@ -1083,14 +1190,16 @@ static int __xe_guc_upload(struct xe_guc *guc)
goto out;
/* Wait for authentication */
- guc_wait_ucode(guc);
+ ret = guc_wait_ucode(guc);
+ if (ret)
+ goto out;
xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_RUNNING);
return 0;
out:
xe_uc_fw_change_status(&guc->fw, XE_UC_FIRMWARE_LOAD_FAIL);
- return 0 /* FIXME: ret, don't want to stop load currently */;
+ return ret;
}
static int vf_guc_min_load_for_hwconfig(struct xe_guc *guc)
@@ -1098,14 +1207,6 @@ static int vf_guc_min_load_for_hwconfig(struct xe_guc *guc)
struct xe_gt *gt = guc_to_gt(guc);
int ret;
- ret = xe_gt_sriov_vf_bootstrap(gt);
- if (ret)
- return ret;
-
- ret = xe_gt_sriov_vf_query_config(gt);
- if (ret)
- return ret;
-
ret = xe_guc_hwconfig_init(guc);
if (ret)
return ret;
@@ -1116,13 +1217,17 @@ static int vf_guc_min_load_for_hwconfig(struct xe_guc *guc)
ret = xe_gt_sriov_vf_connect(gt);
if (ret)
- return ret;
+ goto err_out;
ret = xe_gt_sriov_vf_query_runtime(gt);
if (ret)
- return ret;
+ goto err_out;
return 0;
+
+err_out:
+ xe_guc_sanitize(guc);
+ return ret;
}
/**
@@ -1164,8 +1269,13 @@ int xe_guc_min_load_for_hwconfig(struct xe_guc *guc)
int xe_guc_upload(struct xe_guc *guc)
{
+ struct xe_gt *gt = guc_to_gt(guc);
+
xe_guc_ads_populate(&guc->ads);
+ if (xe_guc_using_main_gamctrl_queues(guc))
+ xe_mmio_write32(&gt->mmio, MAIN_GAMCTRL_MODE, MAIN_GAMCTRL_QUEUE_SELECT);
+
return __xe_guc_upload(guc);
}
@@ -1285,6 +1395,7 @@ int xe_guc_mmio_send_recv(struct xe_guc *guc, const u32 *request,
struct xe_reg reply_reg = xe_gt_is_media_type(gt) ?
MED_VF_SW_FLAG(0) : VF_SW_FLAG(0);
const u32 LAST_INDEX = VF_SW_FLAG_COUNT - 1;
+ bool lost = false;
int ret;
int i;
@@ -1318,6 +1429,12 @@ retry:
FIELD_PREP(GUC_HXG_MSG_0_ORIGIN, GUC_HXG_ORIGIN_GUC),
50000, &reply, false);
if (ret) {
+ /* scratch registers might be cleared during FLR, try once more */
+ if (!reply && !lost) {
+ xe_gt_dbg(gt, "GuC mmio request %#x: lost, trying again\n", request[0]);
+ lost = true;
+ goto retry;
+ }
timeout:
xe_gt_err(gt, "GuC mmio request %#x: no reply %#x\n",
request[0], reply);
@@ -1341,7 +1458,7 @@ timeout:
BUILD_BUG_ON((GUC_HXG_TYPE_RESPONSE_SUCCESS ^ GUC_HXG_TYPE_RESPONSE_FAILURE) != 1);
ret = xe_mmio_wait32(mmio, reply_reg, resp_mask, resp_mask,
- 1000000, &header, false);
+ 2000000, &header, false);
if (unlikely(FIELD_GET(GUC_HXG_MSG_0_ORIGIN, header) !=
GUC_HXG_ORIGIN_GUC))
@@ -1465,7 +1582,7 @@ void xe_guc_sanitize(struct xe_guc *guc)
{
xe_uc_fw_sanitize(&guc->fw);
xe_guc_ct_disable(&guc->ct);
- guc->submission_state.enabled = false;
+ xe_guc_submit_disable(guc);
}
int xe_guc_reset_prepare(struct xe_guc *guc)
@@ -1558,3 +1675,45 @@ void xe_guc_declare_wedged(struct xe_guc *guc)
xe_guc_ct_stop(&guc->ct);
xe_guc_submit_wedge(guc);
}
+
+/**
+ * xe_guc_using_main_gamctrl_queues() - Detect which reporting queues to use.
+ * @guc: The GuC object
+ *
+ * For Xe3p and beyond, we want to program the hardware to use the
+ * "Main GAMCTRL queue" rather than the legacy queue before we upload
+ * the GuC firmware. This will allow the GuC to use a new set of
+ * registers for pagefault handling and avoid some unnecessary
+ * complications with MCR register range handling.
+ *
+ * Return: true if can use new main gamctrl queues.
+ */
+bool xe_guc_using_main_gamctrl_queues(struct xe_guc *guc)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+
+ /*
+ * For Xe3p media gt (35), the GuC and the CS subunits may be still Xe3
+ * that lacks the Main GAMCTRL support. Reserved bits from the GMD_ID
+ * inform the IP version of the subunits.
+ */
+ if (xe_gt_is_media_type(gt) && MEDIA_VER(gt_to_xe(gt)) == 35) {
+ u32 val = xe_mmio_read32(&gt->mmio, GMD_ID);
+ u32 subip = REG_FIELD_GET(GMD_ID_SUBIP_FLAG_MASK, val);
+
+ if (!subip)
+ return true;
+
+ xe_gt_WARN(gt, subip != 1,
+ "GMD_ID has unknown value in the SUBIP_FLAG field - 0x%x\n",
+ subip);
+
+ return false;
+ }
+
+ return GT_VER(gt) >= 35;
+}
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_guc_g2g_test.c"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc.h b/drivers/gpu/drm/xe/xe_guc.h
index 58338be44558..e2d4c5f44ae3 100644
--- a/drivers/gpu/drm/xe/xe_guc.h
+++ b/drivers/gpu/drm/xe/xe_guc.h
@@ -26,6 +26,7 @@
struct drm_printer;
void xe_guc_comm_init_early(struct xe_guc *guc);
+int xe_guc_init_noalloc(struct xe_guc *guc);
int xe_guc_init(struct xe_guc *guc);
int xe_guc_init_post_hwconfig(struct xe_guc *guc);
int xe_guc_post_load_init(struct xe_guc *guc);
@@ -33,6 +34,7 @@ int xe_guc_reset(struct xe_guc *guc);
int xe_guc_upload(struct xe_guc *guc);
int xe_guc_min_load_for_hwconfig(struct xe_guc *guc);
int xe_guc_enable_communication(struct xe_guc *guc);
+int xe_guc_opt_in_features_enable(struct xe_guc *guc);
int xe_guc_suspend(struct xe_guc *guc);
void xe_guc_notify(struct xe_guc *guc);
int xe_guc_auth_huc(struct xe_guc *guc, u32 rsa_addr);
@@ -50,6 +52,11 @@ void xe_guc_stop_prepare(struct xe_guc *guc);
void xe_guc_stop(struct xe_guc *guc);
int xe_guc_start(struct xe_guc *guc);
void xe_guc_declare_wedged(struct xe_guc *guc);
+bool xe_guc_using_main_gamctrl_queues(struct xe_guc *guc);
+
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+int xe_guc_g2g_test_notification(struct xe_guc *guc, u32 *payload, u32 len);
+#endif
static inline u16 xe_engine_class_to_guc_class(enum xe_engine_class class)
{
diff --git a/drivers/gpu/drm/xe/xe_guc_ads.c b/drivers/gpu/drm/xe/xe_guc_ads.c
index 44c1fa2fe7c8..bcb85a1bf26d 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads.c
+++ b/drivers/gpu/drm/xe/xe_guc_ads.c
@@ -18,8 +18,10 @@
#include "xe_bo.h"
#include "xe_gt.h"
#include "xe_gt_ccs_mode.h"
+#include "xe_gt_mcr.h"
#include "xe_gt_printk.h"
#include "xe_guc.h"
+#include "xe_guc_buf.h"
#include "xe_guc_capture.h"
#include "xe_guc_ct.h"
#include "xe_hw_engine.h"
@@ -29,7 +31,6 @@
#include "xe_platform_types.h"
#include "xe_uc_fw.h"
#include "xe_wa.h"
-#include "xe_gt_mcr.h"
/* Slack of a few additional entries per engine */
#define ADS_REGSET_EXTRA_MAX 8
@@ -246,7 +247,7 @@ static size_t calculate_regset_size(struct xe_gt *gt)
count += ADS_REGSET_EXTRA_MAX * XE_NUM_HW_ENGINES;
- if (XE_WA(gt, 1607983814))
+ if (XE_GT_WA(gt, 1607983814))
count += LNCFCMOCS_REG_COUNT;
return count * sizeof(struct guc_mmio_reg);
@@ -283,52 +284,26 @@ static size_t calculate_golden_lrc_size(struct xe_guc_ads *ads)
return total_size;
}
-static void guc_waklv_enable_one_word(struct xe_guc_ads *ads,
- enum xe_guc_klv_ids klv_id,
- u32 value,
- u32 *offset, u32 *remain)
+static void guc_waklv_enable(struct xe_guc_ads *ads,
+ u32 data[], u32 data_len_dw,
+ u32 *offset, u32 *remain,
+ enum xe_guc_klv_ids klv_id)
{
- u32 size;
- u32 klv_entry[] = {
- /* 16:16 key/length */
- FIELD_PREP(GUC_KLV_0_KEY, klv_id) |
- FIELD_PREP(GUC_KLV_0_LEN, 1),
- value,
- /* 1 dword data */
- };
-
- size = sizeof(klv_entry);
+ size_t size = sizeof(u32) * (1 + data_len_dw);
if (*remain < size) {
drm_warn(&ads_to_xe(ads)->drm,
- "w/a klv buffer too small to add klv id %d\n", klv_id);
- } else {
- xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), *offset,
- klv_entry, size);
- *offset += size;
- *remain -= size;
+ "w/a klv buffer too small to add klv id 0x%04X\n", klv_id);
+ return;
}
-}
-
-static void guc_waklv_enable_simple(struct xe_guc_ads *ads,
- enum xe_guc_klv_ids klv_id, u32 *offset, u32 *remain)
-{
- u32 klv_entry[] = {
- /* 16:16 key/length */
- FIELD_PREP(GUC_KLV_0_KEY, klv_id) |
- FIELD_PREP(GUC_KLV_0_LEN, 0),
- /* 0 dwords data */
- };
- u32 size;
- size = sizeof(klv_entry);
-
- if (xe_gt_WARN(ads_to_gt(ads), *remain < size,
- "w/a klv buffer too small to add klv id %d\n", klv_id))
- return;
+ /* 16:16 key/length */
+ xe_map_wr(ads_to_xe(ads), ads_to_map(ads), *offset, u32,
+ FIELD_PREP(GUC_KLV_0_KEY, klv_id) | FIELD_PREP(GUC_KLV_0_LEN, data_len_dw));
+ /* data_len_dw dwords of data */
+ xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads),
+ *offset + sizeof(u32), data, data_len_dw * sizeof(u32));
- xe_map_memcpy_to(ads_to_xe(ads), ads_to_map(ads), *offset,
- klv_entry, size);
*offset += size;
*remain -= size;
}
@@ -342,44 +317,51 @@ static void guc_waklv_init(struct xe_guc_ads *ads)
offset = guc_ads_waklv_offset(ads);
remain = guc_ads_waklv_size(ads);
- if (XE_WA(gt, 14019882105) || XE_WA(gt, 16021333562))
- guc_waklv_enable_simple(ads,
- GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED,
- &offset, &remain);
- if (XE_WA(gt, 18024947630))
- guc_waklv_enable_simple(ads,
- GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING,
- &offset, &remain);
- if (XE_WA(gt, 16022287689))
- guc_waklv_enable_simple(ads,
- GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE,
- &offset, &remain);
-
- if (XE_WA(gt, 14022866841))
- guc_waklv_enable_simple(ads,
- GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO,
- &offset, &remain);
+ if (XE_GT_WA(gt, 14019882105) || XE_GT_WA(gt, 16021333562))
+ guc_waklv_enable(ads, NULL, 0, &offset, &remain,
+ GUC_WORKAROUND_KLV_BLOCK_INTERRUPTS_WHEN_MGSR_BLOCKED);
+ if (XE_GT_WA(gt, 18024947630))
+ guc_waklv_enable(ads, NULL, 0, &offset, &remain,
+ GUC_WORKAROUND_KLV_ID_GAM_PFQ_SHADOW_TAIL_POLLING);
+ if (XE_GT_WA(gt, 16022287689))
+ guc_waklv_enable(ads, NULL, 0, &offset, &remain,
+ GUC_WORKAROUND_KLV_ID_DISABLE_MTP_DURING_ASYNC_COMPUTE);
+
+ if (XE_GT_WA(gt, 14022866841))
+ guc_waklv_enable(ads, NULL, 0, &offset, &remain,
+ GUC_WA_KLV_WAKE_POWER_DOMAINS_FOR_OUTBOUND_MMIO);
/*
* On RC6 exit, GuC will write register 0xB04 with the default value provided. As of now,
* the default value for this register is determined to be 0xC40. This could change in the
* future, so GuC depends on KMD to send it the correct value.
*/
- if (XE_WA(gt, 13011645652))
- guc_waklv_enable_one_word(ads,
- GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE,
- 0xC40,
- &offset, &remain);
-
- if (XE_WA(gt, 14022293748) || XE_WA(gt, 22019794406))
- guc_waklv_enable_simple(ads,
- GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET,
- &offset, &remain);
-
- if (GUC_FIRMWARE_VER(&gt->uc.guc) >= MAKE_GUC_VER(70, 44, 0) && XE_WA(gt, 16026508708))
- guc_waklv_enable_simple(ads,
- GUC_WA_KLV_RESET_BB_STACK_PTR_ON_VF_SWITCH,
- &offset, &remain);
+ if (XE_GT_WA(gt, 13011645652)) {
+ u32 data = 0xC40;
+
+ guc_waklv_enable(ads, &data, 1, &offset, &remain,
+ GUC_WA_KLV_NP_RD_WRITE_TO_CLEAR_RCSM_AT_CGP_LATE_RESTORE);
+ }
+
+ if (XE_GT_WA(gt, 14022293748) || XE_GT_WA(gt, 22019794406))
+ guc_waklv_enable(ads, NULL, 0, &offset, &remain,
+ GUC_WORKAROUND_KLV_ID_BACK_TO_BACK_RCS_ENGINE_RESET);
+
+ if (GUC_FIRMWARE_VER(&gt->uc.guc) >= MAKE_GUC_VER(70, 44, 0) && XE_GT_WA(gt, 16026508708))
+ guc_waklv_enable(ads, NULL, 0, &offset, &remain,
+ GUC_WA_KLV_RESET_BB_STACK_PTR_ON_VF_SWITCH);
+ if (GUC_FIRMWARE_VER(&gt->uc.guc) >= MAKE_GUC_VER(70, 47, 0) && XE_GT_WA(gt, 16026007364)) {
+ u32 data[] = {
+ 0x0,
+ 0xF,
+ };
+ guc_waklv_enable(ads, data, ARRAY_SIZE(data), &offset, &remain,
+ GUC_WA_KLV_RESTORE_UNSAVED_MEDIA_CONTROL_REG);
+ }
+
+ if (XE_GT_WA(gt, 14020001231))
+ guc_waklv_enable(ads, NULL, 0, &offset, &remain,
+ GUC_WORKAROUND_KLV_DISABLE_PSMI_INTERRUPTS_AT_C6_ENTRY_RESTORE_AT_EXIT);
size = guc_ads_waklv_size(ads) - remain;
if (!size)
@@ -783,7 +765,7 @@ static unsigned int guc_mmio_regset_write(struct xe_guc_ads *ads,
guc_mmio_regset_write_one(ads, regset_map, e->reg, count++);
}
- if (XE_WA(hwe->gt, 1607983814) && hwe->class == XE_ENGINE_CLASS_RENDER) {
+ if (XE_GT_WA(hwe->gt, 1607983814) && hwe->class == XE_ENGINE_CLASS_RENDER) {
for (i = 0; i < LNCFCMOCS_REG_COUNT; i++) {
guc_mmio_regset_write_one(ads, regset_map,
XELP_LNCFCMOCS(i), count++);
@@ -838,16 +820,20 @@ static void guc_mmio_reg_state_init(struct xe_guc_ads *ads)
static void guc_um_init_params(struct xe_guc_ads *ads)
{
u32 um_queue_offset = guc_ads_um_queues_offset(ads);
+ struct xe_guc *guc = ads_to_guc(ads);
u64 base_dpa;
u32 base_ggtt;
+ bool with_dpa;
int i;
+ with_dpa = !xe_guc_using_main_gamctrl_queues(guc);
+
base_ggtt = xe_bo_ggtt_addr(ads->bo) + um_queue_offset;
base_dpa = xe_bo_main_addr(ads->bo, PAGE_SIZE) + um_queue_offset;
for (i = 0; i < GUC_UM_HW_QUEUE_MAX; ++i) {
ads_blob_write(ads, um_init_params.queue_params[i].base_dpa,
- base_dpa + (i * GUC_UM_QUEUE_SIZE));
+ with_dpa ? (base_dpa + (i * GUC_UM_QUEUE_SIZE)) : 0);
ads_blob_write(ads, um_init_params.queue_params[i].base_ggtt_address,
base_ggtt + (i * GUC_UM_QUEUE_SIZE));
ads_blob_write(ads, um_init_params.queue_params[i].size_in_bytes,
@@ -889,7 +875,7 @@ void xe_guc_ads_populate_minimal(struct xe_guc_ads *ads)
xe_gt_assert(gt, ads->bo);
- xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size);
+ xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, xe_bo_size(ads->bo));
guc_policies_init(ads);
guc_golden_lrc_init(ads);
guc_mapping_table_init_invalid(gt, &info_map);
@@ -913,7 +899,7 @@ void xe_guc_ads_populate(struct xe_guc_ads *ads)
xe_gt_assert(gt, ads->bo);
- xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, ads->bo->size);
+ xe_map_memset(ads_to_xe(ads), ads_to_map(ads), 0, 0, xe_bo_size(ads->bo));
guc_policies_init(ads);
fill_engine_enable_masks(gt, &info_map);
guc_mmio_reg_state_init(ads);
@@ -1004,16 +990,16 @@ static int guc_ads_action_update_policies(struct xe_guc_ads *ads, u32 policy_off
*/
int xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads *ads)
{
- struct xe_device *xe = ads_to_xe(ads);
- struct xe_gt *gt = ads_to_gt(ads);
- struct xe_tile *tile = gt_to_tile(gt);
struct guc_policies *policies;
- struct xe_bo *bo;
- int ret = 0;
+ struct xe_guc *guc = ads_to_guc(ads);
+ struct xe_device *xe = ads_to_xe(ads);
+ CLASS(xe_guc_buf, buf)(&guc->buf, sizeof(*policies));
- policies = kmalloc(sizeof(*policies), GFP_KERNEL);
- if (!policies)
- return -ENOMEM;
+ if (!xe_guc_buf_is_valid(buf))
+ return -ENOBUFS;
+
+ policies = xe_guc_buf_cpu_ptr(buf);
+ memset(policies, 0, sizeof(*policies));
policies->dpc_promote_time = ads_blob_read(ads, policies.dpc_promote_time);
policies->max_num_work_items = ads_blob_read(ads, policies.max_num_work_items);
@@ -1023,16 +1009,5 @@ int xe_guc_ads_scheduler_policy_toggle_reset(struct xe_guc_ads *ads)
else
policies->global_flags &= ~GLOBAL_POLICY_DISABLE_ENGINE_RESET;
- bo = xe_managed_bo_create_from_data(xe, tile, policies, sizeof(struct guc_policies),
- XE_BO_FLAG_VRAM_IF_DGFX(tile) |
- XE_BO_FLAG_GGTT);
- if (IS_ERR(bo)) {
- ret = PTR_ERR(bo);
- goto out;
- }
-
- ret = guc_ads_action_update_policies(ads, xe_bo_ggtt_addr(bo));
-out:
- kfree(policies);
- return ret;
+ return guc_ads_action_update_policies(ads, xe_guc_buf_flush(buf));
}
diff --git a/drivers/gpu/drm/xe/xe_guc_ads_types.h b/drivers/gpu/drm/xe/xe_guc_ads_types.h
index 70c132458ac3..48a8e092023f 100644
--- a/drivers/gpu/drm/xe/xe_guc_ads_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_ads_types.h
@@ -14,7 +14,7 @@ struct xe_bo;
* struct xe_guc_ads - GuC additional data structures (ADS)
*/
struct xe_guc_ads {
- /** @bo: XE BO for GuC ads blob */
+ /** @bo: Xe BO for GuC ads blob */
struct xe_bo *bo;
/** @golden_lrc_size: golden LRC size */
size_t golden_lrc_size;
diff --git a/drivers/gpu/drm/xe/xe_guc_buf.c b/drivers/gpu/drm/xe/xe_guc_buf.c
index 0193c94dd6a0..3ce442500130 100644
--- a/drivers/gpu/drm/xe/xe_guc_buf.c
+++ b/drivers/gpu/drm/xe/xe_guc_buf.c
@@ -13,6 +13,8 @@
#include "xe_guc_buf.h"
#include "xe_sa.h"
+#define XE_GUC_BUF_CACHE_DEFAULT_SIZE SZ_8K
+
static struct xe_guc *cache_to_guc(struct xe_guc_buf_cache *cache)
{
return container_of(cache, struct xe_guc, buf);
@@ -23,25 +25,12 @@ static struct xe_gt *cache_to_gt(struct xe_guc_buf_cache *cache)
return guc_to_gt(cache_to_guc(cache));
}
-/**
- * xe_guc_buf_cache_init() - Initialize the GuC Buffer Cache.
- * @cache: the &xe_guc_buf_cache to initialize
- *
- * The Buffer Cache allows to obtain a reusable buffer that can be used to pass
- * indirect H2G data to GuC without a need to create a ad-hoc allocation.
- *
- * Return: 0 on success or a negative error code on failure.
- */
-int xe_guc_buf_cache_init(struct xe_guc_buf_cache *cache)
+static int guc_buf_cache_init(struct xe_guc_buf_cache *cache, u32 size)
{
struct xe_gt *gt = cache_to_gt(cache);
struct xe_sa_manager *sam;
- /* XXX: currently it's useful only for the PF actions */
- if (!IS_SRIOV_PF(gt_to_xe(gt)))
- return 0;
-
- sam = __xe_sa_bo_manager_init(gt_to_tile(gt), SZ_8K, 0, sizeof(u32));
+ sam = __xe_sa_bo_manager_init(gt_to_tile(gt), size, 0, sizeof(u32));
if (IS_ERR(sam))
return PTR_ERR(sam);
cache->sam = sam;
@@ -53,6 +42,35 @@ int xe_guc_buf_cache_init(struct xe_guc_buf_cache *cache)
}
/**
+ * xe_guc_buf_cache_init() - Initialize the GuC Buffer Cache.
+ * @cache: the &xe_guc_buf_cache to initialize
+ *
+ * The Buffer Cache allows to obtain a reusable buffer that can be used to pass
+ * data to GuC or read data from GuC without a need to create a ad-hoc allocation.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_guc_buf_cache_init(struct xe_guc_buf_cache *cache)
+{
+ return guc_buf_cache_init(cache, XE_GUC_BUF_CACHE_DEFAULT_SIZE);
+}
+
+/**
+ * xe_guc_buf_cache_init_with_size() - Initialize the GuC Buffer Cache.
+ * @cache: the &xe_guc_buf_cache to initialize
+ * @size: size in bytes
+ *
+ * Like xe_guc_buf_cache_init(), except it allows the caller to make the cache
+ * buffer larger, allowing to accommodate larger objects.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_guc_buf_cache_init_with_size(struct xe_guc_buf_cache *cache, u32 size)
+{
+ return guc_buf_cache_init(cache, max(XE_GUC_BUF_CACHE_DEFAULT_SIZE, size));
+}
+
+/**
* xe_guc_buf_cache_dwords() - Number of dwords the GuC Buffer Cache supports.
* @cache: the &xe_guc_buf_cache to query
*
@@ -120,6 +138,19 @@ void xe_guc_buf_release(const struct xe_guc_buf buf)
}
/**
+ * xe_guc_buf_sync_read() - Copy the data from the GPU memory to the sub-allocation.
+ * @buf: the &xe_guc_buf to sync
+ *
+ * Return: a CPU pointer of the sub-allocation.
+ */
+void *xe_guc_buf_sync_read(const struct xe_guc_buf buf)
+{
+ xe_sa_bo_sync_read(buf.sa);
+
+ return xe_sa_bo_cpu_addr(buf.sa);
+}
+
+/**
* xe_guc_buf_flush() - Copy the data from the sub-allocation to the GPU memory.
* @buf: the &xe_guc_buf to flush
*
@@ -168,7 +199,7 @@ u64 xe_guc_cache_gpu_addr_from_ptr(struct xe_guc_buf_cache *cache, const void *p
if (offset < 0 || offset + size > cache->sam->base.size)
return 0;
- return cache->sam->gpu_addr + offset;
+ return xe_sa_manager_gpu_addr(cache->sam) + offset;
}
#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST)
diff --git a/drivers/gpu/drm/xe/xe_guc_buf.h b/drivers/gpu/drm/xe/xe_guc_buf.h
index 0d67604d96bd..e3cca553fb00 100644
--- a/drivers/gpu/drm/xe/xe_guc_buf.h
+++ b/drivers/gpu/drm/xe/xe_guc_buf.h
@@ -12,6 +12,7 @@
#include "xe_guc_buf_types.h"
int xe_guc_buf_cache_init(struct xe_guc_buf_cache *cache);
+int xe_guc_buf_cache_init_with_size(struct xe_guc_buf_cache *cache, u32 size);
u32 xe_guc_buf_cache_dwords(struct xe_guc_buf_cache *cache);
struct xe_guc_buf xe_guc_buf_reserve(struct xe_guc_buf_cache *cache, u32 dwords);
struct xe_guc_buf xe_guc_buf_from_data(struct xe_guc_buf_cache *cache,
@@ -30,6 +31,7 @@ static inline bool xe_guc_buf_is_valid(const struct xe_guc_buf buf)
}
void *xe_guc_buf_cpu_ptr(const struct xe_guc_buf buf);
+void *xe_guc_buf_sync_read(const struct xe_guc_buf buf);
u64 xe_guc_buf_flush(const struct xe_guc_buf buf);
u64 xe_guc_buf_gpu_addr(const struct xe_guc_buf buf);
u64 xe_guc_cache_gpu_addr_from_ptr(struct xe_guc_buf_cache *cache, const void *ptr, u32 size);
diff --git a/drivers/gpu/drm/xe/xe_guc_capture.c b/drivers/gpu/drm/xe/xe_guc_capture.c
index 859a3ba91be5..0c1fbe97b8bf 100644
--- a/drivers/gpu/drm/xe/xe_guc_capture.c
+++ b/drivers/gpu/drm/xe/xe_guc_capture.c
@@ -122,6 +122,7 @@ struct __guc_capture_parsed_output {
{ RING_IPEHR(0), REG_32BIT, 0, 0, 0, "IPEHR"}, \
{ RING_INSTDONE(0), REG_32BIT, 0, 0, 0, "RING_INSTDONE"}, \
{ INDIRECT_RING_STATE(0), REG_32BIT, 0, 0, 0, "INDIRECT_RING_STATE"}, \
+ { RING_CURRENT_LRCA(0), REG_32BIT, 0, 0, 0, "CURRENT_LRCA"}, \
{ RING_ACTHD(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \
{ RING_ACTHD_UDW(0), REG_64BIT_HI_DW, 0, 0, 0, "ACTHD"}, \
{ RING_BBADDR(0), REG_64BIT_LOW_DW, 0, 0, 0, NULL}, \
@@ -149,6 +150,9 @@ struct __guc_capture_parsed_output {
{ SFC_DONE(2), 0, 0, 0, 0, "SFC_DONE[2]"}, \
{ SFC_DONE(3), 0, 0, 0, 0, "SFC_DONE[3]"}
+#define XE3P_BASE_ENGINE_INSTANCE \
+ { RING_CSMQDEBUG(0), REG_32BIT, 0, 0, 0, "CSMQDEBUG"}
+
/* XE_LP Global */
static const struct __guc_mmio_reg_descr xe_lp_global_regs[] = {
COMMON_XELP_BASE_GLOBAL,
@@ -195,6 +199,12 @@ static const struct __guc_mmio_reg_descr xe_lp_gsc_inst_regs[] = {
COMMON_BASE_ENGINE_INSTANCE,
};
+/* Render / Compute Per-Engine-Instance */
+static const struct __guc_mmio_reg_descr xe3p_rc_inst_regs[] = {
+ COMMON_BASE_ENGINE_INSTANCE,
+ XE3P_BASE_ENGINE_INSTANCE,
+};
+
/*
* Empty list to prevent warnings about unknown class/instance types
* as not all class/instance types have entries on all platforms.
@@ -245,6 +255,21 @@ static const struct __guc_mmio_reg_descr_group xe_hpg_lists[] = {
{}
};
+ /* List of lists for Xe3p and beyond */
+static const struct __guc_mmio_reg_descr_group xe3p_lists[] = {
+ MAKE_REGLIST(xe_lp_global_regs, PF, GLOBAL, 0),
+ MAKE_REGLIST(xe_hpg_rc_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
+ MAKE_REGLIST(xe3p_rc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_RENDER_COMPUTE),
+ MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEO),
+ MAKE_REGLIST(xe_vd_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEO),
+ MAKE_REGLIST(xe_vec_class_regs, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
+ MAKE_REGLIST(xe_vec_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_VIDEOENHANCE),
+ MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_BLITTER),
+ MAKE_REGLIST(xe_blt_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_BLITTER),
+ MAKE_REGLIST(empty_regs_list, PF, ENGINE_CLASS, GUC_CAPTURE_LIST_CLASS_GSC_OTHER),
+ MAKE_REGLIST(xe_lp_gsc_inst_regs, PF, ENGINE_INSTANCE, GUC_CAPTURE_LIST_CLASS_GSC_OTHER),
+ {}
+};
static const char * const capture_list_type_names[] = {
"Global",
"Class",
@@ -292,7 +317,9 @@ guc_capture_remove_stale_matches_from_list(struct xe_guc_state_capture *gc,
static const struct __guc_mmio_reg_descr_group *
guc_capture_get_device_reglist(struct xe_device *xe)
{
- if (GRAPHICS_VERx100(xe) >= 1255)
+ if (GRAPHICS_VER(xe) >= 35)
+ return xe3p_lists;
+ else if (GRAPHICS_VERx100(xe) >= 1255)
return xe_hpg_lists;
else
return xe_lp_lists;
@@ -1817,6 +1844,12 @@ void xe_engine_snapshot_print(struct xe_hw_engine_snapshot *snapshot, struct drm
str_yes_no(snapshot->kernel_reserved));
for (type = GUC_STATE_CAPTURE_TYPE_GLOBAL; type < GUC_STATE_CAPTURE_TYPE_MAX; type++) {
+ /*
+ * FIXME: During devcoredump print we should avoid accessing the
+ * driver pointers for gt or engine. Printing should be done only
+ * using the snapshot captured. Here we are accessing the gt
+ * pointer. It should be fixed.
+ */
list = xe_guc_capture_get_reg_desc_list(gt, GUC_CAPTURE_LIST_INDEX_PF, type,
capture_class, false);
snapshot_print_by_list_order(snapshot, p, type, list);
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.c b/drivers/gpu/drm/xe/xe_guc_ct.c
index 2447de0ebedf..a5019d1e741b 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.c
+++ b/drivers/gpu/drm/xe/xe_guc_ct.c
@@ -21,19 +21,27 @@
#include "xe_devcoredump.h"
#include "xe_device.h"
#include "xe_gt.h"
-#include "xe_gt_pagefault.h"
#include "xe_gt_printk.h"
#include "xe_gt_sriov_pf_control.h"
#include "xe_gt_sriov_pf_monitor.h"
-#include "xe_gt_tlb_invalidation.h"
#include "xe_guc.h"
#include "xe_guc_log.h"
+#include "xe_guc_pagefault.h"
#include "xe_guc_relay.h"
#include "xe_guc_submit.h"
+#include "xe_guc_tlb_inval.h"
#include "xe_map.h"
#include "xe_pm.h"
+#include "xe_sriov_vf.h"
#include "xe_trace_guc.h"
+static void receive_g2h(struct xe_guc_ct *ct);
+static void g2h_worker_func(struct work_struct *w);
+static void safe_mode_worker_func(struct work_struct *w);
+static void ct_exit_safe_mode(struct xe_guc_ct *ct);
+static void guc_ct_change_state(struct xe_guc_ct *ct,
+ enum xe_guc_ct_state state);
+
#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
enum {
/* Internal states, not error conditions */
@@ -79,6 +87,7 @@ struct g2h_fence {
u16 error;
u16 hint;
u16 reason;
+ bool cancel;
bool retry;
bool fail;
bool done;
@@ -86,15 +95,20 @@ struct g2h_fence {
static void g2h_fence_init(struct g2h_fence *g2h_fence, u32 *response_buffer)
{
+ memset(g2h_fence, 0, sizeof(*g2h_fence));
g2h_fence->response_buffer = response_buffer;
- g2h_fence->response_data = 0;
- g2h_fence->response_len = 0;
- g2h_fence->fail = false;
- g2h_fence->retry = false;
- g2h_fence->done = false;
g2h_fence->seqno = ~0x0;
}
+static void g2h_fence_cancel(struct g2h_fence *g2h_fence)
+{
+ g2h_fence->cancel = true;
+ g2h_fence->fail = true;
+
+ /* WRITE_ONCE pairs with READ_ONCEs in guc_ct_send_recv. */
+ WRITE_ONCE(g2h_fence->done, true);
+}
+
static bool g2h_fence_needs_alloc(struct g2h_fence *g2h_fence)
{
return g2h_fence->seqno == ~0x0;
@@ -155,6 +169,7 @@ ct_to_xe(struct xe_guc_ct *ct)
*/
#define CTB_DESC_SIZE ALIGN(sizeof(struct guc_ct_buffer_desc), SZ_2K)
+#define CTB_H2G_BUFFER_OFFSET (CTB_DESC_SIZE * 2)
#define CTB_H2G_BUFFER_SIZE (SZ_4K)
#define CTB_G2H_BUFFER_SIZE (SZ_128K)
#define G2H_ROOM_BUFFER_SIZE (CTB_G2H_BUFFER_SIZE / 2)
@@ -178,7 +193,7 @@ long xe_guc_ct_queue_proc_time_jiffies(struct xe_guc_ct *ct)
static size_t guc_ct_size(void)
{
- return 2 * CTB_DESC_SIZE + CTB_H2G_BUFFER_SIZE +
+ return CTB_H2G_BUFFER_OFFSET + CTB_H2G_BUFFER_SIZE +
CTB_G2H_BUFFER_SIZE;
}
@@ -186,14 +201,14 @@ static void guc_ct_fini(struct drm_device *drm, void *arg)
{
struct xe_guc_ct *ct = arg;
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+ cancel_work_sync(&ct->dead.worker);
+#endif
+ ct_exit_safe_mode(ct);
destroy_workqueue(ct->g2h_wq);
xa_destroy(&ct->fence_lookup);
}
-static void receive_g2h(struct xe_guc_ct *ct);
-static void g2h_worker_func(struct work_struct *w);
-static void safe_mode_worker_func(struct work_struct *w);
-
static void primelockdep(struct xe_guc_ct *ct)
{
if (!IS_ENABLED(CONFIG_LOCKDEP))
@@ -204,16 +219,20 @@ static void primelockdep(struct xe_guc_ct *ct)
fs_reclaim_release(GFP_KERNEL);
}
-int xe_guc_ct_init(struct xe_guc_ct *ct)
+int xe_guc_ct_init_noalloc(struct xe_guc_ct *ct)
{
struct xe_device *xe = ct_to_xe(ct);
struct xe_gt *gt = ct_to_gt(ct);
- struct xe_tile *tile = gt_to_tile(gt);
- struct xe_bo *bo;
int err;
xe_gt_assert(gt, !(guc_ct_size() % PAGE_SIZE));
+ err = drmm_mutex_init(&xe->drm, &ct->lock);
+ if (err)
+ return err;
+
+ primelockdep(ct);
+
ct->g2h_wq = alloc_ordered_workqueue("xe-g2h-wq", WQ_MEM_RECLAIM);
if (!ct->g2h_wq)
return -ENOMEM;
@@ -225,15 +244,36 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
spin_lock_init(&ct->dead.lock);
INIT_WORK(&ct->dead.worker, ct_dead_worker_func);
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC)
+ stack_depot_init();
+#endif
#endif
init_waitqueue_head(&ct->wq);
init_waitqueue_head(&ct->g2h_fence_wq);
- err = drmm_mutex_init(&xe->drm, &ct->lock);
+ err = drmm_add_action_or_reset(&xe->drm, guc_ct_fini, ct);
if (err)
return err;
- primelockdep(ct);
+ xe_gt_assert(gt, ct->state == XE_GUC_CT_STATE_NOT_INITIALIZED);
+ ct->state = XE_GUC_CT_STATE_DISABLED;
+ return 0;
+}
+ALLOW_ERROR_INJECTION(xe_guc_ct_init_noalloc, ERRNO); /* See xe_pci_probe() */
+
+static void guc_action_disable_ct(void *arg)
+{
+ struct xe_guc_ct *ct = arg;
+
+ guc_ct_change_state(ct, XE_GUC_CT_STATE_DISABLED);
+}
+
+int xe_guc_ct_init(struct xe_guc_ct *ct)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ struct xe_tile *tile = gt_to_tile(gt);
+ struct xe_bo *bo;
bo = xe_managed_bo_create_pin_map(xe, tile, guc_ct_size(),
XE_BO_FLAG_SYSTEM |
@@ -245,16 +285,38 @@ int xe_guc_ct_init(struct xe_guc_ct *ct)
ct->bo = bo;
- err = drmm_add_action_or_reset(&xe->drm, guc_ct_fini, ct);
- if (err)
- return err;
-
- xe_gt_assert(gt, ct->state == XE_GUC_CT_STATE_NOT_INITIALIZED);
- ct->state = XE_GUC_CT_STATE_DISABLED;
- return 0;
+ return devm_add_action_or_reset(xe->drm.dev, guc_action_disable_ct, ct);
}
ALLOW_ERROR_INJECTION(xe_guc_ct_init, ERRNO); /* See xe_pci_probe() */
+/**
+ * xe_guc_ct_init_post_hwconfig - Reinitialize the GuC CTB in VRAM
+ * @ct: the &xe_guc_ct
+ *
+ * Allocate a new BO in VRAM and free the previous BO that was allocated
+ * in system memory (SMEM). Applicable only for DGFX products.
+ *
+ * Return: 0 on success, or a negative errno on failure.
+ */
+int xe_guc_ct_init_post_hwconfig(struct xe_guc_ct *ct)
+{
+ struct xe_device *xe = ct_to_xe(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
+ struct xe_tile *tile = gt_to_tile(gt);
+ int ret;
+
+ xe_assert(xe, !xe_guc_ct_enabled(ct));
+
+ if (IS_DGFX(xe)) {
+ ret = xe_managed_bo_reinit_in_vram(xe, tile, &ct->bo);
+ if (ret)
+ return ret;
+ }
+
+ devm_remove_action(xe->drm.dev, guc_action_disable_ct, ct);
+ return devm_add_action_or_reset(xe->drm.dev, guc_action_disable_ct, ct);
+}
+
#define desc_read(xe_, guc_ctb__, field_) \
xe_map_rd_field(xe_, &guc_ctb__->desc, 0, \
struct guc_ct_buffer_desc, field_)
@@ -278,7 +340,7 @@ static void guc_ct_ctb_h2g_init(struct xe_device *xe, struct guc_ctb *h2g,
h2g->desc = *map;
xe_map_memset(xe, &h2g->desc, 0, 0, sizeof(struct guc_ct_buffer_desc));
- h2g->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE * 2);
+ h2g->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_H2G_BUFFER_OFFSET);
}
static void guc_ct_ctb_g2h_init(struct xe_device *xe, struct guc_ctb *g2h,
@@ -296,7 +358,7 @@ static void guc_ct_ctb_g2h_init(struct xe_device *xe, struct guc_ctb *g2h,
g2h->desc = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE);
xe_map_memset(xe, &g2h->desc, 0, 0, sizeof(struct guc_ct_buffer_desc));
- g2h->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_DESC_SIZE * 2 +
+ g2h->cmds = IOSYS_MAP_INIT_OFFSET(map, CTB_H2G_BUFFER_OFFSET +
CTB_H2G_BUFFER_SIZE);
}
@@ -307,7 +369,7 @@ static int guc_ct_ctb_h2g_register(struct xe_guc_ct *ct)
int err;
desc_addr = xe_bo_ggtt_addr(ct->bo);
- ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2;
+ ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_H2G_BUFFER_OFFSET;
size = ct->ctbs.h2g.info.size * sizeof(u32);
err = xe_guc_self_cfg64(guc,
@@ -334,7 +396,7 @@ static int guc_ct_ctb_g2h_register(struct xe_guc_ct *ct)
int err;
desc_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE;
- ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_DESC_SIZE * 2 +
+ ctb_addr = xe_bo_ggtt_addr(ct->bo) + CTB_H2G_BUFFER_OFFSET +
CTB_H2G_BUFFER_SIZE;
size = ct->ctbs.g2h.info.size * sizeof(u32);
@@ -371,9 +433,13 @@ static int guc_ct_control_toggle(struct xe_guc_ct *ct, bool enable)
return ret > 0 ? -EPROTO : ret;
}
-static void xe_guc_ct_set_state(struct xe_guc_ct *ct,
+static void guc_ct_change_state(struct xe_guc_ct *ct,
enum xe_guc_ct_state state)
{
+ struct xe_gt *gt = ct_to_gt(ct);
+ struct g2h_fence *g2h_fence;
+ unsigned long idx;
+
mutex_lock(&ct->lock); /* Serialise dequeue_one_g2h() */
spin_lock_irq(&ct->fast_lock); /* Serialise CT fast-path */
@@ -385,8 +451,20 @@ static void xe_guc_ct_set_state(struct xe_guc_ct *ct,
ct->g2h_outstanding = 0;
ct->state = state;
+ xe_gt_dbg(gt, "GuC CT communication channel %s\n",
+ state == XE_GUC_CT_STATE_STOPPED ? "stopped" :
+ str_enabled_disabled(state == XE_GUC_CT_STATE_ENABLED));
+
spin_unlock_irq(&ct->fast_lock);
+ /* cancel all in-flight send-recv requests */
+ xa_for_each(&ct->fence_lookup, idx, g2h_fence)
+ g2h_fence_cancel(g2h_fence);
+
+ /* make sure guc_ct_send_recv() will see g2h_fence changes */
+ smp_mb();
+ wake_up_all(&ct->g2h_fence_wq);
+
/*
* Lockdep doesn't like this under the fast lock and he destroy only
* needs to be serialized with the send path which ct lock provides.
@@ -432,7 +510,7 @@ static void ct_exit_safe_mode(struct xe_guc_ct *ct)
xe_gt_dbg(ct_to_gt(ct), "GuC CT safe-mode disabled\n");
}
-int xe_guc_ct_enable(struct xe_guc_ct *ct)
+static int __xe_guc_ct_start(struct xe_guc_ct *ct, bool needs_register)
{
struct xe_device *xe = ct_to_xe(ct);
struct xe_gt *gt = ct_to_gt(ct);
@@ -440,27 +518,34 @@ int xe_guc_ct_enable(struct xe_guc_ct *ct)
xe_gt_assert(gt, !xe_guc_ct_enabled(ct));
- xe_map_memset(xe, &ct->bo->vmap, 0, 0, ct->bo->size);
- guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap);
- guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap);
+ if (needs_register) {
+ xe_map_memset(xe, &ct->bo->vmap, 0, 0, xe_bo_size(ct->bo));
+ guc_ct_ctb_h2g_init(xe, &ct->ctbs.h2g, &ct->bo->vmap);
+ guc_ct_ctb_g2h_init(xe, &ct->ctbs.g2h, &ct->bo->vmap);
- err = guc_ct_ctb_h2g_register(ct);
- if (err)
- goto err_out;
+ err = guc_ct_ctb_h2g_register(ct);
+ if (err)
+ goto err_out;
- err = guc_ct_ctb_g2h_register(ct);
- if (err)
- goto err_out;
+ err = guc_ct_ctb_g2h_register(ct);
+ if (err)
+ goto err_out;
- err = guc_ct_control_toggle(ct, true);
- if (err)
- goto err_out;
+ err = guc_ct_control_toggle(ct, true);
+ if (err)
+ goto err_out;
+ } else {
+ ct->ctbs.h2g.info.broken = false;
+ ct->ctbs.g2h.info.broken = false;
+ /* Skip everything in H2G buffer */
+ xe_map_memset(xe, &ct->bo->vmap, CTB_H2G_BUFFER_OFFSET, 0,
+ CTB_H2G_BUFFER_SIZE);
+ }
- xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_ENABLED);
+ guc_ct_change_state(ct, XE_GUC_CT_STATE_ENABLED);
smp_mb();
wake_up_all(&ct->wq);
- xe_gt_dbg(gt, "GuC CT communication channel enabled\n");
if (ct_needs_safe_mode(ct))
ct_enter_safe_mode(ct);
@@ -487,6 +572,32 @@ err_out:
return err;
}
+/**
+ * xe_guc_ct_restart() - Restart GuC CT
+ * @ct: the &xe_guc_ct
+ *
+ * Restart GuC CT to an empty state without issuing a CT register MMIO command.
+ *
+ * Return: 0 on success, or a negative errno on failure.
+ */
+int xe_guc_ct_restart(struct xe_guc_ct *ct)
+{
+ return __xe_guc_ct_start(ct, false);
+}
+
+/**
+ * xe_guc_ct_enable() - Enable GuC CT
+ * @ct: the &xe_guc_ct
+ *
+ * Enable GuC CT to an empty state and issue a CT register MMIO command.
+ *
+ * Return: 0 on success, or a negative errno on failure.
+ */
+int xe_guc_ct_enable(struct xe_guc_ct *ct)
+{
+ return __xe_guc_ct_start(ct, true);
+}
+
static void stop_g2h_handler(struct xe_guc_ct *ct)
{
cancel_work_sync(&ct->g2h_worker);
@@ -501,12 +612,22 @@ static void stop_g2h_handler(struct xe_guc_ct *ct)
*/
void xe_guc_ct_disable(struct xe_guc_ct *ct)
{
- xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_DISABLED);
+ guc_ct_change_state(ct, XE_GUC_CT_STATE_DISABLED);
ct_exit_safe_mode(ct);
stop_g2h_handler(ct);
}
/**
+ * xe_guc_ct_flush_and_stop - Flush and stop all processing of G2H / H2G
+ * @ct: the &xe_guc_ct
+ */
+void xe_guc_ct_flush_and_stop(struct xe_guc_ct *ct)
+{
+ receive_g2h(ct);
+ xe_guc_ct_stop(ct);
+}
+
+/**
* xe_guc_ct_stop - Set GuC to stopped state
* @ct: the &xe_guc_ct
*
@@ -514,7 +635,10 @@ void xe_guc_ct_disable(struct xe_guc_ct *ct)
*/
void xe_guc_ct_stop(struct xe_guc_ct *ct)
{
- xe_guc_ct_set_state(ct, XE_GUC_CT_STATE_STOPPED);
+ if (!xe_guc_ct_initialized(ct))
+ return;
+
+ guc_ct_change_state(ct, XE_GUC_CT_STATE_STOPPED);
stop_g2h_handler(ct);
}
@@ -625,6 +749,69 @@ static void g2h_release_space(struct xe_guc_ct *ct, u32 g2h_len)
spin_unlock_irq(&ct->fast_lock);
}
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+static void fast_req_track(struct xe_guc_ct *ct, u16 fence, u16 action)
+{
+ unsigned int slot = fence % ARRAY_SIZE(ct->fast_req);
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC)
+ unsigned long entries[SZ_32];
+ unsigned int n;
+
+ n = stack_trace_save(entries, ARRAY_SIZE(entries), 1);
+
+ /* May be called under spinlock, so avoid sleeping */
+ ct->fast_req[slot].stack = stack_depot_save(entries, n, GFP_NOWAIT);
+#endif
+ ct->fast_req[slot].fence = fence;
+ ct->fast_req[slot].action = action;
+}
+#else
+static void fast_req_track(struct xe_guc_ct *ct, u16 fence, u16 action)
+{
+}
+#endif
+
+/*
+ * The CT protocol accepts a 16 bits fence. This field is fully owned by the
+ * driver, the GuC will just copy it to the reply message. Since we need to
+ * be able to distinguish between replies to REQUEST and FAST_REQUEST messages,
+ * we use one bit of the seqno as an indicator for that and a rolling counter
+ * for the remaining 15 bits.
+ */
+#define CT_SEQNO_MASK GENMASK(14, 0)
+#define CT_SEQNO_UNTRACKED BIT(15)
+static u16 next_ct_seqno(struct xe_guc_ct *ct, bool is_g2h_fence)
+{
+ u32 seqno = ct->fence_seqno++ & CT_SEQNO_MASK;
+
+ if (!is_g2h_fence)
+ seqno |= CT_SEQNO_UNTRACKED;
+
+ return seqno;
+}
+
+#define MAKE_ACTION(type, __action) \
+({ \
+ FIELD_PREP(GUC_HXG_MSG_0_TYPE, type) | \
+ FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION | \
+ GUC_HXG_EVENT_MSG_0_DATA0, __action); \
+})
+
+static bool vf_action_can_safely_fail(struct xe_device *xe, u32 action)
+{
+ /*
+ * When resuming a VF, we can't reliably track whether context
+ * registration has completed in the GuC state machine. It is harmless
+ * to resend the request, as it will fail silently if GUC_HXG_TYPE_EVENT
+ * is used. Additionally, if there is an H2G protocol issue on a VF,
+ * subsequent H2G messages sent as GUC_HXG_TYPE_FAST_REQUEST will likely
+ * fail.
+ */
+ return IS_SRIOV_VF(xe) && xe_sriov_vf_migration_supported(xe) &&
+ (action == XE_GUC_ACTION_REGISTER_CONTEXT_MULTI_LRC ||
+ action == XE_GUC_ACTION_REGISTER_CONTEXT);
+}
+
#define H2G_CT_HEADERS (GUC_CTB_HDR_LEN + 1) /* one DW CTB header and one DW HxG header */
static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
@@ -696,15 +883,14 @@ static int h2g_write(struct xe_guc_ct *ct, const u32 *action, u32 len,
FIELD_PREP(GUC_CTB_MSG_0_NUM_DWORDS, len) |
FIELD_PREP(GUC_CTB_MSG_0_FENCE, ct_fence_value);
if (want_response) {
- cmd[1] =
- FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_REQUEST) |
- FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
- GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
+ cmd[1] = MAKE_ACTION(GUC_HXG_TYPE_REQUEST, action[0]);
+ } else if (vf_action_can_safely_fail(xe, action[0])) {
+ cmd[1] = MAKE_ACTION(GUC_HXG_TYPE_EVENT, action[0]);
} else {
- cmd[1] =
- FIELD_PREP(GUC_HXG_MSG_0_TYPE, GUC_HXG_TYPE_FAST_REQUEST) |
- FIELD_PREP(GUC_HXG_EVENT_MSG_0_ACTION |
- GUC_HXG_EVENT_MSG_0_DATA0, action[0]);
+ fast_req_track(ct, ct_fence_value,
+ FIELD_GET(GUC_HXG_EVENT_MSG_0_ACTION, action[0]));
+
+ cmd[1] = MAKE_ACTION(GUC_HXG_TYPE_FAST_REQUEST, action[0]);
}
/* H2G header in cmd[1] replaces action[0] so: */
@@ -733,34 +919,15 @@ corrupted:
return -EPIPE;
}
-/*
- * The CT protocol accepts a 16 bits fence. This field is fully owned by the
- * driver, the GuC will just copy it to the reply message. Since we need to
- * be able to distinguish between replies to REQUEST and FAST_REQUEST messages,
- * we use one bit of the seqno as an indicator for that and a rolling counter
- * for the remaining 15 bits.
- */
-#define CT_SEQNO_MASK GENMASK(14, 0)
-#define CT_SEQNO_UNTRACKED BIT(15)
-static u16 next_ct_seqno(struct xe_guc_ct *ct, bool is_g2h_fence)
-{
- u32 seqno = ct->fence_seqno++ & CT_SEQNO_MASK;
-
- if (!is_g2h_fence)
- seqno |= CT_SEQNO_UNTRACKED;
-
- return seqno;
-}
-
static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
u32 len, u32 g2h_len, u32 num_g2h,
struct g2h_fence *g2h_fence)
{
- struct xe_gt *gt __maybe_unused = ct_to_gt(ct);
+ struct xe_gt *gt = ct_to_gt(ct);
u16 seqno;
int ret;
- xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED);
+ xe_gt_assert(gt, xe_guc_ct_initialized(ct));
xe_gt_assert(gt, !g2h_len || !g2h_fence);
xe_gt_assert(gt, !num_g2h || !g2h_fence);
xe_gt_assert(gt, !g2h_len || num_g2h);
@@ -777,7 +944,7 @@ static int __guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action,
goto out;
}
- if (ct->state == XE_GUC_CT_STATE_STOPPED) {
+ if (ct->state == XE_GUC_CT_STATE_STOPPED || xe_gt_recovery_pending(gt)) {
ret = -ECANCELED;
goto out;
}
@@ -832,22 +999,15 @@ static void kick_reset(struct xe_guc_ct *ct)
static int dequeue_one_g2h(struct xe_guc_ct *ct);
-static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
- u32 g2h_len, u32 num_g2h,
- struct g2h_fence *g2h_fence)
+/*
+ * wait before retry of sending h2g message
+ * Return: true if ready for retry, false if the wait timeouted
+ */
+static bool guc_ct_send_wait_for_retry(struct xe_guc_ct *ct, u32 len,
+ u32 g2h_len, struct g2h_fence *g2h_fence,
+ unsigned int *sleep_period_ms)
{
struct xe_device *xe = ct_to_xe(ct);
- struct xe_gt *gt = ct_to_gt(ct);
- unsigned int sleep_period_ms = 1;
- int ret;
-
- xe_gt_assert(gt, !g2h_len || !g2h_fence);
- lockdep_assert_held(&ct->lock);
- xe_device_assert_mem_access(ct_to_xe(ct));
-
-try_again:
- ret = __guc_ct_send_locked(ct, action, len, g2h_len, num_g2h,
- g2h_fence);
/*
* We wait to try to restore credits for about 1 second before bailing.
@@ -856,24 +1016,22 @@ try_again:
* the case of G2H we process any G2H in the channel, hopefully freeing
* credits as we consume the G2H messages.
*/
- if (unlikely(ret == -EBUSY &&
- !h2g_has_room(ct, len + GUC_CTB_HDR_LEN))) {
+ if (!h2g_has_room(ct, len + GUC_CTB_HDR_LEN)) {
struct guc_ctb *h2g = &ct->ctbs.h2g;
- if (sleep_period_ms == 1024)
- goto broken;
+ if (*sleep_period_ms == 1024)
+ return false;
trace_xe_guc_ct_h2g_flow_control(xe, h2g->info.head, h2g->info.tail,
h2g->info.size,
h2g->info.space,
len + GUC_CTB_HDR_LEN);
- msleep(sleep_period_ms);
- sleep_period_ms <<= 1;
-
- goto try_again;
- } else if (unlikely(ret == -EBUSY)) {
+ msleep(*sleep_period_ms);
+ *sleep_period_ms <<= 1;
+ } else {
struct xe_device *xe = ct_to_xe(ct);
struct guc_ctb *g2h = &ct->ctbs.g2h;
+ int ret;
trace_xe_guc_ct_g2h_flow_control(xe, g2h->info.head,
desc_read(xe, g2h, tail),
@@ -887,7 +1045,7 @@ try_again:
(desc_read(ct_to_xe(ct), (&ct->ctbs.g2h), tail) != ct->ctbs.g2h.info.head)
if (!wait_event_timeout(ct->wq, !ct->g2h_outstanding ||
g2h_avail(ct), HZ))
- goto broken;
+ return false;
#undef g2h_avail
ret = dequeue_one_g2h(ct);
@@ -895,9 +1053,32 @@ try_again:
if (ret != -ECANCELED)
xe_gt_err(ct_to_gt(ct), "CTB receive failed (%pe)",
ERR_PTR(ret));
- goto broken;
+ return false;
}
+ }
+ return true;
+}
+
+static int guc_ct_send_locked(struct xe_guc_ct *ct, const u32 *action, u32 len,
+ u32 g2h_len, u32 num_g2h,
+ struct g2h_fence *g2h_fence)
+{
+ struct xe_gt *gt = ct_to_gt(ct);
+ unsigned int sleep_period_ms = 1;
+ int ret;
+ xe_gt_assert(gt, !g2h_len || !g2h_fence);
+ lockdep_assert_held(&ct->lock);
+ xe_device_assert_mem_access(ct_to_xe(ct));
+
+try_again:
+ ret = __guc_ct_send_locked(ct, action, len, g2h_len, num_g2h,
+ g2h_fence);
+
+ if (unlikely(ret == -EBUSY)) {
+ if (!guc_ct_send_wait_for_retry(ct, len, g2h_len, g2h_fence,
+ &sleep_period_ms))
+ goto broken;
goto try_again;
}
@@ -980,11 +1161,15 @@ static bool retry_failure(struct xe_guc_ct *ct, int ret)
return true;
}
+#define GUC_SEND_RETRY_LIMIT 50
+#define GUC_SEND_RETRY_MSLEEP 5
+
static int guc_ct_send_recv(struct xe_guc_ct *ct, const u32 *action, u32 len,
u32 *response_buffer, bool no_fail)
{
struct xe_gt *gt = ct_to_gt(ct);
struct g2h_fence g2h_fence;
+ unsigned int retries = 0;
int ret = 0;
/*
@@ -1020,10 +1205,13 @@ retry_same_fence:
return ret;
}
- ret = wait_event_timeout(ct->g2h_fence_wq, g2h_fence.done, HZ);
+ /* READ_ONCEs pairs with WRITE_ONCEs in parse_g2h_response
+ * and g2h_fence_cancel.
+ */
+ ret = wait_event_timeout(ct->g2h_fence_wq, READ_ONCE(g2h_fence.done), HZ);
if (!ret) {
LNL_FLUSH_WORK(&ct->g2h_worker);
- if (g2h_fence.done) {
+ if (READ_ONCE(g2h_fence.done)) {
xe_gt_warn(gt, "G2H fence %u, action %04x, done\n",
g2h_fence.seqno, action[0]);
ret = 1;
@@ -1049,9 +1237,20 @@ retry_same_fence:
xe_gt_dbg(gt, "H2G action %#x retrying: reason %#x\n",
action[0], g2h_fence.reason);
mutex_unlock(&ct->lock);
+ if (++retries > GUC_SEND_RETRY_LIMIT) {
+ xe_gt_err(gt, "H2G action %#x reached retry limit=%u, aborting\n",
+ action[0], GUC_SEND_RETRY_LIMIT);
+ return -ELOOP;
+ }
+ msleep(GUC_SEND_RETRY_MSLEEP * retries);
goto retry;
}
if (g2h_fence.fail) {
+ if (g2h_fence.cancel) {
+ xe_gt_dbg(gt, "H2G request %#x canceled!\n", action[0]);
+ ret = -ECANCELED;
+ goto unlock;
+ }
xe_gt_err(gt, "H2G request %#x failed: error %#x hint %#x\n",
action[0], g2h_fence.error, g2h_fence.hint);
ret = -EIO;
@@ -1060,6 +1259,7 @@ retry_same_fence:
if (ret > 0)
ret = response_buffer ? g2h_fence.response_len : g2h_fence.response_data;
+unlock:
mutex_unlock(&ct->lock);
return ret;
@@ -1143,6 +1343,55 @@ static int guc_crash_process_msg(struct xe_guc_ct *ct, u32 action)
return 0;
}
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+static void fast_req_report(struct xe_guc_ct *ct, u16 fence)
+{
+ u16 fence_min = U16_MAX, fence_max = 0;
+ struct xe_gt *gt = ct_to_gt(ct);
+ bool found = false;
+ unsigned int n;
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC)
+ char *buf;
+#endif
+
+ lockdep_assert_held(&ct->lock);
+
+ for (n = 0; n < ARRAY_SIZE(ct->fast_req); n++) {
+ if (ct->fast_req[n].fence < fence_min)
+ fence_min = ct->fast_req[n].fence;
+ if (ct->fast_req[n].fence > fence_max)
+ fence_max = ct->fast_req[n].fence;
+
+ if (ct->fast_req[n].fence != fence)
+ continue;
+ found = true;
+
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC)
+ buf = kmalloc(SZ_4K, GFP_NOWAIT);
+ if (buf && stack_depot_snprint(ct->fast_req[n].stack, buf, SZ_4K, 0))
+ xe_gt_err(gt, "Fence 0x%x was used by action %#04x sent at:\n%s",
+ fence, ct->fast_req[n].action, buf);
+ else
+ xe_gt_err(gt, "Fence 0x%x was used by action %#04x [failed to retrieve stack]\n",
+ fence, ct->fast_req[n].action);
+ kfree(buf);
+#else
+ xe_gt_err(gt, "Fence 0x%x was used by action %#04x\n",
+ fence, ct->fast_req[n].action);
+#endif
+ break;
+ }
+
+ if (!found)
+ xe_gt_warn(gt, "Fence 0x%x not found - tracking buffer wrapped? [range = 0x%x -> 0x%x, next = 0x%X]\n",
+ fence, fence_min, fence_max, ct->fence_seqno);
+}
+#else
+static void fast_req_report(struct xe_guc_ct *ct, u16 fence)
+{
+}
+#endif
+
static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
{
struct xe_gt *gt = ct_to_gt(ct);
@@ -1171,6 +1420,13 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
else
xe_gt_err(gt, "unexpected response %u for FAST_REQ H2G fence 0x%x!\n",
type, fence);
+
+ fast_req_report(ct, fence);
+
+ /* FIXME: W/A race in the GuC, will get in firmware soon */
+ if (xe_gt_recovery_pending(gt))
+ return 0;
+
CT_DEAD(ct, NULL, PARSE_G2H_RESPONSE);
return -EPROTO;
@@ -1203,7 +1459,8 @@ static int parse_g2h_response(struct xe_guc_ct *ct, u32 *msg, u32 len)
g2h_release_space(ct, GUC_CTB_HXG_MSG_MAX_LEN);
- g2h_fence->done = true;
+ /* WRITE_ONCE pairs with READ_ONCEs in guc_ct_send_recv. */
+ WRITE_ONCE(g2h_fence->done, true);
smp_mb();
wake_up_all(&ct->g2h_fence_wq);
@@ -1298,12 +1555,7 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
ret = xe_guc_pagefault_handler(guc, payload, adj_len);
break;
case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
- ret = xe_guc_tlb_invalidation_done_handler(guc, payload,
- adj_len);
- break;
- case XE_GUC_ACTION_ACCESS_COUNTER_NOTIFY:
- ret = xe_guc_access_counter_notify_handler(guc, payload,
- adj_len);
+ ret = xe_guc_tlb_inval_done_handler(guc, payload, adj_len);
break;
case XE_GUC_ACTION_GUC2PF_RELAY_FROM_VF:
ret = xe_guc_relay_process_guc2pf(&guc->relay, hxg, hxg_len);
@@ -1321,6 +1573,11 @@ static int process_g2h_msg(struct xe_guc_ct *ct, u32 *msg, u32 len)
case XE_GUC_ACTION_NOTIFY_EXCEPTION:
ret = guc_crash_process_msg(ct, action);
break;
+#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
+ case XE_GUC_ACTION_TEST_G2G_RECV:
+ ret = xe_guc_g2g_test_notification(guc, payload, adj_len);
+ break;
+#endif
default:
xe_gt_err(gt, "unexpected G2H action 0x%04x\n", action);
}
@@ -1344,7 +1601,7 @@ static int g2h_read(struct xe_guc_ct *ct, u32 *msg, bool fast_path)
u32 action;
u32 *hxg;
- xe_gt_assert(gt, ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED);
+ xe_gt_assert(gt, xe_guc_ct_initialized(ct));
lockdep_assert_held(&ct->fast_lock);
if (ct->state == XE_GUC_CT_STATE_DISABLED)
@@ -1500,8 +1757,7 @@ static void g2h_fast_path(struct xe_guc_ct *ct, u32 *msg, u32 len)
break;
case XE_GUC_ACTION_TLB_INVALIDATION_DONE:
__g2h_release_space(ct, len);
- ret = xe_guc_tlb_invalidation_done_handler(guc, payload,
- adj_len);
+ ret = xe_guc_tlb_inval_done_handler(guc, payload, adj_len);
break;
default:
xe_gt_warn(gt, "NOT_POSSIBLE");
@@ -1634,7 +1890,7 @@ static struct xe_guc_ct_snapshot *guc_ct_snapshot_alloc(struct xe_guc_ct *ct, bo
return NULL;
if (ct->bo && want_ctb) {
- snapshot->ctb_size = ct->bo->size;
+ snapshot->ctb_size = xe_bo_size(ct->bo);
snapshot->ctb = kmalloc(snapshot->ctb_size, atomic ? GFP_ATOMIC : GFP_KERNEL);
}
@@ -1770,6 +2026,24 @@ void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool want_ctb)
}
#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+
+#ifdef CONFIG_FUNCTION_ERROR_INJECTION
+/*
+ * This is a helper function which assists the driver in identifying if a fault
+ * injection test is currently active, allowing it to reduce unnecessary debug
+ * output. Typically, the function returns zero, but the fault injection
+ * framework can alter this to return an error. Since faults are injected
+ * through this function, it's important to ensure the compiler doesn't optimize
+ * it into an inline function. To avoid such optimization, the 'noinline'
+ * attribute is applied. Compiler optimizes the static function defined in the
+ * header file as an inline function.
+ */
+noinline int xe_is_injection_active(void) { return 0; }
+ALLOW_ERROR_INJECTION(xe_is_injection_active, ERRNO);
+#else
+int xe_is_injection_active(void) { return 0; }
+#endif
+
static void ct_dead_capture(struct xe_guc_ct *ct, struct guc_ctb *ctb, u32 reason_code)
{
struct xe_guc_log_snapshot *snapshot_log;
@@ -1780,6 +2054,12 @@ static void ct_dead_capture(struct xe_guc_ct *ct, struct guc_ctb *ctb, u32 reaso
if (ctb)
ctb->info.broken = true;
+ /*
+ * Huge dump is getting generated when injecting error for guc CT/MMIO
+ * functions. So, let us suppress the dump when fault is injected.
+ */
+ if (xe_is_injection_active())
+ return;
/* Ignore further errors after the first dump until a reset */
if (ct->dead.reported)
@@ -1830,7 +2110,6 @@ static void ct_dead_print(struct xe_dead_ct *dead)
return;
}
-
/* Can't generate a genuine core dump at this point, so just do the good bits */
drm_puts(&lp, "**** Xe Device Coredump ****\n");
drm_printf(&lp, "Reason: CTB is dead - 0x%X\n", dead->reason);
diff --git a/drivers/gpu/drm/xe/xe_guc_ct.h b/drivers/gpu/drm/xe/xe_guc_ct.h
index 82c4ae458dda..ca1ce2b3c354 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct.h
@@ -11,10 +11,14 @@
struct drm_printer;
struct xe_device;
+int xe_guc_ct_init_noalloc(struct xe_guc_ct *ct);
int xe_guc_ct_init(struct xe_guc_ct *ct);
+int xe_guc_ct_init_post_hwconfig(struct xe_guc_ct *ct);
int xe_guc_ct_enable(struct xe_guc_ct *ct);
+int xe_guc_ct_restart(struct xe_guc_ct *ct);
void xe_guc_ct_disable(struct xe_guc_ct *ct);
void xe_guc_ct_stop(struct xe_guc_ct *ct);
+void xe_guc_ct_flush_and_stop(struct xe_guc_ct *ct);
void xe_guc_ct_fast_path(struct xe_guc_ct *ct);
struct xe_guc_ct_snapshot *xe_guc_ct_snapshot_capture(struct xe_guc_ct *ct);
@@ -22,6 +26,11 @@ void xe_guc_ct_snapshot_print(struct xe_guc_ct_snapshot *snapshot, struct drm_pr
void xe_guc_ct_snapshot_free(struct xe_guc_ct_snapshot *snapshot);
void xe_guc_ct_print(struct xe_guc_ct *ct, struct drm_printer *p, bool want_ctb);
+static inline bool xe_guc_ct_initialized(struct xe_guc_ct *ct)
+{
+ return ct->state != XE_GUC_CT_STATE_NOT_INITIALIZED;
+}
+
static inline bool xe_guc_ct_enabled(struct xe_guc_ct *ct)
{
return ct->state == XE_GUC_CT_STATE_ENABLED;
@@ -65,4 +74,13 @@ xe_guc_ct_send_block_no_fail(struct xe_guc_ct *ct, const u32 *action, u32 len)
long xe_guc_ct_queue_proc_time_jiffies(struct xe_guc_ct *ct);
+/**
+ * xe_guc_ct_wake_waiters() - GuC CT wake up waiters
+ * @ct: GuC CT object
+ */
+static inline void xe_guc_ct_wake_waiters(struct xe_guc_ct *ct)
+{
+ wake_up_all(&ct->wq);
+}
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_ct_types.h b/drivers/gpu/drm/xe/xe_guc_ct_types.h
index 8e1b9d981d61..09d7ff1ef42a 100644
--- a/drivers/gpu/drm/xe/xe_guc_ct_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_ct_types.h
@@ -9,6 +9,7 @@
#include <linux/interrupt.h>
#include <linux/iosys-map.h>
#include <linux/spinlock_types.h>
+#include <linux/stackdepot.h>
#include <linux/wait.h>
#include <linux/xarray.h>
@@ -104,6 +105,18 @@ struct xe_dead_ct {
/** snapshot_log: copy of GuC log at point of error */
struct xe_guc_log_snapshot *snapshot_log;
};
+
+/** struct xe_fast_req_fence - Used to track FAST_REQ messages by fence to match error responses */
+struct xe_fast_req_fence {
+ /** @fence: sequence number sent in H2G and return in G2H error */
+ u16 fence;
+ /** @action: H2G action code */
+ u16 action;
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC)
+ /** @stack: call stack from when the H2G was sent */
+ depot_stack_handle_t stack;
+#endif
+};
#endif
/**
@@ -113,7 +126,7 @@ struct xe_dead_ct {
* for the H2G and G2H requests sent and received through the buffers.
*/
struct xe_guc_ct {
- /** @bo: XE BO for CT */
+ /** @bo: Xe BO for CT */
struct xe_bo *bo;
/** @lock: protects everything in CT layer */
struct mutex lock;
@@ -152,6 +165,8 @@ struct xe_guc_ct {
#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
/** @dead: information for debugging dead CTs */
struct xe_dead_ct dead;
+ /** @fast_req: history of FAST_REQ messages for matching with G2H error responses */
+ struct xe_fast_req_fence fast_req[SZ_32];
#endif
};
diff --git a/drivers/gpu/drm/xe/xe_guc_engine_activity.c b/drivers/gpu/drm/xe/xe_guc_engine_activity.c
index 0fb48f8f05d8..2b99c1ebdd58 100644
--- a/drivers/gpu/drm/xe/xe_guc_engine_activity.c
+++ b/drivers/gpu/drm/xe/xe_guc_engine_activity.c
@@ -94,16 +94,17 @@ static int allocate_engine_activity_buffers(struct xe_guc *guc,
struct xe_tile *tile = gt_to_tile(gt);
struct xe_bo *bo, *metadata_bo;
- metadata_bo = xe_bo_create_pin_map(gt_to_xe(gt), tile, NULL, PAGE_ALIGN(metadata_size),
- ttm_bo_type_kernel, XE_BO_FLAG_SYSTEM |
- XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE);
+ metadata_bo = xe_bo_create_pin_map_novm(gt_to_xe(gt), tile, PAGE_ALIGN(metadata_size),
+ ttm_bo_type_kernel, XE_BO_FLAG_SYSTEM |
+ XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE,
+ false);
if (IS_ERR(metadata_bo))
return PTR_ERR(metadata_bo);
- bo = xe_bo_create_pin_map(gt_to_xe(gt), tile, NULL, PAGE_ALIGN(size),
- ttm_bo_type_kernel, XE_BO_FLAG_VRAM_IF_DGFX(tile) |
- XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE);
+ bo = xe_bo_create_pin_map_novm(gt_to_xe(gt), tile, PAGE_ALIGN(size),
+ ttm_bo_type_kernel, XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_GGTT | XE_BO_FLAG_GGTT_INVALIDATE, false);
if (IS_ERR(bo)) {
xe_bo_unpin_map_no_vm(metadata_bo);
@@ -124,7 +125,7 @@ static void free_engine_activity_buffers(struct engine_activity_buffer *buffer)
static bool is_engine_activity_supported(struct xe_guc *guc)
{
struct xe_uc_fw_version *version = &guc->fw.versions.found[XE_UC_FW_VER_COMPATIBILITY];
- struct xe_uc_fw_version required = { 1, 14, 1 };
+ struct xe_uc_fw_version required = { .major = 1, .minor = 14, .patch = 1 };
struct xe_gt *gt = guc_to_gt(guc);
if (IS_SRIOV_VF(gt_to_xe(gt))) {
diff --git a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h
index 4c39f01e4f52..a3b034e4b205 100644
--- a/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_exec_queue_types.h
@@ -20,6 +20,8 @@ struct xe_exec_queue;
struct xe_guc_exec_queue {
/** @q: Backpointer to parent xe_exec_queue */
struct xe_exec_queue *q;
+ /** @rcu: For safe freeing of exported dma fences */
+ struct rcu_head rcu;
/** @sched: GPU scheduler for this xe_exec_queue */
struct xe_gpu_scheduler sched;
/** @entity: Scheduler entity for this xe_exec_queue */
@@ -33,8 +35,8 @@ struct xe_guc_exec_queue {
struct xe_sched_msg static_msgs[MAX_STATIC_MSG_TYPE];
/** @lr_tdr: long running TDR worker */
struct work_struct lr_tdr;
- /** @fini_async: do final fini async from this worker */
- struct work_struct fini_async;
+ /** @destroy_async: do final destroy async from this worker */
+ struct work_struct destroy_async;
/** @resume_time: time of last resume */
u64 resume_time;
/** @state: GuC specific state for this xe_exec_queue */
@@ -49,6 +51,21 @@ struct xe_guc_exec_queue {
wait_queue_head_t suspend_wait;
/** @suspend_pending: a suspend of the exec_queue is pending */
bool suspend_pending;
+ /**
+ * @needs_cleanup: Needs a cleanup message during VF post migration
+ * recovery.
+ */
+ bool needs_cleanup;
+ /**
+ * @needs_suspend: Needs a suspend message during VF post migration
+ * recovery.
+ */
+ bool needs_suspend;
+ /**
+ * @needs_resume: Needs a resume message during VF post migration
+ * recovery.
+ */
+ bool needs_resume;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_fwif.h b/drivers/gpu/drm/xe/xe_guc_fwif.h
index 6f57578b07cb..c90dd266e9cf 100644
--- a/drivers/gpu/drm/xe/xe_guc_fwif.h
+++ b/drivers/gpu/drm/xe/xe_guc_fwif.h
@@ -15,6 +15,7 @@
#define G2H_LEN_DW_SCHED_CONTEXT_MODE_SET 4
#define G2H_LEN_DW_DEREGISTER_CONTEXT 3
#define G2H_LEN_DW_TLB_INVALIDATE 3
+#define G2H_LEN_DW_G2G_NOTIFY_MIN 3
#define GUC_ID_MAX 65535
#define GUC_ID_UNKNOWN 0xffffffff
@@ -45,6 +46,11 @@
#define GUC_MAX_ENGINE_CLASSES 16
#define GUC_MAX_INSTANCES_PER_CLASS 32
+#define GUC_CONTEXT_NORMAL 0
+#define GUC_CONTEXT_COMPRESSION_SAVE 1
+#define GUC_CONTEXT_COMPRESSION_RESTORE 2
+#define GUC_CONTEXT_COUNT (GUC_CONTEXT_COMPRESSION_RESTORE + 1)
+
/* Helper for context registration H2G */
struct guc_ctxt_registration_info {
u32 flags;
@@ -60,6 +66,7 @@ struct guc_ctxt_registration_info {
u32 hwlrca_hi;
};
#define CONTEXT_REGISTRATION_FLAG_KMD BIT(0)
+#define CONTEXT_REGISTRATION_FLAG_TYPE GENMASK(2, 1)
/* 32-bit KLV structure as used by policy updates and others */
struct guc_klv_generic_dw_t {
@@ -84,13 +91,10 @@ struct guc_update_exec_queue_policy {
#define GUC_LOG_NOTIFY_ON_HALF_FULL BIT(1)
#define GUC_LOG_CAPTURE_ALLOC_UNITS BIT(2)
#define GUC_LOG_LOG_ALLOC_UNITS BIT(3)
-#define GUC_LOG_CRASH_SHIFT 4
-#define GUC_LOG_CRASH_MASK (0x3 << GUC_LOG_CRASH_SHIFT)
-#define GUC_LOG_DEBUG_SHIFT 6
-#define GUC_LOG_DEBUG_MASK (0xF << GUC_LOG_DEBUG_SHIFT)
-#define GUC_LOG_CAPTURE_SHIFT 10
-#define GUC_LOG_CAPTURE_MASK (0x3 << GUC_LOG_CAPTURE_SHIFT)
-#define GUC_LOG_BUF_ADDR_SHIFT 12
+#define GUC_LOG_CRASH REG_GENMASK(5, 4)
+#define GUC_LOG_DEBUG REG_GENMASK(9, 6)
+#define GUC_LOG_CAPTURE REG_GENMASK(11, 10)
+#define GUC_LOG_BUF_ADDR REG_GENMASK(31, 12)
#define GUC_CTL_WA 1
#define GUC_WA_GAM_CREDITS BIT(10)
@@ -103,28 +107,24 @@ struct guc_update_exec_queue_policy {
#define GUC_WA_RENDER_RST_RC6_EXIT BIT(19)
#define GUC_WA_RCS_REGS_IN_CCS_REGS_LIST BIT(21)
#define GUC_WA_ENABLE_TSC_CHECK_ON_RC6 BIT(22)
+#define GUC_WA_SAVE_RESTORE_MCFG_REG_AT_MC6 BIT(25)
#define GUC_CTL_FEATURE 2
#define GUC_CTL_ENABLE_SLPC BIT(2)
#define GUC_CTL_ENABLE_LITE_RESTORE BIT(4)
+#define GUC_CTL_ENABLE_PSMI_LOGGING BIT(7)
+#define GUC_CTL_MAIN_GAMCTRL_QUEUES BIT(9)
#define GUC_CTL_DISABLE_SCHEDULER BIT(14)
#define GUC_CTL_DEBUG 3
-#define GUC_LOG_VERBOSITY_SHIFT 0
-#define GUC_LOG_VERBOSITY_LOW (0 << GUC_LOG_VERBOSITY_SHIFT)
-#define GUC_LOG_VERBOSITY_MED (1 << GUC_LOG_VERBOSITY_SHIFT)
-#define GUC_LOG_VERBOSITY_HIGH (2 << GUC_LOG_VERBOSITY_SHIFT)
-#define GUC_LOG_VERBOSITY_ULTRA (3 << GUC_LOG_VERBOSITY_SHIFT)
-#define GUC_LOG_VERBOSITY_MIN 0
+#define GUC_LOG_VERBOSITY REG_GENMASK(1, 0)
#define GUC_LOG_VERBOSITY_MAX 3
-#define GUC_LOG_VERBOSITY_MASK 0x0000000f
-#define GUC_LOG_DESTINATION_MASK (3 << 4)
-#define GUC_LOG_DISABLED (1 << 6)
-#define GUC_PROFILE_ENABLED (1 << 7)
+#define GUC_LOG_DESTINATION REG_GENMASK(5, 4)
+#define GUC_LOG_DISABLED BIT(6)
+#define GUC_PROFILE_ENABLED BIT(7)
#define GUC_CTL_ADS 4
-#define GUC_ADS_ADDR_SHIFT 1
-#define GUC_ADS_ADDR_MASK (0xFFFFF << GUC_ADS_ADDR_SHIFT)
+#define GUC_ADS_ADDR REG_GENMASK(21, 1)
#define GUC_CTL_DEVID 5
diff --git a/drivers/gpu/drm/xe/xe_guc_log.c b/drivers/gpu/drm/xe/xe_guc_log.c
index 38039c411387..c01ccb35dc75 100644
--- a/drivers/gpu/drm/xe/xe_guc_log.c
+++ b/drivers/gpu/drm/xe/xe_guc_log.c
@@ -79,7 +79,7 @@ static struct xe_guc_log_snapshot *xe_guc_log_snapshot_alloc(struct xe_guc_log *
* Also, can't use vmalloc as might be called from atomic context. So need
* to break the buffer up into smaller chunks that can be allocated.
*/
- snapshot->size = log->bo->size;
+ snapshot->size = xe_bo_size(log->bo);
snapshot->num_chunks = DIV_ROUND_UP(snapshot->size, GUC_LOG_CHUNK_SIZE);
snapshot->copy = kcalloc(snapshot->num_chunks, sizeof(*snapshot->copy),
diff --git a/drivers/gpu/drm/xe/xe_guc_log.h b/drivers/gpu/drm/xe/xe_guc_log.h
index 5b896f5fafaf..98a47ac42b08 100644
--- a/drivers/gpu/drm/xe/xe_guc_log.h
+++ b/drivers/gpu/drm/xe/xe_guc_log.h
@@ -12,12 +12,12 @@
struct drm_printer;
struct xe_device;
-#if IS_ENABLED(CONFIG_DRM_XE_LARGE_GUC_BUFFER)
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_GUC)
#define CRASH_BUFFER_SIZE SZ_1M
#define DEBUG_BUFFER_SIZE SZ_8M
#define CAPTURE_BUFFER_SIZE SZ_2M
#else
-#define CRASH_BUFFER_SIZE SZ_8K
+#define CRASH_BUFFER_SIZE SZ_16K
#define DEBUG_BUFFER_SIZE SZ_64K
#define CAPTURE_BUFFER_SIZE SZ_1M
#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_log_types.h b/drivers/gpu/drm/xe/xe_guc_log_types.h
index b3d5c72ac752..02851b924aa4 100644
--- a/drivers/gpu/drm/xe/xe_guc_log_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_log_types.h
@@ -44,7 +44,7 @@ struct xe_guc_log_snapshot {
struct xe_guc_log {
/** @level: GuC log level */
u32 level;
- /** @bo: XE BO for GuC log */
+ /** @bo: Xe BO for GuC log */
struct xe_bo *bo;
/** @stats: logging related stats */
struct {
diff --git a/drivers/gpu/drm/xe/xe_guc_pagefault.c b/drivers/gpu/drm/xe/xe_guc_pagefault.c
new file mode 100644
index 000000000000..719a18187a31
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_pagefault.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include "abi/guc_actions_abi.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+#include "xe_guc_pagefault.h"
+#include "xe_pagefault.h"
+
+static void guc_ack_fault(struct xe_pagefault *pf, int err)
+{
+ u32 vfid = FIELD_GET(PFD_VFID, pf->producer.msg[2]);
+ u32 engine_instance = FIELD_GET(PFD_ENG_INSTANCE, pf->producer.msg[0]);
+ u32 engine_class = FIELD_GET(PFD_ENG_CLASS, pf->producer.msg[0]);
+ u32 pdata = FIELD_GET(PFD_PDATA_LO, pf->producer.msg[0]) |
+ (FIELD_GET(PFD_PDATA_HI, pf->producer.msg[1]) <<
+ PFD_PDATA_HI_SHIFT);
+ u32 action[] = {
+ XE_GUC_ACTION_PAGE_FAULT_RES_DESC,
+
+ FIELD_PREP(PFR_VALID, 1) |
+ FIELD_PREP(PFR_SUCCESS, !!err) |
+ FIELD_PREP(PFR_REPLY, PFR_ACCESS) |
+ FIELD_PREP(PFR_DESC_TYPE, FAULT_RESPONSE_DESC) |
+ FIELD_PREP(PFR_ASID, pf->consumer.asid),
+
+ FIELD_PREP(PFR_VFID, vfid) |
+ FIELD_PREP(PFR_ENG_INSTANCE, engine_instance) |
+ FIELD_PREP(PFR_ENG_CLASS, engine_class) |
+ FIELD_PREP(PFR_PDATA, pdata),
+ };
+ struct xe_guc *guc = pf->producer.private;
+
+ xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
+}
+
+static const struct xe_pagefault_ops guc_pagefault_ops = {
+ .ack_fault = guc_ack_fault,
+};
+
+/**
+ * xe_guc_pagefault_handler() - G2H page fault handler
+ * @guc: GuC object
+ * @msg: G2H message
+ * @len: Length of G2H message
+ *
+ * Parse GuC to host (G2H) message into a struct xe_pagefault and forward onto
+ * the Xe page fault layer.
+ *
+ * Return: 0 on success, errno on failure
+ */
+int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+ struct xe_pagefault pf;
+ int i;
+
+#define GUC_PF_MSG_LEN_DW \
+ (sizeof(struct xe_guc_pagefault_desc) / sizeof(u32))
+
+ BUILD_BUG_ON(GUC_PF_MSG_LEN_DW > XE_PAGEFAULT_PRODUCER_MSG_LEN_DW);
+
+ if (len != GUC_PF_MSG_LEN_DW)
+ return -EPROTO;
+
+ pf.gt = guc_to_gt(guc);
+
+ /*
+ * XXX: These values happen to match the enum in xe_pagefault_types.h.
+ * If that changes, we’ll need to remap them here.
+ */
+ pf.consumer.page_addr = ((u64)FIELD_GET(PFD_VIRTUAL_ADDR_HI, msg[3])
+ << PFD_VIRTUAL_ADDR_HI_SHIFT) |
+ (FIELD_GET(PFD_VIRTUAL_ADDR_LO, msg[2]) <<
+ PFD_VIRTUAL_ADDR_LO_SHIFT);
+ pf.consumer.asid = FIELD_GET(PFD_ASID, msg[1]);
+ pf.consumer.access_type = FIELD_GET(PFD_ACCESS_TYPE, msg[2]);
+ pf.consumer.fault_type = FIELD_GET(PFD_FAULT_TYPE, msg[2]);
+ if (FIELD_GET(XE2_PFD_TRVA_FAULT, msg[0]))
+ pf.consumer.fault_level = XE_PAGEFAULT_LEVEL_NACK;
+ else
+ pf.consumer.fault_level = FIELD_GET(PFD_FAULT_LEVEL, msg[0]);
+ pf.consumer.engine_class = FIELD_GET(PFD_ENG_CLASS, msg[0]);
+ pf.consumer.engine_instance = FIELD_GET(PFD_ENG_INSTANCE, msg[0]);
+
+ pf.producer.private = guc;
+ pf.producer.ops = &guc_pagefault_ops;
+ for (i = 0; i < GUC_PF_MSG_LEN_DW; ++i)
+ pf.producer.msg[i] = msg[i];
+
+#undef GUC_PF_MSG_LEN_DW
+
+ return xe_pagefault_handler(guc_to_xe(guc), &pf);
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_pagefault.h b/drivers/gpu/drm/xe/xe_guc_pagefault.h
new file mode 100644
index 000000000000..3bd599e7207c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_pagefault.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_GUC_PAGEFAULT_H_
+#define _XE_GUC_PAGEFAULT_H_
+
+#include <linux/types.h>
+
+struct xe_guc;
+
+int xe_guc_pagefault_handler(struct xe_guc *guc, u32 *msg, u32 len);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.c b/drivers/gpu/drm/xe/xe_guc_pc.c
index 18c623992035..951a49fb1d3e 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.c
+++ b/drivers/gpu/drm/xe/xe_guc_pc.c
@@ -5,11 +5,16 @@
#include "xe_guc_pc.h"
+#include <linux/cleanup.h>
#include <linux/delay.h>
+#include <linux/iopoll.h>
+#include <linux/jiffies.h>
#include <linux/ktime.h>
+#include <linux/wait_bit.h>
#include <drm/drm_managed.h>
#include <drm/drm_print.h>
+#include <generated/xe_device_wa_oob.h>
#include <generated/xe_wa_oob.h>
#include "abi/guc_actions_slpc_abi.h"
@@ -51,9 +56,12 @@
#define LNL_MERT_FREQ_CAP 800
#define BMG_MERT_FREQ_CAP 2133
+#define BMG_MIN_FREQ 1200
+#define BMG_MERT_FLUSH_FREQ_CAP 2600
#define SLPC_RESET_TIMEOUT_MS 5 /* roughly 5ms, but no need for precision */
#define SLPC_RESET_EXTENDED_TIMEOUT_MS 1000 /* To be used only at pc_start */
+#define SLPC_ACT_FREQ_TIMEOUT_MS 100
/**
* DOC: GuC Power Conservation (PC)
@@ -73,6 +81,11 @@
* Xe driver enables SLPC with all of its defaults features and frequency
* selection, which varies per platform.
*
+ * Power profiles add another level of control to SLPC. When power saving
+ * profile is chosen, SLPC will use conservative thresholds to ramp frequency,
+ * thus saving power. Base profile is default and ensures balanced performance
+ * for any workload.
+ *
* Render-C States:
* ================
*
@@ -119,26 +132,37 @@ static struct iosys_map *pc_to_maps(struct xe_guc_pc *pc)
FIELD_PREP(HOST2GUC_PC_SLPC_REQUEST_MSG_1_EVENT_ARGC, count))
static int wait_for_pc_state(struct xe_guc_pc *pc,
- enum slpc_global_state state,
+ enum slpc_global_state target_state,
int timeout_ms)
{
- int timeout_us = 1000 * timeout_ms;
- int slept, wait = 10;
+ enum slpc_global_state state;
xe_device_assert_mem_access(pc_to_xe(pc));
- for (slept = 0; slept < timeout_us;) {
- if (slpc_shared_data_read(pc, header.global_state) == state)
- return 0;
+ return poll_timeout_us(state = slpc_shared_data_read(pc, header.global_state),
+ state == target_state,
+ 20, timeout_ms * USEC_PER_MSEC, false);
+}
- usleep_range(wait, wait << 1);
- slept += wait;
- wait <<= 1;
- if (slept + wait > timeout_us)
- wait = timeout_us - slept;
- }
+static int wait_for_flush_complete(struct xe_guc_pc *pc)
+{
+ const unsigned long timeout = msecs_to_jiffies(30);
+
+ if (!wait_var_event_timeout(&pc->flush_freq_limit,
+ !atomic_read(&pc->flush_freq_limit),
+ timeout))
+ return -ETIMEDOUT;
+
+ return 0;
+}
+
+static int wait_for_act_freq_max_limit(struct xe_guc_pc *pc, u32 max_limit)
+{
+ u32 freq;
- return -ETIMEDOUT;
+ return poll_timeout_us(freq = xe_guc_pc_get_act_freq(pc),
+ freq <= max_limit,
+ 20, SLPC_ACT_FREQ_TIMEOUT_MS * USEC_PER_MSEC, false);
}
static int pc_action_reset(struct xe_guc_pc *pc)
@@ -153,7 +177,7 @@ static int pc_action_reset(struct xe_guc_pc *pc)
int ret;
ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
- if (ret)
+ if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED))
xe_gt_err(pc_to_gt(pc), "GuC PC reset failed: %pe\n",
ERR_PTR(ret));
@@ -177,7 +201,7 @@ static int pc_action_query_task_state(struct xe_guc_pc *pc)
/* Blocking here to ensure the results are ready before reading them */
ret = xe_guc_ct_send_block(ct, action, ARRAY_SIZE(action));
- if (ret)
+ if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED))
xe_gt_err(pc_to_gt(pc), "GuC PC query task state failed: %pe\n",
ERR_PTR(ret));
@@ -200,7 +224,7 @@ static int pc_action_set_param(struct xe_guc_pc *pc, u8 id, u32 value)
return -EAGAIN;
ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
- if (ret)
+ if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED))
xe_gt_err(pc_to_gt(pc), "GuC PC set param[%u]=%u failed: %pe\n",
id, value, ERR_PTR(ret));
@@ -222,7 +246,7 @@ static int pc_action_unset_param(struct xe_guc_pc *pc, u8 id)
return -EAGAIN;
ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
- if (ret)
+ if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED))
xe_gt_err(pc_to_gt(pc), "GuC PC unset param failed: %pe",
ERR_PTR(ret));
@@ -239,7 +263,7 @@ static int pc_action_setup_gucrc(struct xe_guc_pc *pc, u32 mode)
int ret;
ret = xe_guc_ct_send(ct, action, ARRAY_SIZE(action), 0, 0);
- if (ret)
+ if (ret && !(xe_device_wedged(pc_to_xe(pc)) && ret == -ECANCELED))
xe_gt_err(pc_to_gt(pc), "GuC RC enable mode=%u failed: %pe\n",
mode, ERR_PTR(ret));
return ret;
@@ -307,7 +331,7 @@ static int pc_set_min_freq(struct xe_guc_pc *pc, u32 freq)
* Our goal is to have the admin choices respected.
*/
pc_action_set_param(pc, SLPC_PARAM_IGNORE_EFFICIENT_FREQUENCY,
- freq < pc->rpe_freq);
+ freq < xe_guc_pc_get_rpe_freq(pc));
return pc_action_set_param(pc,
SLPC_PARAM_GLOBAL_MIN_GT_UNSLICE_FREQ_MHZ,
@@ -339,7 +363,7 @@ static int pc_set_max_freq(struct xe_guc_pc *pc, u32 freq)
freq);
}
-static void mtl_update_rpa_value(struct xe_guc_pc *pc)
+static u32 mtl_get_rpa_freq(struct xe_guc_pc *pc)
{
struct xe_gt *gt = pc_to_gt(pc);
u32 reg;
@@ -349,10 +373,10 @@ static void mtl_update_rpa_value(struct xe_guc_pc *pc)
else
reg = xe_mmio_read32(&gt->mmio, MTL_GT_RPA_FREQUENCY);
- pc->rpa_freq = decode_freq(REG_FIELD_GET(MTL_RPA_MASK, reg));
+ return decode_freq(REG_FIELD_GET(MTL_RPA_MASK, reg));
}
-static void mtl_update_rpe_value(struct xe_guc_pc *pc)
+static u32 mtl_get_rpe_freq(struct xe_guc_pc *pc)
{
struct xe_gt *gt = pc_to_gt(pc);
u32 reg;
@@ -362,68 +386,56 @@ static void mtl_update_rpe_value(struct xe_guc_pc *pc)
else
reg = xe_mmio_read32(&gt->mmio, MTL_GT_RPE_FREQUENCY);
- pc->rpe_freq = decode_freq(REG_FIELD_GET(MTL_RPE_MASK, reg));
+ return decode_freq(REG_FIELD_GET(MTL_RPE_MASK, reg));
}
-static void tgl_update_rpa_value(struct xe_guc_pc *pc)
+static u32 pvc_get_rpa_freq(struct xe_guc_pc *pc)
{
- struct xe_gt *gt = pc_to_gt(pc);
- struct xe_device *xe = gt_to_xe(gt);
- u32 reg;
-
/*
* For PVC we still need to use fused RP0 as the approximation for RPa
* For other platforms than PVC we get the resolved RPa directly from
* PCODE at a different register
*/
- if (xe->info.platform == XE_PVC) {
- reg = xe_mmio_read32(&gt->mmio, PVC_RP_STATE_CAP);
- pc->rpa_freq = REG_FIELD_GET(RP0_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
- } else {
- reg = xe_mmio_read32(&gt->mmio, FREQ_INFO_REC);
- pc->rpa_freq = REG_FIELD_GET(RPA_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
- }
+
+ struct xe_gt *gt = pc_to_gt(pc);
+ u32 reg;
+
+ reg = xe_mmio_read32(&gt->mmio, PVC_RP_STATE_CAP);
+ return REG_FIELD_GET(RP0_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
}
-static void tgl_update_rpe_value(struct xe_guc_pc *pc)
+static u32 tgl_get_rpa_freq(struct xe_guc_pc *pc)
+{
+ struct xe_gt *gt = pc_to_gt(pc);
+ u32 reg;
+
+ reg = xe_mmio_read32(&gt->mmio, FREQ_INFO_REC);
+ return REG_FIELD_GET(RPA_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
+}
+
+static u32 pvc_get_rpe_freq(struct xe_guc_pc *pc)
{
struct xe_gt *gt = pc_to_gt(pc);
- struct xe_device *xe = gt_to_xe(gt);
u32 reg;
/*
* For PVC we still need to use fused RP1 as the approximation for RPe
- * For other platforms than PVC we get the resolved RPe directly from
- * PCODE at a different register
*/
- if (xe->info.platform == XE_PVC) {
- reg = xe_mmio_read32(&gt->mmio, PVC_RP_STATE_CAP);
- pc->rpe_freq = REG_FIELD_GET(RP1_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
- } else {
- reg = xe_mmio_read32(&gt->mmio, FREQ_INFO_REC);
- pc->rpe_freq = REG_FIELD_GET(RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
- }
+ reg = xe_mmio_read32(&gt->mmio, PVC_RP_STATE_CAP);
+ return REG_FIELD_GET(RP1_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
}
-static void pc_update_rp_values(struct xe_guc_pc *pc)
+static u32 tgl_get_rpe_freq(struct xe_guc_pc *pc)
{
struct xe_gt *gt = pc_to_gt(pc);
- struct xe_device *xe = gt_to_xe(gt);
-
- if (GRAPHICS_VERx100(xe) >= 1270) {
- mtl_update_rpa_value(pc);
- mtl_update_rpe_value(pc);
- } else {
- tgl_update_rpa_value(pc);
- tgl_update_rpe_value(pc);
- }
+ u32 reg;
/*
- * RPe is decided at runtime by PCODE. In the rare case where that's
- * smaller than the fused min, we will trust the PCODE and use that
- * as our minimum one.
+ * For other platforms than PVC, we get the resolved RPe directly from
+ * PCODE at a different register
*/
- pc->rpn_freq = min(pc->rpn_freq, pc->rpe_freq);
+ reg = xe_mmio_read32(&gt->mmio, FREQ_INFO_REC);
+ return REG_FIELD_GET(RPE_MASK, reg) * GT_FREQUENCY_MULTIPLIER;
}
/**
@@ -524,9 +536,15 @@ u32 xe_guc_pc_get_rp0_freq(struct xe_guc_pc *pc)
*/
u32 xe_guc_pc_get_rpa_freq(struct xe_guc_pc *pc)
{
- pc_update_rp_values(pc);
+ struct xe_gt *gt = pc_to_gt(pc);
+ struct xe_device *xe = gt_to_xe(gt);
- return pc->rpa_freq;
+ if (GRAPHICS_VERx100(xe) == 1260)
+ return pvc_get_rpa_freq(pc);
+ else if (GRAPHICS_VERx100(xe) >= 1270)
+ return mtl_get_rpa_freq(pc);
+ else
+ return tgl_get_rpa_freq(pc);
}
/**
@@ -537,9 +555,17 @@ u32 xe_guc_pc_get_rpa_freq(struct xe_guc_pc *pc)
*/
u32 xe_guc_pc_get_rpe_freq(struct xe_guc_pc *pc)
{
- pc_update_rp_values(pc);
+ struct xe_device *xe = pc_to_xe(pc);
+ u32 freq;
+
+ if (GRAPHICS_VERx100(xe) == 1260)
+ freq = pvc_get_rpe_freq(pc);
+ else if (GRAPHICS_VERx100(xe) >= 1270)
+ freq = mtl_get_rpe_freq(pc);
+ else
+ freq = tgl_get_rpe_freq(pc);
- return pc->rpe_freq;
+ return freq;
}
/**
@@ -553,6 +579,25 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc)
return pc->rpn_freq;
}
+static int xe_guc_pc_get_min_freq_locked(struct xe_guc_pc *pc, u32 *freq)
+{
+ int ret;
+
+ lockdep_assert_held(&pc->freq_lock);
+
+ /* Might be in the middle of a gt reset */
+ if (!pc->freq_ready)
+ return -EAGAIN;
+
+ ret = pc_action_query_task_state(pc);
+ if (ret)
+ return ret;
+
+ *freq = pc_get_min_freq(pc);
+
+ return 0;
+}
+
/**
* xe_guc_pc_get_min_freq - Get the min operational frequency
* @pc: The GuC PC
@@ -563,26 +608,28 @@ u32 xe_guc_pc_get_rpn_freq(struct xe_guc_pc *pc)
*/
int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq)
{
+ guard(mutex)(&pc->freq_lock);
+
+ return xe_guc_pc_get_min_freq_locked(pc, freq);
+}
+
+static int xe_guc_pc_set_min_freq_locked(struct xe_guc_pc *pc, u32 freq)
+{
int ret;
- xe_device_assert_mem_access(pc_to_xe(pc));
+ lockdep_assert_held(&pc->freq_lock);
- mutex_lock(&pc->freq_lock);
- if (!pc->freq_ready) {
- /* Might be in the middle of a gt reset */
- ret = -EAGAIN;
- goto out;
- }
+ /* Might be in the middle of a gt reset */
+ if (!pc->freq_ready)
+ return -EAGAIN;
- ret = pc_action_query_task_state(pc);
+ ret = pc_set_min_freq(pc, freq);
if (ret)
- goto out;
+ return ret;
- *freq = pc_get_min_freq(pc);
+ pc->user_requested_min = freq;
-out:
- mutex_unlock(&pc->freq_lock);
- return ret;
+ return 0;
}
/**
@@ -596,24 +643,28 @@ out:
*/
int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq)
{
+ guard(mutex)(&pc->freq_lock);
+
+ return xe_guc_pc_set_min_freq_locked(pc, freq);
+}
+
+static int xe_guc_pc_get_max_freq_locked(struct xe_guc_pc *pc, u32 *freq)
+{
int ret;
- mutex_lock(&pc->freq_lock);
- if (!pc->freq_ready) {
- /* Might be in the middle of a gt reset */
- ret = -EAGAIN;
- goto out;
- }
+ lockdep_assert_held(&pc->freq_lock);
- ret = pc_set_min_freq(pc, freq);
+ /* Might be in the middle of a gt reset */
+ if (!pc->freq_ready)
+ return -EAGAIN;
+
+ ret = pc_action_query_task_state(pc);
if (ret)
- goto out;
+ return ret;
- pc->user_requested_min = freq;
+ *freq = pc_get_max_freq(pc);
-out:
- mutex_unlock(&pc->freq_lock);
- return ret;
+ return 0;
}
/**
@@ -626,24 +677,28 @@ out:
*/
int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq)
{
+ guard(mutex)(&pc->freq_lock);
+
+ return xe_guc_pc_get_max_freq_locked(pc, freq);
+}
+
+static int xe_guc_pc_set_max_freq_locked(struct xe_guc_pc *pc, u32 freq)
+{
int ret;
- mutex_lock(&pc->freq_lock);
- if (!pc->freq_ready) {
- /* Might be in the middle of a gt reset */
- ret = -EAGAIN;
- goto out;
- }
+ lockdep_assert_held(&pc->freq_lock);
- ret = pc_action_query_task_state(pc);
+ /* Might be in the middle of a gt reset */
+ if (!pc->freq_ready)
+ return -EAGAIN;
+
+ ret = pc_set_max_freq(pc, freq);
if (ret)
- goto out;
+ return ret;
- *freq = pc_get_max_freq(pc);
+ pc->user_requested_max = freq;
-out:
- mutex_unlock(&pc->freq_lock);
- return ret;
+ return 0;
}
/**
@@ -657,24 +712,14 @@ out:
*/
int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq)
{
- int ret;
-
- mutex_lock(&pc->freq_lock);
- if (!pc->freq_ready) {
- /* Might be in the middle of a gt reset */
- ret = -EAGAIN;
- goto out;
+ if (XE_GT_WA(pc_to_gt(pc), 22019338487)) {
+ if (wait_for_flush_complete(pc) != 0)
+ return -EAGAIN;
}
- ret = pc_set_max_freq(pc, freq);
- if (ret)
- goto out;
+ guard(mutex)(&pc->freq_lock);
- pc->user_requested_max = freq;
-
-out:
- mutex_unlock(&pc->freq_lock);
- return ret;
+ return xe_guc_pc_set_max_freq_locked(pc, freq);
}
/**
@@ -780,7 +825,7 @@ static u32 pc_max_freq_cap(struct xe_guc_pc *pc)
{
struct xe_gt *gt = pc_to_gt(pc);
- if (XE_WA(gt, 22019338487)) {
+ if (XE_GT_WA(gt, 22019338487)) {
if (xe_gt_is_media_type(gt))
return min(LNL_MERT_FREQ_CAP, pc->rp0_freq);
else
@@ -817,6 +862,7 @@ void xe_guc_pc_init_early(struct xe_guc_pc *pc)
static int pc_adjust_freq_bounds(struct xe_guc_pc *pc)
{
+ struct xe_tile *tile = gt_to_tile(pc_to_gt(pc));
int ret;
lockdep_assert_held(&pc->freq_lock);
@@ -843,6 +889,9 @@ static int pc_adjust_freq_bounds(struct xe_guc_pc *pc)
if (pc_get_min_freq(pc) > pc->rp0_freq)
ret = pc_set_min_freq(pc, pc->rp0_freq);
+ if (XE_DEVICE_WA(tile_to_xe(tile), 14022085890))
+ ret = pc_set_min_freq(pc, max(BMG_MIN_FREQ, pc_get_min_freq(pc)));
+
out:
return ret;
}
@@ -868,30 +917,117 @@ static int pc_adjust_requested_freq(struct xe_guc_pc *pc)
return ret;
}
-static int pc_set_mert_freq_cap(struct xe_guc_pc *pc)
+static bool needs_flush_freq_limit(struct xe_guc_pc *pc)
{
- int ret = 0;
+ struct xe_gt *gt = pc_to_gt(pc);
- if (XE_WA(pc_to_gt(pc), 22019338487)) {
- /*
- * Get updated min/max and stash them.
- */
- ret = xe_guc_pc_get_min_freq(pc, &pc->stashed_min_freq);
- if (!ret)
- ret = xe_guc_pc_get_max_freq(pc, &pc->stashed_max_freq);
- if (ret)
- return ret;
+ return XE_GT_WA(gt, 22019338487) &&
+ pc->rp0_freq > BMG_MERT_FLUSH_FREQ_CAP;
+}
+
+/**
+ * xe_guc_pc_apply_flush_freq_limit() - Limit max GT freq during L2 flush
+ * @pc: the xe_guc_pc object
+ *
+ * As per the WA, reduce max GT frequency during L2 cache flush
+ */
+void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc)
+{
+ struct xe_gt *gt = pc_to_gt(pc);
+ u32 max_freq;
+ int ret;
+
+ if (!needs_flush_freq_limit(pc))
+ return;
+
+ guard(mutex)(&pc->freq_lock);
+
+ ret = xe_guc_pc_get_max_freq_locked(pc, &max_freq);
+ if (!ret && max_freq > BMG_MERT_FLUSH_FREQ_CAP) {
+ ret = pc_set_max_freq(pc, BMG_MERT_FLUSH_FREQ_CAP);
+ if (ret) {
+ xe_gt_err_once(gt, "Failed to cap max freq on flush to %u, %pe\n",
+ BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret));
+ return;
+ }
+
+ atomic_set(&pc->flush_freq_limit, 1);
/*
- * Ensure min and max are bound by MERT_FREQ_CAP until driver loads.
+ * If user has previously changed max freq, stash that value to
+ * restore later, otherwise use the current max. New user
+ * requests wait on flush.
*/
- mutex_lock(&pc->freq_lock);
- ret = pc_set_min_freq(pc, min(pc->rpe_freq, pc_max_freq_cap(pc)));
- if (!ret)
- ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc)));
- mutex_unlock(&pc->freq_lock);
+ if (pc->user_requested_max != 0)
+ pc->stashed_max_freq = pc->user_requested_max;
+ else
+ pc->stashed_max_freq = max_freq;
}
+ /*
+ * Wait for actual freq to go below the flush cap: even if the previous
+ * max was below cap, the current one might still be above it
+ */
+ ret = wait_for_act_freq_max_limit(pc, BMG_MERT_FLUSH_FREQ_CAP);
+ if (ret)
+ xe_gt_err_once(gt, "Actual freq did not reduce to %u, %pe\n",
+ BMG_MERT_FLUSH_FREQ_CAP, ERR_PTR(ret));
+}
+
+/**
+ * xe_guc_pc_remove_flush_freq_limit() - Remove max GT freq limit after L2 flush completes.
+ * @pc: the xe_guc_pc object
+ *
+ * Retrieve the previous GT max frequency value.
+ */
+void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc)
+{
+ struct xe_gt *gt = pc_to_gt(pc);
+ int ret = 0;
+
+ if (!needs_flush_freq_limit(pc))
+ return;
+
+ if (!atomic_read(&pc->flush_freq_limit))
+ return;
+
+ mutex_lock(&pc->freq_lock);
+
+ ret = pc_set_max_freq(&gt->uc.guc.pc, pc->stashed_max_freq);
+ if (ret)
+ xe_gt_err_once(gt, "Failed to restore max freq %u:%d",
+ pc->stashed_max_freq, ret);
+
+ atomic_set(&pc->flush_freq_limit, 0);
+ mutex_unlock(&pc->freq_lock);
+ wake_up_var(&pc->flush_freq_limit);
+}
+
+static int pc_set_mert_freq_cap(struct xe_guc_pc *pc)
+{
+ int ret;
+
+ if (!XE_GT_WA(pc_to_gt(pc), 22019338487))
+ return 0;
+
+ guard(mutex)(&pc->freq_lock);
+
+ /*
+ * Get updated min/max and stash them.
+ */
+ ret = xe_guc_pc_get_min_freq_locked(pc, &pc->stashed_min_freq);
+ if (!ret)
+ ret = xe_guc_pc_get_max_freq_locked(pc, &pc->stashed_max_freq);
+ if (ret)
+ return ret;
+
+ /*
+ * Ensure min and max are bound by MERT_FREQ_CAP until driver loads.
+ */
+ ret = pc_set_min_freq(pc, min(xe_guc_pc_get_rpe_freq(pc), pc_max_freq_cap(pc)));
+ if (!ret)
+ ret = pc_set_max_freq(pc, min(pc->rp0_freq, pc_max_freq_cap(pc)));
+
return ret;
}
@@ -930,7 +1066,6 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc)
{
struct xe_device *xe = pc_to_xe(pc);
struct xe_gt *gt = pc_to_gt(pc);
- unsigned int fw_ref;
int ret = 0;
if (xe->info.skip_guc_pc)
@@ -940,17 +1075,7 @@ int xe_guc_pc_gucrc_disable(struct xe_guc_pc *pc)
if (ret)
return ret;
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
- return -ETIMEDOUT;
- }
-
- xe_gt_idle_disable_c6(gt);
-
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
-
- return 0;
+ return xe_gt_idle_disable_c6(gt);
}
/**
@@ -1010,8 +1135,6 @@ static int pc_init_freqs(struct xe_guc_pc *pc)
if (ret)
goto out;
- pc_update_rp_values(pc);
-
pc_init_pcode_freq(pc);
/*
@@ -1036,6 +1159,61 @@ static int pc_action_set_strategy(struct xe_guc_pc *pc, u32 val)
return ret;
}
+static const char *power_profile_to_string(struct xe_guc_pc *pc)
+{
+ switch (pc->power_profile) {
+ case SLPC_POWER_PROFILE_BASE:
+ return "base";
+ case SLPC_POWER_PROFILE_POWER_SAVING:
+ return "power_saving";
+ default:
+ return "invalid";
+ }
+}
+
+void xe_guc_pc_get_power_profile(struct xe_guc_pc *pc, char *profile)
+{
+ switch (pc->power_profile) {
+ case SLPC_POWER_PROFILE_BASE:
+ sprintf(profile, "[%s] %s\n", "base", "power_saving");
+ break;
+ case SLPC_POWER_PROFILE_POWER_SAVING:
+ sprintf(profile, "%s [%s]\n", "base", "power_saving");
+ break;
+ default:
+ sprintf(profile, "invalid");
+ }
+}
+
+int xe_guc_pc_set_power_profile(struct xe_guc_pc *pc, const char *buf)
+{
+ int ret = 0;
+ u32 val;
+
+ if (strncmp("base", buf, strlen("base")) == 0)
+ val = SLPC_POWER_PROFILE_BASE;
+ else if (strncmp("power_saving", buf, strlen("power_saving")) == 0)
+ val = SLPC_POWER_PROFILE_POWER_SAVING;
+ else
+ return -EINVAL;
+
+ guard(mutex)(&pc->freq_lock);
+ xe_pm_runtime_get_noresume(pc_to_xe(pc));
+
+ ret = pc_action_set_param(pc,
+ SLPC_PARAM_POWER_PROFILE,
+ val);
+ if (ret)
+ xe_gt_err_once(pc_to_gt(pc), "Failed to set power profile to %d: %pe\n",
+ val, ERR_PTR(ret));
+ else
+ pc->power_profile = val;
+
+ xe_pm_runtime_put(pc_to_xe(pc));
+
+ return ret;
+}
+
/**
* xe_guc_pc_start - Start GuC's Power Conservation component
* @pc: Xe_GuC_PC instance
@@ -1068,7 +1246,7 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
goto out;
}
- memset(pc->bo->vmap.vaddr, 0, size);
+ xe_map_memset(xe, &pc->bo->vmap, 0, 0, size);
slpc_shared_data_write(pc, header.size, size);
earlier = ktime_get();
@@ -1114,6 +1292,11 @@ int xe_guc_pc_start(struct xe_guc_pc *pc)
/* Enable SLPC Optimized Strategy for compute */
ret = pc_action_set_strategy(pc, SLPC_OPTIMIZED_STRATEGY_COMPUTE);
+ /* Set cached value of power_profile */
+ ret = xe_guc_pc_set_power_profile(pc, power_profile_to_string(pc));
+ if (unlikely(ret))
+ xe_gt_err(gt, "Failed to set SLPC power profile: %pe\n", ERR_PTR(ret));
+
out:
xe_force_wake_put(gt_to_fw(gt), fw_ref);
return ret;
@@ -1157,7 +1340,7 @@ static void xe_guc_pc_fini_hw(void *arg)
XE_WARN_ON(xe_guc_pc_stop(pc));
/* Bind requested freq to mert_freq_cap before unload */
- pc_set_cur_freq(pc, min(pc_max_freq_cap(pc), pc->rpe_freq));
+ pc_set_cur_freq(pc, min(pc_max_freq_cap(pc), xe_guc_pc_get_rpe_freq(pc)));
xe_force_wake_put(gt_to_fw(pc_to_gt(pc)), fw_ref);
}
@@ -1192,6 +1375,8 @@ int xe_guc_pc_init(struct xe_guc_pc *pc)
pc->bo = bo;
+ pc->power_profile = SLPC_POWER_PROFILE_BASE;
+
return devm_add_action_or_reset(xe->drm.dev, xe_guc_pc_fini_hw, pc);
}
diff --git a/drivers/gpu/drm/xe/xe_guc_pc.h b/drivers/gpu/drm/xe/xe_guc_pc.h
index 0a2664d5c811..0e31396f103c 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc.h
+++ b/drivers/gpu/drm/xe/xe_guc_pc.h
@@ -31,6 +31,8 @@ int xe_guc_pc_get_min_freq(struct xe_guc_pc *pc, u32 *freq);
int xe_guc_pc_set_min_freq(struct xe_guc_pc *pc, u32 freq);
int xe_guc_pc_get_max_freq(struct xe_guc_pc *pc, u32 *freq);
int xe_guc_pc_set_max_freq(struct xe_guc_pc *pc, u32 freq);
+int xe_guc_pc_set_power_profile(struct xe_guc_pc *pc, const char *buf);
+void xe_guc_pc_get_power_profile(struct xe_guc_pc *pc, char *profile);
enum xe_gt_idle_state xe_guc_pc_c_status(struct xe_guc_pc *pc);
u64 xe_guc_pc_rc6_residency(struct xe_guc_pc *pc);
@@ -38,5 +40,7 @@ u64 xe_guc_pc_mc6_residency(struct xe_guc_pc *pc);
void xe_guc_pc_init_early(struct xe_guc_pc *pc);
int xe_guc_pc_restore_stashed_freq(struct xe_guc_pc *pc);
void xe_guc_pc_raise_unslice(struct xe_guc_pc *pc);
+void xe_guc_pc_apply_flush_freq_limit(struct xe_guc_pc *pc);
+void xe_guc_pc_remove_flush_freq_limit(struct xe_guc_pc *pc);
#endif /* _XE_GUC_PC_H_ */
diff --git a/drivers/gpu/drm/xe/xe_guc_pc_types.h b/drivers/gpu/drm/xe/xe_guc_pc_types.h
index 2978ac9a249b..711bbcdcb0d3 100644
--- a/drivers/gpu/drm/xe/xe_guc_pc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_pc_types.h
@@ -15,12 +15,10 @@
struct xe_guc_pc {
/** @bo: GGTT buffer object that is shared with GuC PC */
struct xe_bo *bo;
+ /** @flush_freq_limit: 1 when max freq changes are limited by driver */
+ atomic_t flush_freq_limit;
/** @rp0_freq: HW RP0 frequency - The Maximum one */
u32 rp0_freq;
- /** @rpa_freq: HW RPa frequency - The Achievable one */
- u32 rpa_freq;
- /** @rpe_freq: HW RPe frequency - The Efficient one */
- u32 rpe_freq;
/** @rpn_freq: HW RPN frequency - The Minimum one */
u32 rpn_freq;
/** @user_requested_min: Stash the minimum requested freq by user */
@@ -35,6 +33,8 @@ struct xe_guc_pc {
struct mutex freq_lock;
/** @freq_ready: Only handle freq changes, if they are really ready */
bool freq_ready;
+ /** @power_profile: Base or power_saving profile */
+ u32 power_profile;
};
#endif /* _XE_GUC_PC_TYPES_H_ */
diff --git a/drivers/gpu/drm/xe/xe_guc_relay.c b/drivers/gpu/drm/xe/xe_guc_relay.c
index e5dc94f3e618..0c0ff24ba62a 100644
--- a/drivers/gpu/drm/xe/xe_guc_relay.c
+++ b/drivers/gpu/drm/xe/xe_guc_relay.c
@@ -56,9 +56,19 @@ static struct xe_device *relay_to_xe(struct xe_guc_relay *relay)
return gt_to_xe(relay_to_gt(relay));
}
+#define XE_RELAY_DIAG_RATELIMIT_INTERVAL (10 * HZ)
+#define XE_RELAY_DIAG_RATELIMIT_BURST 10
+
+#define relay_ratelimit_printk(relay, _level, fmt...) ({ \
+ typeof(relay) _r = (relay); \
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG_SRIOV) || \
+ ___ratelimit(&_r->diag_ratelimit, "xe_guc_relay")) \
+ xe_gt_sriov_##_level(relay_to_gt(_r), "relay: " fmt); \
+})
+
#define relay_assert(relay, condition) xe_gt_assert(relay_to_gt(relay), condition)
-#define relay_notice(relay, msg...) xe_gt_sriov_notice(relay_to_gt(relay), "relay: " msg)
-#define relay_debug(relay, msg...) xe_gt_sriov_dbg_verbose(relay_to_gt(relay), "relay: " msg)
+#define relay_notice(relay, msg...) relay_ratelimit_printk((relay), notice, msg)
+#define relay_debug(relay, msg...) relay_ratelimit_printk((relay), dbg_verbose, msg)
static int relay_get_totalvfs(struct xe_guc_relay *relay)
{
@@ -345,6 +355,9 @@ int xe_guc_relay_init(struct xe_guc_relay *relay)
INIT_WORK(&relay->worker, relays_worker_fn);
INIT_LIST_HEAD(&relay->pending_relays);
INIT_LIST_HEAD(&relay->incoming_actions);
+ ratelimit_state_init(&relay->diag_ratelimit,
+ XE_RELAY_DIAG_RATELIMIT_INTERVAL,
+ XE_RELAY_DIAG_RATELIMIT_BURST);
err = mempool_init_kmalloc_pool(&relay->pool, XE_RELAY_MEMPOOL_MIN_NUM +
relay_get_totalvfs(relay),
diff --git a/drivers/gpu/drm/xe/xe_guc_relay_types.h b/drivers/gpu/drm/xe/xe_guc_relay_types.h
index 5999fcb77e96..20eee10856b2 100644
--- a/drivers/gpu/drm/xe/xe_guc_relay_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_relay_types.h
@@ -7,6 +7,7 @@
#define _XE_GUC_RELAY_TYPES_H_
#include <linux/mempool.h>
+#include <linux/ratelimit_types.h>
#include <linux/spinlock.h>
#include <linux/workqueue.h>
@@ -31,6 +32,9 @@ struct xe_guc_relay {
/** @last_rid: last Relay-ID used while sending a message. */
u32 last_rid;
+
+ /** @diag_ratelimit: ratelimit state used to throttle diagnostics messages. */
+ struct ratelimit_state diag_ratelimit;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 2ad38f6b103e..f6ba2b0f074d 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -32,6 +32,7 @@
#include "xe_guc_ct.h"
#include "xe_guc_exec_queue_types.h"
#include "xe_guc_id_mgr.h"
+#include "xe_guc_klv_helpers.h"
#include "xe_guc_submit_types.h"
#include "xe_hw_engine.h"
#include "xe_hw_fence.h"
@@ -43,6 +44,7 @@
#include "xe_ring_ops_types.h"
#include "xe_sched_job.h"
#include "xe_trace.h"
+#include "xe_uc_fw.h"
#include "xe_vm.h"
static struct xe_guc *
@@ -68,6 +70,8 @@ exec_queue_to_guc(struct xe_exec_queue *q)
#define EXEC_QUEUE_STATE_BANNED (1 << 9)
#define EXEC_QUEUE_STATE_CHECK_TIMEOUT (1 << 10)
#define EXEC_QUEUE_STATE_EXTRA_REF (1 << 11)
+#define EXEC_QUEUE_STATE_PENDING_RESUME (1 << 12)
+#define EXEC_QUEUE_STATE_PENDING_TDR_EXIT (1 << 13)
static bool exec_queue_registered(struct xe_exec_queue *q)
{
@@ -139,6 +143,11 @@ static void set_exec_queue_destroyed(struct xe_exec_queue *q)
atomic_or(EXEC_QUEUE_STATE_DESTROYED, &q->guc->state);
}
+static void clear_exec_queue_destroyed(struct xe_exec_queue *q)
+{
+ atomic_and(~EXEC_QUEUE_STATE_DESTROYED, &q->guc->state);
+}
+
static bool exec_queue_banned(struct xe_exec_queue *q)
{
return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_BANNED;
@@ -219,6 +228,41 @@ static void set_exec_queue_extra_ref(struct xe_exec_queue *q)
atomic_or(EXEC_QUEUE_STATE_EXTRA_REF, &q->guc->state);
}
+static void clear_exec_queue_extra_ref(struct xe_exec_queue *q)
+{
+ atomic_and(~EXEC_QUEUE_STATE_EXTRA_REF, &q->guc->state);
+}
+
+static bool exec_queue_pending_resume(struct xe_exec_queue *q)
+{
+ return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_RESUME;
+}
+
+static void set_exec_queue_pending_resume(struct xe_exec_queue *q)
+{
+ atomic_or(EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state);
+}
+
+static void clear_exec_queue_pending_resume(struct xe_exec_queue *q)
+{
+ atomic_and(~EXEC_QUEUE_STATE_PENDING_RESUME, &q->guc->state);
+}
+
+static bool exec_queue_pending_tdr_exit(struct xe_exec_queue *q)
+{
+ return atomic_read(&q->guc->state) & EXEC_QUEUE_STATE_PENDING_TDR_EXIT;
+}
+
+static void set_exec_queue_pending_tdr_exit(struct xe_exec_queue *q)
+{
+ atomic_or(EXEC_QUEUE_STATE_PENDING_TDR_EXIT, &q->guc->state);
+}
+
+static void clear_exec_queue_pending_tdr_exit(struct xe_exec_queue *q)
+{
+ atomic_and(~EXEC_QUEUE_STATE_PENDING_TDR_EXIT, &q->guc->state);
+}
+
static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q)
{
return (atomic_read(&q->guc->state) &
@@ -229,6 +273,17 @@ static bool exec_queue_killed_or_banned_or_wedged(struct xe_exec_queue *q)
static void guc_submit_fini(struct drm_device *drm, void *arg)
{
struct xe_guc *guc = arg;
+ struct xe_device *xe = guc_to_xe(guc);
+ struct xe_gt *gt = guc_to_gt(guc);
+ int ret;
+
+ ret = wait_event_timeout(guc->submission_state.fini_wq,
+ xa_empty(&guc->submission_state.exec_queue_lookup),
+ HZ * 5);
+
+ drain_workqueue(xe->destroy_wq);
+
+ xe_gt_assert(gt, ret);
xa_destroy(&guc->submission_state.exec_queue_lookup);
}
@@ -305,6 +360,71 @@ int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids)
return drmm_add_action_or_reset(&xe->drm, guc_submit_fini, guc);
}
+/*
+ * Given that we want to guarantee enough RCS throughput to avoid missing
+ * frames, we set the yield policy to 20% of each 80ms interval.
+ */
+#define RC_YIELD_DURATION 80 /* in ms */
+#define RC_YIELD_RATIO 20 /* in percent */
+static u32 *emit_render_compute_yield_klv(u32 *emit)
+{
+ *emit++ = PREP_GUC_KLV_TAG(SCHEDULING_POLICIES_RENDER_COMPUTE_YIELD);
+ *emit++ = RC_YIELD_DURATION;
+ *emit++ = RC_YIELD_RATIO;
+
+ return emit;
+}
+
+#define SCHEDULING_POLICY_MAX_DWORDS 16
+static int guc_init_global_schedule_policy(struct xe_guc *guc)
+{
+ u32 data[SCHEDULING_POLICY_MAX_DWORDS];
+ u32 *emit = data;
+ u32 count = 0;
+ int ret;
+
+ if (GUC_SUBMIT_VER(guc) < MAKE_GUC_VER(1, 1, 0))
+ return 0;
+
+ *emit++ = XE_GUC_ACTION_UPDATE_SCHEDULING_POLICIES_KLV;
+
+ if (CCS_MASK(guc_to_gt(guc)))
+ emit = emit_render_compute_yield_klv(emit);
+
+ count = emit - data;
+ if (count > 1) {
+ xe_assert(guc_to_xe(guc), count <= SCHEDULING_POLICY_MAX_DWORDS);
+
+ ret = xe_guc_ct_send_block(&guc->ct, data, count);
+ if (ret < 0) {
+ xe_gt_err(guc_to_gt(guc),
+ "failed to enable GuC scheduling policies: %pe\n",
+ ERR_PTR(ret));
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+int xe_guc_submit_enable(struct xe_guc *guc)
+{
+ int ret;
+
+ ret = guc_init_global_schedule_policy(guc);
+ if (ret)
+ return ret;
+
+ guc->submission_state.enabled = true;
+
+ return 0;
+}
+
+void xe_guc_submit_disable(struct xe_guc *guc)
+{
+ guc->submission_state.enabled = false;
+}
+
static void __release_guc_id(struct xe_guc *guc, struct xe_exec_queue *q, u32 xa_count)
{
int i;
@@ -487,6 +607,15 @@ static void __register_mlrc_exec_queue(struct xe_guc *guc,
action[len++] = upper_32_bits(xe_lrc_descriptor(lrc));
}
+ /* explicitly checks some fields that we might fixup later */
+ xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo ==
+ action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_5_WQ_DESC_ADDR_LOWER]);
+ xe_gt_assert(guc_to_gt(guc), info->wq_base_lo ==
+ action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_7_WQ_BUF_BASE_LOWER]);
+ xe_gt_assert(guc_to_gt(guc), q->width ==
+ action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_10_NUM_CTXS]);
+ xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo ==
+ action[XE_GUC_REGISTER_CONTEXT_MULTI_LRC_DATA_11_HW_LRC_ADDR]);
xe_gt_assert(guc_to_gt(guc), len <= MAX_MLRC_REG_SIZE);
#undef MAX_MLRC_REG_SIZE
@@ -511,10 +640,18 @@ static void __register_exec_queue(struct xe_guc *guc,
info->hwlrca_hi,
};
+ /* explicitly checks some fields that we might fixup later */
+ xe_gt_assert(guc_to_gt(guc), info->wq_desc_lo ==
+ action[XE_GUC_REGISTER_CONTEXT_DATA_5_WQ_DESC_ADDR_LOWER]);
+ xe_gt_assert(guc_to_gt(guc), info->wq_base_lo ==
+ action[XE_GUC_REGISTER_CONTEXT_DATA_7_WQ_BUF_BASE_LOWER]);
+ xe_gt_assert(guc_to_gt(guc), info->hwlrca_lo ==
+ action[XE_GUC_REGISTER_CONTEXT_DATA_10_HW_LRC_ADDR]);
+
xe_guc_ct_send(&guc->ct, action, ARRAY_SIZE(action), 0, 0);
}
-static void register_exec_queue(struct xe_exec_queue *q)
+static void register_exec_queue(struct xe_exec_queue *q, int ctx_type)
{
struct xe_guc *guc = exec_queue_to_guc(q);
struct xe_device *xe = guc_to_xe(guc);
@@ -522,6 +659,7 @@ static void register_exec_queue(struct xe_exec_queue *q)
struct guc_ctxt_registration_info info;
xe_gt_assert(guc_to_gt(guc), !exec_queue_registered(q));
+ xe_gt_assert(guc_to_gt(guc), ctx_type < GUC_CONTEXT_COUNT);
memset(&info, 0, sizeof(info));
info.context_idx = q->guc->id;
@@ -529,7 +667,8 @@ static void register_exec_queue(struct xe_exec_queue *q)
info.engine_submit_mask = q->logical_mask;
info.hwlrca_lo = lower_32_bits(xe_lrc_descriptor(lrc));
info.hwlrca_hi = upper_32_bits(xe_lrc_descriptor(lrc));
- info.flags = CONTEXT_REGISTRATION_FLAG_KMD;
+ info.flags = CONTEXT_REGISTRATION_FLAG_KMD |
+ FIELD_PREP(CONTEXT_REGISTRATION_FLAG_TYPE, ctx_type);
if (xe_exec_queue_is_parallel(q)) {
u64 ggtt_addr = xe_lrc_parallel_ggtt_addr(lrc);
@@ -573,26 +712,51 @@ static u32 wq_space_until_wrap(struct xe_exec_queue *q)
return (WQ_SIZE - q->guc->wqi_tail);
}
+static bool vf_recovery(struct xe_guc *guc)
+{
+ return xe_gt_recovery_pending(guc_to_gt(guc));
+}
+
+static inline void relaxed_ms_sleep(unsigned int delay_ms)
+{
+ unsigned long min_us, max_us;
+
+ if (!delay_ms)
+ return;
+
+ if (delay_ms > 20) {
+ msleep(delay_ms);
+ return;
+ }
+
+ min_us = mul_u32_u32(delay_ms, 1000);
+ max_us = min_us + 500;
+
+ usleep_range(min_us, max_us);
+}
+
static int wq_wait_for_space(struct xe_exec_queue *q, u32 wqi_size)
{
struct xe_guc *guc = exec_queue_to_guc(q);
struct xe_device *xe = guc_to_xe(guc);
struct iosys_map map = xe_lrc_parallel_map(q->lrc[0]);
- unsigned int sleep_period_ms = 1;
+ unsigned int sleep_period_ms = 1, sleep_total_ms = 0;
#define AVAILABLE_SPACE \
CIRC_SPACE(q->guc->wqi_tail, q->guc->wqi_head, WQ_SIZE)
- if (wqi_size > AVAILABLE_SPACE) {
+ if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) {
try_again:
q->guc->wqi_head = parallel_read(xe, map, wq_desc.head);
- if (wqi_size > AVAILABLE_SPACE) {
- if (sleep_period_ms == 1024) {
+ if (wqi_size > AVAILABLE_SPACE && !vf_recovery(guc)) {
+ if (sleep_total_ms > 2000) {
xe_gt_reset_async(q->gt);
return -ENODEV;
}
msleep(sleep_period_ms);
- sleep_period_ms <<= 1;
+ sleep_total_ms += sleep_period_ms;
+ if (sleep_period_ms < 64)
+ sleep_period_ms <<= 1;
goto try_again;
}
}
@@ -666,7 +830,7 @@ static void wq_item_append(struct xe_exec_queue *q)
}
#define RESUME_PENDING ~0x0ull
-static void submit_exec_queue(struct xe_exec_queue *q)
+static void submit_exec_queue(struct xe_exec_queue *q, struct xe_sched_job *job)
{
struct xe_guc *guc = exec_queue_to_guc(q);
struct xe_lrc *lrc = q->lrc[0];
@@ -678,10 +842,13 @@ static void submit_exec_queue(struct xe_exec_queue *q)
xe_gt_assert(guc_to_gt(guc), exec_queue_registered(q));
- if (xe_exec_queue_is_parallel(q))
- wq_item_append(q);
- else
- xe_lrc_set_ring_tail(lrc, lrc->ring.tail);
+ if (!job->restore_replay || job->last_replay) {
+ if (xe_exec_queue_is_parallel(q))
+ wq_item_append(q);
+ else
+ xe_lrc_set_ring_tail(lrc, lrc->ring.tail);
+ job->last_replay = false;
+ }
if (exec_queue_suspended(q) && !xe_exec_queue_is_parallel(q))
return;
@@ -723,30 +890,33 @@ guc_exec_queue_run_job(struct drm_sched_job *drm_job)
struct xe_sched_job *job = to_xe_sched_job(drm_job);
struct xe_exec_queue *q = job->q;
struct xe_guc *guc = exec_queue_to_guc(q);
- struct dma_fence *fence = NULL;
- bool lr = xe_exec_queue_is_lr(q);
+ bool lr = xe_exec_queue_is_lr(q), killed_or_banned_or_wedged =
+ exec_queue_killed_or_banned_or_wedged(q);
xe_gt_assert(guc_to_gt(guc), !(exec_queue_destroyed(q) || exec_queue_pending_disable(q)) ||
exec_queue_banned(q) || exec_queue_suspended(q));
trace_xe_sched_job_run(job);
- if (!exec_queue_killed_or_banned_or_wedged(q) && !xe_sched_job_is_error(job)) {
+ if (!killed_or_banned_or_wedged && !xe_sched_job_is_error(job)) {
if (!exec_queue_registered(q))
- register_exec_queue(q);
- if (!lr) /* LR jobs are emitted in the exec IOCTL */
+ register_exec_queue(q, GUC_CONTEXT_NORMAL);
+ if (!job->restore_replay)
q->ring_ops->emit_job(job);
- submit_exec_queue(q);
+ submit_exec_queue(q, job);
+ job->restore_replay = false;
}
- if (lr) {
- xe_sched_job_set_error(job, -EOPNOTSUPP);
- dma_fence_put(job->fence); /* Drop ref from xe_sched_job_arm */
- } else {
- fence = job->fence;
- }
+ /*
+ * We don't care about job-fence ordering in LR VMs because these fences
+ * are never exported; they are used solely to keep jobs on the pending
+ * list. Once a queue enters an error state, there's no need to track
+ * them.
+ */
+ if (killed_or_banned_or_wedged && lr)
+ xe_sched_job_set_error(job, -ECANCELED);
- return fence;
+ return job->fence;
}
static void guc_exec_queue_free_job(struct drm_sched_job *drm_job)
@@ -780,15 +950,17 @@ static void disable_scheduling_deregister(struct xe_guc *guc,
ret = wait_event_timeout(guc->ct.wq,
(!exec_queue_pending_enable(q) &&
!exec_queue_pending_disable(q)) ||
- xe_guc_read_stopped(guc),
+ xe_guc_read_stopped(guc) ||
+ vf_recovery(guc),
HZ * 5);
- if (!ret) {
+ if (!ret && !vf_recovery(guc)) {
struct xe_gpu_scheduler *sched = &q->guc->sched;
xe_gt_warn(q->gt, "Pending enable/disable failed to respond\n");
xe_sched_submission_start(sched);
xe_gt_reset_async(q->gt);
- xe_sched_tdr_queue_imm(sched);
+ if (!xe_exec_queue_is_lr(q))
+ xe_sched_tdr_queue_imm(sched);
return;
}
@@ -880,12 +1052,18 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
struct xe_exec_queue *q = ge->q;
struct xe_guc *guc = exec_queue_to_guc(q);
struct xe_gpu_scheduler *sched = &ge->sched;
- bool wedged;
+ struct xe_sched_job *job;
+ bool wedged = false;
xe_gt_assert(guc_to_gt(guc), xe_exec_queue_is_lr(q));
+
+ if (vf_recovery(guc))
+ return;
+
trace_xe_exec_queue_lr_cleanup(q);
- wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
+ if (!exec_queue_killed(q))
+ wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
/* Kill the run_job / process_msg entry points */
xe_sched_submission_stop(sched);
@@ -914,7 +1092,11 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
*/
ret = wait_event_timeout(guc->ct.wq,
!exec_queue_pending_disable(q) ||
- xe_guc_read_stopped(guc), HZ * 5);
+ xe_guc_read_stopped(guc) ||
+ vf_recovery(guc), HZ * 5);
+ if (vf_recovery(guc))
+ return;
+
if (!ret) {
xe_gt_warn(q->gt, "Schedule disable failed to respond, guc_id=%d\n",
q->guc->id);
@@ -929,7 +1111,16 @@ static void xe_guc_exec_queue_lr_cleanup(struct work_struct *w)
if (!exec_queue_killed(q) && !xe_lrc_ring_is_idle(q->lrc[0]))
xe_devcoredump(q, NULL, "LR job cleanup, guc_id=%d", q->guc->id);
+ xe_hw_fence_irq_stop(q->fence_irq);
+
xe_sched_submission_start(sched);
+
+ spin_lock(&sched->base.job_list_lock);
+ list_for_each_entry(job, &sched->base.pending_list, drm.list)
+ xe_sched_job_set_error(job, -ECANCELED);
+ spin_unlock(&sched->base.job_list_lock);
+
+ xe_hw_fence_irq_start(q->fence_irq);
}
#define ADJUST_FIVE_PERCENT(__t) mul_u64_u32_div(__t, 105, 100)
@@ -959,10 +1150,7 @@ static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job)
*/
xe_gt_assert(gt, timeout_ms < 100 * MSEC_PER_SEC);
- if (ctx_timestamp < ctx_job_timestamp)
- diff = ctx_timestamp + U32_MAX - ctx_job_timestamp;
- else
- diff = ctx_timestamp - ctx_job_timestamp;
+ diff = ctx_timestamp - ctx_job_timestamp;
/*
* Ensure timeout is within 5% to account for an GuC scheduling latency
@@ -998,12 +1186,14 @@ static void enable_scheduling(struct xe_exec_queue *q)
ret = wait_event_timeout(guc->ct.wq,
!exec_queue_pending_enable(q) ||
- xe_guc_read_stopped(guc), HZ * 5);
- if (!ret || xe_guc_read_stopped(guc)) {
+ xe_guc_read_stopped(guc) ||
+ vf_recovery(guc), HZ * 5);
+ if ((!ret && !vf_recovery(guc)) || xe_guc_read_stopped(guc)) {
xe_gt_warn(guc_to_gt(guc), "Schedule enable failed to respond");
set_exec_queue_banned(q);
xe_gt_reset_async(q->gt);
- xe_sched_tdr_queue_imm(&q->guc->sched);
+ if (!xe_exec_queue_is_lr(q))
+ xe_sched_tdr_queue_imm(&q->guc->sched);
}
}
@@ -1059,7 +1249,9 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
int err = -ETIME;
pid_t pid = -1;
int i = 0;
- bool wedged, skip_timeout_check;
+ bool wedged = false, skip_timeout_check;
+
+ xe_gt_assert(guc_to_gt(guc), !xe_exec_queue_is_lr(q));
/*
* TDR has fired before free job worker. Common if exec queue
@@ -1067,12 +1259,9 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
* list so job can be freed and kick scheduler ensuring free job is not
* lost.
*/
- if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags)) {
- xe_sched_add_pending_job(sched, job);
- xe_sched_submission_start(sched);
-
- return DRM_GPU_SCHED_STAT_NOMINAL;
- }
+ if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &job->fence->flags) ||
+ vf_recovery(guc))
+ return DRM_GPU_SCHED_STAT_NO_HANG;
/* Kill the run_job entry point */
xe_sched_submission_stop(sched);
@@ -1105,7 +1294,8 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
* doesn't work for SRIOV. For now assuming timeouts in wedged mode are
* genuine timeouts.
*/
- wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
+ if (!exec_queue_killed(q))
+ wedged = guc_submit_hint_wedged(exec_queue_to_guc(q));
/* Engine state now stable, disable scheduling to check timestamp */
if (!wedged && exec_queue_registered(q)) {
@@ -1122,7 +1312,10 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
ret = wait_event_timeout(guc->ct.wq,
(!exec_queue_pending_enable(q) &&
!exec_queue_pending_disable(q)) ||
- xe_guc_read_stopped(guc), HZ * 5);
+ xe_guc_read_stopped(guc) ||
+ vf_recovery(guc), HZ * 5);
+ if (vf_recovery(guc))
+ goto handle_vf_resume;
if (!ret || xe_guc_read_stopped(guc))
goto trigger_reset;
@@ -1147,7 +1340,10 @@ guc_exec_queue_timedout_job(struct drm_sched_job *drm_job)
smp_rmb();
ret = wait_event_timeout(guc->ct.wq,
!exec_queue_pending_disable(q) ||
- xe_guc_read_stopped(guc), HZ * 5);
+ xe_guc_read_stopped(guc) ||
+ vf_recovery(guc), HZ * 5);
+ if (vf_recovery(guc))
+ goto handle_vf_resume;
if (!ret || xe_guc_read_stopped(guc)) {
trigger_reset:
if (!ret)
@@ -1240,9 +1436,10 @@ trigger_reset:
/* Start fence signaling */
xe_hw_fence_irq_start(q->fence_irq);
- return DRM_GPU_SCHED_STAT_NOMINAL;
+ return DRM_GPU_SCHED_STAT_RESET;
sched_enable:
+ set_exec_queue_pending_tdr_exit(q);
enable_scheduling(q);
rearm:
/*
@@ -1250,50 +1447,62 @@ rearm:
* but there is not currently an easy way to do in DRM scheduler. With
* some thought, do this in a follow up.
*/
- xe_sched_add_pending_job(sched, job);
xe_sched_submission_start(sched);
+handle_vf_resume:
+ return DRM_GPU_SCHED_STAT_NO_HANG;
+}
+
+static void guc_exec_queue_fini(struct xe_exec_queue *q)
+{
+ struct xe_guc_exec_queue *ge = q->guc;
+ struct xe_guc *guc = exec_queue_to_guc(q);
- return DRM_GPU_SCHED_STAT_NOMINAL;
+ release_guc_id(guc, q);
+ xe_sched_entity_fini(&ge->entity);
+ xe_sched_fini(&ge->sched);
+
+ /*
+ * RCU free due sched being exported via DRM scheduler fences
+ * (timeline name).
+ */
+ kfree_rcu(ge, rcu);
}
-static void __guc_exec_queue_fini_async(struct work_struct *w)
+static void __guc_exec_queue_destroy_async(struct work_struct *w)
{
struct xe_guc_exec_queue *ge =
- container_of(w, struct xe_guc_exec_queue, fini_async);
+ container_of(w, struct xe_guc_exec_queue, destroy_async);
struct xe_exec_queue *q = ge->q;
struct xe_guc *guc = exec_queue_to_guc(q);
xe_pm_runtime_get(guc_to_xe(guc));
trace_xe_exec_queue_destroy(q);
- release_guc_id(guc, q);
if (xe_exec_queue_is_lr(q))
cancel_work_sync(&ge->lr_tdr);
/* Confirm no work left behind accessing device structures */
cancel_delayed_work_sync(&ge->sched.base.work_tdr);
- xe_sched_entity_fini(&ge->entity);
- xe_sched_fini(&ge->sched);
- kfree(ge);
xe_exec_queue_fini(q);
+
xe_pm_runtime_put(guc_to_xe(guc));
}
-static void guc_exec_queue_fini_async(struct xe_exec_queue *q)
+static void guc_exec_queue_destroy_async(struct xe_exec_queue *q)
{
struct xe_guc *guc = exec_queue_to_guc(q);
struct xe_device *xe = guc_to_xe(guc);
- INIT_WORK(&q->guc->fini_async, __guc_exec_queue_fini_async);
+ INIT_WORK(&q->guc->destroy_async, __guc_exec_queue_destroy_async);
/* We must block on kernel engines so slabs are empty on driver unload */
if (q->flags & EXEC_QUEUE_FLAG_PERMANENT || exec_queue_wedged(q))
- __guc_exec_queue_fini_async(&q->guc->fini_async);
+ __guc_exec_queue_destroy_async(&q->guc->destroy_async);
else
- queue_work(xe->destroy_wq, &q->guc->fini_async);
+ queue_work(xe->destroy_wq, &q->guc->destroy_async);
}
-static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q)
+static void __guc_exec_queue_destroy(struct xe_guc *guc, struct xe_exec_queue *q)
{
/*
* Might be done from within the GPU scheduler, need to do async as we
@@ -1302,7 +1511,7 @@ static void __guc_exec_queue_fini(struct xe_guc *guc, struct xe_exec_queue *q)
* this we and don't really care when everything is fini'd, just that it
* is.
*/
- guc_exec_queue_fini_async(q);
+ guc_exec_queue_destroy_async(q);
}
static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg)
@@ -1313,10 +1522,20 @@ static void __guc_exec_queue_process_msg_cleanup(struct xe_sched_msg *msg)
xe_gt_assert(guc_to_gt(guc), !(q->flags & EXEC_QUEUE_FLAG_PERMANENT));
trace_xe_exec_queue_cleanup_entity(q);
- if (exec_queue_registered(q))
+ /*
+ * Expected state transitions for cleanup:
+ * - If the exec queue is registered and GuC firmware is running, we must first
+ * disable scheduling and deregister the queue to ensure proper teardown and
+ * resource release in the GuC, then destroy the exec queue on driver side.
+ * - If the GuC is already stopped (e.g., during driver unload or GPU reset),
+ * we cannot expect a response for the deregister request. In this case,
+ * it is safe to directly destroy the exec queue on driver side, as the GuC
+ * will not process further requests and all resources must be cleaned up locally.
+ */
+ if (exec_queue_registered(q) && xe_uc_fw_is_running(&guc->fw))
disable_scheduling_deregister(guc, q);
else
- __guc_exec_queue_fini(guc, q);
+ __guc_exec_queue_destroy(guc, q);
}
static bool guc_exec_queue_allowed_to_change_state(struct xe_exec_queue *q)
@@ -1336,11 +1555,24 @@ static void __guc_exec_queue_process_msg_set_sched_props(struct xe_sched_msg *ms
static void __suspend_fence_signal(struct xe_exec_queue *q)
{
+ struct xe_guc *guc = exec_queue_to_guc(q);
+ struct xe_device *xe = guc_to_xe(guc);
+
if (!q->guc->suspend_pending)
return;
WRITE_ONCE(q->guc->suspend_pending, false);
- wake_up(&q->guc->suspend_wait);
+
+ /*
+ * We use a GuC shared wait queue for VFs because the VF resfix start
+ * interrupt must be able to wake all instances of suspend_wait. This
+ * prevents the VF migration worker from being starved during
+ * scheduling.
+ */
+ if (IS_SRIOV_VF(xe))
+ wake_up_all(&guc->ct.wq);
+ else
+ wake_up(&q->guc->suspend_wait);
}
static void suspend_fence_signal(struct xe_exec_queue *q)
@@ -1361,8 +1593,9 @@ static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg)
if (guc_exec_queue_allowed_to_change_state(q) && !exec_queue_suspended(q) &&
exec_queue_enabled(q)) {
- wait_event(guc->ct.wq, (q->guc->resume_time != RESUME_PENDING ||
- xe_guc_read_stopped(guc)) && !exec_queue_pending_disable(q));
+ wait_event(guc->ct.wq, vf_recovery(guc) ||
+ ((q->guc->resume_time != RESUME_PENDING ||
+ xe_guc_read_stopped(guc)) && !exec_queue_pending_disable(q)));
if (!xe_guc_read_stopped(guc)) {
s64 since_resume_ms =
@@ -1372,7 +1605,7 @@ static void __guc_exec_queue_process_msg_suspend(struct xe_sched_msg *msg)
since_resume_ms;
if (wait_ms > 0 && q->guc->resume_time)
- msleep(wait_ms);
+ relaxed_ms_sleep(wait_ms);
set_exec_queue_suspended(q);
disable_scheduling(q, false);
@@ -1391,6 +1624,7 @@ static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg)
clear_exec_queue_suspended(q);
if (!exec_queue_enabled(q)) {
q->guc->resume_time = RESUME_PENDING;
+ set_exec_queue_pending_resume(q);
enable_scheduling(q);
}
} else {
@@ -1404,6 +1638,7 @@ static void __guc_exec_queue_process_msg_resume(struct xe_sched_msg *msg)
#define RESUME 4
#define OPCODE_MASK 0xf
#define MSG_LOCKED BIT(8)
+#define MSG_HEAD BIT(9)
static void guc_exec_queue_process_msg(struct xe_sched_msg *msg)
{
@@ -1457,6 +1692,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
q->guc = ge;
ge->q = q;
+ init_rcu_head(&ge->rcu);
init_waitqueue_head(&ge->suspend_wait);
for (i = 0; i < MAX_STATIC_MSG_TYPE; ++i)
@@ -1465,7 +1701,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
timeout = (q->vm && xe_vm_in_lr_mode(q->vm)) ? MAX_SCHEDULE_TIMEOUT :
msecs_to_jiffies(q->sched_props.job_timeout_ms);
err = xe_sched_init(&ge->sched, &drm_sched_ops, &xe_sched_ops,
- NULL, q->lrc[0]->ring.size / MAX_JOB_SIZE_BYTES, 64,
+ NULL, xe_lrc_ring_size() / MAX_JOB_SIZE_BYTES, 64,
timeout, guc_to_gt(guc)->ordered_wq, NULL,
q->name, gt_to_xe(q->gt)->drm.dev);
if (err)
@@ -1487,7 +1723,7 @@ static int guc_exec_queue_init(struct xe_exec_queue *q)
q->entity = &ge->entity;
- if (xe_guc_read_stopped(guc))
+ if (xe_guc_read_stopped(guc) || vf_recovery(guc))
xe_sched_stop(sched);
mutex_unlock(&guc->submission_state.lock);
@@ -1527,12 +1763,24 @@ static void guc_exec_queue_add_msg(struct xe_exec_queue *q, struct xe_sched_msg
msg->private_data = q;
trace_xe_sched_msg_add(msg);
- if (opcode & MSG_LOCKED)
+ if (opcode & MSG_HEAD)
+ xe_sched_add_msg_head(&q->guc->sched, msg);
+ else if (opcode & MSG_LOCKED)
xe_sched_add_msg_locked(&q->guc->sched, msg);
else
xe_sched_add_msg(&q->guc->sched, msg);
}
+static void guc_exec_queue_try_add_msg_head(struct xe_exec_queue *q,
+ struct xe_sched_msg *msg,
+ u32 opcode)
+{
+ if (!list_empty(&msg->link))
+ return;
+
+ guc_exec_queue_add_msg(q, msg, opcode | MSG_LOCKED | MSG_HEAD);
+}
+
static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q,
struct xe_sched_msg *msg,
u32 opcode)
@@ -1548,14 +1796,14 @@ static bool guc_exec_queue_try_add_msg(struct xe_exec_queue *q,
#define STATIC_MSG_CLEANUP 0
#define STATIC_MSG_SUSPEND 1
#define STATIC_MSG_RESUME 2
-static void guc_exec_queue_fini(struct xe_exec_queue *q)
+static void guc_exec_queue_destroy(struct xe_exec_queue *q)
{
struct xe_sched_msg *msg = q->guc->static_msgs + STATIC_MSG_CLEANUP;
if (!(q->flags & EXEC_QUEUE_FLAG_PERMANENT) && !exec_queue_wedged(q))
guc_exec_queue_add_msg(q, msg, CLEANUP);
else
- __guc_exec_queue_fini(exec_queue_to_guc(q), q);
+ __guc_exec_queue_destroy(exec_queue_to_guc(q), q);
}
static int guc_exec_queue_set_priority(struct xe_exec_queue *q,
@@ -1633,6 +1881,7 @@ static int guc_exec_queue_suspend(struct xe_exec_queue *q)
static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q)
{
struct xe_guc *guc = exec_queue_to_guc(q);
+ struct xe_device *xe = guc_to_xe(guc);
int ret;
/*
@@ -1640,11 +1889,21 @@ static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q)
* suspend_pending upon kill but to be paranoid but races in which
* suspend_pending is set after kill also check kill here.
*/
- ret = wait_event_interruptible_timeout(q->guc->suspend_wait,
- !READ_ONCE(q->guc->suspend_pending) ||
- exec_queue_killed(q) ||
- xe_guc_read_stopped(guc),
- HZ * 5);
+#define WAIT_COND \
+ (!READ_ONCE(q->guc->suspend_pending) || exec_queue_killed(q) || \
+ xe_guc_read_stopped(guc))
+
+retry:
+ if (IS_SRIOV_VF(xe))
+ ret = wait_event_interruptible_timeout(guc->ct.wq, WAIT_COND ||
+ vf_recovery(guc),
+ HZ * 5);
+ else
+ ret = wait_event_interruptible_timeout(q->guc->suspend_wait,
+ WAIT_COND, HZ * 5);
+
+ if (vf_recovery(guc) && !xe_device_wedged((guc_to_xe(guc))))
+ return -EAGAIN;
if (!ret) {
xe_gt_warn(guc_to_gt(guc),
@@ -1652,8 +1911,13 @@ static int guc_exec_queue_suspend_wait(struct xe_exec_queue *q)
q->guc->id);
/* XXX: Trigger GT reset? */
return -ETIME;
+ } else if (IS_SRIOV_VF(xe) && !WAIT_COND) {
+ /* Corner case on RESFIX DONE where vf_recovery() changes */
+ goto retry;
}
+#undef WAIT_COND
+
return ret < 0 ? ret : 0;
}
@@ -1676,7 +1940,7 @@ static bool guc_exec_queue_reset_status(struct xe_exec_queue *q)
}
/*
- * All of these functions are an abstraction layer which other parts of XE can
+ * All of these functions are an abstraction layer which other parts of Xe can
* use to trap into the GuC backend. All of these functions, aside from init,
* really shouldn't do much other than trap into the DRM scheduler which
* synchronizes these operations.
@@ -1685,6 +1949,7 @@ static const struct xe_exec_queue_ops guc_exec_queue_ops = {
.init = guc_exec_queue_init,
.kill = guc_exec_queue_kill,
.fini = guc_exec_queue_fini,
+ .destroy = guc_exec_queue_destroy,
.set_priority = guc_exec_queue_set_priority,
.set_timeslice = guc_exec_queue_set_timeslice,
.set_preempt_timeout = guc_exec_queue_set_preempt_timeout,
@@ -1706,7 +1971,7 @@ static void guc_exec_queue_stop(struct xe_guc *guc, struct xe_exec_queue *q)
if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q))
xe_exec_queue_put(q);
else if (exec_queue_destroyed(q))
- __guc_exec_queue_fini(guc, q);
+ __guc_exec_queue_destroy(guc, q);
}
if (q->guc->suspend_pending) {
set_exec_queue_suspended(q);
@@ -1751,6 +2016,12 @@ int xe_guc_submit_reset_prepare(struct xe_guc *guc)
{
int ret;
+ if (xe_gt_WARN_ON(guc_to_gt(guc), vf_recovery(guc)))
+ return 0;
+
+ if (!guc->submission_state.initialized)
+ return 0;
+
/*
* Using an atomic here rather than submission_state.lock as this
* function can be called while holding the CT lock (engine reset
@@ -1797,16 +2068,177 @@ void xe_guc_submit_stop(struct xe_guc *guc)
}
+static void guc_exec_queue_revert_pending_state_change(struct xe_guc *guc,
+ struct xe_exec_queue *q)
+{
+ bool pending_enable, pending_disable, pending_resume;
+
+ pending_enable = exec_queue_pending_enable(q);
+ pending_resume = exec_queue_pending_resume(q);
+
+ if (pending_enable && pending_resume) {
+ q->guc->needs_resume = true;
+ xe_gt_dbg(guc_to_gt(guc), "Replay RESUME - guc_id=%d",
+ q->guc->id);
+ }
+
+ if (pending_enable && !pending_resume &&
+ !exec_queue_pending_tdr_exit(q)) {
+ clear_exec_queue_registered(q);
+ if (xe_exec_queue_is_lr(q))
+ xe_exec_queue_put(q);
+ xe_gt_dbg(guc_to_gt(guc), "Replay REGISTER - guc_id=%d",
+ q->guc->id);
+ }
+
+ if (pending_enable) {
+ clear_exec_queue_enabled(q);
+ clear_exec_queue_pending_resume(q);
+ clear_exec_queue_pending_tdr_exit(q);
+ clear_exec_queue_pending_enable(q);
+ xe_gt_dbg(guc_to_gt(guc), "Replay ENABLE - guc_id=%d",
+ q->guc->id);
+ }
+
+ if (exec_queue_destroyed(q) && exec_queue_registered(q)) {
+ clear_exec_queue_destroyed(q);
+ if (exec_queue_extra_ref(q))
+ xe_exec_queue_put(q);
+ else
+ q->guc->needs_cleanup = true;
+ clear_exec_queue_extra_ref(q);
+ xe_gt_dbg(guc_to_gt(guc), "Replay CLEANUP - guc_id=%d",
+ q->guc->id);
+ }
+
+ pending_disable = exec_queue_pending_disable(q);
+
+ if (pending_disable && exec_queue_suspended(q)) {
+ clear_exec_queue_suspended(q);
+ q->guc->needs_suspend = true;
+ xe_gt_dbg(guc_to_gt(guc), "Replay SUSPEND - guc_id=%d",
+ q->guc->id);
+ }
+
+ if (pending_disable) {
+ if (!pending_enable)
+ set_exec_queue_enabled(q);
+ clear_exec_queue_pending_disable(q);
+ clear_exec_queue_check_timeout(q);
+ xe_gt_dbg(guc_to_gt(guc), "Replay DISABLE - guc_id=%d",
+ q->guc->id);
+ }
+
+ q->guc->resume_time = 0;
+}
+
+static void lrc_parallel_clear(struct xe_lrc *lrc)
+{
+ struct xe_device *xe = gt_to_xe(lrc->gt);
+ struct iosys_map map = xe_lrc_parallel_map(lrc);
+ int i;
+
+ for (i = 0; i < WQ_SIZE / sizeof(u32); ++i)
+ parallel_write(xe, map, wq[i],
+ FIELD_PREP(WQ_TYPE_MASK, WQ_TYPE_NOOP) |
+ FIELD_PREP(WQ_LEN_MASK, 0));
+}
+
+/*
+ * This function is quite complex but only real way to ensure no state is lost
+ * during VF resume flows. The function scans the queue state, make adjustments
+ * as needed, and queues jobs / messages which replayed upon unpause.
+ */
+static void guc_exec_queue_pause(struct xe_guc *guc, struct xe_exec_queue *q)
+{
+ struct xe_gpu_scheduler *sched = &q->guc->sched;
+ struct xe_sched_job *job;
+ int i;
+
+ lockdep_assert_held(&guc->submission_state.lock);
+
+ /* Stop scheduling + flush any DRM scheduler operations */
+ xe_sched_submission_stop(sched);
+ if (xe_exec_queue_is_lr(q))
+ cancel_work_sync(&q->guc->lr_tdr);
+ else
+ cancel_delayed_work_sync(&sched->base.work_tdr);
+
+ guc_exec_queue_revert_pending_state_change(guc, q);
+
+ if (xe_exec_queue_is_parallel(q)) {
+ /* Pairs with WRITE_ONCE in __xe_exec_queue_init */
+ struct xe_lrc *lrc = READ_ONCE(q->lrc[0]);
+
+ /*
+ * NOP existing WQ commands that may contain stale GGTT
+ * addresses. These will be replayed upon unpause. The hardware
+ * seems to get confused if the WQ head/tail pointers are
+ * adjusted.
+ */
+ if (lrc)
+ lrc_parallel_clear(lrc);
+ }
+
+ job = xe_sched_first_pending_job(sched);
+ if (job) {
+ job->restore_replay = true;
+
+ /*
+ * Adjust software tail so jobs submitted overwrite previous
+ * position in ring buffer with new GGTT addresses.
+ */
+ for (i = 0; i < q->width; ++i)
+ q->lrc[i]->ring.tail = job->ptrs[i].head;
+ }
+}
+
+/**
+ * xe_guc_submit_pause - Stop further runs of submission tasks on given GuC.
+ * @guc: the &xe_guc struct instance whose scheduler is to be disabled
+ */
+void xe_guc_submit_pause(struct xe_guc *guc)
+{
+ struct xe_exec_queue *q;
+ unsigned long index;
+
+ xe_gt_assert(guc_to_gt(guc), vf_recovery(guc));
+
+ mutex_lock(&guc->submission_state.lock);
+ xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
+ /* Prevent redundant attempts to stop parallel queues */
+ if (q->guc->id != index)
+ continue;
+
+ guc_exec_queue_pause(guc, q);
+ }
+ mutex_unlock(&guc->submission_state.lock);
+}
+
static void guc_exec_queue_start(struct xe_exec_queue *q)
{
struct xe_gpu_scheduler *sched = &q->guc->sched;
if (!exec_queue_killed_or_banned_or_wedged(q)) {
+ struct xe_sched_job *job = xe_sched_first_pending_job(sched);
int i;
trace_xe_exec_queue_resubmit(q);
- for (i = 0; i < q->width; ++i)
- xe_lrc_set_ring_head(q->lrc[i], q->lrc[i]->ring.tail);
+ if (job) {
+ for (i = 0; i < q->width; ++i) {
+ /*
+ * The GuC context is unregistered at this point
+ * time, adjusting software ring tail ensures
+ * jobs are rewritten in original placement,
+ * adjusting LRC tail ensures the newly loaded
+ * GuC / contexts only view the LRC tail
+ * increasing as jobs are written out.
+ */
+ q->lrc[i]->ring.tail = job->ptrs[i].head;
+ xe_lrc_set_ring_tail(q->lrc[i],
+ xe_lrc_ring_head(q->lrc[i]));
+ }
+ }
xe_sched_resubmit_jobs(sched);
}
@@ -1837,6 +2269,152 @@ int xe_guc_submit_start(struct xe_guc *guc)
return 0;
}
+static void guc_exec_queue_unpause_prepare(struct xe_guc *guc,
+ struct xe_exec_queue *q)
+{
+ struct xe_gpu_scheduler *sched = &q->guc->sched;
+ struct xe_sched_job *job = NULL, *__job;
+ bool restore_replay = false;
+
+ list_for_each_entry(__job, &sched->base.pending_list, drm.list) {
+ job = __job;
+ restore_replay |= job->restore_replay;
+ if (restore_replay) {
+ xe_gt_dbg(guc_to_gt(guc), "Replay JOB - guc_id=%d, seqno=%d",
+ q->guc->id, xe_sched_job_seqno(job));
+
+ q->ring_ops->emit_job(job);
+ job->restore_replay = true;
+ }
+ }
+
+ if (job)
+ job->last_replay = true;
+}
+
+/**
+ * xe_guc_submit_unpause_prepare - Prepare unpause submission tasks on given GuC.
+ * @guc: the &xe_guc struct instance whose scheduler is to be prepared for unpause
+ */
+void xe_guc_submit_unpause_prepare(struct xe_guc *guc)
+{
+ struct xe_exec_queue *q;
+ unsigned long index;
+
+ xe_gt_assert(guc_to_gt(guc), vf_recovery(guc));
+
+ mutex_lock(&guc->submission_state.lock);
+ xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
+ /* Prevent redundant attempts to stop parallel queues */
+ if (q->guc->id != index)
+ continue;
+
+ guc_exec_queue_unpause_prepare(guc, q);
+ }
+ mutex_unlock(&guc->submission_state.lock);
+}
+
+static void guc_exec_queue_replay_pending_state_change(struct xe_exec_queue *q)
+{
+ struct xe_gpu_scheduler *sched = &q->guc->sched;
+ struct xe_sched_msg *msg;
+
+ if (q->guc->needs_cleanup) {
+ msg = q->guc->static_msgs + STATIC_MSG_CLEANUP;
+
+ guc_exec_queue_add_msg(q, msg, CLEANUP);
+ q->guc->needs_cleanup = false;
+ }
+
+ if (q->guc->needs_suspend) {
+ msg = q->guc->static_msgs + STATIC_MSG_SUSPEND;
+
+ xe_sched_msg_lock(sched);
+ guc_exec_queue_try_add_msg_head(q, msg, SUSPEND);
+ xe_sched_msg_unlock(sched);
+
+ q->guc->needs_suspend = false;
+ }
+
+ /*
+ * The resume must be in the message queue before the suspend as it is
+ * not possible for a resume to be issued if a suspend pending is, but
+ * the inverse is possible.
+ */
+ if (q->guc->needs_resume) {
+ msg = q->guc->static_msgs + STATIC_MSG_RESUME;
+
+ xe_sched_msg_lock(sched);
+ guc_exec_queue_try_add_msg_head(q, msg, RESUME);
+ xe_sched_msg_unlock(sched);
+
+ q->guc->needs_resume = false;
+ }
+}
+
+static void guc_exec_queue_unpause(struct xe_guc *guc, struct xe_exec_queue *q)
+{
+ struct xe_gpu_scheduler *sched = &q->guc->sched;
+ bool needs_tdr = exec_queue_killed_or_banned_or_wedged(q);
+
+ lockdep_assert_held(&guc->submission_state.lock);
+
+ xe_sched_resubmit_jobs(sched);
+ guc_exec_queue_replay_pending_state_change(q);
+ xe_sched_submission_start(sched);
+ if (needs_tdr)
+ xe_guc_exec_queue_trigger_cleanup(q);
+ xe_sched_submission_resume_tdr(sched);
+}
+
+/**
+ * xe_guc_submit_unpause - Allow further runs of submission tasks on given GuC.
+ * @guc: the &xe_guc struct instance whose scheduler is to be enabled
+ */
+void xe_guc_submit_unpause(struct xe_guc *guc)
+{
+ struct xe_exec_queue *q;
+ unsigned long index;
+
+ mutex_lock(&guc->submission_state.lock);
+ xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
+ /*
+ * Prevent redundant attempts to stop parallel queues, or queues
+ * created after resfix done.
+ */
+ if (q->guc->id != index ||
+ !READ_ONCE(q->guc->sched.base.pause_submit))
+ continue;
+
+ guc_exec_queue_unpause(guc, q);
+ }
+ mutex_unlock(&guc->submission_state.lock);
+}
+
+/**
+ * xe_guc_submit_pause_abort - Abort all paused submission task on given GuC.
+ * @guc: the &xe_guc struct instance whose scheduler is to be aborted
+ */
+void xe_guc_submit_pause_abort(struct xe_guc *guc)
+{
+ struct xe_exec_queue *q;
+ unsigned long index;
+
+ mutex_lock(&guc->submission_state.lock);
+ xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
+ struct xe_gpu_scheduler *sched = &q->guc->sched;
+
+ /* Prevent redundant attempts to stop parallel queues */
+ if (q->guc->id != index)
+ continue;
+
+ xe_sched_submission_start(sched);
+ if (exec_queue_killed_or_banned_or_wedged(q))
+ xe_guc_exec_queue_trigger_cleanup(q);
+ }
+ mutex_unlock(&guc->submission_state.lock);
+}
+
static struct xe_exec_queue *
g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id)
{
@@ -1850,7 +2428,7 @@ g2h_exec_queue_lookup(struct xe_guc *guc, u32 guc_id)
q = xa_load(&guc->submission_state.exec_queue_lookup, guc_id);
if (unlikely(!q)) {
- xe_gt_err(gt, "Not engine present for guc_id %u\n", guc_id);
+ xe_gt_err(gt, "No exec queue found for guc_id %u\n", guc_id);
return NULL;
}
@@ -1886,6 +2464,8 @@ static void handle_sched_done(struct xe_guc *guc, struct xe_exec_queue *q,
xe_gt_assert(guc_to_gt(guc), exec_queue_pending_enable(q));
q->guc->resume_time = ktime_get();
+ clear_exec_queue_pending_resume(q);
+ clear_exec_queue_pending_tdr_exit(q);
clear_exec_queue_pending_enable(q);
smp_wmb();
wake_up_all(&guc->ct.wq);
@@ -1960,7 +2540,7 @@ static void handle_deregister_done(struct xe_guc *guc, struct xe_exec_queue *q)
if (exec_queue_extra_ref(q) || xe_exec_queue_is_lr(q))
xe_exec_queue_put(q);
else
- __guc_exec_queue_fini(guc, q);
+ __guc_exec_queue_destroy(guc, q);
}
int xe_guc_deregister_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
@@ -2057,12 +2637,16 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
struct xe_gt *gt = guc_to_gt(guc);
struct xe_exec_queue *q;
u32 guc_id;
+ u32 type = XE_GUC_CAT_ERR_TYPE_INVALID;
- if (unlikely(len < 1))
+ if (unlikely(!len || len > 2))
return -EPROTO;
guc_id = msg[0];
+ if (len == 2)
+ type = msg[1];
+
if (guc_id == GUC_ID_UNKNOWN) {
/*
* GuC uses GUC_ID_UNKNOWN if it can not map the CAT fault to any PF/VF
@@ -2076,8 +2660,19 @@ int xe_guc_exec_queue_memory_cat_error_handler(struct xe_guc *guc, u32 *msg,
if (unlikely(!q))
return -EPROTO;
- xe_gt_dbg(gt, "Engine memory cat error: engine_class=%s, logical_mask: 0x%x, guc_id=%d",
- xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
+ /*
+ * The type is HW-defined and changes based on platform, so we don't
+ * decode it in the kernel and only check if it is valid.
+ * See bspec 54047 and 72187 for details.
+ */
+ if (type != XE_GUC_CAT_ERR_TYPE_INVALID)
+ xe_gt_dbg(gt,
+ "Engine memory CAT error [%u]: class=%s, logical_mask: 0x%x, guc_id=%d",
+ type, xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
+ else
+ xe_gt_dbg(gt,
+ "Engine memory CAT error: class=%s, logical_mask: 0x%x, guc_id=%d",
+ xe_hw_engine_class_to_str(q->class), q->logical_mask, guc_id);
trace_xe_exec_queue_memory_cat_error(q);
@@ -2334,6 +2929,34 @@ static void guc_exec_queue_print(struct xe_exec_queue *q, struct drm_printer *p)
}
/**
+ * xe_guc_register_vf_exec_queue - Register exec queue for a given context type.
+ * @q: Execution queue
+ * @ctx_type: Type of the context
+ *
+ * This function registers the execution queue with the guc. Special context
+ * types like GUC_CONTEXT_COMPRESSION_SAVE and GUC_CONTEXT_COMPRESSION_RESTORE
+ * are only applicable for IGPU and in the VF.
+ * Submits the execution queue to GUC after registering it.
+ *
+ * Returns - None.
+ */
+void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type)
+{
+ struct xe_guc *guc = exec_queue_to_guc(q);
+ struct xe_device *xe = guc_to_xe(guc);
+ struct xe_gt *gt = guc_to_gt(guc);
+
+ xe_gt_assert(gt, IS_SRIOV_VF(xe));
+ xe_gt_assert(gt, !IS_DGFX(xe));
+ xe_gt_assert(gt, ctx_type == GUC_CONTEXT_COMPRESSION_SAVE ||
+ ctx_type == GUC_CONTEXT_COMPRESSION_RESTORE);
+ xe_gt_assert(gt, GUC_SUBMIT_VER(guc) >= MAKE_GUC_VER(1, 23, 0));
+
+ register_exec_queue(q, ctx_type);
+ enable_scheduling(q);
+}
+
+/**
* xe_guc_submit_print - GuC Submit Print.
* @guc: GuC.
* @p: drm_printer where it will be printed out.
@@ -2353,3 +2976,32 @@ void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p)
guc_exec_queue_print(q, p);
mutex_unlock(&guc->submission_state.lock);
}
+
+/**
+ * xe_guc_contexts_hwsp_rebase - Re-compute GGTT references within all
+ * exec queues registered to given GuC.
+ * @guc: the &xe_guc struct instance
+ * @scratch: scratch buffer to be used as temporary storage
+ *
+ * Returns: zero on success, negative error code on failure.
+ */
+int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch)
+{
+ struct xe_exec_queue *q;
+ unsigned long index;
+ int err = 0;
+
+ mutex_lock(&guc->submission_state.lock);
+ xa_for_each(&guc->submission_state.exec_queue_lookup, index, q) {
+ /* Prevent redundant attempts to stop parallel queues */
+ if (q->guc->id != index)
+ continue;
+
+ err = xe_exec_queue_contexts_hwsp_rebase(q, scratch);
+ if (err)
+ break;
+ }
+ mutex_unlock(&guc->submission_state.lock);
+
+ return err;
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.h b/drivers/gpu/drm/xe/xe_guc_submit.h
index 9b71a986c6ca..b49a2748ec46 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.h
+++ b/drivers/gpu/drm/xe/xe_guc_submit.h
@@ -13,11 +13,17 @@ struct xe_exec_queue;
struct xe_guc;
int xe_guc_submit_init(struct xe_guc *guc, unsigned int num_ids);
+int xe_guc_submit_enable(struct xe_guc *guc);
+void xe_guc_submit_disable(struct xe_guc *guc);
int xe_guc_submit_reset_prepare(struct xe_guc *guc);
void xe_guc_submit_reset_wait(struct xe_guc *guc);
void xe_guc_submit_stop(struct xe_guc *guc);
int xe_guc_submit_start(struct xe_guc *guc);
+void xe_guc_submit_pause(struct xe_guc *guc);
+void xe_guc_submit_unpause(struct xe_guc *guc);
+void xe_guc_submit_unpause_prepare(struct xe_guc *guc);
+void xe_guc_submit_pause_abort(struct xe_guc *guc);
void xe_guc_submit_wedge(struct xe_guc *guc);
int xe_guc_read_stopped(struct xe_guc *guc);
@@ -39,5 +45,8 @@ xe_guc_exec_queue_snapshot_print(struct xe_guc_submit_exec_queue_snapshot *snaps
void
xe_guc_exec_queue_snapshot_free(struct xe_guc_submit_exec_queue_snapshot *snapshot);
void xe_guc_submit_print(struct xe_guc *guc, struct drm_printer *p);
+void xe_guc_register_vf_exec_queue(struct xe_exec_queue *q, int ctx_type);
+
+int xe_guc_contexts_hwsp_rebase(struct xe_guc *guc, void *scratch);
#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_tlb_inval.c b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
new file mode 100644
index 000000000000..a80175c7c478
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_tlb_inval.c
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include "abi/guc_actions_abi.h"
+
+#include "xe_device.h"
+#include "xe_gt_stats.h"
+#include "xe_gt_types.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+#include "xe_guc_tlb_inval.h"
+#include "xe_force_wake.h"
+#include "xe_mmio.h"
+#include "xe_tlb_inval.h"
+
+#include "regs/xe_guc_regs.h"
+
+/*
+ * XXX: The seqno algorithm relies on TLB invalidation being processed in order
+ * which they currently are by the GuC, if that changes the algorithm will need
+ * to be updated.
+ */
+
+static int send_tlb_inval(struct xe_guc *guc, const u32 *action, int len)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+
+ xe_gt_assert(gt, action[1]); /* Seqno */
+
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_TLB_INVAL, 1);
+ return xe_guc_ct_send(&guc->ct, action, len,
+ G2H_LEN_DW_TLB_INVALIDATE, 1);
+}
+
+#define MAKE_INVAL_OP(type) ((type << XE_GUC_TLB_INVAL_TYPE_SHIFT) | \
+ XE_GUC_TLB_INVAL_MODE_HEAVY << XE_GUC_TLB_INVAL_MODE_SHIFT | \
+ XE_GUC_TLB_INVAL_FLUSH_CACHE)
+
+static int send_tlb_inval_all(struct xe_tlb_inval *tlb_inval, u32 seqno)
+{
+ struct xe_guc *guc = tlb_inval->private;
+ u32 action[] = {
+ XE_GUC_ACTION_TLB_INVALIDATION_ALL,
+ seqno,
+ MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL),
+ };
+
+ return send_tlb_inval(guc, action, ARRAY_SIZE(action));
+}
+
+static int send_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval, u32 seqno)
+{
+ struct xe_guc *guc = tlb_inval->private;
+ struct xe_gt *gt = guc_to_gt(guc);
+ struct xe_device *xe = guc_to_xe(guc);
+
+ /*
+ * Returning -ECANCELED in this function is squashed at the caller and
+ * signals waiters.
+ */
+
+ if (xe_guc_ct_enabled(&guc->ct) && guc->submission_state.enabled) {
+ u32 action[] = {
+ XE_GUC_ACTION_TLB_INVALIDATION,
+ seqno,
+ MAKE_INVAL_OP(XE_GUC_TLB_INVAL_GUC),
+ };
+
+ return send_tlb_inval(guc, action, ARRAY_SIZE(action));
+ } else if (xe_device_uc_enabled(xe) && !xe_device_wedged(xe)) {
+ struct xe_mmio *mmio = &gt->mmio;
+ unsigned int fw_ref;
+
+ if (IS_SRIOV_VF(xe))
+ return -ECANCELED;
+
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (xe->info.platform == XE_PVC || GRAPHICS_VER(xe) >= 20) {
+ xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC1,
+ PVC_GUC_TLB_INV_DESC1_INVALIDATE);
+ xe_mmio_write32(mmio, PVC_GUC_TLB_INV_DESC0,
+ PVC_GUC_TLB_INV_DESC0_VALID);
+ } else {
+ xe_mmio_write32(mmio, GUC_TLB_INV_CR,
+ GUC_TLB_INV_CR_INVALIDATE);
+ }
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ }
+
+ return -ECANCELED;
+}
+
+/*
+ * Ensure that roundup_pow_of_two(length) doesn't overflow.
+ * Note that roundup_pow_of_two() operates on unsigned long,
+ * not on u64.
+ */
+#define MAX_RANGE_TLB_INVALIDATION_LENGTH (rounddown_pow_of_two(ULONG_MAX))
+
+static int send_tlb_inval_ppgtt(struct xe_tlb_inval *tlb_inval, u32 seqno,
+ u64 start, u64 end, u32 asid)
+{
+#define MAX_TLB_INVALIDATION_LEN 7
+ struct xe_guc *guc = tlb_inval->private;
+ struct xe_gt *gt = guc_to_gt(guc);
+ u32 action[MAX_TLB_INVALIDATION_LEN];
+ u64 length = end - start;
+ int len = 0;
+
+ if (guc_to_xe(guc)->info.force_execlist)
+ return -ECANCELED;
+
+ action[len++] = XE_GUC_ACTION_TLB_INVALIDATION;
+ action[len++] = seqno;
+ if (!gt_to_xe(gt)->info.has_range_tlb_inval ||
+ length > MAX_RANGE_TLB_INVALIDATION_LENGTH) {
+ action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_FULL);
+ } else {
+ u64 orig_start = start;
+ u64 align;
+
+ if (length < SZ_4K)
+ length = SZ_4K;
+
+ /*
+ * We need to invalidate a higher granularity if start address
+ * is not aligned to length. When start is not aligned with
+ * length we need to find the length large enough to create an
+ * address mask covering the required range.
+ */
+ align = roundup_pow_of_two(length);
+ start = ALIGN_DOWN(start, align);
+ end = ALIGN(end, align);
+ length = align;
+ while (start + length < end) {
+ length <<= 1;
+ start = ALIGN_DOWN(orig_start, length);
+ }
+
+ /*
+ * Minimum invalidation size for a 2MB page that the hardware
+ * expects is 16MB
+ */
+ if (length >= SZ_2M) {
+ length = max_t(u64, SZ_16M, length);
+ start = ALIGN_DOWN(orig_start, length);
+ }
+
+ xe_gt_assert(gt, length >= SZ_4K);
+ xe_gt_assert(gt, is_power_of_2(length));
+ xe_gt_assert(gt, !(length & GENMASK(ilog2(SZ_16M) - 1,
+ ilog2(SZ_2M) + 1)));
+ xe_gt_assert(gt, IS_ALIGNED(start, length));
+
+ action[len++] = MAKE_INVAL_OP(XE_GUC_TLB_INVAL_PAGE_SELECTIVE);
+ action[len++] = asid;
+ action[len++] = lower_32_bits(start);
+ action[len++] = upper_32_bits(start);
+ action[len++] = ilog2(length) - ilog2(SZ_4K);
+ }
+
+ xe_gt_assert(gt, len <= MAX_TLB_INVALIDATION_LEN);
+
+ return send_tlb_inval(guc, action, len);
+}
+
+static bool tlb_inval_initialized(struct xe_tlb_inval *tlb_inval)
+{
+ struct xe_guc *guc = tlb_inval->private;
+
+ return xe_guc_ct_initialized(&guc->ct);
+}
+
+static void tlb_inval_flush(struct xe_tlb_inval *tlb_inval)
+{
+ struct xe_guc *guc = tlb_inval->private;
+
+ LNL_FLUSH_WORK(&guc->ct.g2h_worker);
+}
+
+static long tlb_inval_timeout_delay(struct xe_tlb_inval *tlb_inval)
+{
+ struct xe_guc *guc = tlb_inval->private;
+
+ /* this reflects what HW/GuC needs to process TLB inv request */
+ const long hw_tlb_timeout = HZ / 4;
+
+ /* this estimates actual delay caused by the CTB transport */
+ long delay = xe_guc_ct_queue_proc_time_jiffies(&guc->ct);
+
+ return hw_tlb_timeout + 2 * delay;
+}
+
+static const struct xe_tlb_inval_ops guc_tlb_inval_ops = {
+ .all = send_tlb_inval_all,
+ .ggtt = send_tlb_inval_ggtt,
+ .ppgtt = send_tlb_inval_ppgtt,
+ .initialized = tlb_inval_initialized,
+ .flush = tlb_inval_flush,
+ .timeout_delay = tlb_inval_timeout_delay,
+};
+
+/**
+ * xe_guc_tlb_inval_init_early() - Init GuC TLB invalidation early
+ * @guc: GuC object
+ * @tlb_inval: TLB invalidation client
+ *
+ * Initialize GuC TLB invalidation by setting back pointer in TLB invalidation
+ * client to the GuC and setting GuC backend ops.
+ */
+void xe_guc_tlb_inval_init_early(struct xe_guc *guc,
+ struct xe_tlb_inval *tlb_inval)
+{
+ tlb_inval->private = guc;
+ tlb_inval->ops = &guc_tlb_inval_ops;
+}
+
+/**
+ * xe_guc_tlb_inval_done_handler() - TLB invalidation done handler
+ * @guc: guc
+ * @msg: message indicating TLB invalidation done
+ * @len: length of message
+ *
+ * Parse seqno of TLB invalidation, wake any waiters for seqno, and signal any
+ * invalidation fences for seqno. Algorithm for this depends on seqno being
+ * received in-order and asserts this assumption.
+ *
+ * Return: 0 on success, -EPROTO for malformed messages.
+ */
+int xe_guc_tlb_inval_done_handler(struct xe_guc *guc, u32 *msg, u32 len)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+
+ if (unlikely(len != 1))
+ return -EPROTO;
+
+ xe_tlb_inval_done_handler(&gt->tlb_inval, msg[0]);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_guc_tlb_inval.h b/drivers/gpu/drm/xe/xe_guc_tlb_inval.h
new file mode 100644
index 000000000000..07d668b02e3d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_guc_tlb_inval.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_GUC_TLB_INVAL_H_
+#define _XE_GUC_TLB_INVAL_H_
+
+#include <linux/types.h>
+
+struct xe_guc;
+struct xe_tlb_inval;
+
+void xe_guc_tlb_inval_init_early(struct xe_guc *guc,
+ struct xe_tlb_inval *tlb_inval);
+
+int xe_guc_tlb_inval_done_handler(struct xe_guc *guc, u32 *msg, u32 len);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_guc_types.h b/drivers/gpu/drm/xe/xe_guc_types.h
index 1fde7614fcc5..c7b9642b41ba 100644
--- a/drivers/gpu/drm/xe/xe_guc_types.h
+++ b/drivers/gpu/drm/xe/xe_guc_types.h
@@ -85,6 +85,12 @@ struct xe_guc {
struct xarray exec_queue_lookup;
/** @submission_state.stopped: submissions are stopped */
atomic_t stopped;
+ /**
+ * @submission_state.reset_blocked: reset attempts are blocked;
+ * blocking reset in order to delay it may be required if running
+ * an operation which is sensitive to resets.
+ */
+ atomic_t reset_blocked;
/** @submission_state.lock: protects submission state */
struct mutex lock;
/** @submission_state.enabled: submission is enabled */
diff --git a/drivers/gpu/drm/xe/xe_heci_gsc.c b/drivers/gpu/drm/xe/xe_heci_gsc.c
index 27d11e06a82b..495cdd4f948d 100644
--- a/drivers/gpu/drm/xe/xe_heci_gsc.c
+++ b/drivers/gpu/drm/xe/xe_heci_gsc.c
@@ -8,18 +8,17 @@
#include <linux/pci.h>
#include <linux/sizes.h>
+#include <drm/drm_print.h>
+
#include "xe_device_types.h"
#include "xe_drv.h"
#include "xe_heci_gsc.h"
+#include "regs/xe_gsc_regs.h"
#include "xe_platform_types.h"
#include "xe_survivability_mode.h"
#define GSC_BAR_LENGTH 0x00000FFC
-#define DG1_GSC_HECI2_BASE 0x259000
-#define PVC_GSC_HECI2_BASE 0x285000
-#define DG2_GSC_HECI2_BASE 0x374000
-
static void heci_gsc_irq_mask(struct irq_data *d)
{
/* generic irq handling */
@@ -200,7 +199,7 @@ int xe_heci_gsc_init(struct xe_device *xe)
if (ret)
return ret;
- if (!def->use_polling && !xe_survivability_mode_is_enabled(xe)) {
+ if (!def->use_polling && !xe_survivability_mode_is_boot_enabled(xe)) {
ret = heci_gsc_irq_setup(xe);
if (ret)
return ret;
@@ -224,7 +223,7 @@ void xe_heci_gsc_irq_handler(struct xe_device *xe, u32 iir)
if (xe->heci_gsc.irq < 0)
return;
- ret = generic_handle_irq(xe->heci_gsc.irq);
+ ret = generic_handle_irq_safe(xe->heci_gsc.irq);
if (ret)
drm_err_ratelimited(&xe->drm, "error handling GSC irq: %d\n", ret);
}
@@ -244,7 +243,7 @@ void xe_heci_csc_irq_handler(struct xe_device *xe, u32 iir)
if (xe->heci_gsc.irq < 0)
return;
- ret = generic_handle_irq(xe->heci_gsc.irq);
+ ret = generic_handle_irq_safe(xe->heci_gsc.irq);
if (ret)
drm_err_ratelimited(&xe->drm, "error handling GSC irq: %d\n", ret);
}
diff --git a/drivers/gpu/drm/xe/xe_hmm.c b/drivers/gpu/drm/xe/xe_hmm.c
deleted file mode 100644
index 57b71956ddf4..000000000000
--- a/drivers/gpu/drm/xe/xe_hmm.c
+++ /dev/null
@@ -1,325 +0,0 @@
-// SPDX-License-Identifier: MIT
-/*
- * Copyright © 2024 Intel Corporation
- */
-
-#include <linux/scatterlist.h>
-#include <linux/mmu_notifier.h>
-#include <linux/dma-mapping.h>
-#include <linux/memremap.h>
-#include <linux/swap.h>
-#include <linux/hmm.h>
-#include <linux/mm.h>
-#include "xe_hmm.h"
-#include "xe_vm.h"
-#include "xe_bo.h"
-
-static u64 xe_npages_in_range(unsigned long start, unsigned long end)
-{
- return (end - start) >> PAGE_SHIFT;
-}
-
-static int xe_alloc_sg(struct xe_device *xe, struct sg_table *st,
- struct hmm_range *range, struct rw_semaphore *notifier_sem)
-{
- unsigned long i, npages, hmm_pfn;
- unsigned long num_chunks = 0;
- int ret;
-
- /* HMM docs says this is needed. */
- ret = down_read_interruptible(notifier_sem);
- if (ret)
- return ret;
-
- if (mmu_interval_read_retry(range->notifier, range->notifier_seq)) {
- up_read(notifier_sem);
- return -EAGAIN;
- }
-
- npages = xe_npages_in_range(range->start, range->end);
- for (i = 0; i < npages;) {
- unsigned long len;
-
- hmm_pfn = range->hmm_pfns[i];
- xe_assert(xe, hmm_pfn & HMM_PFN_VALID);
-
- len = 1UL << hmm_pfn_to_map_order(hmm_pfn);
-
- /* If order > 0 the page may extend beyond range->start */
- len -= (hmm_pfn & ~HMM_PFN_FLAGS) & (len - 1);
- i += len;
- num_chunks++;
- }
- up_read(notifier_sem);
-
- return sg_alloc_table(st, num_chunks, GFP_KERNEL);
-}
-
-/**
- * xe_build_sg() - build a scatter gather table for all the physical pages/pfn
- * in a hmm_range. dma-map pages if necessary. dma-address is save in sg table
- * and will be used to program GPU page table later.
- * @xe: the xe device who will access the dma-address in sg table
- * @range: the hmm range that we build the sg table from. range->hmm_pfns[]
- * has the pfn numbers of pages that back up this hmm address range.
- * @st: pointer to the sg table.
- * @notifier_sem: The xe notifier lock.
- * @write: whether we write to this range. This decides dma map direction
- * for system pages. If write we map it bi-diretional; otherwise
- * DMA_TO_DEVICE
- *
- * All the contiguous pfns will be collapsed into one entry in
- * the scatter gather table. This is for the purpose of efficiently
- * programming GPU page table.
- *
- * The dma_address in the sg table will later be used by GPU to
- * access memory. So if the memory is system memory, we need to
- * do a dma-mapping so it can be accessed by GPU/DMA.
- *
- * FIXME: This function currently only support pages in system
- * memory. If the memory is GPU local memory (of the GPU who
- * is going to access memory), we need gpu dpa (device physical
- * address), and there is no need of dma-mapping. This is TBD.
- *
- * FIXME: dma-mapping for peer gpu device to access remote gpu's
- * memory. Add this when you support p2p
- *
- * This function allocates the storage of the sg table. It is
- * caller's responsibility to free it calling sg_free_table.
- *
- * Returns 0 if successful; -ENOMEM if fails to allocate memory
- */
-static int xe_build_sg(struct xe_device *xe, struct hmm_range *range,
- struct sg_table *st,
- struct rw_semaphore *notifier_sem,
- bool write)
-{
- unsigned long npages = xe_npages_in_range(range->start, range->end);
- struct device *dev = xe->drm.dev;
- struct scatterlist *sgl;
- struct page *page;
- unsigned long i, j;
-
- lockdep_assert_held(notifier_sem);
-
- i = 0;
- for_each_sg(st->sgl, sgl, st->nents, j) {
- unsigned long hmm_pfn, size;
-
- hmm_pfn = range->hmm_pfns[i];
- page = hmm_pfn_to_page(hmm_pfn);
- xe_assert(xe, !is_device_private_page(page));
-
- size = 1UL << hmm_pfn_to_map_order(hmm_pfn);
- size -= page_to_pfn(page) & (size - 1);
- i += size;
-
- if (unlikely(j == st->nents - 1)) {
- xe_assert(xe, i >= npages);
- if (i > npages)
- size -= (i - npages);
-
- sg_mark_end(sgl);
- } else {
- xe_assert(xe, i < npages);
- }
-
- sg_set_page(sgl, page, size << PAGE_SHIFT, 0);
- }
-
- return dma_map_sgtable(dev, st, write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE,
- DMA_ATTR_SKIP_CPU_SYNC | DMA_ATTR_NO_KERNEL_MAPPING);
-}
-
-static void xe_hmm_userptr_set_mapped(struct xe_userptr_vma *uvma)
-{
- struct xe_userptr *userptr = &uvma->userptr;
- struct xe_vm *vm = xe_vma_vm(&uvma->vma);
-
- lockdep_assert_held_write(&vm->lock);
- lockdep_assert_held(&vm->userptr.notifier_lock);
-
- mutex_lock(&userptr->unmap_mutex);
- xe_assert(vm->xe, !userptr->mapped);
- userptr->mapped = true;
- mutex_unlock(&userptr->unmap_mutex);
-}
-
-void xe_hmm_userptr_unmap(struct xe_userptr_vma *uvma)
-{
- struct xe_userptr *userptr = &uvma->userptr;
- struct xe_vma *vma = &uvma->vma;
- bool write = !xe_vma_read_only(vma);
- struct xe_vm *vm = xe_vma_vm(vma);
- struct xe_device *xe = vm->xe;
-
- if (!lockdep_is_held_type(&vm->userptr.notifier_lock, 0) &&
- !lockdep_is_held_type(&vm->lock, 0) &&
- !(vma->gpuva.flags & XE_VMA_DESTROYED)) {
- /* Don't unmap in exec critical section. */
- xe_vm_assert_held(vm);
- /* Don't unmap while mapping the sg. */
- lockdep_assert_held(&vm->lock);
- }
-
- mutex_lock(&userptr->unmap_mutex);
- if (userptr->sg && userptr->mapped)
- dma_unmap_sgtable(xe->drm.dev, userptr->sg,
- write ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE, 0);
- userptr->mapped = false;
- mutex_unlock(&userptr->unmap_mutex);
-}
-
-/**
- * xe_hmm_userptr_free_sg() - Free the scatter gather table of userptr
- * @uvma: the userptr vma which hold the scatter gather table
- *
- * With function xe_userptr_populate_range, we allocate storage of
- * the userptr sg table. This is a helper function to free this
- * sg table, and dma unmap the address in the table.
- */
-void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma)
-{
- struct xe_userptr *userptr = &uvma->userptr;
-
- xe_assert(xe_vma_vm(&uvma->vma)->xe, userptr->sg);
- xe_hmm_userptr_unmap(uvma);
- sg_free_table(userptr->sg);
- userptr->sg = NULL;
-}
-
-/**
- * xe_hmm_userptr_populate_range() - Populate physical pages of a virtual
- * address range
- *
- * @uvma: userptr vma which has information of the range to populate.
- * @is_mm_mmap_locked: True if mmap_read_lock is already acquired by caller.
- *
- * This function populate the physical pages of a virtual
- * address range. The populated physical pages is saved in
- * userptr's sg table. It is similar to get_user_pages but call
- * hmm_range_fault.
- *
- * This function also read mmu notifier sequence # (
- * mmu_interval_read_begin), for the purpose of later
- * comparison (through mmu_interval_read_retry).
- *
- * This must be called with mmap read or write lock held.
- *
- * This function allocates the storage of the userptr sg table.
- * It is caller's responsibility to free it calling sg_free_table.
- *
- * returns: 0 for success; negative error no on failure
- */
-int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma,
- bool is_mm_mmap_locked)
-{
- unsigned long timeout =
- jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT);
- unsigned long *pfns;
- struct xe_userptr *userptr;
- struct xe_vma *vma = &uvma->vma;
- u64 userptr_start = xe_vma_userptr(vma);
- u64 userptr_end = userptr_start + xe_vma_size(vma);
- struct xe_vm *vm = xe_vma_vm(vma);
- struct hmm_range hmm_range = {
- .pfn_flags_mask = 0, /* ignore pfns */
- .default_flags = HMM_PFN_REQ_FAULT,
- .start = userptr_start,
- .end = userptr_end,
- .notifier = &uvma->userptr.notifier,
- .dev_private_owner = vm->xe,
- };
- bool write = !xe_vma_read_only(vma);
- unsigned long notifier_seq;
- u64 npages;
- int ret;
-
- userptr = &uvma->userptr;
-
- if (is_mm_mmap_locked)
- mmap_assert_locked(userptr->notifier.mm);
-
- if (vma->gpuva.flags & XE_VMA_DESTROYED)
- return 0;
-
- notifier_seq = mmu_interval_read_begin(&userptr->notifier);
- if (notifier_seq == userptr->notifier_seq)
- return 0;
-
- if (userptr->sg)
- xe_hmm_userptr_free_sg(uvma);
-
- npages = xe_npages_in_range(userptr_start, userptr_end);
- pfns = kvmalloc_array(npages, sizeof(*pfns), GFP_KERNEL);
- if (unlikely(!pfns))
- return -ENOMEM;
-
- if (write)
- hmm_range.default_flags |= HMM_PFN_REQ_WRITE;
-
- if (!mmget_not_zero(userptr->notifier.mm)) {
- ret = -EFAULT;
- goto free_pfns;
- }
-
- hmm_range.hmm_pfns = pfns;
-
- while (true) {
- hmm_range.notifier_seq = mmu_interval_read_begin(&userptr->notifier);
-
- if (!is_mm_mmap_locked)
- mmap_read_lock(userptr->notifier.mm);
-
- ret = hmm_range_fault(&hmm_range);
-
- if (!is_mm_mmap_locked)
- mmap_read_unlock(userptr->notifier.mm);
-
- if (ret == -EBUSY) {
- if (time_after(jiffies, timeout))
- break;
-
- continue;
- }
- break;
- }
-
- mmput(userptr->notifier.mm);
-
- if (ret)
- goto free_pfns;
-
- ret = xe_alloc_sg(vm->xe, &userptr->sgt, &hmm_range, &vm->userptr.notifier_lock);
- if (ret)
- goto free_pfns;
-
- ret = down_read_interruptible(&vm->userptr.notifier_lock);
- if (ret)
- goto free_st;
-
- if (mmu_interval_read_retry(hmm_range.notifier, hmm_range.notifier_seq)) {
- ret = -EAGAIN;
- goto out_unlock;
- }
-
- ret = xe_build_sg(vm->xe, &hmm_range, &userptr->sgt,
- &vm->userptr.notifier_lock, write);
- if (ret)
- goto out_unlock;
-
- userptr->sg = &userptr->sgt;
- xe_hmm_userptr_set_mapped(uvma);
- userptr->notifier_seq = hmm_range.notifier_seq;
- up_read(&vm->userptr.notifier_lock);
- kvfree(pfns);
- return 0;
-
-out_unlock:
- up_read(&vm->userptr.notifier_lock);
-free_st:
- sg_free_table(&userptr->sgt);
-free_pfns:
- kvfree(pfns);
- return ret;
-}
diff --git a/drivers/gpu/drm/xe/xe_hmm.h b/drivers/gpu/drm/xe/xe_hmm.h
deleted file mode 100644
index 0ea98d8e7bbc..000000000000
--- a/drivers/gpu/drm/xe/xe_hmm.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/* SPDX-License-Identifier: MIT
- *
- * Copyright © 2024 Intel Corporation
- */
-
-#ifndef _XE_HMM_H_
-#define _XE_HMM_H_
-
-#include <linux/types.h>
-
-struct xe_userptr_vma;
-
-int xe_hmm_userptr_populate_range(struct xe_userptr_vma *uvma, bool is_mm_mmap_locked);
-
-void xe_hmm_userptr_free_sg(struct xe_userptr_vma *uvma);
-
-void xe_hmm_userptr_unmap(struct xe_userptr_vma *uvma);
-#endif
diff --git a/drivers/gpu/drm/xe/xe_huc.c b/drivers/gpu/drm/xe/xe_huc.c
index 6a846e4cb221..0a70c8924582 100644
--- a/drivers/gpu/drm/xe/xe_huc.c
+++ b/drivers/gpu/drm/xe/xe_huc.c
@@ -66,14 +66,18 @@ static int huc_alloc_gsc_pkt(struct xe_huc *huc)
int xe_huc_init(struct xe_huc *huc)
{
struct xe_gt *gt = huc_to_gt(huc);
- struct xe_tile *tile = gt_to_tile(gt);
struct xe_device *xe = gt_to_xe(gt);
int ret;
huc->fw.type = XE_UC_FW_TYPE_HUC;
- /* On platforms with a media GT the HuC is only available there */
- if (tile->media_gt && (gt != tile->media_gt)) {
+ /*
+ * The HuC is only available on the media GT on most platforms. The
+ * exception to that rule are the old Xe1 platforms where there was
+ * no separate GT for media IP, so the HuC was part of the primary
+ * GT. Such platforms have graphics versions 12.55 and earlier.
+ */
+ if (!xe_gt_is_media_type(gt) && GRAPHICS_VERx100(xe) > 1255) {
xe_uc_fw_change_status(&huc->fw, XE_UC_FIRMWARE_NOT_SUPPORTED);
return 0;
}
@@ -171,7 +175,7 @@ static int huc_auth_via_gsccs(struct xe_huc *huc)
sizeof(struct pxp43_new_huc_auth_in));
wr_offset = huc_emit_pxp_auth_msg(xe, &pkt->vmap, wr_offset,
xe_bo_ggtt_addr(huc->fw.bo),
- huc->fw.bo->size);
+ xe_bo_size(huc->fw.bo));
do {
err = xe_gsc_pkt_submit_kernel(&gt->uc.gsc, ggtt_offset, wr_offset,
ggtt_offset + PXP43_HUC_AUTH_INOUT_SIZE,
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index 93241fd0a4ba..6a9e2a4272dd 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -17,6 +17,7 @@
#include "regs/xe_irq_regs.h"
#include "xe_assert.h"
#include "xe_bo.h"
+#include "xe_configfs.h"
#include "xe_device.h"
#include "xe_execlist.h"
#include "xe_force_wake.h"
@@ -345,17 +346,26 @@ void xe_hw_engine_enable_ring(struct xe_hw_engine *hwe)
xe_hw_engine_mmio_read32(hwe, RING_MI_MODE(0));
}
-static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_gt *gt,
+static bool xe_hw_engine_match_fixed_cslice_mode(const struct xe_device *xe,
+ const struct xe_gt *gt,
const struct xe_hw_engine *hwe)
{
+ /*
+ * Xe3p no longer supports load balance mode, so "fixed cslice" mode
+ * is automatic and no RCU_MODE programming is required.
+ */
+ if (GRAPHICS_VER(gt_to_xe(gt)) >= 35)
+ return false;
+
return xe_gt_ccs_mode_enabled(gt) &&
- xe_rtp_match_first_render_or_compute(gt, hwe);
+ xe_rtp_match_first_render_or_compute(xe, gt, hwe);
}
-static bool xe_rtp_cfeg_wmtp_disabled(const struct xe_gt *gt,
+static bool xe_rtp_cfeg_wmtp_disabled(const struct xe_device *xe,
+ const struct xe_gt *gt,
const struct xe_hw_engine *hwe)
{
- if (GRAPHICS_VER(gt_to_xe(gt)) < 20)
+ if (GRAPHICS_VER(xe) < 20)
return false;
if (hwe->class != XE_ENGINE_CLASS_COMPUTE &&
@@ -575,7 +585,7 @@ static void adjust_idledly(struct xe_hw_engine *hwe)
u32 maxcnt_units_ns = 640;
bool inhibit_switch = 0;
- if (!IS_SRIOV_VF(gt_to_xe(hwe->gt)) && XE_WA(gt, 16023105232)) {
+ if (!IS_SRIOV_VF(gt_to_xe(hwe->gt)) && XE_GT_WA(gt, 16023105232)) {
idledly = xe_mmio_read32(&gt->mmio, RING_IDLEDLY(hwe->mmio_base));
maxcnt = xe_mmio_read32(&gt->mmio, RING_PWRCTX_MAXCNT(hwe->mmio_base));
@@ -693,7 +703,7 @@ static void read_media_fuses(struct xe_gt *gt)
if (!(BIT(j) & vdbox_mask)) {
gt->info.engine_mask &= ~BIT(i);
- drm_info(&xe->drm, "vcs%u fused off\n", j);
+ xe_gt_info(gt, "vcs%u fused off\n", j);
}
}
@@ -703,40 +713,63 @@ static void read_media_fuses(struct xe_gt *gt)
if (!(BIT(j) & vebox_mask)) {
gt->info.engine_mask &= ~BIT(i);
- drm_info(&xe->drm, "vecs%u fused off\n", j);
+ xe_gt_info(gt, "vecs%u fused off\n", j);
}
}
}
+static u32 infer_svccopy_from_meml3(struct xe_gt *gt)
+{
+ u32 meml3 = REG_FIELD_GET(MEML3_EN_MASK,
+ xe_mmio_read32(&gt->mmio, MIRROR_FUSE3));
+ u32 svccopy_mask = 0;
+
+ /*
+ * Each of the four meml3 bits determines the fusing of two service
+ * copy engines.
+ */
+ for (int i = 0; i < 4; i++)
+ svccopy_mask |= (meml3 & BIT(i)) ? 0b11 << 2 * i : 0;
+
+ return svccopy_mask;
+}
+
+static u32 read_svccopy_fuses(struct xe_gt *gt)
+{
+ return REG_FIELD_GET(FUSE_SERVICE_COPY_ENABLE_MASK,
+ xe_mmio_read32(&gt->mmio, SERVICE_COPY_ENABLE));
+}
+
static void read_copy_fuses(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
u32 bcs_mask;
- if (GRAPHICS_VERx100(xe) < 1260 || GRAPHICS_VERx100(xe) >= 1270)
- return;
-
xe_force_wake_assert_held(gt_to_fw(gt), XE_FW_GT);
- bcs_mask = xe_mmio_read32(&gt->mmio, MIRROR_FUSE3);
- bcs_mask = REG_FIELD_GET(MEML3_EN_MASK, bcs_mask);
+ if (GRAPHICS_VER(xe) >= 35)
+ bcs_mask = read_svccopy_fuses(gt);
+ else if (GRAPHICS_VERx100(xe) == 1260)
+ bcs_mask = infer_svccopy_from_meml3(gt);
+ else
+ return;
- /* BCS0 is always present; only BCS1-BCS8 may be fused off */
- for (int i = XE_HW_ENGINE_BCS1, j = 0; i <= XE_HW_ENGINE_BCS8; ++i, ++j) {
+ /* Only BCS1-BCS8 may be fused off */
+ bcs_mask <<= XE_HW_ENGINE_BCS1;
+ for (int i = XE_HW_ENGINE_BCS1; i <= XE_HW_ENGINE_BCS8; ++i) {
if (!(gt->info.engine_mask & BIT(i)))
continue;
- if (!(BIT(j / 2) & bcs_mask)) {
+ if (!(bcs_mask & BIT(i))) {
gt->info.engine_mask &= ~BIT(i);
- drm_info(&xe->drm, "bcs%u fused off\n", j);
+ xe_gt_info(gt, "bcs%u fused off\n",
+ i - XE_HW_ENGINE_BCS0);
}
}
}
static void read_compute_fuses_from_dss(struct xe_gt *gt)
{
- struct xe_device *xe = gt_to_xe(gt);
-
/*
* CCS fusing based on DSS masks only applies to platforms that can
* have more than one CCS.
@@ -755,14 +788,13 @@ static void read_compute_fuses_from_dss(struct xe_gt *gt)
if (!xe_gt_topology_has_dss_in_quadrant(gt, j)) {
gt->info.engine_mask &= ~BIT(i);
- drm_info(&xe->drm, "ccs%u fused off\n", j);
+ xe_gt_info(gt, "ccs%u fused off\n", j);
}
}
}
static void read_compute_fuses_from_reg(struct xe_gt *gt)
{
- struct xe_device *xe = gt_to_xe(gt);
u32 ccs_mask;
ccs_mask = xe_mmio_read32(&gt->mmio, XEHP_FUSE4);
@@ -774,7 +806,7 @@ static void read_compute_fuses_from_reg(struct xe_gt *gt)
if ((ccs_mask & BIT(j)) == 0) {
gt->info.engine_mask &= ~BIT(i);
- drm_info(&xe->drm, "ccs%u fused off\n", j);
+ xe_gt_info(gt, "ccs%u fused off\n", j);
}
}
}
@@ -789,8 +821,6 @@ static void read_compute_fuses(struct xe_gt *gt)
static void check_gsc_availability(struct xe_gt *gt)
{
- struct xe_device *xe = gt_to_xe(gt);
-
if (!(gt->info.engine_mask & BIT(XE_HW_ENGINE_GSCCS0)))
return;
@@ -806,7 +836,25 @@ static void check_gsc_availability(struct xe_gt *gt)
xe_mmio_write32(&gt->mmio, GUNIT_GSC_INTR_ENABLE, 0);
xe_mmio_write32(&gt->mmio, GUNIT_GSC_INTR_MASK, ~0);
- drm_dbg(&xe->drm, "GSC FW not used, disabling gsccs\n");
+ xe_gt_dbg(gt, "GSC FW not used, disabling gsccs\n");
+ }
+}
+
+static void check_sw_disable(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ u64 sw_allowed = xe_configfs_get_engines_allowed(to_pci_dev(xe->drm.dev));
+ enum xe_hw_engine_id id;
+
+ for (id = 0; id < XE_NUM_HW_ENGINES; ++id) {
+ if (!(gt->info.engine_mask & BIT(id)))
+ continue;
+
+ if (!(sw_allowed & BIT(id))) {
+ gt->info.engine_mask &= ~BIT(id);
+ xe_gt_info(gt, "%s disabled via configfs\n",
+ engine_infos[id].name);
+ }
}
}
@@ -818,6 +866,7 @@ int xe_hw_engines_init_early(struct xe_gt *gt)
read_copy_fuses(gt);
read_compute_fuses(gt);
check_gsc_availability(gt);
+ check_sw_disable(gt);
BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT < XE_HW_ENGINE_PREEMPT_TIMEOUT_MIN);
BUILD_BUG_ON(XE_HW_ENGINE_PREEMPT_TIMEOUT > XE_HW_ENGINE_PREEMPT_TIMEOUT_MAX);
@@ -855,7 +904,7 @@ void xe_hw_engine_handle_irq(struct xe_hw_engine *hwe, u16 intr_vec)
if (hwe->irq_handler)
hwe->irq_handler(hwe, intr_vec);
- if (intr_vec & GT_RENDER_USER_INTERRUPT)
+ if (intr_vec & GT_MI_USER_INTERRUPT)
xe_hw_fence_irq_run(hwe->fence_irq);
}
@@ -1044,12 +1093,13 @@ struct xe_hw_engine *
xe_hw_engine_lookup(struct xe_device *xe,
struct drm_xe_engine_class_instance eci)
{
+ struct xe_gt *gt = xe_device_get_gt(xe, eci.gt_id);
unsigned int idx;
if (eci.engine_class >= ARRAY_SIZE(user_to_xe_engine_class))
return NULL;
- if (eci.gt_id >= xe->info.gt_count)
+ if (!gt)
return NULL;
idx = array_index_nospec(eci.engine_class,
diff --git a/drivers/gpu/drm/xe/xe_hw_engine_group.c b/drivers/gpu/drm/xe/xe_hw_engine_group.c
index 2d68c5b5262a..fa4db5f23342 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine_group.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine_group.c
@@ -13,15 +13,6 @@
#include "xe_vm.h"
static void
-hw_engine_group_free(struct drm_device *drm, void *arg)
-{
- struct xe_hw_engine_group *group = arg;
-
- destroy_workqueue(group->resume_wq);
- kfree(group);
-}
-
-static void
hw_engine_group_resume_lr_jobs_func(struct work_struct *w)
{
struct xe_exec_queue *q;
@@ -53,7 +44,7 @@ hw_engine_group_alloc(struct xe_device *xe)
struct xe_hw_engine_group *group;
int err;
- group = kzalloc(sizeof(*group), GFP_KERNEL);
+ group = drmm_kzalloc(&xe->drm, sizeof(*group), GFP_KERNEL);
if (!group)
return ERR_PTR(-ENOMEM);
@@ -61,14 +52,14 @@ hw_engine_group_alloc(struct xe_device *xe)
if (!group->resume_wq)
return ERR_PTR(-ENOMEM);
+ err = drmm_add_action_or_reset(&xe->drm, __drmm_workqueue_release, group->resume_wq);
+ if (err)
+ return ERR_PTR(err);
+
init_rwsem(&group->mode_sem);
INIT_WORK(&group->resume_work, hw_engine_group_resume_lr_jobs_func);
INIT_LIST_HEAD(&group->exec_queue_list);
- err = drmm_add_action_or_reset(&xe->drm, hw_engine_group_free, group);
- if (err)
- return ERR_PTR(err);
-
return group;
}
@@ -84,25 +75,18 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt)
enum xe_hw_engine_id id;
struct xe_hw_engine_group *group_rcs_ccs, *group_bcs, *group_vcs_vecs;
struct xe_device *xe = gt_to_xe(gt);
- int err;
group_rcs_ccs = hw_engine_group_alloc(xe);
- if (IS_ERR(group_rcs_ccs)) {
- err = PTR_ERR(group_rcs_ccs);
- goto err_group_rcs_ccs;
- }
+ if (IS_ERR(group_rcs_ccs))
+ return PTR_ERR(group_rcs_ccs);
group_bcs = hw_engine_group_alloc(xe);
- if (IS_ERR(group_bcs)) {
- err = PTR_ERR(group_bcs);
- goto err_group_bcs;
- }
+ if (IS_ERR(group_bcs))
+ return PTR_ERR(group_bcs);
group_vcs_vecs = hw_engine_group_alloc(xe);
- if (IS_ERR(group_vcs_vecs)) {
- err = PTR_ERR(group_vcs_vecs);
- goto err_group_vcs_vecs;
- }
+ if (IS_ERR(group_vcs_vecs))
+ return PTR_ERR(group_vcs_vecs);
for_each_hw_engine(hwe, gt, id) {
switch (hwe->class) {
@@ -119,21 +103,12 @@ int xe_hw_engine_setup_groups(struct xe_gt *gt)
break;
case XE_ENGINE_CLASS_OTHER:
break;
- default:
- drm_warn(&xe->drm, "NOT POSSIBLE");
+ case XE_ENGINE_CLASS_MAX:
+ xe_gt_assert(gt, false);
}
}
return 0;
-
-err_group_vcs_vecs:
- kfree(group_vcs_vecs);
-err_group_bcs:
- kfree(group_bcs);
-err_group_rcs_ccs:
- kfree(group_rcs_ccs);
-
- return err;
}
/**
@@ -238,17 +213,13 @@ static int xe_hw_engine_group_suspend_faulting_lr_jobs(struct xe_hw_engine_group
err = q->ops->suspend_wait(q);
if (err)
- goto err_suspend;
+ return err;
}
if (need_resume)
xe_hw_engine_group_resume_faulting_lr_jobs(group);
return 0;
-
-err_suspend:
- up_write(&group->mode_sem);
- return err;
}
/**
diff --git a/drivers/gpu/drm/xe/xe_hw_error.c b/drivers/gpu/drm/xe/xe_hw_error.c
new file mode 100644
index 000000000000..8c65291f36fc
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_error.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include <linux/fault-inject.h>
+
+#include "regs/xe_gsc_regs.h"
+#include "regs/xe_hw_error_regs.h"
+#include "regs/xe_irq_regs.h"
+
+#include "xe_device.h"
+#include "xe_hw_error.h"
+#include "xe_mmio.h"
+#include "xe_survivability_mode.h"
+
+#define HEC_UNCORR_FW_ERR_BITS 4
+extern struct fault_attr inject_csc_hw_error;
+
+/* Error categories reported by hardware */
+enum hardware_error {
+ HARDWARE_ERROR_CORRECTABLE = 0,
+ HARDWARE_ERROR_NONFATAL = 1,
+ HARDWARE_ERROR_FATAL = 2,
+ HARDWARE_ERROR_MAX,
+};
+
+static const char * const hec_uncorrected_fw_errors[] = {
+ "Fatal",
+ "CSE Disabled",
+ "FD Corruption",
+ "Data Corruption"
+};
+
+static const char *hw_error_to_str(const enum hardware_error hw_err)
+{
+ switch (hw_err) {
+ case HARDWARE_ERROR_CORRECTABLE:
+ return "CORRECTABLE";
+ case HARDWARE_ERROR_NONFATAL:
+ return "NONFATAL";
+ case HARDWARE_ERROR_FATAL:
+ return "FATAL";
+ default:
+ return "UNKNOWN";
+ }
+}
+
+static bool fault_inject_csc_hw_error(void)
+{
+ return IS_ENABLED(CONFIG_DEBUG_FS) && should_fail(&inject_csc_hw_error, 1);
+}
+
+static void csc_hw_error_work(struct work_struct *work)
+{
+ struct xe_tile *tile = container_of(work, typeof(*tile), csc_hw_error_work);
+ struct xe_device *xe = tile_to_xe(tile);
+ int ret;
+
+ ret = xe_survivability_mode_runtime_enable(xe);
+ if (ret)
+ drm_err(&xe->drm, "Failed to enable runtime survivability mode\n");
+}
+
+static void csc_hw_error_handler(struct xe_tile *tile, const enum hardware_error hw_err)
+{
+ const char *hw_err_str = hw_error_to_str(hw_err);
+ struct xe_device *xe = tile_to_xe(tile);
+ struct xe_mmio *mmio = &tile->mmio;
+ u32 base, err_bit, err_src;
+ unsigned long fw_err;
+
+ if (xe->info.platform != XE_BATTLEMAGE)
+ return;
+
+ base = BMG_GSC_HECI1_BASE;
+ lockdep_assert_held(&xe->irq.lock);
+ err_src = xe_mmio_read32(mmio, HEC_UNCORR_ERR_STATUS(base));
+ if (!err_src) {
+ drm_err_ratelimited(&xe->drm, HW_ERR "Tile%d reported HEC_ERR_STATUS_%s blank\n",
+ tile->id, hw_err_str);
+ return;
+ }
+
+ if (err_src & UNCORR_FW_REPORTED_ERR) {
+ fw_err = xe_mmio_read32(mmio, HEC_UNCORR_FW_ERR_DW0(base));
+ for_each_set_bit(err_bit, &fw_err, HEC_UNCORR_FW_ERR_BITS) {
+ drm_err_ratelimited(&xe->drm, HW_ERR
+ "%s: HEC Uncorrected FW %s error reported, bit[%d] is set\n",
+ hw_err_str, hec_uncorrected_fw_errors[err_bit],
+ err_bit);
+
+ schedule_work(&tile->csc_hw_error_work);
+ }
+ }
+
+ xe_mmio_write32(mmio, HEC_UNCORR_ERR_STATUS(base), err_src);
+}
+
+static void hw_error_source_handler(struct xe_tile *tile, const enum hardware_error hw_err)
+{
+ const char *hw_err_str = hw_error_to_str(hw_err);
+ struct xe_device *xe = tile_to_xe(tile);
+ unsigned long flags;
+ u32 err_src;
+
+ if (xe->info.platform != XE_BATTLEMAGE)
+ return;
+
+ spin_lock_irqsave(&xe->irq.lock, flags);
+ err_src = xe_mmio_read32(&tile->mmio, DEV_ERR_STAT_REG(hw_err));
+ if (!err_src) {
+ drm_err_ratelimited(&xe->drm, HW_ERR "Tile%d reported DEV_ERR_STAT_%s blank!\n",
+ tile->id, hw_err_str);
+ goto unlock;
+ }
+
+ if (err_src & XE_CSC_ERROR)
+ csc_hw_error_handler(tile, hw_err);
+
+ xe_mmio_write32(&tile->mmio, DEV_ERR_STAT_REG(hw_err), err_src);
+
+unlock:
+ spin_unlock_irqrestore(&xe->irq.lock, flags);
+}
+
+/**
+ * xe_hw_error_irq_handler - irq handling for hw errors
+ * @tile: tile instance
+ * @master_ctl: value read from master interrupt register
+ *
+ * Xe platforms add three error bits to the master interrupt register to support error handling.
+ * These three bits are used to convey the class of error FATAL, NONFATAL, or CORRECTABLE.
+ * To process the interrupt, determine the source of error by reading the Device Error Source
+ * Register that corresponds to the class of error being serviced.
+ */
+void xe_hw_error_irq_handler(struct xe_tile *tile, const u32 master_ctl)
+{
+ enum hardware_error hw_err;
+
+ if (fault_inject_csc_hw_error())
+ schedule_work(&tile->csc_hw_error_work);
+
+ for (hw_err = 0; hw_err < HARDWARE_ERROR_MAX; hw_err++)
+ if (master_ctl & ERROR_IRQ(hw_err))
+ hw_error_source_handler(tile, hw_err);
+}
+
+/*
+ * Process hardware errors during boot
+ */
+static void process_hw_errors(struct xe_device *xe)
+{
+ struct xe_tile *tile;
+ u32 master_ctl;
+ u8 id;
+
+ for_each_tile(tile, xe, id) {
+ master_ctl = xe_mmio_read32(&tile->mmio, GFX_MSTR_IRQ);
+ xe_hw_error_irq_handler(tile, master_ctl);
+ xe_mmio_write32(&tile->mmio, GFX_MSTR_IRQ, master_ctl);
+ }
+}
+
+/**
+ * xe_hw_error_init - Initialize hw errors
+ * @xe: xe device instance
+ *
+ * Initialize and check for errors that occurred during boot
+ * prior to driver load
+ */
+void xe_hw_error_init(struct xe_device *xe)
+{
+ struct xe_tile *tile = xe_device_get_root_tile(xe);
+
+ if (!IS_DGFX(xe) || IS_SRIOV_VF(xe))
+ return;
+
+ INIT_WORK(&tile->csc_hw_error_work, csc_hw_error_work);
+
+ process_hw_errors(xe);
+}
diff --git a/drivers/gpu/drm/xe/xe_hw_error.h b/drivers/gpu/drm/xe/xe_hw_error.h
new file mode 100644
index 000000000000..d86e28c5180c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_hw_error.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+#ifndef XE_HW_ERROR_H_
+#define XE_HW_ERROR_H_
+
+#include <linux/types.h>
+
+struct xe_tile;
+struct xe_device;
+
+void xe_hw_error_irq_handler(struct xe_tile *tile, const u32 master_ctl);
+void xe_hw_error_init(struct xe_device *xe);
+#endif
diff --git a/drivers/gpu/drm/xe/xe_hw_fence.c b/drivers/gpu/drm/xe/xe_hw_fence.c
index 0b4f12be3692..b2a0c46dfcd4 100644
--- a/drivers/gpu/drm/xe/xe_hw_fence.c
+++ b/drivers/gpu/drm/xe/xe_hw_fence.c
@@ -100,6 +100,9 @@ void xe_hw_fence_irq_finish(struct xe_hw_fence_irq *irq)
spin_unlock_irqrestore(&irq->lock, flags);
dma_fence_end_signalling(tmp);
}
+
+ /* Safe release of the irq->lock used in dma_fence_init. */
+ synchronize_rcu();
}
void xe_hw_fence_irq_run(struct xe_hw_fence_irq *irq)
@@ -165,7 +168,7 @@ static bool xe_hw_fence_signaled(struct dma_fence *dma_fence)
u32 seqno = xe_map_rd(xe, &fence->seqno_map, 0, u32);
return dma_fence->error ||
- !__dma_fence_is_later(dma_fence->seqno, seqno, dma_fence->ops);
+ !__dma_fence_is_later(dma_fence, dma_fence->seqno, seqno);
}
static bool xe_hw_fence_enable_signaling(struct dma_fence *dma_fence)
diff --git a/drivers/gpu/drm/xe/xe_hwmon.c b/drivers/gpu/drm/xe/xe_hwmon.c
index eb293aec36a0..97879daeefc1 100644
--- a/drivers/gpu/drm/xe/xe_hwmon.c
+++ b/drivers/gpu/drm/xe/xe_hwmon.c
@@ -20,6 +20,8 @@
#include "xe_pcode_api.h"
#include "xe_sriov.h"
#include "xe_pm.h"
+#include "xe_vsec.h"
+#include "regs/xe_pmt.h"
enum xe_hwmon_reg {
REG_TEMP,
@@ -51,6 +53,22 @@ enum xe_fan_channel {
FAN_MAX,
};
+/* Attribute index for powerX_xxx_interval sysfs entries */
+enum sensor_attr_power {
+ SENSOR_INDEX_PSYS_PL1,
+ SENSOR_INDEX_PKG_PL1,
+ SENSOR_INDEX_PSYS_PL2,
+ SENSOR_INDEX_PKG_PL2,
+};
+
+/*
+ * For platforms that support mailbox commands for power limits, REG_PKG_POWER_SKU_UNIT is
+ * not supported and below are SKU units to be used.
+ */
+#define PWR_UNIT 0x3
+#define ENERGY_UNIT 0xe
+#define TIME_UNIT 0xa
+
/*
* SF_* - scale factors for particular quantities according to hwmon spec.
*/
@@ -60,6 +78,19 @@ enum xe_fan_channel {
#define SF_ENERGY 1000000 /* microjoules */
#define SF_TIME 1000 /* milliseconds */
+/*
+ * PL*_HWMON_ATTR - mapping of hardware power limits to corresponding hwmon power attribute.
+ */
+#define PL1_HWMON_ATTR hwmon_power_max
+#define PL2_HWMON_ATTR hwmon_power_cap
+
+#define PWR_ATTR_TO_STR(attr) (((attr) == hwmon_power_max) ? "PL1" : "PL2")
+
+/*
+ * Timeout for power limit write mailbox command.
+ */
+#define PL_WRITE_MBX_TIMEOUT_MS (1)
+
/**
* struct xe_hwmon_energy_info - to accumulate energy
*/
@@ -100,8 +131,87 @@ struct xe_hwmon {
struct xe_hwmon_energy_info ei[CHANNEL_MAX];
/** @fi: Fan info for fanN_input */
struct xe_hwmon_fan_info fi[FAN_MAX];
+ /** @boot_power_limit_read: is boot power limits read */
+ bool boot_power_limit_read;
+ /** @pl1_on_boot: power limit PL1 on boot */
+ u32 pl1_on_boot[CHANNEL_MAX];
+ /** @pl2_on_boot: power limit PL2 on boot */
+ u32 pl2_on_boot[CHANNEL_MAX];
+
};
+static int xe_hwmon_pcode_read_power_limit(const struct xe_hwmon *hwmon, u32 attr, int channel,
+ u32 *uval)
+{
+ struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe);
+ u32 val0 = 0, val1 = 0;
+ int ret = 0;
+
+ ret = xe_pcode_read(root_tile, PCODE_MBOX(PCODE_POWER_SETUP,
+ (channel == CHANNEL_CARD) ?
+ READ_PSYSGPU_POWER_LIMIT :
+ READ_PACKAGE_POWER_LIMIT,
+ hwmon->boot_power_limit_read ?
+ READ_PL_FROM_PCODE : READ_PL_FROM_FW),
+ &val0, &val1);
+
+ if (ret) {
+ drm_dbg(&hwmon->xe->drm, "read failed ch %d val0 0x%08x, val1 0x%08x, ret %d\n",
+ channel, val0, val1, ret);
+ *uval = 0;
+ return ret;
+ }
+
+ /* return the value only if limit is enabled */
+ if (attr == PL1_HWMON_ATTR)
+ *uval = (val0 & PWR_LIM_EN) ? val0 : 0;
+ else if (attr == PL2_HWMON_ATTR)
+ *uval = (val1 & PWR_LIM_EN) ? val1 : 0;
+ else if (attr == hwmon_power_label)
+ *uval = (val0 & PWR_LIM_EN) ? 1 : (val1 & PWR_LIM_EN) ? 1 : 0;
+ else
+ *uval = 0;
+
+ return ret;
+}
+
+static int xe_hwmon_pcode_rmw_power_limit(const struct xe_hwmon *hwmon, u32 attr, u8 channel,
+ u32 clr, u32 set)
+{
+ struct xe_tile *root_tile = xe_device_get_root_tile(hwmon->xe);
+ u32 val0 = 0, val1 = 0;
+ int ret = 0;
+
+ ret = xe_pcode_read(root_tile, PCODE_MBOX(PCODE_POWER_SETUP,
+ (channel == CHANNEL_CARD) ?
+ READ_PSYSGPU_POWER_LIMIT :
+ READ_PACKAGE_POWER_LIMIT,
+ hwmon->boot_power_limit_read ?
+ READ_PL_FROM_PCODE : READ_PL_FROM_FW),
+ &val0, &val1);
+
+ if (ret)
+ drm_dbg(&hwmon->xe->drm, "read failed ch %d val0 0x%08x, val1 0x%08x, ret %d\n",
+ channel, val0, val1, ret);
+
+ if (attr == PL1_HWMON_ATTR)
+ val0 = (val0 & ~clr) | set;
+ else if (attr == PL2_HWMON_ATTR)
+ val1 = (val1 & ~clr) | set;
+ else
+ return -EIO;
+
+ ret = xe_pcode_write64_timeout(root_tile, PCODE_MBOX(PCODE_POWER_SETUP,
+ (channel == CHANNEL_CARD) ?
+ WRITE_PSYSGPU_POWER_LIMIT :
+ WRITE_PACKAGE_POWER_LIMIT, 0),
+ val0, val1, PL_WRITE_MBX_TIMEOUT_MS);
+ if (ret)
+ drm_dbg(&hwmon->xe->drm, "write failed ch %d val0 0x%08x, val1 0x%08x, ret %d\n",
+ channel, val0, val1, ret);
+ return ret;
+}
+
static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg hwmon_reg,
int channel)
{
@@ -122,29 +232,19 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg
}
break;
case REG_PKG_RAPL_LIMIT:
- if (xe->info.platform == XE_BATTLEMAGE) {
- if (channel == CHANNEL_PKG)
- return BMG_PACKAGE_RAPL_LIMIT;
- else
- return BMG_PLATFORM_POWER_LIMIT;
- } else if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) {
+ if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG)
return PVC_GT0_PACKAGE_RAPL_LIMIT;
- } else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) {
+ else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG))
return PCU_CR_PACKAGE_RAPL_LIMIT;
- }
break;
case REG_PKG_POWER_SKU:
- if (xe->info.platform == XE_BATTLEMAGE)
- return BMG_PACKAGE_POWER_SKU;
- else if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG)
+ if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG)
return PVC_GT0_PACKAGE_POWER_SKU;
else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG))
return PCU_CR_PACKAGE_POWER_SKU;
break;
case REG_PKG_POWER_SKU_UNIT:
- if (xe->info.platform == XE_BATTLEMAGE)
- return BMG_PACKAGE_POWER_SKU_UNIT;
- else if (xe->info.platform == XE_PVC)
+ if (xe->info.platform == XE_PVC)
return PVC_GT0_PACKAGE_POWER_SKU_UNIT;
else if (xe->info.platform == XE_DG2)
return PCU_CR_PACKAGE_POWER_SKU_UNIT;
@@ -154,12 +254,7 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg
return GT_PERF_STATUS;
break;
case REG_PKG_ENERGY_STATUS:
- if (xe->info.platform == XE_BATTLEMAGE) {
- if (channel == CHANNEL_PKG)
- return BMG_PACKAGE_ENERGY_STATUS;
- else
- return BMG_PLATFORM_ENERGY_STATUS;
- } else if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) {
+ if (xe->info.platform == XE_PVC && channel == CHANNEL_PKG) {
return PVC_GT0_PLATFORM_ENERGY_STATUS;
} else if ((xe->info.platform == XE_DG2) && (channel == CHANNEL_PKG)) {
return PCU_CR_PACKAGE_ENERGY_STATUS;
@@ -181,7 +276,7 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg
return XE_REG(0);
}
-#define PL1_DISABLE 0
+#define PL_DISABLE 0
/*
* HW allows arbitrary PL1 limits to be set but silently clamps these values to
@@ -189,94 +284,145 @@ static struct xe_reg xe_hwmon_get_reg(struct xe_hwmon *hwmon, enum xe_hwmon_reg
* same pattern for sysfs, allow arbitrary PL1 limits to be set but display
* clamped values when read.
*/
-static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, int channel, long *value)
+static void xe_hwmon_power_max_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *value)
{
- u64 reg_val, min, max;
+ u32 reg_val = 0;
struct xe_device *xe = hwmon->xe;
struct xe_reg rapl_limit, pkg_power_sku;
struct xe_mmio *mmio = xe_root_tile_mmio(xe);
- rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel);
- pkg_power_sku = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel);
+ mutex_lock(&hwmon->hwmon_lock);
- /*
- * Valid check of REG_PKG_RAPL_LIMIT is already done in xe_hwmon_power_is_visible.
- * So not checking it again here.
- */
- if (!xe_reg_is_valid(pkg_power_sku)) {
- drm_warn(&xe->drm, "pkg_power_sku invalid\n");
- *value = 0;
- return;
+ if (hwmon->xe->info.has_mbx_power_limits) {
+ xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, &reg_val);
+ } else {
+ rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel);
+ pkg_power_sku = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel);
+ reg_val = xe_mmio_read32(mmio, rapl_limit);
}
- mutex_lock(&hwmon->hwmon_lock);
-
- reg_val = xe_mmio_read32(mmio, rapl_limit);
- /* Check if PL1 limit is disabled */
- if (!(reg_val & PKG_PWR_LIM_1_EN)) {
- *value = PL1_DISABLE;
+ /* Check if PL limits are disabled. */
+ if (!(reg_val & PWR_LIM_EN)) {
+ *value = PL_DISABLE;
+ drm_info(&hwmon->xe->drm, "%s disabled for channel %d, val 0x%08x\n",
+ PWR_ATTR_TO_STR(attr), channel, reg_val);
goto unlock;
}
- reg_val = REG_FIELD_GET(PKG_PWR_LIM_1, reg_val);
- *value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power);
+ reg_val = REG_FIELD_GET(PWR_LIM_VAL, reg_val);
+ *value = mul_u32_u32(reg_val, SF_POWER) >> hwmon->scl_shift_power;
- reg_val = xe_mmio_read64_2x32(mmio, pkg_power_sku);
- min = REG_FIELD_GET(PKG_MIN_PWR, reg_val);
- min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power);
- max = REG_FIELD_GET(PKG_MAX_PWR, reg_val);
- max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power);
+ /* For platforms with mailbox power limit support clamping would be done by pcode. */
+ if (!hwmon->xe->info.has_mbx_power_limits) {
+ u64 pkg_pwr, min, max;
- if (min && max)
- *value = clamp_t(u64, *value, min, max);
+ pkg_pwr = xe_mmio_read64_2x32(mmio, pkg_power_sku);
+ min = REG_FIELD_GET(PKG_MIN_PWR, pkg_pwr);
+ max = REG_FIELD_GET(PKG_MAX_PWR, pkg_pwr);
+ min = mul_u64_u32_shr(min, SF_POWER, hwmon->scl_shift_power);
+ max = mul_u64_u32_shr(max, SF_POWER, hwmon->scl_shift_power);
+ if (min && max)
+ *value = clamp_t(u64, *value, min, max);
+ }
unlock:
mutex_unlock(&hwmon->hwmon_lock);
}
-static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, int channel, long value)
+static int xe_hwmon_power_max_write(struct xe_hwmon *hwmon, u32 attr, int channel, long value)
{
struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
int ret = 0;
- u64 reg_val;
+ u32 reg_val, max;
struct xe_reg rapl_limit;
+ u64 max_supp_power_limit = 0;
+
+ mutex_lock(&hwmon->hwmon_lock);
rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel);
- mutex_lock(&hwmon->hwmon_lock);
+ /* Disable Power Limit and verify, as limit cannot be disabled on all platforms. */
+ if (value == PL_DISABLE) {
+ if (hwmon->xe->info.has_mbx_power_limits) {
+ drm_dbg(&hwmon->xe->drm, "disabling %s on channel %d\n",
+ PWR_ATTR_TO_STR(attr), channel);
+ xe_hwmon_pcode_rmw_power_limit(hwmon, attr, channel, PWR_LIM_EN, 0);
+ xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, &reg_val);
+ } else {
+ reg_val = xe_mmio_rmw32(mmio, rapl_limit, PWR_LIM_EN, 0);
+ reg_val = xe_mmio_read32(mmio, rapl_limit);
+ }
- /* Disable PL1 limit and verify, as limit cannot be disabled on all platforms */
- if (value == PL1_DISABLE) {
- reg_val = xe_mmio_rmw32(mmio, rapl_limit, PKG_PWR_LIM_1_EN, 0);
- reg_val = xe_mmio_read32(mmio, rapl_limit);
- if (reg_val & PKG_PWR_LIM_1_EN) {
- drm_warn(&hwmon->xe->drm, "PL1 disable is not supported!\n");
+ if (reg_val & PWR_LIM_EN) {
+ drm_warn(&hwmon->xe->drm, "Power limit disable is not supported!\n");
ret = -EOPNOTSUPP;
}
goto unlock;
}
+ /*
+ * If the sysfs value exceeds the maximum pcode supported power limit value, clamp it to
+ * the supported maximum (U12.3 format).
+ * This is to avoid truncation during reg_val calculation below and ensure the valid
+ * power limit is sent for pcode which would clamp it to card-supported value.
+ */
+ max_supp_power_limit = ((PWR_LIM_VAL) >> hwmon->scl_shift_power) * SF_POWER;
+ if (value > max_supp_power_limit) {
+ value = max_supp_power_limit;
+ drm_info(&hwmon->xe->drm,
+ "Power limit clamped as selected %s exceeds channel %d limit\n",
+ PWR_ATTR_TO_STR(attr), channel);
+ }
+
/* Computation in 64-bits to avoid overflow. Round to nearest. */
reg_val = DIV_ROUND_CLOSEST_ULL((u64)value << hwmon->scl_shift_power, SF_POWER);
- reg_val = PKG_PWR_LIM_1_EN | REG_FIELD_PREP(PKG_PWR_LIM_1, reg_val);
- reg_val = xe_mmio_rmw32(mmio, rapl_limit, PKG_PWR_LIM_1_EN | PKG_PWR_LIM_1, reg_val);
+ /*
+ * Clamp power limit to GPU firmware default as maximum, as an additional protection to
+ * pcode clamp.
+ */
+ if (hwmon->xe->info.has_mbx_power_limits) {
+ max = (attr == PL1_HWMON_ATTR) ?
+ hwmon->pl1_on_boot[channel] : hwmon->pl2_on_boot[channel];
+ max = REG_FIELD_PREP(PWR_LIM_VAL, max);
+ if (reg_val > max) {
+ reg_val = max;
+ drm_dbg(&hwmon->xe->drm,
+ "Clamping power limit to GPU firmware default 0x%x\n",
+ reg_val);
+ }
+ }
+
+ reg_val = PWR_LIM_EN | REG_FIELD_PREP(PWR_LIM_VAL, reg_val);
+
+ if (hwmon->xe->info.has_mbx_power_limits)
+ ret = xe_hwmon_pcode_rmw_power_limit(hwmon, attr, channel, PWR_LIM, reg_val);
+ else
+ reg_val = xe_mmio_rmw32(mmio, rapl_limit, PWR_LIM, reg_val);
unlock:
mutex_unlock(&hwmon->hwmon_lock);
return ret;
}
-static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, int channel, long *value)
+static void xe_hwmon_power_rated_max_read(struct xe_hwmon *hwmon, u32 attr, int channel,
+ long *value)
{
struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
- struct xe_reg reg = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel);
- u64 reg_val;
+ u32 reg_val;
+
+ if (hwmon->xe->info.has_mbx_power_limits) {
+ /* PL1 is rated max if supported. */
+ xe_hwmon_pcode_read_power_limit(hwmon, PL1_HWMON_ATTR, channel, &reg_val);
+ } else {
+ /*
+ * This sysfs file won't be visible if REG_PKG_POWER_SKU is invalid, so valid check
+ * for this register can be skipped.
+ * See xe_hwmon_power_is_visible.
+ */
+ struct xe_reg reg = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel);
+
+ reg_val = xe_mmio_read32(mmio, reg);
+ }
- /*
- * This sysfs file won't be visible if REG_PKG_POWER_SKU is invalid, so valid check
- * for this register can be skipped.
- * See xe_hwmon_power_is_visible.
- */
- reg_val = xe_mmio_read32(mmio, reg);
reg_val = REG_FIELD_GET(PKG_TDP, reg_val);
*value = mul_u64_u32_shr(reg_val, SF_POWER, hwmon->scl_shift_power);
}
@@ -306,16 +452,37 @@ xe_hwmon_energy_get(struct xe_hwmon *hwmon, int channel, long *energy)
{
struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
struct xe_hwmon_energy_info *ei = &hwmon->ei[channel];
- u64 reg_val;
+ u32 reg_val;
+ int ret = 0;
- reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS,
- channel));
+ /* Energy is supported only for card and pkg */
+ if (channel > CHANNEL_PKG) {
+ *energy = 0;
+ return;
+ }
- if (reg_val >= ei->reg_val_prev)
- ei->accum_energy += reg_val - ei->reg_val_prev;
- else
- ei->accum_energy += UINT_MAX - ei->reg_val_prev + reg_val;
+ if (hwmon->xe->info.platform == XE_BATTLEMAGE) {
+ u64 pmt_val;
+ ret = xe_pmt_telem_read(to_pci_dev(hwmon->xe->drm.dev),
+ xe_mmio_read32(mmio, PUNIT_TELEMETRY_GUID),
+ &pmt_val, BMG_ENERGY_STATUS_PMT_OFFSET, sizeof(pmt_val));
+ if (ret != sizeof(pmt_val)) {
+ drm_warn(&hwmon->xe->drm, "energy read from pmt failed, ret %d\n", ret);
+ *energy = 0;
+ return;
+ }
+
+ if (channel == CHANNEL_PKG)
+ reg_val = REG_FIELD_GET64(ENERGY_PKG, pmt_val);
+ else
+ reg_val = REG_FIELD_GET64(ENERGY_CARD, pmt_val);
+ } else {
+ reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS,
+ channel));
+ }
+
+ ei->accum_energy += reg_val - ei->reg_val_prev;
ei->reg_val_prev = reg_val;
*energy = mul_u64_u32_shr(ei->accum_energy, SF_ENERGY,
@@ -328,25 +495,39 @@ xe_hwmon_power_max_interval_show(struct device *dev, struct device_attribute *at
{
struct xe_hwmon *hwmon = dev_get_drvdata(dev);
struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
- u32 x, y, x_w = 2; /* 2 bits */
- u64 r, tau4, out;
- int sensor_index = to_sensor_dev_attr(attr)->index;
+ u32 reg_val, x, y, x_w = 2; /* 2 bits */
+ u64 tau4, out;
+ int channel = (to_sensor_dev_attr(attr)->index % 2) ? CHANNEL_PKG : CHANNEL_CARD;
+ u32 power_attr = (to_sensor_dev_attr(attr)->index > 1) ? PL2_HWMON_ATTR : PL1_HWMON_ATTR;
+
+ int ret = 0;
xe_pm_runtime_get(hwmon->xe);
mutex_lock(&hwmon->hwmon_lock);
- r = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, sensor_index));
+ if (hwmon->xe->info.has_mbx_power_limits) {
+ ret = xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, &reg_val);
+ if (ret) {
+ drm_err(&hwmon->xe->drm,
+ "power interval read fail, ch %d, attr %d, val 0x%08x, ret %d\n",
+ channel, power_attr, reg_val, ret);
+ reg_val = 0;
+ }
+ } else {
+ reg_val = xe_mmio_read32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT,
+ channel));
+ }
mutex_unlock(&hwmon->hwmon_lock);
xe_pm_runtime_put(hwmon->xe);
- x = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_X, r);
- y = REG_FIELD_GET(PKG_PWR_LIM_1_TIME_Y, r);
+ x = REG_FIELD_GET(PWR_LIM_TIME_X, reg_val);
+ y = REG_FIELD_GET(PWR_LIM_TIME_Y, reg_val);
/*
- * tau = 1.x * power(2,y), x = bits(23:22), y = bits(21:17)
+ * tau = (1 + (x / 4)) * power(2,y), x = bits(23:22), y = bits(21:17)
* = (4 | x) << (y - 2)
*
* Here (y - 2) ensures a 1.x fixed point representation of 1.x
@@ -372,15 +553,16 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a
u32 x, y, rxy, x_w = 2; /* 2 bits */
u64 tau4, r, max_win;
unsigned long val;
+ int channel = (to_sensor_dev_attr(attr)->index % 2) ? CHANNEL_PKG : CHANNEL_CARD;
+ u32 power_attr = (to_sensor_dev_attr(attr)->index > 1) ? PL2_HWMON_ATTR : PL1_HWMON_ATTR;
int ret;
- int sensor_index = to_sensor_dev_attr(attr)->index;
ret = kstrtoul(buf, 0, &val);
if (ret)
return ret;
/*
- * Max HW supported tau in '1.x * power(2,y)' format, x = 0, y = 0x12.
+ * Max HW supported tau in '(1 + (x / 4)) * power(2,y)' format, x = 0, y = 0x12.
* The hwmon->scl_shift_time default of 0xa results in a max tau of 256 seconds.
*
* The ideal scenario is for PKG_MAX_WIN to be read from the PKG_PWR_SKU register.
@@ -404,7 +586,7 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a
return -EINVAL;
/* val in hw units */
- val = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_time, SF_TIME);
+ val = DIV_ROUND_CLOSEST_ULL((u64)val << hwmon->scl_shift_time, SF_TIME) + 1;
/*
* Convert val to 1.x * power(2,y)
@@ -419,14 +601,18 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a
x = (val - (1ul << y)) << x_w >> y;
}
- rxy = REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_X, x) | REG_FIELD_PREP(PKG_PWR_LIM_1_TIME_Y, y);
+ rxy = REG_FIELD_PREP(PWR_LIM_TIME_X, x) |
+ REG_FIELD_PREP(PWR_LIM_TIME_Y, y);
xe_pm_runtime_get(hwmon->xe);
mutex_lock(&hwmon->hwmon_lock);
- r = xe_mmio_rmw32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, sensor_index),
- PKG_PWR_LIM_1_TIME, rxy);
+ if (hwmon->xe->info.has_mbx_power_limits)
+ xe_hwmon_pcode_rmw_power_limit(hwmon, power_attr, channel, PWR_LIM_TIME, rxy);
+ else
+ r = xe_mmio_rmw32(mmio, xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel),
+ PWR_LIM_TIME, rxy);
mutex_unlock(&hwmon->hwmon_lock);
@@ -435,17 +621,28 @@ xe_hwmon_power_max_interval_store(struct device *dev, struct device_attribute *a
return count;
}
+/* PSYS PL1 */
static SENSOR_DEVICE_ATTR(power1_max_interval, 0664,
xe_hwmon_power_max_interval_show,
- xe_hwmon_power_max_interval_store, CHANNEL_CARD);
-
+ xe_hwmon_power_max_interval_store, SENSOR_INDEX_PSYS_PL1);
+/* PKG PL1 */
static SENSOR_DEVICE_ATTR(power2_max_interval, 0664,
xe_hwmon_power_max_interval_show,
- xe_hwmon_power_max_interval_store, CHANNEL_PKG);
+ xe_hwmon_power_max_interval_store, SENSOR_INDEX_PKG_PL1);
+/* PSYS PL2 */
+static SENSOR_DEVICE_ATTR(power1_cap_interval, 0664,
+ xe_hwmon_power_max_interval_show,
+ xe_hwmon_power_max_interval_store, SENSOR_INDEX_PSYS_PL2);
+/* PKG PL2 */
+static SENSOR_DEVICE_ATTR(power2_cap_interval, 0664,
+ xe_hwmon_power_max_interval_show,
+ xe_hwmon_power_max_interval_store, SENSOR_INDEX_PKG_PL2);
static struct attribute *hwmon_attributes[] = {
&sensor_dev_attr_power1_max_interval.dev_attr.attr,
&sensor_dev_attr_power2_max_interval.dev_attr.attr,
+ &sensor_dev_attr_power1_cap_interval.dev_attr.attr,
+ &sensor_dev_attr_power2_cap_interval.dev_attr.attr,
NULL
};
@@ -455,12 +652,20 @@ static umode_t xe_hwmon_attributes_visible(struct kobject *kobj,
struct device *dev = kobj_to_dev(kobj);
struct xe_hwmon *hwmon = dev_get_drvdata(dev);
int ret = 0;
+ int channel = (index % 2) ? CHANNEL_PKG : CHANNEL_CARD;
+ u32 power_attr = (index > 1) ? PL2_HWMON_ATTR : PL1_HWMON_ATTR;
+ u32 uval = 0;
+ struct xe_reg rapl_limit;
+ struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
- xe_pm_runtime_get(hwmon->xe);
-
- ret = xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, index)) ? attr->mode : 0;
-
- xe_pm_runtime_put(hwmon->xe);
+ if (hwmon->xe->info.has_mbx_power_limits) {
+ xe_hwmon_pcode_read_power_limit(hwmon, power_attr, channel, &uval);
+ } else if (power_attr != PL2_HWMON_ATTR) {
+ rapl_limit = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel);
+ if (xe_reg_is_valid(rapl_limit))
+ uval = xe_mmio_read32(mmio, rapl_limit);
+ }
+ ret = (uval & PWR_LIM_EN) ? attr->mode : 0;
return ret;
}
@@ -478,8 +683,9 @@ static const struct attribute_group *hwmon_groups[] = {
static const struct hwmon_channel_info * const hwmon_info[] = {
HWMON_CHANNEL_INFO(temp, HWMON_T_LABEL, HWMON_T_INPUT | HWMON_T_LABEL,
HWMON_T_INPUT | HWMON_T_LABEL),
- HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL,
- HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_CRIT | HWMON_P_LABEL),
+ HWMON_CHANNEL_INFO(power, HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL | HWMON_P_CRIT |
+ HWMON_P_CAP,
+ HWMON_P_MAX | HWMON_P_RATED_MAX | HWMON_P_LABEL | HWMON_P_CAP),
HWMON_CHANNEL_INFO(curr, HWMON_C_LABEL, HWMON_C_CRIT | HWMON_C_LABEL),
HWMON_CHANNEL_INFO(in, HWMON_I_INPUT | HWMON_I_LABEL, HWMON_I_INPUT | HWMON_I_LABEL),
HWMON_CHANNEL_INFO(energy, HWMON_E_INPUT | HWMON_E_LABEL, HWMON_E_INPUT | HWMON_E_LABEL),
@@ -527,7 +733,7 @@ static int xe_hwmon_power_curr_crit_read(struct xe_hwmon *hwmon, int channel,
long *value, u32 scale_factor)
{
int ret;
- u32 uval;
+ u32 uval = 0;
mutex_lock(&hwmon->hwmon_lock);
@@ -547,9 +753,23 @@ static int xe_hwmon_power_curr_crit_write(struct xe_hwmon *hwmon, int channel,
{
int ret;
u32 uval;
+ u64 max_crit_power_curr = 0;
mutex_lock(&hwmon->hwmon_lock);
+ /*
+ * If the sysfs value exceeds the pcode mailbox cmd POWER_SETUP_SUBCOMMAND_WRITE_I1
+ * max supported value, clamp it to the command's max (U10.6 format).
+ * This is to avoid truncation during uval calculation below and ensure the valid power
+ * limit is sent for pcode which would clamp it to card-supported value.
+ */
+ max_crit_power_curr = (POWER_SETUP_I1_DATA_MASK >> POWER_SETUP_I1_SHIFT) * scale_factor;
+ if (value > max_crit_power_curr) {
+ value = max_crit_power_curr;
+ drm_info(&hwmon->xe->drm,
+ "Power limit clamped as selected exceeds channel %d limit\n",
+ channel);
+ }
uval = DIV_ROUND_CLOSEST_ULL(value << POWER_SETUP_I1_SHIFT, scale_factor);
ret = xe_hwmon_pcode_write_i1(hwmon, uval);
@@ -600,23 +820,62 @@ xe_hwmon_temp_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val)
static umode_t
xe_hwmon_power_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel)
{
- u32 uval;
+ u32 uval = 0;
+ struct xe_reg reg;
+ struct xe_mmio *mmio = xe_root_tile_mmio(hwmon->xe);
switch (attr) {
case hwmon_power_max:
- return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT,
- channel)) ? 0664 : 0;
+ case hwmon_power_cap:
+ if (hwmon->xe->info.has_mbx_power_limits) {
+ xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, &uval);
+ } else if (attr != PL2_HWMON_ATTR) {
+ reg = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel);
+ if (xe_reg_is_valid(reg))
+ uval = xe_mmio_read32(mmio, reg);
+ }
+ if (uval & PWR_LIM_EN) {
+ drm_info(&hwmon->xe->drm, "%s is supported on channel %d\n",
+ PWR_ATTR_TO_STR(attr), channel);
+ return 0664;
+ }
+ drm_dbg(&hwmon->xe->drm, "%s is unsupported on channel %d\n",
+ PWR_ATTR_TO_STR(attr), channel);
+ return 0;
case hwmon_power_rated_max:
- return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU,
- channel)) ? 0444 : 0;
+ if (hwmon->xe->info.has_mbx_power_limits) {
+ return 0;
+ } else {
+ reg = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel);
+ if (xe_reg_is_valid(reg))
+ uval = xe_mmio_read32(mmio, reg);
+ return uval ? 0444 : 0;
+ }
case hwmon_power_crit:
- if (channel == CHANNEL_PKG)
- return (xe_hwmon_pcode_read_i1(hwmon, &uval) ||
- !(uval & POWER_SETUP_I1_WATTS)) ? 0 : 0644;
+ if (channel == CHANNEL_CARD) {
+ xe_hwmon_pcode_read_i1(hwmon, &uval);
+ return (uval & POWER_SETUP_I1_WATTS) ? 0644 : 0;
+ }
break;
case hwmon_power_label:
- return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT,
- channel)) ? 0444 : 0;
+ if (hwmon->xe->info.has_mbx_power_limits) {
+ xe_hwmon_pcode_read_power_limit(hwmon, attr, channel, &uval);
+ } else {
+ reg = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU, channel);
+ if (xe_reg_is_valid(reg))
+ uval = xe_mmio_read32(mmio, reg);
+
+ if (!uval) {
+ reg = xe_hwmon_get_reg(hwmon, REG_PKG_RAPL_LIMIT, channel);
+ if (xe_reg_is_valid(reg))
+ uval = xe_mmio_read32(mmio, reg);
+ }
+ }
+ if ((!(uval & PWR_LIM_EN)) && channel == CHANNEL_CARD) {
+ xe_hwmon_pcode_read_i1(hwmon, &uval);
+ return (uval & POWER_SETUP_I1_WATTS) ? 0444 : 0;
+ }
+ return (uval) ? 0444 : 0;
default:
return 0;
}
@@ -628,10 +887,11 @@ xe_hwmon_power_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val)
{
switch (attr) {
case hwmon_power_max:
- xe_hwmon_power_max_read(hwmon, channel, val);
+ case hwmon_power_cap:
+ xe_hwmon_power_max_read(hwmon, attr, channel, val);
return 0;
case hwmon_power_rated_max:
- xe_hwmon_power_rated_max_read(hwmon, channel, val);
+ xe_hwmon_power_rated_max_read(hwmon, attr, channel, val);
return 0;
case hwmon_power_crit:
return xe_hwmon_power_curr_crit_read(hwmon, channel, val, SF_POWER);
@@ -644,8 +904,9 @@ static int
xe_hwmon_power_write(struct xe_hwmon *hwmon, u32 attr, int channel, long val)
{
switch (attr) {
+ case hwmon_power_cap:
case hwmon_power_max:
- return xe_hwmon_power_max_write(hwmon, channel, val);
+ return xe_hwmon_power_max_write(hwmon, attr, channel, val);
case hwmon_power_crit:
return xe_hwmon_power_curr_crit_write(hwmon, channel, val, SF_POWER);
default:
@@ -656,7 +917,7 @@ xe_hwmon_power_write(struct xe_hwmon *hwmon, u32 attr, int channel, long val)
static umode_t
xe_hwmon_curr_is_visible(const struct xe_hwmon *hwmon, u32 attr, int channel)
{
- u32 uval;
+ u32 uval = 0;
/* hwmon sysfs attribute of current available only for package */
if (channel != CHANNEL_PKG)
@@ -726,11 +987,18 @@ xe_hwmon_in_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val)
static umode_t
xe_hwmon_energy_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel)
{
+ long energy = 0;
+
switch (attr) {
case hwmon_energy_input:
case hwmon_energy_label:
- return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS,
- channel)) ? 0444 : 0;
+ if (hwmon->xe->info.platform == XE_BATTLEMAGE) {
+ xe_hwmon_energy_get(hwmon, channel, &energy);
+ return energy ? 0444 : 0;
+ } else {
+ return xe_reg_is_valid(xe_hwmon_get_reg(hwmon, REG_PKG_ENERGY_STATUS,
+ channel)) ? 0444 : 0;
+ }
default:
return 0;
}
@@ -751,7 +1019,7 @@ xe_hwmon_energy_read(struct xe_hwmon *hwmon, u32 attr, int channel, long *val)
static umode_t
xe_hwmon_fan_is_visible(struct xe_hwmon *hwmon, u32 attr, int channel)
{
- u32 uval;
+ u32 uval = 0;
if (!hwmon->xe->info.has_fan_control)
return 0;
@@ -824,8 +1092,6 @@ xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type,
struct xe_hwmon *hwmon = (struct xe_hwmon *)drvdata;
int ret;
- xe_pm_runtime_get(hwmon->xe);
-
switch (type) {
case hwmon_temp:
ret = xe_hwmon_temp_is_visible(hwmon, attr, channel);
@@ -850,8 +1116,6 @@ xe_hwmon_is_visible(const void *drvdata, enum hwmon_sensor_types type,
break;
}
- xe_pm_runtime_put(hwmon->xe);
-
return ret;
}
@@ -965,18 +1229,52 @@ xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon)
int channel;
struct xe_reg pkg_power_sku_unit;
- /*
- * The contents of register PKG_POWER_SKU_UNIT do not change,
- * so read it once and store the shift values.
- */
- pkg_power_sku_unit = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, 0);
- if (xe_reg_is_valid(pkg_power_sku_unit)) {
- val_sku_unit = xe_mmio_read32(mmio, pkg_power_sku_unit);
- hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit);
- hwmon->scl_shift_energy = REG_FIELD_GET(PKG_ENERGY_UNIT, val_sku_unit);
- hwmon->scl_shift_time = REG_FIELD_GET(PKG_TIME_UNIT, val_sku_unit);
+ if (hwmon->xe->info.has_mbx_power_limits) {
+ /* Check if GPU firmware support mailbox power limits commands. */
+ if (xe_hwmon_pcode_read_power_limit(hwmon, PL1_HWMON_ATTR, CHANNEL_CARD,
+ &hwmon->pl1_on_boot[CHANNEL_CARD]) |
+ xe_hwmon_pcode_read_power_limit(hwmon, PL1_HWMON_ATTR, CHANNEL_PKG,
+ &hwmon->pl1_on_boot[CHANNEL_PKG]) |
+ xe_hwmon_pcode_read_power_limit(hwmon, PL2_HWMON_ATTR, CHANNEL_CARD,
+ &hwmon->pl2_on_boot[CHANNEL_CARD]) |
+ xe_hwmon_pcode_read_power_limit(hwmon, PL2_HWMON_ATTR, CHANNEL_PKG,
+ &hwmon->pl2_on_boot[CHANNEL_PKG])) {
+ drm_warn(&hwmon->xe->drm,
+ "Failed to read power limits, check GPU firmware !\n");
+ } else {
+ drm_info(&hwmon->xe->drm, "Using mailbox commands for power limits\n");
+ /* Write default limits to read from pcode from now on. */
+ xe_hwmon_pcode_rmw_power_limit(hwmon, PL1_HWMON_ATTR,
+ CHANNEL_CARD, PWR_LIM | PWR_LIM_TIME,
+ hwmon->pl1_on_boot[CHANNEL_CARD]);
+ xe_hwmon_pcode_rmw_power_limit(hwmon, PL1_HWMON_ATTR,
+ CHANNEL_PKG, PWR_LIM | PWR_LIM_TIME,
+ hwmon->pl1_on_boot[CHANNEL_PKG]);
+ xe_hwmon_pcode_rmw_power_limit(hwmon, PL2_HWMON_ATTR,
+ CHANNEL_CARD, PWR_LIM | PWR_LIM_TIME,
+ hwmon->pl2_on_boot[CHANNEL_CARD]);
+ xe_hwmon_pcode_rmw_power_limit(hwmon, PL2_HWMON_ATTR,
+ CHANNEL_PKG, PWR_LIM | PWR_LIM_TIME,
+ hwmon->pl2_on_boot[CHANNEL_PKG]);
+ hwmon->scl_shift_power = PWR_UNIT;
+ hwmon->scl_shift_energy = ENERGY_UNIT;
+ hwmon->scl_shift_time = TIME_UNIT;
+ hwmon->boot_power_limit_read = true;
+ }
+ } else {
+ drm_info(&hwmon->xe->drm, "Using register for power limits\n");
+ /*
+ * The contents of register PKG_POWER_SKU_UNIT do not change,
+ * so read it once and store the shift values.
+ */
+ pkg_power_sku_unit = xe_hwmon_get_reg(hwmon, REG_PKG_POWER_SKU_UNIT, 0);
+ if (xe_reg_is_valid(pkg_power_sku_unit)) {
+ val_sku_unit = xe_mmio_read32(mmio, pkg_power_sku_unit);
+ hwmon->scl_shift_power = REG_FIELD_GET(PKG_PWR_UNIT, val_sku_unit);
+ hwmon->scl_shift_energy = REG_FIELD_GET(PKG_ENERGY_UNIT, val_sku_unit);
+ hwmon->scl_shift_time = REG_FIELD_GET(PKG_TIME_UNIT, val_sku_unit);
+ }
}
-
/*
* Initialize 'struct xe_hwmon_energy_info', i.e. set fields to the
* first value of the energy register read
@@ -991,13 +1289,6 @@ xe_hwmon_get_preregistration_info(struct xe_hwmon *hwmon)
xe_hwmon_fan_input_read(hwmon, channel, &fan_speed);
}
-static void xe_hwmon_mutex_destroy(void *arg)
-{
- struct xe_hwmon *hwmon = arg;
-
- mutex_destroy(&hwmon->hwmon_lock);
-}
-
int xe_hwmon_register(struct xe_device *xe)
{
struct device *dev = xe->drm.dev;
@@ -1016,8 +1307,7 @@ int xe_hwmon_register(struct xe_device *xe)
if (!hwmon)
return -ENOMEM;
- mutex_init(&hwmon->hwmon_lock);
- ret = devm_add_action_or_reset(dev, xe_hwmon_mutex_destroy, hwmon);
+ ret = devm_mutex_init(dev, &hwmon->hwmon_lock);
if (ret)
return ret;
@@ -1041,4 +1331,4 @@ int xe_hwmon_register(struct xe_device *xe)
return 0;
}
-
+MODULE_IMPORT_NS("INTEL_PMT_TELEMETRY");
diff --git a/drivers/gpu/drm/xe/xe_i2c.c b/drivers/gpu/drm/xe/xe_i2c.c
new file mode 100644
index 000000000000..0b5452be0c87
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_i2c.c
@@ -0,0 +1,372 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/*
+ * Intel Xe I2C attached Microcontroller Units (MCU)
+ *
+ * Copyright (C) 2025 Intel Corporation.
+ */
+
+#include <linux/array_size.h>
+#include <linux/container_of.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/ioport.h>
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/notifier.h>
+#include <linux/pci.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
+#include <linux/regmap.h>
+#include <linux/sprintf.h>
+#include <linux/string.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+
+#include "regs/xe_i2c_regs.h"
+#include "regs/xe_irq_regs.h"
+
+#include "xe_device.h"
+#include "xe_device_types.h"
+#include "xe_i2c.h"
+#include "xe_mmio.h"
+#include "xe_platform_types.h"
+
+/**
+ * DOC: Xe I2C devices
+ *
+ * Register a platform device for the I2C host controller (Synpsys DesignWare
+ * I2C) if the registers of that controller are mapped to the MMIO, and also the
+ * I2C client device for the Add-In Management Controller (the MCU) attached to
+ * the host controller.
+ *
+ * See drivers/i2c/busses/i2c-designware-* for more information on the I2C host
+ * controller.
+ */
+
+static const char adapter_name[] = "i2c_designware";
+
+static const struct property_entry xe_i2c_adapter_properties[] = {
+ PROPERTY_ENTRY_STRING("compatible", "intel,xe-i2c"),
+ PROPERTY_ENTRY_U32("clock-frequency", I2C_MAX_FAST_MODE_PLUS_FREQ),
+ { }
+};
+
+static inline void xe_i2c_read_endpoint(struct xe_mmio *mmio, void *ep)
+{
+ u32 *val = ep;
+
+ val[0] = xe_mmio_read32(mmio, REG_SG_REMAP_ADDR_PREFIX);
+ val[1] = xe_mmio_read32(mmio, REG_SG_REMAP_ADDR_POSTFIX);
+}
+
+static void xe_i2c_client_work(struct work_struct *work)
+{
+ struct xe_i2c *i2c = container_of(work, struct xe_i2c, work);
+ struct i2c_board_info info = {
+ .type = "amc",
+ .flags = I2C_CLIENT_HOST_NOTIFY,
+ .addr = i2c->ep.addr[1],
+ };
+
+ i2c->client[0] = i2c_new_client_device(i2c->adapter, &info);
+}
+
+static int xe_i2c_notifier(struct notifier_block *nb, unsigned long action, void *data)
+{
+ struct xe_i2c *i2c = container_of(nb, struct xe_i2c, bus_notifier);
+ struct i2c_adapter *adapter = i2c_verify_adapter(data);
+ struct device *dev = data;
+
+ if (action == BUS_NOTIFY_ADD_DEVICE &&
+ adapter && dev->parent == &i2c->pdev->dev) {
+ i2c->adapter = adapter;
+ schedule_work(&i2c->work);
+ return NOTIFY_OK;
+ }
+
+ return NOTIFY_DONE;
+}
+
+static int xe_i2c_register_adapter(struct xe_i2c *i2c)
+{
+ struct pci_dev *pci = to_pci_dev(i2c->drm_dev);
+ struct platform_device *pdev;
+ struct fwnode_handle *fwnode;
+ int ret;
+
+ fwnode = fwnode_create_software_node(xe_i2c_adapter_properties, NULL);
+ if (IS_ERR(fwnode))
+ return PTR_ERR(fwnode);
+
+ /*
+ * Not using platform_device_register_full() here because we don't have
+ * a handle to the platform_device before it returns. xe_i2c_notifier()
+ * uses that handle, but it may be called before
+ * platform_device_register_full() is done.
+ */
+ pdev = platform_device_alloc(adapter_name, pci_dev_id(pci));
+ if (!pdev) {
+ ret = -ENOMEM;
+ goto err_fwnode_remove;
+ }
+
+ if (i2c->adapter_irq) {
+ struct resource res;
+
+ res = DEFINE_RES_IRQ_NAMED(i2c->adapter_irq, "xe_i2c");
+
+ ret = platform_device_add_resources(pdev, &res, 1);
+ if (ret)
+ goto err_pdev_put;
+ }
+
+ pdev->dev.parent = i2c->drm_dev;
+ pdev->dev.fwnode = fwnode;
+ i2c->adapter_node = fwnode;
+ i2c->pdev = pdev;
+
+ ret = platform_device_add(pdev);
+ if (ret)
+ goto err_pdev_put;
+
+ return 0;
+
+err_pdev_put:
+ platform_device_put(pdev);
+err_fwnode_remove:
+ fwnode_remove_software_node(fwnode);
+
+ return ret;
+}
+
+static void xe_i2c_unregister_adapter(struct xe_i2c *i2c)
+{
+ platform_device_unregister(i2c->pdev);
+ fwnode_remove_software_node(i2c->adapter_node);
+}
+
+/**
+ * xe_i2c_present - I2C controller is present and functional
+ * @xe: xe device instance
+ *
+ * Check whether the I2C controller is present and functioning with valid
+ * endpoint cookie.
+ *
+ * Return: %true if present, %false otherwise.
+ */
+bool xe_i2c_present(struct xe_device *xe)
+{
+ return xe->i2c && xe->i2c->ep.cookie == XE_I2C_EP_COOKIE_DEVICE;
+}
+
+static bool xe_i2c_irq_present(struct xe_device *xe)
+{
+ return xe->i2c && xe->i2c->adapter_irq;
+}
+
+/**
+ * xe_i2c_irq_handler: Handler for I2C interrupts
+ * @xe: xe device instance
+ * @master_ctl: interrupt register
+ *
+ * Forward interrupts generated by the I2C host adapter to the I2C host adapter
+ * driver.
+ */
+void xe_i2c_irq_handler(struct xe_device *xe, u32 master_ctl)
+{
+ if (!xe_i2c_irq_present(xe))
+ return;
+
+ if (master_ctl & I2C_IRQ)
+ generic_handle_irq_safe(xe->i2c->adapter_irq);
+}
+
+void xe_i2c_irq_reset(struct xe_device *xe)
+{
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
+
+ if (!xe_i2c_irq_present(xe))
+ return;
+
+ xe_mmio_rmw32(mmio, I2C_BRIDGE_PCICFGCTL, ACPI_INTR_EN, 0);
+}
+
+void xe_i2c_irq_postinstall(struct xe_device *xe)
+{
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
+
+ if (!xe_i2c_irq_present(xe))
+ return;
+
+ xe_mmio_rmw32(mmio, I2C_BRIDGE_PCICFGCTL, 0, ACPI_INTR_EN);
+}
+
+static int xe_i2c_irq_map(struct irq_domain *h, unsigned int virq,
+ irq_hw_number_t hw_irq_num)
+{
+ irq_set_chip_and_handler(virq, &dummy_irq_chip, handle_simple_irq);
+ return 0;
+}
+
+static const struct irq_domain_ops xe_i2c_irq_ops = {
+ .map = xe_i2c_irq_map,
+};
+
+static int xe_i2c_create_irq(struct xe_i2c *i2c)
+{
+ struct irq_domain *domain;
+
+ if (!(i2c->ep.capabilities & XE_I2C_EP_CAP_IRQ))
+ return 0;
+
+ domain = irq_domain_create_linear(dev_fwnode(i2c->drm_dev), 1, &xe_i2c_irq_ops, NULL);
+ if (!domain)
+ return -ENOMEM;
+
+ i2c->adapter_irq = irq_create_mapping(domain, 0);
+ i2c->irqdomain = domain;
+
+ return 0;
+}
+
+static void xe_i2c_remove_irq(struct xe_i2c *i2c)
+{
+ if (!i2c->irqdomain)
+ return;
+
+ irq_dispose_mapping(i2c->adapter_irq);
+ irq_domain_remove(i2c->irqdomain);
+}
+
+static int xe_i2c_read(void *context, unsigned int reg, unsigned int *val)
+{
+ struct xe_i2c *i2c = context;
+
+ *val = xe_mmio_read32(i2c->mmio, XE_REG(reg + I2C_MEM_SPACE_OFFSET));
+
+ return 0;
+}
+
+static int xe_i2c_write(void *context, unsigned int reg, unsigned int val)
+{
+ struct xe_i2c *i2c = context;
+
+ xe_mmio_write32(i2c->mmio, XE_REG(reg + I2C_MEM_SPACE_OFFSET), val);
+
+ return 0;
+}
+
+static const struct regmap_config i2c_regmap_config = {
+ .reg_bits = 32,
+ .val_bits = 32,
+ .reg_read = xe_i2c_read,
+ .reg_write = xe_i2c_write,
+ .fast_io = true,
+};
+
+void xe_i2c_pm_suspend(struct xe_device *xe)
+{
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
+
+ if (!xe_i2c_present(xe))
+ return;
+
+ xe_mmio_rmw32(mmio, I2C_CONFIG_PMCSR, PCI_PM_CTRL_STATE_MASK, (__force u32)PCI_D3hot);
+ drm_dbg(&xe->drm, "pmcsr: 0x%08x\n", xe_mmio_read32(mmio, I2C_CONFIG_PMCSR));
+}
+
+void xe_i2c_pm_resume(struct xe_device *xe, bool d3cold)
+{
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
+
+ if (!xe_i2c_present(xe))
+ return;
+
+ if (d3cold)
+ xe_mmio_rmw32(mmio, I2C_CONFIG_CMD, 0, PCI_COMMAND_MEMORY | PCI_COMMAND_MASTER);
+
+ xe_mmio_rmw32(mmio, I2C_CONFIG_PMCSR, PCI_PM_CTRL_STATE_MASK, (__force u32)PCI_D0);
+ drm_dbg(&xe->drm, "pmcsr: 0x%08x\n", xe_mmio_read32(mmio, I2C_CONFIG_PMCSR));
+}
+
+static void xe_i2c_remove(void *data)
+{
+ struct xe_i2c *i2c = data;
+ unsigned int i;
+
+ for (i = 0; i < XE_I2C_MAX_CLIENTS; i++)
+ i2c_unregister_device(i2c->client[i]);
+
+ bus_unregister_notifier(&i2c_bus_type, &i2c->bus_notifier);
+ xe_i2c_unregister_adapter(i2c);
+ xe_i2c_remove_irq(i2c);
+}
+
+/**
+ * xe_i2c_probe: Probe the I2C host adapter and the I2C clients attached to it
+ * @xe: xe device instance
+ *
+ * Register all the I2C devices described in the I2C Endpoint data structure.
+ *
+ * Return: 0 on success, error code on failure
+ */
+int xe_i2c_probe(struct xe_device *xe)
+{
+ struct device *drm_dev = xe->drm.dev;
+ struct xe_i2c_endpoint ep;
+ struct regmap *regmap;
+ struct xe_i2c *i2c;
+ int ret;
+
+ if (xe->info.platform != XE_BATTLEMAGE)
+ return 0;
+
+ if (IS_SRIOV_VF(xe))
+ return 0;
+
+ xe_i2c_read_endpoint(xe_root_tile_mmio(xe), &ep);
+ if (ep.cookie != XE_I2C_EP_COOKIE_DEVICE)
+ return 0;
+
+ i2c = devm_kzalloc(drm_dev, sizeof(*i2c), GFP_KERNEL);
+ if (!i2c)
+ return -ENOMEM;
+
+ INIT_WORK(&i2c->work, xe_i2c_client_work);
+ i2c->mmio = xe_root_tile_mmio(xe);
+ i2c->drm_dev = drm_dev;
+ i2c->ep = ep;
+ xe->i2c = i2c;
+
+ /* PCI PM isn't aware of this device, bring it up and match it with SGUnit state. */
+ xe_i2c_pm_resume(xe, true);
+
+ regmap = devm_regmap_init(drm_dev, NULL, i2c, &i2c_regmap_config);
+ if (IS_ERR(regmap))
+ return PTR_ERR(regmap);
+
+ i2c->bus_notifier.notifier_call = xe_i2c_notifier;
+ ret = bus_register_notifier(&i2c_bus_type, &i2c->bus_notifier);
+ if (ret)
+ return ret;
+
+ ret = xe_i2c_create_irq(i2c);
+ if (ret)
+ goto err_unregister_notifier;
+
+ ret = xe_i2c_register_adapter(i2c);
+ if (ret)
+ goto err_remove_irq;
+
+ xe_i2c_irq_postinstall(xe);
+ return devm_add_action_or_reset(drm_dev, xe_i2c_remove, i2c);
+
+err_remove_irq:
+ xe_i2c_remove_irq(i2c);
+
+err_unregister_notifier:
+ bus_unregister_notifier(&i2c_bus_type, &i2c->bus_notifier);
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/xe/xe_i2c.h b/drivers/gpu/drm/xe/xe_i2c.h
new file mode 100644
index 000000000000..425d8160835f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_i2c.h
@@ -0,0 +1,68 @@
+/* SPDX-License-Identifier: MIT */
+#ifndef _XE_I2C_H_
+#define _XE_I2C_H_
+
+#include <linux/bits.h>
+#include <linux/notifier.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+
+struct device;
+struct fwnode_handle;
+struct i2c_adapter;
+struct i2c_client;
+struct irq_domain;
+struct platform_device;
+struct xe_device;
+struct xe_mmio;
+
+#define XE_I2C_MAX_CLIENTS 3
+
+#define XE_I2C_EP_COOKIE_DEVICE 0xde
+
+/* Endpoint Capabilities */
+#define XE_I2C_EP_CAP_IRQ BIT(0)
+
+struct xe_i2c_endpoint {
+ u8 cookie;
+ u8 capabilities;
+ u16 addr[XE_I2C_MAX_CLIENTS];
+};
+
+struct xe_i2c {
+ struct fwnode_handle *adapter_node;
+ struct platform_device *pdev;
+ struct i2c_adapter *adapter;
+ struct i2c_client *client[XE_I2C_MAX_CLIENTS];
+
+ struct notifier_block bus_notifier;
+ struct work_struct work;
+
+ struct irq_domain *irqdomain;
+ int adapter_irq;
+
+ struct xe_i2c_endpoint ep;
+ struct device *drm_dev;
+
+ struct xe_mmio *mmio;
+};
+
+#if IS_ENABLED(CONFIG_I2C)
+int xe_i2c_probe(struct xe_device *xe);
+bool xe_i2c_present(struct xe_device *xe);
+void xe_i2c_irq_handler(struct xe_device *xe, u32 master_ctl);
+void xe_i2c_irq_postinstall(struct xe_device *xe);
+void xe_i2c_irq_reset(struct xe_device *xe);
+void xe_i2c_pm_suspend(struct xe_device *xe);
+void xe_i2c_pm_resume(struct xe_device *xe, bool d3cold);
+#else
+static inline int xe_i2c_probe(struct xe_device *xe) { return 0; }
+static inline bool xe_i2c_present(struct xe_device *xe) { return false; }
+static inline void xe_i2c_irq_handler(struct xe_device *xe, u32 master_ctl) { }
+static inline void xe_i2c_irq_postinstall(struct xe_device *xe) { }
+static inline void xe_i2c_irq_reset(struct xe_device *xe) { }
+static inline void xe_i2c_pm_suspend(struct xe_device *xe) { }
+static inline void xe_i2c_pm_resume(struct xe_device *xe, bool d3cold) { }
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_irq.c b/drivers/gpu/drm/xe/xe_irq.c
index 5362d3174b06..024e13e606ec 100644
--- a/drivers/gpu/drm/xe/xe_irq.c
+++ b/drivers/gpu/drm/xe/xe_irq.c
@@ -18,10 +18,13 @@
#include "xe_gt.h"
#include "xe_guc.h"
#include "xe_hw_engine.h"
+#include "xe_hw_error.h"
+#include "xe_i2c.h"
#include "xe_memirq.h"
#include "xe_mmio.h"
#include "xe_pxp.h"
#include "xe_sriov.h"
+#include "xe_tile.h"
/*
* Interrupt registers for a unit are always consecutive and ordered
@@ -136,68 +139,112 @@ void xe_irq_enable_hwe(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
struct xe_mmio *mmio = &gt->mmio;
- u32 ccs_mask, bcs_mask;
- u32 irqs, dmask, smask;
- u32 gsc_mask = 0;
- u32 heci_mask = 0;
+ u32 common_mask, val, gsc_mask = 0, heci_mask = 0,
+ rcs_mask = 0, bcs_mask = 0, vcs_mask = 0, vecs_mask = 0,
+ ccs_mask = 0;
if (xe_device_uses_memirq(xe))
return;
if (xe_device_uc_enabled(xe)) {
- irqs = GT_RENDER_USER_INTERRUPT |
- GT_RENDER_PIPECTL_NOTIFY_INTERRUPT;
+ common_mask = GT_MI_USER_INTERRUPT |
+ GT_FLUSH_COMPLETE_INTERRUPT;
+
+ /* Enable Compute Walker Interrupt for non-MSIX platforms */
+ if (GRAPHICS_VERx100(xe) >= 3511 && !xe_device_has_msix(xe)) {
+ rcs_mask |= GT_COMPUTE_WALKER_INTERRUPT;
+ ccs_mask |= GT_COMPUTE_WALKER_INTERRUPT;
+ }
} else {
- irqs = GT_RENDER_USER_INTERRUPT |
- GT_CS_MASTER_ERROR_INTERRUPT |
- GT_CONTEXT_SWITCH_INTERRUPT |
- GT_WAIT_SEMAPHORE_INTERRUPT;
+ common_mask = GT_MI_USER_INTERRUPT |
+ GT_CS_MASTER_ERROR_INTERRUPT |
+ GT_CONTEXT_SWITCH_INTERRUPT |
+ GT_WAIT_SEMAPHORE_INTERRUPT;
}
- ccs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COMPUTE);
- bcs_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COPY);
+ rcs_mask |= common_mask;
+ bcs_mask |= common_mask;
+ vcs_mask |= common_mask;
+ vecs_mask |= common_mask;
+ ccs_mask |= common_mask;
- dmask = irqs << 16 | irqs;
- smask = irqs << 16;
+ if (xe_gt_is_main_type(gt)) {
+ /*
+ * For enabling the interrupts, the information about fused off
+ * engines doesn't matter much, but this also allows to check if
+ * the engine is available architecturally in the platform
+ */
+ u32 ccs_fuse_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COMPUTE);
+ u32 bcs_fuse_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_COPY);
- if (!xe_gt_is_media_type(gt)) {
/* Enable interrupts for each engine class */
- xe_mmio_write32(mmio, RENDER_COPY_INTR_ENABLE, dmask);
- if (ccs_mask)
- xe_mmio_write32(mmio, CCS_RSVD_INTR_ENABLE, smask);
+ xe_mmio_write32(mmio, RENDER_COPY_INTR_ENABLE,
+ REG_FIELD_PREP(ENGINE1_MASK, rcs_mask) |
+ REG_FIELD_PREP(ENGINE0_MASK, bcs_mask));
+ if (ccs_fuse_mask)
+ xe_mmio_write32(mmio, CCS_RSVD_INTR_ENABLE,
+ REG_FIELD_PREP(ENGINE1_MASK, ccs_mask));
/* Unmask interrupts for each engine instance */
- xe_mmio_write32(mmio, RCS0_RSVD_INTR_MASK, ~smask);
- xe_mmio_write32(mmio, BCS_RSVD_INTR_MASK, ~smask);
- if (bcs_mask & (BIT(1)|BIT(2)))
- xe_mmio_write32(mmio, XEHPC_BCS1_BCS2_INTR_MASK, ~dmask);
- if (bcs_mask & (BIT(3)|BIT(4)))
- xe_mmio_write32(mmio, XEHPC_BCS3_BCS4_INTR_MASK, ~dmask);
- if (bcs_mask & (BIT(5)|BIT(6)))
- xe_mmio_write32(mmio, XEHPC_BCS5_BCS6_INTR_MASK, ~dmask);
- if (bcs_mask & (BIT(7)|BIT(8)))
- xe_mmio_write32(mmio, XEHPC_BCS7_BCS8_INTR_MASK, ~dmask);
- if (ccs_mask & (BIT(0)|BIT(1)))
- xe_mmio_write32(mmio, CCS0_CCS1_INTR_MASK, ~dmask);
- if (ccs_mask & (BIT(2)|BIT(3)))
- xe_mmio_write32(mmio, CCS2_CCS3_INTR_MASK, ~dmask);
+ val = ~REG_FIELD_PREP(ENGINE1_MASK, rcs_mask);
+ xe_mmio_write32(mmio, RCS0_RSVD_INTR_MASK, val);
+ val = ~REG_FIELD_PREP(ENGINE1_MASK, bcs_mask);
+ xe_mmio_write32(mmio, BCS_RSVD_INTR_MASK, val);
+
+ val = ~(REG_FIELD_PREP(ENGINE1_MASK, bcs_mask) |
+ REG_FIELD_PREP(ENGINE0_MASK, bcs_mask));
+ if (bcs_fuse_mask & (BIT(1)|BIT(2)))
+ xe_mmio_write32(mmio, XEHPC_BCS1_BCS2_INTR_MASK, val);
+ if (bcs_fuse_mask & (BIT(3)|BIT(4)))
+ xe_mmio_write32(mmio, XEHPC_BCS3_BCS4_INTR_MASK, val);
+ if (bcs_fuse_mask & (BIT(5)|BIT(6)))
+ xe_mmio_write32(mmio, XEHPC_BCS5_BCS6_INTR_MASK, val);
+ if (bcs_fuse_mask & (BIT(7)|BIT(8)))
+ xe_mmio_write32(mmio, XEHPC_BCS7_BCS8_INTR_MASK, val);
+
+ val = ~(REG_FIELD_PREP(ENGINE1_MASK, ccs_mask) |
+ REG_FIELD_PREP(ENGINE0_MASK, ccs_mask));
+ if (ccs_fuse_mask & (BIT(0)|BIT(1)))
+ xe_mmio_write32(mmio, CCS0_CCS1_INTR_MASK, val);
+ if (ccs_fuse_mask & (BIT(2)|BIT(3)))
+ xe_mmio_write32(mmio, CCS2_CCS3_INTR_MASK, val);
}
if (xe_gt_is_media_type(gt) || MEDIA_VER(xe) < 13) {
+ u32 vcs_fuse_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_VIDEO_DECODE);
+ u32 vecs_fuse_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_VIDEO_ENHANCE);
+ u32 other_fuse_mask = xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_OTHER);
+
/* Enable interrupts for each engine class */
- xe_mmio_write32(mmio, VCS_VECS_INTR_ENABLE, dmask);
+ xe_mmio_write32(mmio, VCS_VECS_INTR_ENABLE,
+ REG_FIELD_PREP(ENGINE1_MASK, vcs_mask) |
+ REG_FIELD_PREP(ENGINE0_MASK, vecs_mask));
/* Unmask interrupts for each engine instance */
- xe_mmio_write32(mmio, VCS0_VCS1_INTR_MASK, ~dmask);
- xe_mmio_write32(mmio, VCS2_VCS3_INTR_MASK, ~dmask);
- xe_mmio_write32(mmio, VECS0_VECS1_INTR_MASK, ~dmask);
+ val = ~(REG_FIELD_PREP(ENGINE1_MASK, vcs_mask) |
+ REG_FIELD_PREP(ENGINE0_MASK, vcs_mask));
+ if (vcs_fuse_mask & (BIT(0) | BIT(1)))
+ xe_mmio_write32(mmio, VCS0_VCS1_INTR_MASK, val);
+ if (vcs_fuse_mask & (BIT(2) | BIT(3)))
+ xe_mmio_write32(mmio, VCS2_VCS3_INTR_MASK, val);
+ if (vcs_fuse_mask & (BIT(4) | BIT(5)))
+ xe_mmio_write32(mmio, VCS4_VCS5_INTR_MASK, val);
+ if (vcs_fuse_mask & (BIT(6) | BIT(7)))
+ xe_mmio_write32(mmio, VCS6_VCS7_INTR_MASK, val);
+
+ val = ~(REG_FIELD_PREP(ENGINE1_MASK, vecs_mask) |
+ REG_FIELD_PREP(ENGINE0_MASK, vecs_mask));
+ if (vecs_fuse_mask & (BIT(0) | BIT(1)))
+ xe_mmio_write32(mmio, VECS0_VECS1_INTR_MASK, val);
+ if (vecs_fuse_mask & (BIT(2) | BIT(3)))
+ xe_mmio_write32(mmio, VECS2_VECS3_INTR_MASK, val);
/*
* the heci2 interrupt is enabled via the same register as the
* GSCCS interrupts, but it has its own mask register.
*/
- if (xe_hw_engine_mask_per_class(gt, XE_ENGINE_CLASS_OTHER)) {
- gsc_mask = irqs | GSC_ER_COMPLETE;
+ if (other_fuse_mask) {
+ gsc_mask = common_mask | GSC_ER_COMPLETE;
heci_mask = GSC_IRQ_INTF(1);
} else if (xe->info.has_heci_gscfi) {
gsc_mask = GSC_IRQ_INTF(1);
@@ -260,7 +307,7 @@ gt_engine_identity(struct xe_device *xe,
static void
gt_other_irq_handler(struct xe_gt *gt, const u8 instance, const u16 iir)
{
- if (instance == OTHER_GUC_INSTANCE && !xe_gt_is_media_type(gt))
+ if (instance == OTHER_GUC_INSTANCE && xe_gt_is_main_type(gt))
return xe_guc_irq_handler(&gt->uc.guc, iir);
if (instance == OTHER_MEDIA_GUC_INSTANCE && xe_gt_is_media_type(gt))
return xe_guc_irq_handler(&gt->uc.guc, iir);
@@ -466,6 +513,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
xe_mmio_write32(mmio, GFX_MSTR_IRQ, master_ctl);
gt_irq_handler(tile, master_ctl, intr_dw, identity);
+ xe_hw_error_irq_handler(tile, master_ctl);
/*
* Display interrupts (including display backlight operations
@@ -476,6 +524,7 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
if (xe->info.has_heci_cscfi)
xe_heci_csc_irq_handler(xe, master_ctl);
xe_display_irq_handler(xe, master_ctl);
+ xe_i2c_irq_handler(xe, master_ctl);
gu_misc_iir = gu_misc_irq_ack(xe, master_ctl);
}
}
@@ -489,11 +538,15 @@ static irqreturn_t dg1_irq_handler(int irq, void *arg)
static void gt_irq_reset(struct xe_tile *tile)
{
struct xe_mmio *mmio = &tile->mmio;
-
- u32 ccs_mask = xe_hw_engine_mask_per_class(tile->primary_gt,
- XE_ENGINE_CLASS_COMPUTE);
- u32 bcs_mask = xe_hw_engine_mask_per_class(tile->primary_gt,
- XE_ENGINE_CLASS_COPY);
+ u32 ccs_mask = ~0;
+ u32 bcs_mask = ~0;
+
+ if (tile->primary_gt) {
+ ccs_mask = xe_hw_engine_mask_per_class(tile->primary_gt,
+ XE_ENGINE_CLASS_COMPUTE);
+ bcs_mask = xe_hw_engine_mask_per_class(tile->primary_gt,
+ XE_ENGINE_CLASS_COPY);
+ }
/* Disable RCS, BCS, VCS and VECS class engines. */
xe_mmio_write32(mmio, RENDER_COPY_INTR_ENABLE, 0);
@@ -550,7 +603,7 @@ static void xelp_irq_reset(struct xe_tile *tile)
static void dg1_irq_reset(struct xe_tile *tile)
{
- if (tile->id == 0)
+ if (xe_tile_is_root(tile))
dg1_intr_disable(tile_to_xe(tile));
gt_irq_reset(tile);
@@ -611,6 +664,7 @@ static void xe_irq_reset(struct xe_device *xe)
tile = xe_device_get_root_tile(xe);
mask_and_disable(tile, GU_MISC_IRQ_OFFSET);
xe_display_irq_reset(xe);
+ xe_i2c_irq_reset(xe);
/*
* The tile's top-level status register should be the last one
@@ -651,7 +705,8 @@ static void xe_irq_postinstall(struct xe_device *xe)
xe_memirq_postinstall(&tile->memirq);
}
- xe_display_irq_postinstall(xe, xe_root_mmio_gt(xe));
+ xe_display_irq_postinstall(xe);
+ xe_i2c_irq_postinstall(xe);
/*
* ASLE backlight operations are reported via GUnit GSE interrupts
@@ -753,6 +808,8 @@ int xe_irq_install(struct xe_device *xe)
int nvec = 1;
int err;
+ xe_hw_error_init(xe);
+
xe_irq_reset(xe);
if (xe_device_has_msix(xe)) {
@@ -840,22 +897,6 @@ static int xe_irq_msix_init(struct xe_device *xe)
return 0;
}
-static irqreturn_t guc2host_irq_handler(int irq, void *arg)
-{
- struct xe_device *xe = arg;
- struct xe_tile *tile;
- u8 id;
-
- if (!atomic_read(&xe->irq.enabled))
- return IRQ_NONE;
-
- for_each_tile(tile, xe, id)
- xe_guc_irq_handler(&tile->primary_gt->uc.guc,
- GUC_INTR_GUC2HOST);
-
- return IRQ_HANDLED;
-}
-
static irqreturn_t xe_irq_msix_default_hwe_handler(int irq, void *arg)
{
unsigned int tile_id, gt_id;
@@ -972,7 +1013,7 @@ int xe_irq_msix_request_irqs(struct xe_device *xe)
u16 msix;
msix = GUC2HOST_MSIX;
- err = xe_irq_msix_request_irq(xe, guc2host_irq_handler, xe,
+ err = xe_irq_msix_request_irq(xe, xe_irq_handler(xe), xe,
DRIVER_NAME "-guc2host", false, &msix);
if (err)
return err;
diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw.c b/drivers/gpu/drm/xe/xe_late_bind_fw.c
new file mode 100644
index 000000000000..768442ca7da6
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_late_bind_fw.c
@@ -0,0 +1,464 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include <linux/component.h>
+#include <linux/delay.h>
+#include <linux/firmware.h>
+
+#include <drm/drm_managed.h>
+#include <drm/intel/i915_component.h>
+#include <drm/intel/intel_lb_mei_interface.h>
+#include <drm/drm_print.h>
+
+#include "xe_device.h"
+#include "xe_late_bind_fw.h"
+#include "xe_pcode.h"
+#include "xe_pcode_api.h"
+#include "xe_pm.h"
+
+/*
+ * The component should load quite quickly in most cases, but it could take
+ * a bit. Using a very big timeout just to cover the worst case scenario
+ */
+#define LB_INIT_TIMEOUT_MS 20000
+
+/*
+ * Retry interval set to 6 seconds, in steps of 200 ms, to allow time for
+ * other OS components to release the MEI CL handle
+ */
+#define LB_FW_LOAD_RETRY_MAXCOUNT 30
+#define LB_FW_LOAD_RETRY_PAUSE_MS 200
+
+static const u32 fw_id_to_type[] = {
+ [XE_LB_FW_FAN_CONTROL] = INTEL_LB_TYPE_FAN_CONTROL,
+ };
+
+static const char * const fw_id_to_name[] = {
+ [XE_LB_FW_FAN_CONTROL] = "fan_control",
+ };
+
+static struct xe_device *
+late_bind_to_xe(struct xe_late_bind *late_bind)
+{
+ return container_of(late_bind, struct xe_device, late_bind);
+}
+
+static struct xe_device *
+late_bind_fw_to_xe(struct xe_late_bind_fw *lb_fw)
+{
+ return container_of(lb_fw, struct xe_device, late_bind.late_bind_fw[lb_fw->id]);
+}
+
+/* Refer to the "Late Bind based Firmware Layout" documentation entry for details */
+static int parse_cpd_header(struct xe_late_bind_fw *lb_fw,
+ const void *data, size_t size, const char *manifest_entry)
+{
+ struct xe_device *xe = late_bind_fw_to_xe(lb_fw);
+ const struct gsc_cpd_header_v2 *header = data;
+ const struct gsc_manifest_header *manifest;
+ const struct gsc_cpd_entry *entry;
+ size_t min_size = sizeof(*header);
+ u32 offset = 0;
+ int i;
+
+ /* manifest_entry is mandatory */
+ xe_assert(xe, manifest_entry);
+
+ if (size < min_size || header->header_marker != GSC_CPD_HEADER_MARKER)
+ return -ENOENT;
+
+ if (header->header_length < sizeof(struct gsc_cpd_header_v2)) {
+ drm_err(&xe->drm, "%s late binding fw: Invalid CPD header length %u!\n",
+ fw_id_to_name[lb_fw->id], header->header_length);
+ return -EINVAL;
+ }
+
+ min_size = header->header_length + sizeof(struct gsc_cpd_entry) * header->num_of_entries;
+ if (size < min_size) {
+ drm_err(&xe->drm, "%s late binding fw: too small! %zu < %zu\n",
+ fw_id_to_name[lb_fw->id], size, min_size);
+ return -ENODATA;
+ }
+
+ /* Look for the manifest first */
+ entry = (void *)header + header->header_length;
+ for (i = 0; i < header->num_of_entries; i++, entry++)
+ if (strcmp(entry->name, manifest_entry) == 0)
+ offset = entry->offset & GSC_CPD_ENTRY_OFFSET_MASK;
+
+ if (!offset) {
+ drm_err(&xe->drm, "%s late binding fw: Failed to find manifest_entry\n",
+ fw_id_to_name[lb_fw->id]);
+ return -ENODATA;
+ }
+
+ min_size = offset + sizeof(struct gsc_manifest_header);
+ if (size < min_size) {
+ drm_err(&xe->drm, "%s late binding fw: too small! %zu < %zu\n",
+ fw_id_to_name[lb_fw->id], size, min_size);
+ return -ENODATA;
+ }
+
+ manifest = data + offset;
+
+ lb_fw->version = manifest->fw_version;
+
+ return 0;
+}
+
+/* Refer to the "Late Bind based Firmware Layout" documentation entry for details */
+static int parse_lb_layout(struct xe_late_bind_fw *lb_fw,
+ const void *data, size_t size, const char *fpt_entry)
+{
+ struct xe_device *xe = late_bind_fw_to_xe(lb_fw);
+ const struct csc_fpt_header *header = data;
+ const struct csc_fpt_entry *entry;
+ size_t min_size = sizeof(*header);
+ u32 offset = 0;
+ int i;
+
+ /* fpt_entry is mandatory */
+ xe_assert(xe, fpt_entry);
+
+ if (size < min_size || header->header_marker != CSC_FPT_HEADER_MARKER)
+ return -ENOENT;
+
+ if (header->header_length < sizeof(struct csc_fpt_header)) {
+ drm_err(&xe->drm, "%s late binding fw: Invalid FPT header length %u!\n",
+ fw_id_to_name[lb_fw->id], header->header_length);
+ return -EINVAL;
+ }
+
+ min_size = header->header_length + sizeof(struct csc_fpt_entry) * header->num_of_entries;
+ if (size < min_size) {
+ drm_err(&xe->drm, "%s late binding fw: too small! %zu < %zu\n",
+ fw_id_to_name[lb_fw->id], size, min_size);
+ return -ENODATA;
+ }
+
+ /* Look for the cpd header first */
+ entry = (void *)header + header->header_length;
+ for (i = 0; i < header->num_of_entries; i++, entry++)
+ if (strcmp(entry->name, fpt_entry) == 0)
+ offset = entry->offset;
+
+ if (!offset) {
+ drm_err(&xe->drm, "%s late binding fw: Failed to find fpt_entry\n",
+ fw_id_to_name[lb_fw->id]);
+ return -ENODATA;
+ }
+
+ min_size = offset + sizeof(struct gsc_cpd_header_v2);
+ if (size < min_size) {
+ drm_err(&xe->drm, "%s late binding fw: too small! %zu < %zu\n",
+ fw_id_to_name[lb_fw->id], size, min_size);
+ return -ENODATA;
+ }
+
+ return parse_cpd_header(lb_fw, data + offset, size - offset, "LTES.man");
+}
+
+static const char *xe_late_bind_parse_status(uint32_t status)
+{
+ switch (status) {
+ case INTEL_LB_STATUS_SUCCESS:
+ return "success";
+ case INTEL_LB_STATUS_4ID_MISMATCH:
+ return "4Id Mismatch";
+ case INTEL_LB_STATUS_ARB_FAILURE:
+ return "ARB Failure";
+ case INTEL_LB_STATUS_GENERAL_ERROR:
+ return "General Error";
+ case INTEL_LB_STATUS_INVALID_PARAMS:
+ return "Invalid Params";
+ case INTEL_LB_STATUS_INVALID_SIGNATURE:
+ return "Invalid Signature";
+ case INTEL_LB_STATUS_INVALID_PAYLOAD:
+ return "Invalid Payload";
+ case INTEL_LB_STATUS_TIMEOUT:
+ return "Timeout";
+ default:
+ return "Unknown error";
+ }
+}
+
+static int xe_late_bind_fw_num_fans(struct xe_late_bind *late_bind, u32 *num_fans)
+{
+ struct xe_device *xe = late_bind_to_xe(late_bind);
+ struct xe_tile *root_tile = xe_device_get_root_tile(xe);
+
+ return xe_pcode_read(root_tile,
+ PCODE_MBOX(FAN_SPEED_CONTROL, FSC_READ_NUM_FANS, 0), num_fans, NULL);
+}
+
+void xe_late_bind_wait_for_worker_completion(struct xe_late_bind *late_bind)
+{
+ struct xe_device *xe = late_bind_to_xe(late_bind);
+ struct xe_late_bind_fw *lbfw;
+ int fw_id;
+
+ for (fw_id = 0; fw_id < XE_LB_FW_MAX_ID; fw_id++) {
+ lbfw = &late_bind->late_bind_fw[fw_id];
+ if (lbfw->payload && late_bind->wq) {
+ drm_dbg(&xe->drm, "Flush work: load %s firmware\n",
+ fw_id_to_name[lbfw->id]);
+ flush_work(&lbfw->work);
+ }
+ }
+}
+
+static void xe_late_bind_work(struct work_struct *work)
+{
+ struct xe_late_bind_fw *lbfw = container_of(work, struct xe_late_bind_fw, work);
+ struct xe_late_bind *late_bind = container_of(lbfw, struct xe_late_bind,
+ late_bind_fw[lbfw->id]);
+ struct xe_device *xe = late_bind_to_xe(late_bind);
+ int retry = LB_FW_LOAD_RETRY_MAXCOUNT;
+ int ret;
+ int slept;
+
+ xe_device_assert_mem_access(xe);
+
+ /* we can queue this before the component is bound */
+ for (slept = 0; slept < LB_INIT_TIMEOUT_MS; slept += 100) {
+ if (late_bind->component.ops)
+ break;
+ msleep(100);
+ }
+
+ if (!late_bind->component.ops) {
+ drm_err(&xe->drm, "Late bind component not bound\n");
+ /* Do not re-attempt fw load */
+ drmm_kfree(&xe->drm, (void *)lbfw->payload);
+ lbfw->payload = NULL;
+ goto out;
+ }
+
+ drm_dbg(&xe->drm, "Load %s firmware\n", fw_id_to_name[lbfw->id]);
+
+ do {
+ ret = late_bind->component.ops->push_payload(late_bind->component.mei_dev,
+ lbfw->type,
+ lbfw->flags,
+ lbfw->payload,
+ lbfw->payload_size);
+ if (!ret)
+ break;
+ msleep(LB_FW_LOAD_RETRY_PAUSE_MS);
+ } while (--retry && ret == -EBUSY);
+
+ if (!ret) {
+ drm_dbg(&xe->drm, "Load %s firmware successful\n",
+ fw_id_to_name[lbfw->id]);
+ goto out;
+ }
+
+ if (ret > 0)
+ drm_err(&xe->drm, "Load %s firmware failed with err %d, %s\n",
+ fw_id_to_name[lbfw->id], ret, xe_late_bind_parse_status(ret));
+ else
+ drm_err(&xe->drm, "Load %s firmware failed with err %d",
+ fw_id_to_name[lbfw->id], ret);
+ /* Do not re-attempt fw load */
+ drmm_kfree(&xe->drm, (void *)lbfw->payload);
+ lbfw->payload = NULL;
+
+out:
+ xe_pm_runtime_put(xe);
+}
+
+int xe_late_bind_fw_load(struct xe_late_bind *late_bind)
+{
+ struct xe_device *xe = late_bind_to_xe(late_bind);
+ struct xe_late_bind_fw *lbfw;
+ int fw_id;
+
+ if (!late_bind->component_added)
+ return -ENODEV;
+
+ if (late_bind->disable)
+ return 0;
+
+ for (fw_id = 0; fw_id < XE_LB_FW_MAX_ID; fw_id++) {
+ lbfw = &late_bind->late_bind_fw[fw_id];
+ if (lbfw->payload) {
+ xe_pm_runtime_get_noresume(xe);
+ queue_work(late_bind->wq, &lbfw->work);
+ }
+ }
+ return 0;
+}
+
+static int __xe_late_bind_fw_init(struct xe_late_bind *late_bind, u32 fw_id)
+{
+ struct xe_device *xe = late_bind_to_xe(late_bind);
+ struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+ struct xe_late_bind_fw *lb_fw;
+ const struct firmware *fw;
+ u32 num_fans;
+ int ret;
+
+ if (fw_id >= XE_LB_FW_MAX_ID)
+ return -EINVAL;
+
+ lb_fw = &late_bind->late_bind_fw[fw_id];
+
+ lb_fw->id = fw_id;
+ lb_fw->type = fw_id_to_type[lb_fw->id];
+ lb_fw->flags &= ~INTEL_LB_FLAG_IS_PERSISTENT;
+
+ if (lb_fw->type == INTEL_LB_TYPE_FAN_CONTROL) {
+ ret = xe_late_bind_fw_num_fans(late_bind, &num_fans);
+ if (ret) {
+ drm_dbg(&xe->drm, "Failed to read number of fans: %d\n", ret);
+ return 0; /* Not a fatal error, continue without fan control */
+ }
+ drm_dbg(&xe->drm, "Number of Fans: %d\n", num_fans);
+ if (!num_fans)
+ return 0;
+ }
+
+ snprintf(lb_fw->blob_path, sizeof(lb_fw->blob_path), "xe/%s_8086_%04x_%04x_%04x.bin",
+ fw_id_to_name[lb_fw->id], pdev->device,
+ pdev->subsystem_vendor, pdev->subsystem_device);
+
+ drm_dbg(&xe->drm, "Request late binding firmware %s\n", lb_fw->blob_path);
+ ret = firmware_request_nowarn(&fw, lb_fw->blob_path, xe->drm.dev);
+ if (ret) {
+ drm_dbg(&xe->drm, "%s late binding fw not available for current device",
+ fw_id_to_name[lb_fw->id]);
+ return 0;
+ }
+
+ if (fw->size > XE_LB_MAX_PAYLOAD_SIZE) {
+ drm_err(&xe->drm, "Firmware %s size %zu is larger than max pay load size %u\n",
+ lb_fw->blob_path, fw->size, XE_LB_MAX_PAYLOAD_SIZE);
+ release_firmware(fw);
+ return -ENODATA;
+ }
+
+ ret = parse_lb_layout(lb_fw, fw->data, fw->size, "LTES");
+ if (ret)
+ return ret;
+
+ lb_fw->payload_size = fw->size;
+ lb_fw->payload = drmm_kzalloc(&xe->drm, lb_fw->payload_size, GFP_KERNEL);
+ if (!lb_fw->payload) {
+ release_firmware(fw);
+ return -ENOMEM;
+ }
+
+ drm_info(&xe->drm, "Using %s firmware from %s version %u.%u.%u.%u\n",
+ fw_id_to_name[lb_fw->id], lb_fw->blob_path,
+ lb_fw->version.major, lb_fw->version.minor,
+ lb_fw->version.hotfix, lb_fw->version.build);
+
+ memcpy((void *)lb_fw->payload, fw->data, lb_fw->payload_size);
+ release_firmware(fw);
+ INIT_WORK(&lb_fw->work, xe_late_bind_work);
+
+ return 0;
+}
+
+static int xe_late_bind_fw_init(struct xe_late_bind *late_bind)
+{
+ int ret;
+ int fw_id;
+
+ late_bind->wq = alloc_ordered_workqueue("late-bind-ordered-wq", 0);
+ if (!late_bind->wq)
+ return -ENOMEM;
+
+ for (fw_id = 0; fw_id < XE_LB_FW_MAX_ID; fw_id++) {
+ ret = __xe_late_bind_fw_init(late_bind, fw_id);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+static int xe_late_bind_component_bind(struct device *xe_kdev,
+ struct device *mei_kdev, void *data)
+{
+ struct xe_device *xe = kdev_to_xe_device(xe_kdev);
+ struct xe_late_bind *late_bind = &xe->late_bind;
+
+ late_bind->component.ops = data;
+ late_bind->component.mei_dev = mei_kdev;
+
+ return 0;
+}
+
+static void xe_late_bind_component_unbind(struct device *xe_kdev,
+ struct device *mei_kdev, void *data)
+{
+ struct xe_device *xe = kdev_to_xe_device(xe_kdev);
+ struct xe_late_bind *late_bind = &xe->late_bind;
+
+ xe_late_bind_wait_for_worker_completion(late_bind);
+
+ late_bind->component.ops = NULL;
+}
+
+static const struct component_ops xe_late_bind_component_ops = {
+ .bind = xe_late_bind_component_bind,
+ .unbind = xe_late_bind_component_unbind,
+};
+
+static void xe_late_bind_remove(void *arg)
+{
+ struct xe_late_bind *late_bind = arg;
+ struct xe_device *xe = late_bind_to_xe(late_bind);
+
+ xe_late_bind_wait_for_worker_completion(late_bind);
+
+ late_bind->component_added = false;
+
+ component_del(xe->drm.dev, &xe_late_bind_component_ops);
+ if (late_bind->wq) {
+ destroy_workqueue(late_bind->wq);
+ late_bind->wq = NULL;
+ }
+}
+
+/**
+ * xe_late_bind_init() - add xe mei late binding component
+ * @late_bind: pointer to late bind structure.
+ *
+ * Return: 0 if the initialization was successful, a negative errno otherwise.
+ */
+int xe_late_bind_init(struct xe_late_bind *late_bind)
+{
+ struct xe_device *xe = late_bind_to_xe(late_bind);
+ int err;
+
+ if (!xe->info.has_late_bind)
+ return 0;
+
+ if (!IS_ENABLED(CONFIG_INTEL_MEI_LB) || !IS_ENABLED(CONFIG_INTEL_MEI_GSC)) {
+ drm_info(&xe->drm, "Can't init xe mei late bind missing mei component\n");
+ return 0;
+ }
+
+ err = component_add_typed(xe->drm.dev, &xe_late_bind_component_ops,
+ INTEL_COMPONENT_LB);
+ if (err < 0) {
+ drm_err(&xe->drm, "Failed to add mei late bind component (%pe)\n", ERR_PTR(err));
+ return err;
+ }
+
+ late_bind->component_added = true;
+
+ err = devm_add_action_or_reset(xe->drm.dev, xe_late_bind_remove, late_bind);
+ if (err)
+ return err;
+
+ err = xe_late_bind_fw_init(late_bind);
+ if (err)
+ return err;
+
+ return xe_late_bind_fw_load(late_bind);
+}
diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw.h b/drivers/gpu/drm/xe/xe_late_bind_fw.h
new file mode 100644
index 000000000000..07e437390539
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_late_bind_fw.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_LATE_BIND_FW_H_
+#define _XE_LATE_BIND_FW_H_
+
+#include <linux/types.h>
+
+struct xe_late_bind;
+
+int xe_late_bind_init(struct xe_late_bind *late_bind);
+int xe_late_bind_fw_load(struct xe_late_bind *late_bind);
+void xe_late_bind_wait_for_worker_completion(struct xe_late_bind *late_bind);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw_types.h b/drivers/gpu/drm/xe/xe_late_bind_fw_types.h
new file mode 100644
index 000000000000..0f5da89ce98b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_late_bind_fw_types.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_LATE_BIND_TYPES_H_
+#define _XE_LATE_BIND_TYPES_H_
+
+#include <linux/iosys-map.h>
+#include <linux/mutex.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include "xe_uc_fw_abi.h"
+
+#define XE_LB_MAX_PAYLOAD_SIZE SZ_4K
+
+/**
+ * xe_late_bind_fw_id - enum to determine late binding fw index
+ */
+enum xe_late_bind_fw_id {
+ XE_LB_FW_FAN_CONTROL = 0,
+ XE_LB_FW_MAX_ID
+};
+
+/**
+ * struct xe_late_bind_fw
+ */
+struct xe_late_bind_fw {
+ /** @id: firmware index */
+ u32 id;
+ /** @blob_path: firmware binary path */
+ char blob_path[PATH_MAX];
+ /** @type: firmware type */
+ u32 type;
+ /** @flags: firmware flags */
+ u32 flags;
+ /** @payload: to store the late binding blob */
+ const u8 *payload;
+ /** @payload_size: late binding blob payload_size */
+ size_t payload_size;
+ /** @work: worker to upload latebind blob */
+ struct work_struct work;
+ /** @version: late binding blob manifest version */
+ struct gsc_version version;
+};
+
+/**
+ * struct xe_late_bind_component - Late Binding services component
+ * @mei_dev: device that provide Late Binding service.
+ * @ops: Ops implemented by Late Binding driver, used by Xe driver.
+ *
+ * Communication between Xe and MEI drivers for Late Binding services
+ */
+struct xe_late_bind_component {
+ struct device *mei_dev;
+ const struct intel_lb_component_ops *ops;
+};
+
+/**
+ * struct xe_late_bind
+ */
+struct xe_late_bind {
+ /** @component: struct for communication with mei component */
+ struct xe_late_bind_component component;
+ /** @late_bind_fw: late binding firmware array */
+ struct xe_late_bind_fw late_bind_fw[XE_LB_FW_MAX_ID];
+ /** @wq: workqueue to submit request to download late bind blob */
+ struct workqueue_struct *wq;
+ /** @component_added: whether the component has been added */
+ bool component_added;
+ /** @disable: to block late binding reload during pm resume flow*/
+ bool disable;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_lmtt.c b/drivers/gpu/drm/xe/xe_lmtt.c
index 63db66df064b..4dc1de482eee 100644
--- a/drivers/gpu/drm/xe/xe_lmtt.c
+++ b/drivers/gpu/drm/xe/xe_lmtt.c
@@ -11,12 +11,13 @@
#include "xe_assert.h"
#include "xe_bo.h"
+#include "xe_tlb_inval.h"
#include "xe_lmtt.h"
#include "xe_map.h"
#include "xe_mmio.h"
#include "xe_res_cursor.h"
#include "xe_sriov.h"
-#include "xe_sriov_printk.h"
+#include "xe_tile_sriov_printk.h"
/**
* DOC: Local Memory Translation Table
@@ -31,7 +32,7 @@
*/
#define lmtt_assert(lmtt, condition) xe_tile_assert(lmtt_to_tile(lmtt), condition)
-#define lmtt_debug(lmtt, msg...) xe_sriov_dbg_verbose(lmtt_to_xe(lmtt), "LMTT: " msg)
+#define lmtt_debug(lmtt, msg...) xe_tile_sriov_dbg_verbose(lmtt_to_tile(lmtt), "LMTT: " msg)
static bool xe_has_multi_level_lmtt(struct xe_device *xe)
{
@@ -66,18 +67,21 @@ static struct xe_lmtt_pt *lmtt_pt_alloc(struct xe_lmtt *lmtt, unsigned int level
goto out;
}
- bo = xe_bo_create_pin_map(lmtt_to_xe(lmtt), lmtt_to_tile(lmtt), NULL,
- PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) *
- lmtt->ops->lmtt_pte_num(level)),
- ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) |
- XE_BO_FLAG_NEEDS_64K);
+ bo = xe_bo_create_pin_map_novm(lmtt_to_xe(lmtt), lmtt_to_tile(lmtt),
+ PAGE_ALIGN(lmtt->ops->lmtt_pte_size(level) *
+ lmtt->ops->lmtt_pte_num(level)),
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_VRAM_IF_DGFX(lmtt_to_tile(lmtt)) |
+ XE_BO_FLAG_NEEDS_64K, false);
if (IS_ERR(bo)) {
err = PTR_ERR(bo);
goto out_free_pt;
}
lmtt_assert(lmtt, xe_bo_is_vram(bo));
+ lmtt_debug(lmtt, "level=%u addr=%#llx\n", level, (u64)xe_bo_main_addr(bo, XE_PAGE_SIZE));
+
+ xe_map_memset(lmtt_to_xe(lmtt), &bo->vmap, 0, 0, xe_bo_size(bo));
pt->level = level;
pt->bo = bo;
@@ -91,6 +95,9 @@ out:
static void lmtt_pt_free(struct xe_lmtt_pt *pt)
{
+ lmtt_debug(&pt->bo->tile->sriov.pf.lmtt, "level=%u addr=%llx\n",
+ pt->level, (u64)xe_bo_main_addr(pt->bo, XE_PAGE_SIZE));
+
xe_bo_unpin_map_no_vm(pt->bo);
kfree(pt);
}
@@ -188,14 +195,17 @@ static void lmtt_setup_dir_ptr(struct xe_lmtt *lmtt)
struct xe_tile *tile = lmtt_to_tile(lmtt);
struct xe_device *xe = tile_to_xe(tile);
dma_addr_t offset = xe_bo_main_addr(lmtt->pd->bo, XE_PAGE_SIZE);
+ struct xe_gt *gt;
+ u8 id;
lmtt_debug(lmtt, "DIR offset %pad\n", &offset);
lmtt_assert(lmtt, xe_bo_is_vram(lmtt->pd->bo));
lmtt_assert(lmtt, IS_ALIGNED(offset, SZ_64K));
- xe_mmio_write32(&tile->mmio,
- GRAPHICS_VER(xe) >= 20 ? XE2_LMEM_CFG : LMEM_CFG,
- LMEM_EN | REG_FIELD_PREP(LMTT_DIR_PTR, offset / SZ_64K));
+ for_each_gt_on_tile(gt, tile, id)
+ xe_mmio_write32(&gt->mmio,
+ GRAPHICS_VER(xe) >= 20 ? XE2_LMEM_CFG : LMEM_CFG,
+ LMEM_EN | REG_FIELD_PREP(LMTT_DIR_PTR, offset / SZ_64K));
}
/**
@@ -216,6 +226,57 @@ void xe_lmtt_init_hw(struct xe_lmtt *lmtt)
lmtt_setup_dir_ptr(lmtt);
}
+static int lmtt_invalidate_hw(struct xe_lmtt *lmtt)
+{
+ struct xe_tlb_inval_fence fences[XE_MAX_GT_PER_TILE];
+ struct xe_tlb_inval_fence *fence = fences;
+ struct xe_tile *tile = lmtt_to_tile(lmtt);
+ struct xe_gt *gt;
+ int result = 0;
+ int err;
+ u8 id;
+
+ for_each_gt_on_tile(gt, tile, id) {
+ xe_tlb_inval_fence_init(&gt->tlb_inval, fence, true);
+ err = xe_tlb_inval_all(&gt->tlb_inval, fence);
+ result = result ?: err;
+ fence++;
+ }
+
+ lmtt_debug(lmtt, "num_fences=%d err=%d\n", (int)(fence - fences), result);
+
+ /*
+ * It is fine to wait for all fences, even for those which covers the
+ * invalidation request that failed, as such fence should be already
+ * marked as signaled.
+ */
+ fence = fences;
+ for_each_gt_on_tile(gt, tile, id)
+ xe_tlb_inval_fence_wait(fence++);
+
+ return result;
+}
+
+/**
+ * xe_lmtt_invalidate_hw - Invalidate LMTT hardware.
+ * @lmtt: the &xe_lmtt to invalidate
+ *
+ * Send requests to all GuCs on this tile to invalidate all TLBs.
+ *
+ * This function should be called only when running as a PF driver.
+ */
+void xe_lmtt_invalidate_hw(struct xe_lmtt *lmtt)
+{
+ int err;
+
+ lmtt_assert(lmtt, IS_SRIOV_PF(lmtt_to_xe(lmtt)));
+
+ err = lmtt_invalidate_hw(lmtt);
+ if (err)
+ xe_tile_sriov_err(lmtt_to_tile(lmtt), "LMTT invalidation failed (%pe)",
+ ERR_PTR(err));
+}
+
static void lmtt_write_pte(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pt,
u64 pte, unsigned int idx)
{
@@ -226,9 +287,14 @@ static void lmtt_write_pte(struct xe_lmtt *lmtt, struct xe_lmtt_pt *pt,
switch (lmtt->ops->lmtt_pte_size(level)) {
case sizeof(u32):
+ lmtt_assert(lmtt, !overflows_type(pte, u32));
+ lmtt_assert(lmtt, !pte || !iosys_map_rd(&pt->bo->vmap, idx * sizeof(u32), u32));
+
xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u32), u32, pte);
break;
case sizeof(u64):
+ lmtt_assert(lmtt, !pte || !iosys_map_rd(&pt->bo->vmap, idx * sizeof(u64), u64));
+
xe_map_wr(lmtt_to_xe(lmtt), &pt->bo->vmap, idx * sizeof(u64), u64, pte);
break;
default:
@@ -265,6 +331,7 @@ static void lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid)
return;
lmtt_write_pte(lmtt, pd, LMTT_PTE_INVALID, vfid);
+ lmtt_invalidate_hw(lmtt);
lmtt_assert(lmtt, pd->level > 0);
lmtt_assert(lmtt, pt->level == pd->level - 1);
@@ -386,11 +453,11 @@ static void lmtt_insert_bo(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo
u64 addr, vram_offset;
lmtt_assert(lmtt, IS_ALIGNED(start, page_size));
- lmtt_assert(lmtt, IS_ALIGNED(bo->size, page_size));
+ lmtt_assert(lmtt, IS_ALIGNED(xe_bo_size(bo), page_size));
lmtt_assert(lmtt, xe_bo_is_vram(bo));
vram_offset = vram_region_gpu_offset(bo->ttm.resource);
- xe_res_first(bo->ttm.resource, 0, bo->size, &cur);
+ xe_res_first(bo->ttm.resource, 0, xe_bo_size(bo), &cur);
while (cur.remaining) {
addr = xe_res_dma(&cur);
addr += vram_offset; /* XXX */
diff --git a/drivers/gpu/drm/xe/xe_lmtt.h b/drivers/gpu/drm/xe/xe_lmtt.h
index cb10ef994db6..75a234fbf367 100644
--- a/drivers/gpu/drm/xe/xe_lmtt.h
+++ b/drivers/gpu/drm/xe/xe_lmtt.h
@@ -15,6 +15,7 @@ struct xe_lmtt_ops;
#ifdef CONFIG_PCI_IOV
int xe_lmtt_init(struct xe_lmtt *lmtt);
void xe_lmtt_init_hw(struct xe_lmtt *lmtt);
+void xe_lmtt_invalidate_hw(struct xe_lmtt *lmtt);
int xe_lmtt_prepare_pages(struct xe_lmtt *lmtt, unsigned int vfid, u64 range);
int xe_lmtt_populate_pages(struct xe_lmtt *lmtt, unsigned int vfid, struct xe_bo *bo, u64 offset);
void xe_lmtt_drop_pages(struct xe_lmtt *lmtt, unsigned int vfid);
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 61a2e87990a9..b5083c99dd50 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -8,6 +8,7 @@
#include <generated/xe_wa_oob.h>
#include <linux/ascii85.h>
+#include <linux/panic.h>
#include "instructions/xe_mi_commands.h"
#include "instructions/xe_gfxpipe_commands.h"
@@ -16,6 +17,7 @@
#include "regs/xe_lrc_layout.h"
#include "xe_bb.h"
#include "xe_bo.h"
+#include "xe_configfs.h"
#include "xe_device.h"
#include "xe_drm_client.h"
#include "xe_exec_queue_types.h"
@@ -39,14 +41,56 @@
#define LRC_ENGINE_INSTANCE GENMASK_ULL(53, 48)
#define LRC_PPHWSP_SIZE SZ_4K
+#define LRC_INDIRECT_CTX_BO_SIZE SZ_4K
#define LRC_INDIRECT_RING_STATE_SIZE SZ_4K
+/*
+ * Layout of the LRC and associated data allocated as
+ * lrc->bo:
+ *
+ * Region Size
+ * +============================+=================================+ <- __xe_lrc_ring_offset()
+ * | Ring | ring_size, see |
+ * | | xe_lrc_init() |
+ * +============================+=================================+ <- __xe_lrc_pphwsp_offset()
+ * | PPHWSP (includes SW state) | 4K |
+ * +----------------------------+---------------------------------+ <- __xe_lrc_regs_offset()
+ * | Engine Context Image | n * 4K, see |
+ * | | xe_gt_lrc_size() |
+ * +----------------------------+---------------------------------+ <- __xe_lrc_indirect_ring_offset()
+ * | Indirect Ring State Page | 0 or 4k, see |
+ * | | XE_LRC_FLAG_INDIRECT_RING_STATE |
+ * +============================+=================================+ <- __xe_lrc_indirect_ctx_offset()
+ * | Indirect Context Page | 0 or 4k, see |
+ * | | XE_LRC_FLAG_INDIRECT_CTX |
+ * +============================+=================================+ <- __xe_lrc_wa_bb_offset()
+ * | WA BB Per Ctx | 4k |
+ * +============================+=================================+ <- xe_bo_size(lrc->bo)
+ */
+
static struct xe_device *
lrc_to_xe(struct xe_lrc *lrc)
{
return gt_to_xe(lrc->fence_ctx.gt);
}
+static bool
+gt_engine_needs_indirect_ctx(struct xe_gt *gt, enum xe_engine_class class)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+
+ if (XE_GT_WA(gt, 16010904313) &&
+ (class == XE_ENGINE_CLASS_RENDER ||
+ class == XE_ENGINE_CLASS_COMPUTE))
+ return true;
+
+ if (xe_configfs_get_ctx_restore_mid_bb(to_pci_dev(xe->drm.dev),
+ class, NULL))
+ return true;
+
+ return false;
+}
+
size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class)
{
struct xe_device *xe = gt_to_xe(gt);
@@ -581,8 +625,6 @@ static void set_context_control(u32 *regs, struct xe_hw_engine *hwe)
if (xe_gt_has_indirect_ring_state(hwe->gt))
regs[CTX_CONTEXT_CONTROL] |=
_MASKED_BIT_ENABLE(CTX_CTRL_INDIRECT_RING_STATE_ENABLE);
-
- /* TODO: Timestamp */
}
static void set_memory_based_intr(u32 *regs, struct xe_hw_engine *hwe)
@@ -654,15 +696,21 @@ u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
#define LRC_SEQNO_PPHWSP_OFFSET 512
#define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
#define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8)
+#define LRC_ENGINE_ID_PPHWSP_OFFSET 1024
#define LRC_PARALLEL_PPHWSP_OFFSET 2048
-#define LRC_ENGINE_ID_PPHWSP_OFFSET 2096
u32 xe_lrc_regs_offset(struct xe_lrc *lrc)
{
return xe_lrc_pphwsp_offset(lrc) + LRC_PPHWSP_SIZE;
}
-static size_t lrc_reg_size(struct xe_device *xe)
+/**
+ * xe_lrc_reg_size() - Get size of the LRC registers area within queues
+ * @xe: the &xe_device struct instance
+ *
+ * Returns: Size of the LRC registers area for current platform
+ */
+size_t xe_lrc_reg_size(struct xe_device *xe)
{
if (GRAPHICS_VERx100(xe) >= 1250)
return 96 * sizeof(u32);
@@ -672,7 +720,7 @@ static size_t lrc_reg_size(struct xe_device *xe)
size_t xe_lrc_skip_size(struct xe_device *xe)
{
- return LRC_PPHWSP_SIZE + lrc_reg_size(xe);
+ return LRC_PPHWSP_SIZE + xe_lrc_reg_size(xe);
}
static inline u32 __xe_lrc_seqno_offset(struct xe_lrc *lrc)
@@ -716,8 +764,23 @@ static u32 __xe_lrc_ctx_timestamp_udw_offset(struct xe_lrc *lrc)
static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc)
{
- /* Indirect ring state page is at the very end of LRC */
- return lrc->size - LRC_INDIRECT_RING_STATE_SIZE;
+ u32 offset = xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE -
+ LRC_INDIRECT_RING_STATE_SIZE;
+
+ if (lrc->flags & XE_LRC_FLAG_INDIRECT_CTX)
+ offset -= LRC_INDIRECT_CTX_BO_SIZE;
+
+ return offset;
+}
+
+static inline u32 __xe_lrc_indirect_ctx_offset(struct xe_lrc *lrc)
+{
+ return xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE - LRC_INDIRECT_CTX_BO_SIZE;
+}
+
+static inline u32 __xe_lrc_wa_bb_offset(struct xe_lrc *lrc)
+{
+ return xe_bo_size(lrc->bo) - LRC_WA_BB_SIZE;
}
#define DECL_MAP_ADDR_HELPERS(elem) \
@@ -898,6 +961,47 @@ static void *empty_lrc_data(struct xe_hw_engine *hwe)
return data;
}
+/**
+ * xe_default_lrc_update_memirq_regs_with_address - Re-compute GGTT references in default LRC
+ * of given engine.
+ * @hwe: the &xe_hw_engine struct instance
+ */
+void xe_default_lrc_update_memirq_regs_with_address(struct xe_hw_engine *hwe)
+{
+ struct xe_gt *gt = hwe->gt;
+ u32 *regs;
+
+ if (!gt->default_lrc[hwe->class])
+ return;
+
+ regs = gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE;
+ set_memory_based_intr(regs, hwe);
+}
+
+/**
+ * xe_lrc_update_memirq_regs_with_address - Re-compute GGTT references in mem interrupt data
+ * for given LRC.
+ * @lrc: the &xe_lrc struct instance
+ * @hwe: the &xe_hw_engine struct instance
+ * @regs: scratch buffer to be used as temporary storage
+ */
+void xe_lrc_update_memirq_regs_with_address(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
+ u32 *regs)
+{
+ struct xe_gt *gt = hwe->gt;
+ struct iosys_map map;
+ size_t regs_len;
+
+ if (!xe_device_uses_memirq(gt_to_xe(gt)))
+ return;
+
+ map = __xe_lrc_regs_map(lrc);
+ regs_len = xe_lrc_reg_size(gt_to_xe(gt));
+ xe_map_memcpy_from(gt_to_xe(gt), regs, &map, 0, regs_len);
+ set_memory_based_intr(regs, hwe);
+ xe_map_memcpy_to(gt_to_xe(gt), &map, 0, regs, regs_len);
+}
+
static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
{
u64 desc = xe_vm_pdp4_descriptor(vm, gt_to_tile(lrc->gt));
@@ -909,17 +1013,12 @@ static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
static void xe_lrc_finish(struct xe_lrc *lrc)
{
xe_hw_fence_ctx_finish(&lrc->fence_ctx);
- xe_bo_lock(lrc->bo, false);
- xe_bo_unpin(lrc->bo);
- xe_bo_unlock(lrc->bo);
- xe_bo_put(lrc->bo);
- xe_bo_unpin_map_no_vm(lrc->bb_per_ctx_bo);
+ xe_bo_unpin_map_no_vm(lrc->bo);
}
/*
- * xe_lrc_setup_utilization() - Setup wa bb to assist in calculating active
- * context run ticks.
- * @lrc: Pointer to the lrc.
+ * wa_bb_setup_utilization() - Write commands to wa bb to assist
+ * in calculating active context run ticks.
*
* Context Timestamp (CTX_TIMESTAMP) in the LRC accumulates the run ticks of the
* context, but only gets updated when the context switches out. In order to
@@ -944,11 +1043,15 @@ static void xe_lrc_finish(struct xe_lrc *lrc)
* store it in the PPHSWP.
*/
#define CONTEXT_ACTIVE 1ULL
-static void xe_lrc_setup_utilization(struct xe_lrc *lrc)
+static ssize_t setup_utilization_wa(struct xe_lrc *lrc,
+ struct xe_hw_engine *hwe,
+ u32 *batch,
+ size_t max_len)
{
- u32 *cmd;
+ u32 *cmd = batch;
- cmd = lrc->bb_per_ctx_bo->vmap.vaddr;
+ if (xe_gt_WARN_ON(lrc->gt, max_len < 12))
+ return -ENOSPC;
*cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET;
*cmd++ = ENGINE_ID(0).addr;
@@ -967,88 +1070,388 @@ static void xe_lrc_setup_utilization(struct xe_lrc *lrc)
*cmd++ = upper_32_bits(CONTEXT_ACTIVE);
}
- *cmd++ = MI_BATCH_BUFFER_END;
+ return cmd - batch;
+}
+
+static ssize_t setup_timestamp_wa(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
+ u32 *batch, size_t max_len)
+{
+ const u32 ts_addr = __xe_lrc_ctx_timestamp_ggtt_addr(lrc);
+ u32 *cmd = batch;
+
+ if (!XE_GT_WA(lrc->gt, 16010904313) ||
+ !(hwe->class == XE_ENGINE_CLASS_RENDER ||
+ hwe->class == XE_ENGINE_CLASS_COMPUTE ||
+ hwe->class == XE_ENGINE_CLASS_COPY ||
+ hwe->class == XE_ENGINE_CLASS_VIDEO_DECODE ||
+ hwe->class == XE_ENGINE_CLASS_VIDEO_ENHANCE))
+ return 0;
+
+ if (xe_gt_WARN_ON(lrc->gt, max_len < 12))
+ return -ENOSPC;
+
+ *cmd++ = MI_LOAD_REGISTER_MEM | MI_LRM_USE_GGTT | MI_LRI_LRM_CS_MMIO |
+ MI_LRM_ASYNC;
+ *cmd++ = RING_CTX_TIMESTAMP(0).addr;
+ *cmd++ = ts_addr;
+ *cmd++ = 0;
+
+ *cmd++ = MI_LOAD_REGISTER_MEM | MI_LRM_USE_GGTT | MI_LRI_LRM_CS_MMIO |
+ MI_LRM_ASYNC;
+ *cmd++ = RING_CTX_TIMESTAMP(0).addr;
+ *cmd++ = ts_addr;
+ *cmd++ = 0;
+
+ *cmd++ = MI_LOAD_REGISTER_MEM | MI_LRM_USE_GGTT | MI_LRI_LRM_CS_MMIO;
+ *cmd++ = RING_CTX_TIMESTAMP(0).addr;
+ *cmd++ = ts_addr;
+ *cmd++ = 0;
+
+ return cmd - batch;
+}
+
+static ssize_t setup_configfs_post_ctx_restore_bb(struct xe_lrc *lrc,
+ struct xe_hw_engine *hwe,
+ u32 *batch, size_t max_len)
+{
+ struct xe_device *xe = gt_to_xe(lrc->gt);
+ const u32 *user_batch;
+ u32 *cmd = batch;
+ u32 count;
+
+ count = xe_configfs_get_ctx_restore_post_bb(to_pci_dev(xe->drm.dev),
+ hwe->class, &user_batch);
+ if (!count)
+ return 0;
+
+ if (count > max_len)
+ return -ENOSPC;
+
+ /*
+ * This should be used only for tests and validation. Taint the kernel
+ * as anything could be submitted directly in context switches
+ */
+ add_taint(TAINT_TEST, LOCKDEP_STILL_OK);
+
+ memcpy(cmd, user_batch, count * sizeof(u32));
+ cmd += count;
+
+ return cmd - batch;
+}
+
+static ssize_t setup_configfs_mid_ctx_restore_bb(struct xe_lrc *lrc,
+ struct xe_hw_engine *hwe,
+ u32 *batch, size_t max_len)
+{
+ struct xe_device *xe = gt_to_xe(lrc->gt);
+ const u32 *user_batch;
+ u32 *cmd = batch;
+ u32 count;
+
+ count = xe_configfs_get_ctx_restore_mid_bb(to_pci_dev(xe->drm.dev),
+ hwe->class, &user_batch);
+ if (!count)
+ return 0;
+
+ if (count > max_len)
+ return -ENOSPC;
+
+ /*
+ * This should be used only for tests and validation. Taint the kernel
+ * as anything could be submitted directly in context switches
+ */
+ add_taint(TAINT_TEST, LOCKDEP_STILL_OK);
+
+ memcpy(cmd, user_batch, count * sizeof(u32));
+ cmd += count;
+
+ return cmd - batch;
+}
+
+static ssize_t setup_invalidate_state_cache_wa(struct xe_lrc *lrc,
+ struct xe_hw_engine *hwe,
+ u32 *batch, size_t max_len)
+{
+ u32 *cmd = batch;
+
+ if (!XE_GT_WA(lrc->gt, 18022495364) ||
+ hwe->class != XE_ENGINE_CLASS_RENDER)
+ return 0;
+
+ if (xe_gt_WARN_ON(lrc->gt, max_len < 3))
+ return -ENOSPC;
+
+ *cmd++ = MI_LOAD_REGISTER_IMM | MI_LRI_NUM_REGS(1);
+ *cmd++ = CS_DEBUG_MODE1(0).addr;
+ *cmd++ = _MASKED_BIT_ENABLE(INSTRUCTION_STATE_CACHE_INVALIDATE);
+
+ return cmd - batch;
+}
+
+struct bo_setup {
+ ssize_t (*setup)(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
+ u32 *batch, size_t max_size);
+};
+
+struct bo_setup_state {
+ /* Input: */
+ struct xe_lrc *lrc;
+ struct xe_hw_engine *hwe;
+ size_t max_size;
+ size_t reserve_dw;
+ unsigned int offset;
+ const struct bo_setup *funcs;
+ unsigned int num_funcs;
+
+ /* State: */
+ u32 *buffer;
+ u32 *ptr;
+ unsigned int written;
+};
+
+static int setup_bo(struct bo_setup_state *state)
+{
+ ssize_t remain;
+
+ if (state->lrc->bo->vmap.is_iomem) {
+ xe_gt_assert(state->hwe->gt, state->buffer);
+ state->ptr = state->buffer;
+ } else {
+ state->ptr = state->lrc->bo->vmap.vaddr + state->offset;
+ }
+
+ remain = state->max_size / sizeof(u32);
+
+ for (size_t i = 0; i < state->num_funcs; i++) {
+ ssize_t len = state->funcs[i].setup(state->lrc, state->hwe,
+ state->ptr, remain);
+
+ remain -= len;
+
+ /*
+ * Caller has asked for at least reserve_dw to remain unused.
+ */
+ if (len < 0 ||
+ xe_gt_WARN_ON(state->lrc->gt, remain < state->reserve_dw))
+ goto fail;
+
+ state->ptr += len;
+ state->written += len;
+ }
+
+ return 0;
+
+fail:
+ return -ENOSPC;
+}
+
+static void finish_bo(struct bo_setup_state *state)
+{
+ if (!state->lrc->bo->vmap.is_iomem)
+ return;
+
+ xe_map_memcpy_to(gt_to_xe(state->lrc->gt), &state->lrc->bo->vmap,
+ state->offset, state->buffer,
+ state->written * sizeof(u32));
+}
+
+/**
+ * xe_lrc_setup_wa_bb_with_scratch - Execute all wa bb setup callbacks.
+ * @lrc: the &xe_lrc struct instance
+ * @hwe: the &xe_hw_engine struct instance
+ * @scratch: preallocated scratch buffer for temporary storage
+ * Return: 0 on success, negative error code on failure
+ */
+int xe_lrc_setup_wa_bb_with_scratch(struct xe_lrc *lrc, struct xe_hw_engine *hwe, u32 *scratch)
+{
+ static const struct bo_setup funcs[] = {
+ { .setup = setup_timestamp_wa },
+ { .setup = setup_invalidate_state_cache_wa },
+ { .setup = setup_utilization_wa },
+ { .setup = setup_configfs_post_ctx_restore_bb },
+ };
+ struct bo_setup_state state = {
+ .lrc = lrc,
+ .hwe = hwe,
+ .max_size = LRC_WA_BB_SIZE,
+ .buffer = scratch,
+ .reserve_dw = 1,
+ .offset = __xe_lrc_wa_bb_offset(lrc),
+ .funcs = funcs,
+ .num_funcs = ARRAY_SIZE(funcs),
+ };
+ int ret;
+
+ ret = setup_bo(&state);
+ if (ret)
+ return ret;
+
+ *state.ptr++ = MI_BATCH_BUFFER_END;
+ state.written++;
+
+ finish_bo(&state);
xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR,
- xe_bo_ggtt_addr(lrc->bb_per_ctx_bo) | 1);
+ xe_bo_ggtt_addr(lrc->bo) + state.offset + 1);
+ return 0;
}
-#define PVC_CTX_ASID (0x2e + 1)
-#define PVC_CTX_ACC_CTR_THOLD (0x2a + 1)
+static int setup_wa_bb(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
+{
+ u32 *buf = NULL;
+ int ret;
+
+ if (lrc->bo->vmap.is_iomem) {
+ buf = kmalloc(LRC_WA_BB_SIZE, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+ }
+
+ ret = xe_lrc_setup_wa_bb_with_scratch(lrc, hwe, buf);
+
+ kfree(buf);
+
+ return ret;
+}
+
+static int
+setup_indirect_ctx(struct xe_lrc *lrc, struct xe_hw_engine *hwe)
+{
+ static const struct bo_setup rcs_funcs[] = {
+ { .setup = setup_timestamp_wa },
+ { .setup = setup_configfs_mid_ctx_restore_bb },
+ };
+ static const struct bo_setup xcs_funcs[] = {
+ { .setup = setup_configfs_mid_ctx_restore_bb },
+ };
+ struct bo_setup_state state = {
+ .lrc = lrc,
+ .hwe = hwe,
+ .max_size = (63 * 64) /* max 63 cachelines */,
+ .buffer = NULL,
+ .offset = __xe_lrc_indirect_ctx_offset(lrc),
+ };
+ int ret;
+
+ if (!(lrc->flags & XE_LRC_FLAG_INDIRECT_CTX))
+ return 0;
+
+ if (hwe->class == XE_ENGINE_CLASS_RENDER ||
+ hwe->class == XE_ENGINE_CLASS_COMPUTE) {
+ state.funcs = rcs_funcs;
+ state.num_funcs = ARRAY_SIZE(rcs_funcs);
+ } else {
+ state.funcs = xcs_funcs;
+ state.num_funcs = ARRAY_SIZE(xcs_funcs);
+ }
+
+ if (xe_gt_WARN_ON(lrc->gt, !state.funcs))
+ return 0;
+
+ if (lrc->bo->vmap.is_iomem) {
+ state.buffer = kmalloc(state.max_size, GFP_KERNEL);
+ if (!state.buffer)
+ return -ENOMEM;
+ }
+
+ ret = setup_bo(&state);
+ if (ret) {
+ kfree(state.buffer);
+ return ret;
+ }
+
+ /*
+ * Align to 64B cacheline so there's no garbage at the end for CS to
+ * execute: size for indirect ctx must be a multiple of 64.
+ */
+ while (state.written & 0xf) {
+ *state.ptr++ = MI_NOOP;
+ state.written++;
+ }
+
+ finish_bo(&state);
+ kfree(state.buffer);
+
+ /*
+ * Enable INDIRECT_CTX leaving INDIRECT_CTX_OFFSET at its default: it
+ * varies per engine class, but the default is good enough
+ */
+ xe_lrc_write_ctx_reg(lrc,
+ CTX_CS_INDIRECT_CTX,
+ (xe_bo_ggtt_addr(lrc->bo) + state.offset) |
+ /* Size in CLs. */
+ (state.written * sizeof(u32) / 64));
+
+ return 0;
+}
static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
struct xe_vm *vm, u32 ring_size, u16 msix_vec,
u32 init_flags)
{
struct xe_gt *gt = hwe->gt;
+ const u32 lrc_size = xe_gt_lrc_size(gt, hwe->class);
+ u32 bo_size = ring_size + lrc_size + LRC_WA_BB_SIZE;
struct xe_tile *tile = gt_to_tile(gt);
struct xe_device *xe = gt_to_xe(gt);
struct iosys_map map;
- void *init_data = NULL;
u32 arb_enable;
- u32 lrc_size;
u32 bo_flags;
int err;
kref_init(&lrc->refcount);
lrc->gt = gt;
+ lrc->size = lrc_size;
lrc->flags = 0;
- lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class);
+ lrc->ring.size = ring_size;
+ lrc->ring.tail = 0;
+
+ if (gt_engine_needs_indirect_ctx(gt, hwe->class)) {
+ lrc->flags |= XE_LRC_FLAG_INDIRECT_CTX;
+ bo_size += LRC_INDIRECT_CTX_BO_SIZE;
+ }
+
if (xe_gt_has_indirect_ring_state(gt))
lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE;
bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT |
XE_BO_FLAG_GGTT_INVALIDATE;
- if (vm && vm->xef) /* userspace */
- bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE;
- /*
- * FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
- * via VM bind calls.
- */
- lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size,
- ttm_bo_type_kernel,
- bo_flags);
+ if ((vm && vm->xef) || init_flags & XE_LRC_CREATE_USER_CTX) /* userspace */
+ bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE | XE_BO_FLAG_FORCE_USER_VRAM;
+
+ lrc->bo = xe_bo_create_pin_map_novm(xe, tile,
+ bo_size,
+ ttm_bo_type_kernel,
+ bo_flags, false);
if (IS_ERR(lrc->bo))
return PTR_ERR(lrc->bo);
- lrc->bb_per_ctx_bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K,
- ttm_bo_type_kernel,
- bo_flags);
- if (IS_ERR(lrc->bb_per_ctx_bo)) {
- err = PTR_ERR(lrc->bb_per_ctx_bo);
- goto err_lrc_finish;
- }
-
- lrc->size = lrc_size;
- lrc->ring.size = ring_size;
- lrc->ring.tail = 0;
-
xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
hwe->fence_irq, hwe->name);
- if (!gt->default_lrc[hwe->class]) {
- init_data = empty_lrc_data(hwe);
- if (!init_data) {
- err = -ENOMEM;
- goto err_lrc_finish;
- }
- }
-
/*
* Init Per-Process of HW status Page, LRC / context state to known
- * values
+ * values. If there's already a primed default_lrc, just copy it, otherwise
+ * it's the early submission to record the lrc: build a new empty one from
+ * scratch.
*/
map = __xe_lrc_pphwsp_map(lrc);
- if (!init_data) {
+ if (gt->default_lrc[hwe->class]) {
xe_map_memset(xe, &map, 0, 0, LRC_PPHWSP_SIZE); /* PPHWSP */
xe_map_memcpy_to(xe, &map, LRC_PPHWSP_SIZE,
gt->default_lrc[hwe->class] + LRC_PPHWSP_SIZE,
- xe_gt_lrc_size(gt, hwe->class) - LRC_PPHWSP_SIZE);
+ lrc_size - LRC_PPHWSP_SIZE);
} else {
- xe_map_memcpy_to(xe, &map, 0, init_data,
- xe_gt_lrc_size(gt, hwe->class));
+ void *init_data = empty_lrc_data(hwe);
+
+ if (!init_data) {
+ err = -ENOMEM;
+ goto err_lrc_finish;
+ }
+
+ xe_map_memcpy_to(xe, &map, 0, init_data, lrc_size);
kfree(init_data);
}
@@ -1102,7 +1505,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP_UDW, 0);
if (xe->info.has_asid && vm)
- xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
+ xe_lrc_write_ctx_reg(lrc, CTX_ASID, vm->usm.asid);
lrc->desc = LRC_VALID;
lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT);
@@ -1128,7 +1531,13 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
map = __xe_lrc_start_seqno_map(lrc);
xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
- xe_lrc_setup_utilization(lrc);
+ err = setup_wa_bb(lrc, hwe);
+ if (err)
+ goto err_lrc_finish;
+
+ err = setup_indirect_ctx(lrc, hwe);
+ if (err)
+ goto err_lrc_finish;
return 0;
@@ -1184,6 +1593,23 @@ void xe_lrc_destroy(struct kref *ref)
kfree(lrc);
}
+/**
+ * xe_lrc_update_hwctx_regs_with_address - Re-compute GGTT references within given LRC.
+ * @lrc: the &xe_lrc struct instance
+ */
+void xe_lrc_update_hwctx_regs_with_address(struct xe_lrc *lrc)
+{
+ if (xe_lrc_has_indirect_ring_state(lrc)) {
+ xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE,
+ __xe_lrc_indirect_ring_ggtt_addr(lrc));
+
+ xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START,
+ __xe_lrc_ring_ggtt_addr(lrc));
+ } else {
+ xe_lrc_write_ctx_reg(lrc, CTX_RING_START, __xe_lrc_ring_ggtt_addr(lrc));
+ }
+}
+
void xe_lrc_set_ring_tail(struct xe_lrc *lrc, u32 tail)
{
if (xe_lrc_has_indirect_ring_state(lrc))
@@ -1722,7 +2148,7 @@ static const struct instr_state xe_hpg_svg_state[] = {
{ .instr = CMD_3DSTATE_DRAWING_RECTANGLE, .num_dw = 4 },
};
-void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb)
+u32 *xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, u32 *cs)
{
struct xe_gt *gt = q->hwe->gt;
struct xe_device *xe = gt_to_xe(gt);
@@ -1749,7 +2175,7 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b
* continue to emit all of the SVG state since it's best not to leak
* any of the state between contexts, even if that leakage is harmless.
*/
- if (XE_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) {
+ if (XE_GT_WA(gt, 14019789679) && q->hwe->class == XE_ENGINE_CLASS_RENDER) {
state_table = xe_hpg_svg_state;
state_table_size = ARRAY_SIZE(xe_hpg_svg_state);
}
@@ -1757,7 +2183,7 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b
if (!state_table) {
xe_gt_dbg(gt, "No non-register state to emit on graphics ver %d.%02d\n",
GRAPHICS_VER(xe), GRAPHICS_VERx100(xe) % 100);
- return;
+ return cs;
}
for (int i = 0; i < state_table_size; i++) {
@@ -1780,12 +2206,14 @@ void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *b
instr == CMD_3DSTATE_DRAWING_RECTANGLE)
instr = CMD_3DSTATE_DRAWING_RECTANGLE_FAST;
- bb->cs[bb->len] = instr;
+ *cs = instr;
if (!is_single_dw)
- bb->cs[bb->len] |= (num_dw - 2);
+ *cs |= (num_dw - 2);
- bb->len += num_dw;
+ cs += num_dw;
}
+
+ return cs;
}
struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
@@ -1795,9 +2223,6 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
if (!snapshot)
return NULL;
- if (lrc->bo->vm)
- xe_vm_get(lrc->bo->vm);
-
snapshot->context_desc = xe_lrc_ggtt_addr(lrc);
snapshot->ring_addr = __xe_lrc_ring_ggtt_addr(lrc);
snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc);
@@ -1809,7 +2234,7 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
snapshot->seqno = xe_lrc_seqno(lrc);
snapshot->lrc_bo = xe_bo_get(lrc->bo);
snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc);
- snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset;
+ snapshot->lrc_size = lrc->size;
snapshot->lrc_snapshot = NULL;
snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc));
snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc);
@@ -1819,14 +2244,12 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot)
{
struct xe_bo *bo;
- struct xe_vm *vm;
struct iosys_map src;
if (!snapshot)
return;
bo = snapshot->lrc_bo;
- vm = bo->vm;
snapshot->lrc_bo = NULL;
snapshot->lrc_snapshot = kvmalloc(snapshot->lrc_size, GFP_KERNEL);
@@ -1846,8 +2269,6 @@ void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot)
xe_bo_unlock(bo);
put_bo:
xe_bo_put(bo);
- if (vm)
- xe_vm_put(vm);
}
void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer *p)
@@ -1900,14 +2321,9 @@ void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot)
return;
kvfree(snapshot->lrc_snapshot);
- if (snapshot->lrc_bo) {
- struct xe_vm *vm;
-
- vm = snapshot->lrc_bo->vm;
+ if (snapshot->lrc_bo)
xe_bo_put(snapshot->lrc_bo);
- if (vm)
- xe_vm_put(vm);
- }
+
kfree(snapshot);
}
diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
index eb6e8de8c939..2fb628da5c43 100644
--- a/drivers/gpu/drm/xe/xe_lrc.h
+++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -42,8 +42,12 @@ struct xe_lrc_snapshot {
#define LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR (0x34 * 4)
#define LRC_PPHWSP_PXP_INVAL_SCRATCH_ADDR (0x40 * 4)
-#define XE_LRC_CREATE_RUNALONE 0x1
-#define XE_LRC_CREATE_PXP 0x2
+#define LRC_WA_BB_SIZE SZ_4K
+
+#define XE_LRC_CREATE_RUNALONE BIT(0)
+#define XE_LRC_CREATE_PXP BIT(1)
+#define XE_LRC_CREATE_USER_CTX BIT(2)
+
struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
u32 ring_size, u16 msix_vec, u32 flags);
void xe_lrc_destroy(struct kref *ref);
@@ -72,6 +76,16 @@ static inline void xe_lrc_put(struct xe_lrc *lrc)
kref_put(&lrc->refcount, xe_lrc_destroy);
}
+/**
+ * xe_lrc_ring_size() - Xe LRC ring size
+ *
+ * Return: Size of LRC ring buffer
+ */
+static inline size_t xe_lrc_ring_size(void)
+{
+ return SZ_16K;
+}
+
size_t xe_gt_lrc_size(struct xe_gt *gt, enum xe_engine_class class);
u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc);
u32 xe_lrc_regs_offset(struct xe_lrc *lrc);
@@ -88,6 +102,10 @@ bool xe_lrc_ring_is_idle(struct xe_lrc *lrc);
u32 xe_lrc_indirect_ring_ggtt_addr(struct xe_lrc *lrc);
u32 xe_lrc_ggtt_addr(struct xe_lrc *lrc);
u32 *xe_lrc_regs(struct xe_lrc *lrc);
+void xe_lrc_update_hwctx_regs_with_address(struct xe_lrc *lrc);
+void xe_default_lrc_update_memirq_regs_with_address(struct xe_hw_engine *hwe);
+void xe_lrc_update_memirq_regs_with_address(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
+ u32 *regs);
u32 xe_lrc_read_ctx_reg(struct xe_lrc *lrc, int reg_nr);
void xe_lrc_write_ctx_reg(struct xe_lrc *lrc, int reg_nr, u32 val);
@@ -106,13 +124,14 @@ s32 xe_lrc_start_seqno(struct xe_lrc *lrc);
u32 xe_lrc_parallel_ggtt_addr(struct xe_lrc *lrc);
struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc);
+size_t xe_lrc_reg_size(struct xe_device *xe);
size_t xe_lrc_skip_size(struct xe_device *xe);
void xe_lrc_dump_default(struct drm_printer *p,
struct xe_gt *gt,
enum xe_engine_class);
-void xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, struct xe_bb *bb);
+u32 *xe_lrc_emit_hwe_state_instructions(struct xe_exec_queue *q, u32 *cs);
struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc);
void xe_lrc_snapshot_capture_delayed(struct xe_lrc_snapshot *snapshot);
@@ -124,6 +143,8 @@ u32 xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc *lrc);
u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc);
u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc);
u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc);
+int xe_lrc_setup_wa_bb_with_scratch(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
+ u32 *scratch);
/**
* xe_lrc_update_timestamp - readout LRC timestamp and update cached value
diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h
index ae24cf6f8dd9..e9883706e004 100644
--- a/drivers/gpu/drm/xe/xe_lrc_types.h
+++ b/drivers/gpu/drm/xe/xe_lrc_types.h
@@ -22,14 +22,15 @@ struct xe_lrc {
*/
struct xe_bo *bo;
- /** @size: size of lrc including any indirect ring state page */
+ /** @size: size of the lrc and optional indirect ring state */
u32 size;
/** @gt: gt which this LRC belongs to */
struct xe_gt *gt;
/** @flags: LRC flags */
-#define XE_LRC_FLAG_INDIRECT_RING_STATE 0x1
+#define XE_LRC_FLAG_INDIRECT_CTX 0x1
+#define XE_LRC_FLAG_INDIRECT_RING_STATE 0x2
u32 flags;
/** @refcount: ref count of this lrc */
@@ -53,9 +54,6 @@ struct xe_lrc {
/** @ctx_timestamp: readout value of CTX_TIMESTAMP on last update */
u64 ctx_timestamp;
-
- /** @bb_per_ctx_bo: buffer object for per context batch wa buffer */
- struct xe_bo *bb_per_ctx_bo;
};
struct xe_lrc_snapshot;
diff --git a/drivers/gpu/drm/xe/xe_map.h b/drivers/gpu/drm/xe/xe_map.h
index f62e0c8b67ab..c44777125691 100644
--- a/drivers/gpu/drm/xe/xe_map.h
+++ b/drivers/gpu/drm/xe/xe_map.h
@@ -14,9 +14,9 @@
* DOC: Map layer
*
* All access to any memory shared with a device (both sysmem and vram) in the
- * XE driver should go through this layer (xe_map). This layer is built on top
+ * Xe driver should go through this layer (xe_map). This layer is built on top
* of :ref:`driver-api/device-io:Generalizing Access to System and I/O Memory`
- * and with extra hooks into the XE driver that allows adding asserts to memory
+ * and with extra hooks into the Xe driver that allows adding asserts to memory
* accesses (e.g. for blocking runtime_pm D3Cold on Discrete Graphics).
*/
diff --git a/drivers/gpu/drm/xe/xe_memirq.c b/drivers/gpu/drm/xe/xe_memirq.c
index 49c45ec3e83c..b0c7ce0a5d1e 100644
--- a/drivers/gpu/drm/xe/xe_memirq.c
+++ b/drivers/gpu/drm/xe/xe_memirq.c
@@ -14,16 +14,15 @@
#include "xe_device.h"
#include "xe_device_types.h"
#include "xe_gt.h"
-#include "xe_gt_printk.h"
#include "xe_guc.h"
#include "xe_hw_engine.h"
#include "xe_map.h"
#include "xe_memirq.h"
+#include "xe_tile_printk.h"
#define memirq_assert(m, condition) xe_tile_assert(memirq_to_tile(m), condition)
#define memirq_printk(m, _level, _fmt, ...) \
- drm_##_level(&memirq_to_xe(m)->drm, "MEMIRQ%u: " _fmt, \
- memirq_to_tile(m)->id, ##__VA_ARGS__)
+ xe_tile_##_level(memirq_to_tile(m), "MEMIRQ: " _fmt, ##__VA_ARGS__)
#ifdef CONFIG_DRM_XE_DEBUG_MEMIRQ
#define memirq_debug(m, _fmt, ...) memirq_printk(m, dbg, _fmt, ##__VA_ARGS__)
@@ -398,8 +397,9 @@ void xe_memirq_postinstall(struct xe_memirq *memirq)
memirq_set_enable(memirq, true);
}
-static bool memirq_received(struct xe_memirq *memirq, struct iosys_map *vector,
- u16 offset, const char *name)
+static bool __memirq_received(struct xe_memirq *memirq,
+ struct iosys_map *vector, u16 offset,
+ const char *name, bool clear)
{
u8 value;
@@ -409,19 +409,33 @@ static bool memirq_received(struct xe_memirq *memirq, struct iosys_map *vector,
memirq_err_ratelimited(memirq,
"Unexpected memirq value %#x from %s at %u\n",
value, name, offset);
- iosys_map_wr(vector, offset, u8, 0x00);
+ if (clear)
+ iosys_map_wr(vector, offset, u8, 0x00);
}
return value;
}
+static bool memirq_received_noclear(struct xe_memirq *memirq,
+ struct iosys_map *vector,
+ u16 offset, const char *name)
+{
+ return __memirq_received(memirq, vector, offset, name, false);
+}
+
+static bool memirq_received(struct xe_memirq *memirq, struct iosys_map *vector,
+ u16 offset, const char *name)
+{
+ return __memirq_received(memirq, vector, offset, name, true);
+}
+
static void memirq_dispatch_engine(struct xe_memirq *memirq, struct iosys_map *status,
struct xe_hw_engine *hwe)
{
memirq_debug(memirq, "STATUS %s %*ph\n", hwe->name, 16, status->vaddr);
- if (memirq_received(memirq, status, ilog2(GT_RENDER_USER_INTERRUPT), hwe->name))
- xe_hw_engine_handle_irq(hwe, GT_RENDER_USER_INTERRUPT);
+ if (memirq_received(memirq, status, ilog2(GT_MI_USER_INTERRUPT), hwe->name))
+ xe_hw_engine_handle_irq(hwe, GT_MI_USER_INTERRUPT);
}
static void memirq_dispatch_guc(struct xe_memirq *memirq, struct iosys_map *status,
@@ -434,8 +448,16 @@ static void memirq_dispatch_guc(struct xe_memirq *memirq, struct iosys_map *stat
if (memirq_received(memirq, status, ilog2(GUC_INTR_GUC2HOST), name))
xe_guc_irq_handler(guc, GUC_INTR_GUC2HOST);
- if (memirq_received(memirq, status, ilog2(GUC_INTR_SW_INT_0), name))
+ /*
+ * This is a software interrupt that must be cleared after it's consumed
+ * to avoid race conditions where xe_gt_sriov_vf_recovery_pending()
+ * returns false.
+ */
+ if (memirq_received_noclear(memirq, status, ilog2(GUC_INTR_SW_INT_0),
+ name)) {
xe_guc_irq_handler(guc, GUC_INTR_SW_INT_0);
+ iosys_map_wr(status, ilog2(GUC_INTR_SW_INT_0), u8, 0x00);
+ }
}
/**
@@ -461,6 +483,23 @@ void xe_memirq_hwe_handler(struct xe_memirq *memirq, struct xe_hw_engine *hwe)
}
/**
+ * xe_memirq_guc_sw_int_0_irq_pending() - SW_INT_0 IRQ is pending
+ * @memirq: the &xe_memirq
+ * @guc: the &xe_guc to check for IRQ
+ *
+ * Return: True if SW_INT_0 IRQ is pending on @guc, False otherwise
+ */
+bool xe_memirq_guc_sw_int_0_irq_pending(struct xe_memirq *memirq, struct xe_guc *guc)
+{
+ struct xe_gt *gt = guc_to_gt(guc);
+ u32 offset = xe_gt_is_media_type(gt) ? ilog2(INTR_MGUC) : ilog2(INTR_GUC);
+ struct iosys_map map = IOSYS_MAP_INIT_OFFSET(&memirq->status, offset * SZ_16);
+
+ return memirq_received_noclear(memirq, &map, ilog2(GUC_INTR_SW_INT_0),
+ guc_name(guc));
+}
+
+/**
* xe_memirq_handler - The `Memory Based Interrupts`_ Handler.
* @memirq: the &xe_memirq
*
diff --git a/drivers/gpu/drm/xe/xe_memirq.h b/drivers/gpu/drm/xe/xe_memirq.h
index 06130650e9d6..e25d2234ab87 100644
--- a/drivers/gpu/drm/xe/xe_memirq.h
+++ b/drivers/gpu/drm/xe/xe_memirq.h
@@ -25,4 +25,6 @@ void xe_memirq_handler(struct xe_memirq *memirq);
int xe_memirq_init_guc(struct xe_memirq *memirq, struct xe_guc *guc);
+bool xe_memirq_guc_sw_int_0_irq_pending(struct xe_memirq *memirq, struct xe_guc *guc);
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 8f8e9fdfb2a8..5a95b08a4723 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -9,6 +9,7 @@
#include <linux/sizes.h>
#include <drm/drm_managed.h>
+#include <drm/drm_pagemap.h>
#include <drm/ttm/ttm_tt.h>
#include <uapi/drm/xe_drm.h>
@@ -28,12 +29,16 @@
#include "xe_lrc.h"
#include "xe_map.h"
#include "xe_mocs.h"
+#include "xe_printk.h"
#include "xe_pt.h"
#include "xe_res_cursor.h"
+#include "xe_sa.h"
#include "xe_sched_job.h"
#include "xe_sync.h"
#include "xe_trace_bo.h"
+#include "xe_validation.h"
#include "xe_vm.h"
+#include "xe_vram.h"
/**
* struct xe_migrate - migrate context.
@@ -53,6 +58,13 @@ struct xe_migrate {
u64 usm_batch_base_ofs;
/** @cleared_mem_ofs: VM offset of @cleared_bo. */
u64 cleared_mem_ofs;
+ /** @large_page_copy_ofs: VM offset of 2M pages used for large copies */
+ u64 large_page_copy_ofs;
+ /**
+ * @large_page_copy_pdes: BO offset to writeout 2M pages (PDEs) used for
+ * large copies
+ */
+ u64 large_page_copy_pdes;
/**
* @fence: dma-fence representing the last migration job batch.
* Protected by @job_mutex.
@@ -82,20 +94,7 @@ struct xe_migrate {
* of the instruction. Subtracting the instruction header (1 dword) and
* address (2 dwords), that leaves 0x3FD dwords (0x1FE qwords) for PTE values.
*/
-#define MAX_PTE_PER_SDI 0x1FE
-
-/**
- * xe_tile_migrate_exec_queue() - Get this tile's migrate exec queue.
- * @tile: The tile.
- *
- * Returns the default migrate exec queue of this tile.
- *
- * Return: The default migrate exec queue
- */
-struct xe_exec_queue *xe_tile_migrate_exec_queue(struct xe_tile *tile)
-{
- return tile->migrate->q;
-}
+#define MAX_PTE_PER_SDI 0x1FEU
static void xe_migrate_fini(void *arg)
{
@@ -130,38 +129,39 @@ static u64 xe_migrate_vram_ofs(struct xe_device *xe, u64 addr, bool is_comp_pte)
u64 identity_offset = IDENTITY_OFFSET;
if (GRAPHICS_VER(xe) >= 20 && is_comp_pte)
- identity_offset += DIV_ROUND_UP_ULL(xe->mem.vram.actual_physical_size, SZ_1G);
+ identity_offset += DIV_ROUND_UP_ULL(xe_vram_region_actual_physical_size
+ (xe->mem.vram), SZ_1G);
- addr -= xe->mem.vram.dpa_base;
+ addr -= xe_vram_region_dpa_base(xe->mem.vram);
return addr + (identity_offset << xe_pt_shift(2));
}
static void xe_migrate_program_identity(struct xe_device *xe, struct xe_vm *vm, struct xe_bo *bo,
u64 map_ofs, u64 vram_offset, u16 pat_index, u64 pt_2m_ofs)
{
+ struct xe_vram_region *vram = xe->mem.vram;
+ resource_size_t dpa_base = xe_vram_region_dpa_base(vram);
u64 pos, ofs, flags;
u64 entry;
/* XXX: Unclear if this should be usable_size? */
- u64 vram_limit = xe->mem.vram.actual_physical_size +
- xe->mem.vram.dpa_base;
+ u64 vram_limit = xe_vram_region_actual_physical_size(vram) + dpa_base;
u32 level = 2;
ofs = map_ofs + XE_PAGE_SIZE * level + vram_offset * 8;
flags = vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level,
true, 0);
- xe_assert(xe, IS_ALIGNED(xe->mem.vram.usable_size, SZ_2M));
+ xe_assert(xe, IS_ALIGNED(xe_vram_region_usable_size(vram), SZ_2M));
/*
* Use 1GB pages when possible, last chunk always use 2M
* pages as mixing reserved memory (stolen, WOCPM) with a single
* mapping is not allowed on certain platforms.
*/
- for (pos = xe->mem.vram.dpa_base; pos < vram_limit;
+ for (pos = dpa_base; pos < vram_limit;
pos += SZ_1G, ofs += 8) {
if (pos + SZ_1G >= vram_limit) {
- entry = vm->pt_ops->pde_encode_bo(bo, pt_2m_ofs,
- pat_index);
+ entry = vm->pt_ops->pde_encode_bo(bo, pt_2m_ofs);
xe_map_wr(xe, &bo->vmap, ofs, u64, entry);
flags = vm->pt_ops->pte_encode_addr(xe, 0,
@@ -182,7 +182,7 @@ static void xe_migrate_program_identity(struct xe_device *xe, struct xe_vm *vm,
}
static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
- struct xe_vm *vm)
+ struct xe_vm *vm, struct drm_exec *exec)
{
struct xe_device *xe = tile_to_xe(tile);
u16 pat_index = xe->pat.idx[XE_CACHE_WB];
@@ -203,19 +203,19 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
BUILD_BUG_ON(!(NUM_KERNEL_PDE & 1));
/* Need to be sure everything fits in the first PT, or create more */
- xe_tile_assert(tile, m->batch_base_ofs + batch->size < SZ_2M);
+ xe_tile_assert(tile, m->batch_base_ofs + xe_bo_size(batch) < SZ_2M);
bo = xe_bo_create_pin_map(vm->xe, tile, vm,
num_entries * XE_PAGE_SIZE,
ttm_bo_type_kernel,
XE_BO_FLAG_VRAM_IF_DGFX(tile) |
- XE_BO_FLAG_PAGETABLE);
+ XE_BO_FLAG_PAGETABLE, exec);
if (IS_ERR(bo))
return PTR_ERR(bo);
/* PT30 & PT31 reserved for 2M identity map */
- pt29_ofs = bo->size - 3 * XE_PAGE_SIZE;
- entry = vm->pt_ops->pde_encode_bo(bo, pt29_ofs, pat_index);
+ pt29_ofs = xe_bo_size(bo) - 3 * XE_PAGE_SIZE;
+ entry = vm->pt_ops->pde_encode_bo(bo, pt29_ofs);
xe_pt_write(xe, &vm->pt_root[id]->bo->vmap, 0, entry);
map_ofs = (num_entries - num_setup) * XE_PAGE_SIZE;
@@ -236,7 +236,7 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
if (!IS_DGFX(xe)) {
/* Write out batch too */
m->batch_base_ofs = NUM_PT_SLOTS * XE_PAGE_SIZE;
- for (i = 0; i < batch->size;
+ for (i = 0; i < xe_bo_size(batch);
i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE :
XE_PAGE_SIZE) {
entry = vm->pt_ops->pte_encode_bo(batch, i,
@@ -247,13 +247,13 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
level++;
}
if (xe->info.has_usm) {
- xe_tile_assert(tile, batch->size == SZ_1M);
+ xe_tile_assert(tile, xe_bo_size(batch) == SZ_1M);
batch = tile->primary_gt->usm.bb_pool->bo;
m->usm_batch_base_ofs = m->batch_base_ofs + SZ_1M;
- xe_tile_assert(tile, batch->size == SZ_512K);
+ xe_tile_assert(tile, xe_bo_size(batch) == SZ_512K);
- for (i = 0; i < batch->size;
+ for (i = 0; i < xe_bo_size(batch);
i += vm->flags & XE_VM_FLAG_64K ? XE_64K_PAGE_SIZE :
XE_PAGE_SIZE) {
entry = vm->pt_ops->pte_encode_bo(batch, i,
@@ -283,20 +283,25 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
flags = XE_PDE_64K;
entry = vm->pt_ops->pde_encode_bo(bo, map_ofs + (u64)(level - 1) *
- XE_PAGE_SIZE, pat_index);
+ XE_PAGE_SIZE);
xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level, u64,
entry | flags);
}
/* Write PDE's that point to our BO. */
- for (i = 0; i < map_ofs / PAGE_SIZE; i++) {
- entry = vm->pt_ops->pde_encode_bo(bo, (u64)i * XE_PAGE_SIZE,
- pat_index);
+ for (i = 0; i < map_ofs / XE_PAGE_SIZE; i++) {
+ entry = vm->pt_ops->pde_encode_bo(bo, (u64)i * XE_PAGE_SIZE);
xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE +
(i + 1) * 8, u64, entry);
}
+ /* Reserve 2M PDEs */
+ level = 1;
+ m->large_page_copy_ofs = NUM_PT_SLOTS << xe_pt_shift(level);
+ m->large_page_copy_pdes = map_ofs + XE_PAGE_SIZE * level +
+ NUM_PT_SLOTS * 8;
+
/* Set up a 1GiB NULL mapping at 255GiB offset. */
level = 2;
xe_map_wr(xe, &bo->vmap, map_ofs + XE_PAGE_SIZE * level + 255 * 8, u64,
@@ -306,12 +311,12 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
/* Identity map the entire vram at 256GiB offset */
if (IS_DGFX(xe)) {
- u64 pt30_ofs = bo->size - 2 * XE_PAGE_SIZE;
+ u64 pt30_ofs = xe_bo_size(bo) - 2 * XE_PAGE_SIZE;
+ resource_size_t actual_phy_size = xe_vram_region_actual_physical_size(xe->mem.vram);
xe_migrate_program_identity(xe, vm, bo, map_ofs, IDENTITY_OFFSET,
pat_index, pt30_ofs);
- xe_assert(xe, xe->mem.vram.actual_physical_size <=
- (MAX_NUM_PTE - IDENTITY_OFFSET) * SZ_1G);
+ xe_assert(xe, actual_phy_size <= (MAX_NUM_PTE - IDENTITY_OFFSET) * SZ_1G);
/*
* Identity map the entire vram for compressed pat_index for xe2+
@@ -320,11 +325,11 @@ static int xe_migrate_prepare_vm(struct xe_tile *tile, struct xe_migrate *m,
if (GRAPHICS_VER(xe) >= 20 && xe_device_has_flat_ccs(xe)) {
u16 comp_pat_index = xe->pat.idx[XE_CACHE_NONE_COMPRESSION];
u64 vram_offset = IDENTITY_OFFSET +
- DIV_ROUND_UP_ULL(xe->mem.vram.actual_physical_size, SZ_1G);
- u64 pt31_ofs = bo->size - XE_PAGE_SIZE;
+ DIV_ROUND_UP_ULL(actual_phy_size, SZ_1G);
+ u64 pt31_ofs = xe_bo_size(bo) - XE_PAGE_SIZE;
- xe_assert(xe, xe->mem.vram.actual_physical_size <= (MAX_NUM_PTE -
- IDENTITY_OFFSET - IDENTITY_OFFSET / 2) * SZ_1G);
+ xe_assert(xe, actual_phy_size <= (MAX_NUM_PTE - IDENTITY_OFFSET -
+ IDENTITY_OFFSET / 2) * SZ_1G);
xe_migrate_program_identity(xe, vm, bo, map_ofs, vram_offset,
comp_pat_index, pt31_ofs);
}
@@ -387,38 +392,63 @@ static bool xe_migrate_needs_ccs_emit(struct xe_device *xe)
}
/**
- * xe_migrate_init() - Initialize a migrate context
- * @tile: Back-pointer to the tile we're initializing for.
+ * xe_migrate_alloc - Allocate a migrate struct for a given &xe_tile
+ * @tile: &xe_tile
+ *
+ * Allocates a &xe_migrate for a given tile.
*
- * Return: Pointer to a migrate context on success. Error pointer on error.
+ * Return: &xe_migrate on success, or NULL when out of memory.
*/
-struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
+struct xe_migrate *xe_migrate_alloc(struct xe_tile *tile)
+{
+ struct xe_migrate *m = drmm_kzalloc(&tile_to_xe(tile)->drm, sizeof(*m), GFP_KERNEL);
+
+ if (m)
+ m->tile = tile;
+ return m;
+}
+
+static int xe_migrate_lock_prepare_vm(struct xe_tile *tile, struct xe_migrate *m, struct xe_vm *vm)
{
struct xe_device *xe = tile_to_xe(tile);
+ struct xe_validation_ctx ctx;
+ struct drm_exec exec;
+ int err = 0;
+
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) {
+ err = xe_vm_drm_exec_lock(vm, &exec);
+ drm_exec_retry_on_contention(&exec);
+ err = xe_migrate_prepare_vm(tile, m, vm, &exec);
+ drm_exec_retry_on_contention(&exec);
+ xe_validation_retry_on_oom(&ctx, &err);
+ }
+
+ return err;
+}
+
+/**
+ * xe_migrate_init() - Initialize a migrate context
+ * @m: The migration context
+ *
+ * Return: 0 if successful, negative error code on failure
+ */
+int xe_migrate_init(struct xe_migrate *m)
+{
+ struct xe_tile *tile = m->tile;
struct xe_gt *primary_gt = tile->primary_gt;
- struct xe_migrate *m;
+ struct xe_device *xe = tile_to_xe(tile);
struct xe_vm *vm;
int err;
- m = devm_kzalloc(xe->drm.dev, sizeof(*m), GFP_KERNEL);
- if (!m)
- return ERR_PTR(-ENOMEM);
-
- m->tile = tile;
-
/* Special layout, prepared below.. */
vm = xe_vm_create(xe, XE_VM_FLAG_MIGRATION |
- XE_VM_FLAG_SET_TILE_ID(tile));
+ XE_VM_FLAG_SET_TILE_ID(tile), NULL);
if (IS_ERR(vm))
- return ERR_CAST(vm);
+ return PTR_ERR(vm);
- xe_vm_lock(vm, false);
- err = xe_migrate_prepare_vm(tile, m, vm);
- xe_vm_unlock(vm);
- if (err) {
- xe_vm_close_and_put(vm);
- return ERR_PTR(err);
- }
+ err = xe_migrate_lock_prepare_vm(tile, m, vm);
+ if (err)
+ goto err_out;
if (xe->info.has_usm) {
struct xe_hw_engine *hwe = xe_gt_hw_engine(primary_gt,
@@ -427,8 +457,10 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
false);
u32 logical_mask = xe_migrate_usm_logical_mask(primary_gt);
- if (!hwe || !logical_mask)
- return ERR_PTR(-EINVAL);
+ if (!hwe || !logical_mask) {
+ err = -EINVAL;
+ goto err_out;
+ }
/*
* XXX: Currently only reserving 1 (likely slow) BCS instance on
@@ -437,16 +469,18 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
m->q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe,
EXEC_QUEUE_FLAG_KERNEL |
EXEC_QUEUE_FLAG_PERMANENT |
- EXEC_QUEUE_FLAG_HIGH_PRIORITY, 0);
+ EXEC_QUEUE_FLAG_HIGH_PRIORITY |
+ EXEC_QUEUE_FLAG_MIGRATE, 0);
} else {
m->q = xe_exec_queue_create_class(xe, primary_gt, vm,
XE_ENGINE_CLASS_COPY,
EXEC_QUEUE_FLAG_KERNEL |
- EXEC_QUEUE_FLAG_PERMANENT, 0);
+ EXEC_QUEUE_FLAG_PERMANENT |
+ EXEC_QUEUE_FLAG_MIGRATE, 0);
}
if (IS_ERR(m->q)) {
- xe_vm_close_and_put(vm);
- return ERR_CAST(m->q);
+ err = PTR_ERR(m->q);
+ goto err_out;
}
mutex_init(&m->job_mutex);
@@ -456,7 +490,7 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
err = devm_add_action_or_reset(xe->drm.dev, xe_migrate_fini, m);
if (err)
- return ERR_PTR(err);
+ return err;
if (IS_DGFX(xe)) {
if (xe_migrate_needs_ccs_emit(xe))
@@ -471,7 +505,12 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
(unsigned long long)m->min_chunk_size);
}
- return m;
+ return err;
+
+err_out:
+ xe_vm_close_and_put(vm);
+ return err;
+
}
static u64 max_mem_transfer_per_pass(struct xe_device *xe)
@@ -661,9 +700,9 @@ static void emit_copy_ccs(struct xe_gt *gt, struct xe_bb *bb,
}
#define EMIT_COPY_DW 10
-static void emit_copy(struct xe_gt *gt, struct xe_bb *bb,
- u64 src_ofs, u64 dst_ofs, unsigned int size,
- unsigned int pitch)
+static void emit_xy_fast_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
+ u64 dst_ofs, unsigned int size,
+ unsigned int pitch)
{
struct xe_device *xe = gt_to_xe(gt);
u32 mocs = 0;
@@ -692,6 +731,61 @@ static void emit_copy(struct xe_gt *gt, struct xe_bb *bb,
bb->cs[bb->len++] = upper_32_bits(src_ofs);
}
+#define PAGE_COPY_MODE_PS SZ_256 /* hw uses 256 bytes as the page-size */
+static void emit_mem_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
+ u64 dst_ofs, unsigned int size, unsigned int pitch)
+{
+ u32 mode, copy_type, width;
+
+ xe_gt_assert(gt, IS_ALIGNED(size, pitch));
+ xe_gt_assert(gt, pitch <= U16_MAX);
+ xe_gt_assert(gt, pitch);
+ xe_gt_assert(gt, size);
+
+ if (IS_ALIGNED(size, PAGE_COPY_MODE_PS) &&
+ IS_ALIGNED(lower_32_bits(src_ofs), PAGE_COPY_MODE_PS) &&
+ IS_ALIGNED(lower_32_bits(dst_ofs), PAGE_COPY_MODE_PS)) {
+ mode = MEM_COPY_PAGE_COPY_MODE;
+ copy_type = 0; /* linear copy */
+ width = size / PAGE_COPY_MODE_PS;
+ } else if (pitch > 1) {
+ xe_gt_assert(gt, size / pitch <= U16_MAX);
+ mode = 0; /* BYTE_COPY */
+ copy_type = MEM_COPY_MATRIX_COPY;
+ width = pitch;
+ } else {
+ mode = 0; /* BYTE_COPY */
+ copy_type = 0; /* linear copy */
+ width = size;
+ }
+
+ xe_gt_assert(gt, width <= U16_MAX);
+
+ bb->cs[bb->len++] = MEM_COPY_CMD | mode | copy_type;
+ bb->cs[bb->len++] = width - 1;
+ bb->cs[bb->len++] = size / pitch - 1; /* ignored by hw for page-copy/linear above */
+ bb->cs[bb->len++] = pitch - 1;
+ bb->cs[bb->len++] = pitch - 1;
+ bb->cs[bb->len++] = lower_32_bits(src_ofs);
+ bb->cs[bb->len++] = upper_32_bits(src_ofs);
+ bb->cs[bb->len++] = lower_32_bits(dst_ofs);
+ bb->cs[bb->len++] = upper_32_bits(dst_ofs);
+ bb->cs[bb->len++] = FIELD_PREP(MEM_COPY_SRC_MOCS_INDEX_MASK, gt->mocs.uc_index) |
+ FIELD_PREP(MEM_COPY_DST_MOCS_INDEX_MASK, gt->mocs.uc_index);
+}
+
+static void emit_copy(struct xe_gt *gt, struct xe_bb *bb,
+ u64 src_ofs, u64 dst_ofs, unsigned int size,
+ unsigned int pitch)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+
+ if (xe->info.has_mem_copy_instr)
+ emit_mem_copy(gt, bb, src_ofs, dst_ofs, size, pitch);
+ else
+ emit_xy_fast_copy(gt, bb, src_ofs, dst_ofs, size, pitch);
+}
+
static u64 xe_migrate_batch_base(struct xe_migrate *m, bool usm)
{
return usm ? m->usm_batch_base_ofs : m->batch_base_ofs;
@@ -768,7 +862,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
struct xe_gt *gt = m->tile->primary_gt;
struct xe_device *xe = gt_to_xe(gt);
struct dma_fence *fence = NULL;
- u64 size = src_bo->size;
+ u64 size = xe_bo_size(src_bo);
struct xe_res_cursor src_it, dst_it, ccs_it;
u64 src_L0_ofs, dst_L0_ofs;
u32 src_L0_pt, dst_L0_pt;
@@ -791,7 +885,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
if (XE_WARN_ON(copy_ccs && src_bo != dst_bo))
return ERR_PTR(-EINVAL);
- if (src_bo != dst_bo && XE_WARN_ON(src_bo->size != dst_bo->size))
+ if (src_bo != dst_bo && XE_WARN_ON(xe_bo_size(src_bo) != xe_bo_size(dst_bo)))
return ERR_PTR(-EINVAL);
if (!src_is_vram)
@@ -809,7 +903,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
&ccs_it);
while (size) {
- u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */
+ u32 batch_size = 1; /* MI_BATCH_BUFFER_END */
struct xe_sched_job *job;
struct xe_bb *bb;
u32 flush_flags = 0;
@@ -834,11 +928,15 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
batch_size += pte_update_size(m, pte_flags, src, &src_it, &src_L0,
&src_L0_ofs, &src_L0_pt, 0, 0,
avail_pts);
-
- pte_flags = dst_is_vram ? PTE_UPDATE_FLAG_IS_VRAM : 0;
- batch_size += pte_update_size(m, pte_flags, dst, &dst_it, &src_L0,
- &dst_L0_ofs, &dst_L0_pt, 0,
- avail_pts, avail_pts);
+ if (copy_only_ccs) {
+ dst_L0_ofs = src_L0_ofs;
+ } else {
+ pte_flags = dst_is_vram ? PTE_UPDATE_FLAG_IS_VRAM : 0;
+ batch_size += pte_update_size(m, pte_flags, dst,
+ &dst_it, &src_L0,
+ &dst_L0_ofs, &dst_L0_pt,
+ 0, avail_pts, avail_pts);
+ }
if (copy_system_ccs) {
xe_assert(xe, type_device);
@@ -863,12 +961,12 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
if (src_is_vram && xe_migrate_allow_identity(src_L0, &src_it))
xe_res_next(&src_it, src_L0);
else
- emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs,
+ emit_pte(m, bb, src_L0_pt, src_is_vram, copy_system_ccs || use_comp_pat,
&src_it, src_L0, src);
if (dst_is_vram && xe_migrate_allow_identity(src_L0, &dst_it))
xe_res_next(&dst_it, src_L0);
- else
+ else if (!copy_only_ccs)
emit_pte(m, bb, dst_L0_pt, dst_is_vram, copy_system_ccs,
&dst_it, src_L0, dst);
@@ -896,11 +994,11 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
goto err;
}
- xe_sched_job_add_migrate_flush(job, flush_flags);
+ xe_sched_job_add_migrate_flush(job, flush_flags | MI_INVALIDATE_TLB);
if (!fence) {
err = xe_sched_job_add_deps(job, src_bo->ttm.base.resv,
DMA_RESV_USAGE_BOOKKEEP);
- if (!err && src_bo != dst_bo)
+ if (!err && src_bo->ttm.base.resv != dst_bo->ttm.base.resv)
err = xe_sched_job_add_deps(job, dst_bo->ttm.base.resv,
DMA_RESV_USAGE_BOOKKEEP);
if (err)
@@ -940,6 +1038,301 @@ err_sync:
return fence;
}
+/**
+ * xe_migrate_lrc() - Get the LRC from migrate context.
+ * @migrate: Migrate context.
+ *
+ * Return: Pointer to LRC on success, error on failure
+ */
+struct xe_lrc *xe_migrate_lrc(struct xe_migrate *migrate)
+{
+ return migrate->q->lrc[0];
+}
+
+static u64 migrate_vm_ppgtt_addr_tlb_inval(void)
+{
+ /*
+ * The migrate VM is self-referential so it can modify its own PTEs (see
+ * pte_update_size() or emit_pte() functions). We reserve NUM_KERNEL_PDE
+ * entries for kernel operations (copies, clears, CCS migrate), and
+ * suballocate the rest to user operations (binds/unbinds). With
+ * NUM_KERNEL_PDE = 15, NUM_KERNEL_PDE - 1 is already used for PTE updates,
+ * so assign NUM_KERNEL_PDE - 2 for TLB invalidation.
+ */
+ return (NUM_KERNEL_PDE - 2) * XE_PAGE_SIZE;
+}
+
+static int emit_flush_invalidate(u32 *dw, int i, u32 flags)
+{
+ u64 addr = migrate_vm_ppgtt_addr_tlb_inval();
+
+ dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW |
+ MI_FLUSH_IMM_DW | flags;
+ dw[i++] = lower_32_bits(addr);
+ dw[i++] = upper_32_bits(addr);
+ dw[i++] = MI_NOOP;
+ dw[i++] = MI_NOOP;
+
+ return i;
+}
+
+/**
+ * xe_migrate_ccs_rw_copy() - Copy content of TTM resources.
+ * @tile: Tile whose migration context to be used.
+ * @q : Execution to be used along with migration context.
+ * @src_bo: The buffer object @src is currently bound to.
+ * @read_write : Creates BB commands for CCS read/write.
+ *
+ * Creates batch buffer instructions to copy CCS metadata from CCS pool to
+ * memory and vice versa.
+ *
+ * This function should only be called for IGPU.
+ *
+ * Return: 0 if successful, negative error code on failure.
+ */
+int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
+ struct xe_bo *src_bo,
+ enum xe_sriov_vf_ccs_rw_ctxs read_write)
+
+{
+ bool src_is_pltt = read_write == XE_SRIOV_VF_CCS_READ_CTX;
+ bool dst_is_pltt = read_write == XE_SRIOV_VF_CCS_WRITE_CTX;
+ struct ttm_resource *src = src_bo->ttm.resource;
+ struct xe_migrate *m = tile->migrate;
+ struct xe_gt *gt = tile->primary_gt;
+ u32 batch_size, batch_size_allocated;
+ struct xe_device *xe = gt_to_xe(gt);
+ struct xe_res_cursor src_it, ccs_it;
+ u64 size = xe_bo_size(src_bo);
+ struct xe_bb *bb = NULL;
+ u64 src_L0, src_L0_ofs;
+ u32 src_L0_pt;
+ int err;
+
+ xe_res_first_sg(xe_bo_sg(src_bo), 0, size, &src_it);
+
+ xe_res_first_sg(xe_bo_sg(src_bo), xe_bo_ccs_pages_start(src_bo),
+ PAGE_ALIGN(xe_device_ccs_bytes(xe, size)),
+ &ccs_it);
+
+ /* Calculate Batch buffer size */
+ batch_size = 0;
+ while (size) {
+ batch_size += 10; /* Flush + ggtt addr + 2 NOP */
+ u64 ccs_ofs, ccs_size;
+ u32 ccs_pt;
+
+ u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE;
+
+ src_L0 = min_t(u64, max_mem_transfer_per_pass(xe), size);
+
+ batch_size += pte_update_size(m, false, src, &src_it, &src_L0,
+ &src_L0_ofs, &src_L0_pt, 0, 0,
+ avail_pts);
+
+ ccs_size = xe_device_ccs_bytes(xe, src_L0);
+ batch_size += pte_update_size(m, 0, NULL, &ccs_it, &ccs_size, &ccs_ofs,
+ &ccs_pt, 0, avail_pts, avail_pts);
+ xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE));
+
+ /* Add copy commands size here */
+ batch_size += EMIT_COPY_CCS_DW;
+
+ size -= src_L0;
+ }
+
+ bb = xe_bb_ccs_new(gt, batch_size, read_write);
+ if (IS_ERR(bb)) {
+ drm_err(&xe->drm, "BB allocation failed.\n");
+ err = PTR_ERR(bb);
+ goto err_ret;
+ }
+
+ batch_size_allocated = batch_size;
+ size = xe_bo_size(src_bo);
+ batch_size = 0;
+
+ /*
+ * Emit PTE and copy commands here.
+ * The CCS copy command can only support limited size. If the size to be
+ * copied is more than the limit, divide copy into chunks. So, calculate
+ * sizes here again before copy command is emitted.
+ */
+ while (size) {
+ batch_size += 10; /* Flush + ggtt addr + 2 NOP */
+ u32 flush_flags = 0;
+ u64 ccs_ofs, ccs_size;
+ u32 ccs_pt;
+
+ u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE;
+
+ src_L0 = xe_migrate_res_sizes(m, &src_it);
+
+ batch_size += pte_update_size(m, false, src, &src_it, &src_L0,
+ &src_L0_ofs, &src_L0_pt, 0, 0,
+ avail_pts);
+
+ ccs_size = xe_device_ccs_bytes(xe, src_L0);
+ batch_size += pte_update_size(m, 0, NULL, &ccs_it, &ccs_size, &ccs_ofs,
+ &ccs_pt, 0, avail_pts, avail_pts);
+ xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE));
+ batch_size += EMIT_COPY_CCS_DW;
+
+ emit_pte(m, bb, src_L0_pt, false, true, &src_it, src_L0, src);
+
+ emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src);
+
+ bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags);
+ flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_pltt,
+ src_L0_ofs, dst_is_pltt,
+ src_L0, ccs_ofs, true);
+ bb->len = emit_flush_invalidate(bb->cs, bb->len, flush_flags);
+
+ size -= src_L0;
+ }
+
+ xe_assert(xe, (batch_size_allocated == bb->len));
+ src_bo->bb_ccs[read_write] = bb;
+
+ return 0;
+
+err_ret:
+ return err;
+}
+
+/**
+ * xe_get_migrate_exec_queue() - Get the execution queue from migrate context.
+ * @migrate: Migrate context.
+ *
+ * Return: Pointer to execution queue on success, error on failure
+ */
+struct xe_exec_queue *xe_migrate_exec_queue(struct xe_migrate *migrate)
+{
+ return migrate->q;
+}
+
+/**
+ * xe_migrate_vram_copy_chunk() - Copy a chunk of a VRAM buffer object.
+ * @vram_bo: The VRAM buffer object.
+ * @vram_offset: The VRAM offset.
+ * @sysmem_bo: The sysmem buffer object.
+ * @sysmem_offset: The sysmem offset.
+ * @size: The size of VRAM chunk to copy.
+ * @dir: The direction of the copy operation.
+ *
+ * Copies a portion of a buffer object between VRAM and system memory.
+ * On Xe2 platforms that support flat CCS, VRAM data is decompressed when
+ * copying to system memory.
+ *
+ * Return: Pointer to a dma_fence representing the last copy batch, or
+ * an error pointer on failure. If there is a failure, any copy operation
+ * started by the function call has been synced.
+ */
+struct dma_fence *xe_migrate_vram_copy_chunk(struct xe_bo *vram_bo, u64 vram_offset,
+ struct xe_bo *sysmem_bo, u64 sysmem_offset,
+ u64 size, enum xe_migrate_copy_dir dir)
+{
+ struct xe_device *xe = xe_bo_device(vram_bo);
+ struct xe_tile *tile = vram_bo->tile;
+ struct xe_gt *gt = tile->primary_gt;
+ struct xe_migrate *m = tile->migrate;
+ struct dma_fence *fence = NULL;
+ struct ttm_resource *vram = vram_bo->ttm.resource;
+ struct ttm_resource *sysmem = sysmem_bo->ttm.resource;
+ struct xe_res_cursor vram_it, sysmem_it;
+ u64 vram_L0_ofs, sysmem_L0_ofs;
+ u32 vram_L0_pt, sysmem_L0_pt;
+ u64 vram_L0, sysmem_L0;
+ bool to_sysmem = (dir == XE_MIGRATE_COPY_TO_SRAM);
+ bool use_comp_pat = to_sysmem &&
+ GRAPHICS_VER(xe) >= 20 && xe_device_has_flat_ccs(xe);
+ int pass = 0;
+ int err;
+
+ xe_assert(xe, IS_ALIGNED(vram_offset | sysmem_offset | size, PAGE_SIZE));
+ xe_assert(xe, xe_bo_is_vram(vram_bo));
+ xe_assert(xe, !xe_bo_is_vram(sysmem_bo));
+ xe_assert(xe, !range_overflows(vram_offset, size, (u64)vram_bo->ttm.base.size));
+ xe_assert(xe, !range_overflows(sysmem_offset, size, (u64)sysmem_bo->ttm.base.size));
+
+ xe_res_first(vram, vram_offset, size, &vram_it);
+ xe_res_first_sg(xe_bo_sg(sysmem_bo), sysmem_offset, size, &sysmem_it);
+
+ while (size) {
+ u32 pte_flags = PTE_UPDATE_FLAG_IS_VRAM;
+ u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */
+ struct xe_sched_job *job;
+ struct xe_bb *bb;
+ u32 update_idx;
+ bool usm = xe->info.has_usm;
+ u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE;
+
+ sysmem_L0 = xe_migrate_res_sizes(m, &sysmem_it);
+ vram_L0 = min(xe_migrate_res_sizes(m, &vram_it), sysmem_L0);
+
+ xe_dbg(xe, "Pass %u, size: %llu\n", pass++, vram_L0);
+
+ pte_flags |= use_comp_pat ? PTE_UPDATE_FLAG_IS_COMP_PTE : 0;
+ batch_size += pte_update_size(m, pte_flags, vram, &vram_it, &vram_L0,
+ &vram_L0_ofs, &vram_L0_pt, 0, 0, avail_pts);
+
+ batch_size += pte_update_size(m, 0, sysmem, &sysmem_it, &vram_L0, &sysmem_L0_ofs,
+ &sysmem_L0_pt, 0, avail_pts, avail_pts);
+ batch_size += EMIT_COPY_DW;
+
+ bb = xe_bb_new(gt, batch_size, usm);
+ if (IS_ERR(bb)) {
+ err = PTR_ERR(bb);
+ return ERR_PTR(err);
+ }
+
+ if (xe_migrate_allow_identity(vram_L0, &vram_it))
+ xe_res_next(&vram_it, vram_L0);
+ else
+ emit_pte(m, bb, vram_L0_pt, true, use_comp_pat, &vram_it, vram_L0, vram);
+
+ emit_pte(m, bb, sysmem_L0_pt, false, false, &sysmem_it, vram_L0, sysmem);
+
+ bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
+ update_idx = bb->len;
+
+ if (to_sysmem)
+ emit_copy(gt, bb, vram_L0_ofs, sysmem_L0_ofs, vram_L0, XE_PAGE_SIZE);
+ else
+ emit_copy(gt, bb, sysmem_L0_ofs, vram_L0_ofs, vram_L0, XE_PAGE_SIZE);
+
+ job = xe_bb_create_migration_job(m->q, bb, xe_migrate_batch_base(m, usm),
+ update_idx);
+ if (IS_ERR(job)) {
+ xe_bb_free(bb, NULL);
+ err = PTR_ERR(job);
+ return ERR_PTR(err);
+ }
+
+ xe_sched_job_add_migrate_flush(job, MI_INVALIDATE_TLB);
+
+ xe_assert(xe, dma_resv_test_signaled(vram_bo->ttm.base.resv,
+ DMA_RESV_USAGE_BOOKKEEP));
+ xe_assert(xe, dma_resv_test_signaled(sysmem_bo->ttm.base.resv,
+ DMA_RESV_USAGE_BOOKKEEP));
+
+ scoped_guard(mutex, &m->job_mutex) {
+ xe_sched_job_arm(job);
+ dma_fence_put(fence);
+ fence = dma_fence_get(&job->drm.s_fence->finished);
+ xe_sched_job_push(job);
+
+ dma_fence_put(m->fence);
+ m->fence = dma_fence_get(fence);
+ }
+
+ xe_bb_free(bb, fence);
+ size -= vram_L0;
+ }
+
+ return fence;
+}
+
static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
u32 size, u32 pitch)
{
@@ -1064,7 +1457,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
struct xe_device *xe = gt_to_xe(gt);
bool clear_only_system_ccs = false;
struct dma_fence *fence = NULL;
- u64 size = bo->size;
+ u64 size = xe_bo_size(bo);
struct xe_res_cursor src_it;
struct ttm_resource *src = dst;
int err;
@@ -1076,9 +1469,9 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
clear_only_system_ccs = true;
if (!clear_vram)
- xe_res_first_sg(xe_bo_sg(bo), 0, bo->size, &src_it);
+ xe_res_first_sg(xe_bo_sg(bo), 0, xe_bo_size(bo), &src_it);
else
- xe_res_first(src, 0, bo->size, &src_it);
+ xe_res_first(src, 0, xe_bo_size(bo), &src_it);
while (size) {
u64 clear_L0_ofs;
@@ -1097,7 +1490,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
/* Calculate final sizes and batch size.. */
pte_flags = clear_vram ? PTE_UPDATE_FLAG_IS_VRAM : 0;
- batch_size = 2 +
+ batch_size = 1 +
pte_update_size(m, pte_flags, src, &src_it,
&clear_L0, &clear_L0_ofs, &clear_L0_pt,
clear_bo_data ? emit_clear_cmd_len(gt) : 0, 0,
@@ -1119,11 +1512,13 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
size -= clear_L0;
/* Preemption is enabled again by the ring ops. */
- if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it))
+ if (clear_vram && xe_migrate_allow_identity(clear_L0, &src_it)) {
xe_res_next(&src_it, clear_L0);
- else
- emit_pte(m, bb, clear_L0_pt, clear_vram, clear_only_system_ccs,
- &src_it, clear_L0, dst);
+ } else {
+ emit_pte(m, bb, clear_L0_pt, clear_vram,
+ clear_only_system_ccs, &src_it, clear_L0, dst);
+ flush_flags |= MI_INVALIDATE_TLB;
+ }
bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
update_idx = bb->len;
@@ -1134,7 +1529,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
if (xe_migrate_needs_ccs_emit(xe)) {
emit_copy_ccs(gt, bb, clear_L0_ofs, true,
m->cleared_mem_ofs, false, clear_L0);
- flush_flags = MI_FLUSH_DW_CCS;
+ flush_flags |= MI_FLUSH_DW_CCS;
}
job = xe_bb_create_migration_job(m->q, bb,
@@ -1407,7 +1802,7 @@ __xe_migrate_update_pgtables(struct xe_migrate *m,
if (idx == chunk)
goto next_cmd;
- xe_tile_assert(tile, pt_bo->size == SZ_4K);
+ xe_tile_assert(tile, xe_bo_size(pt_bo) == SZ_4K);
/* Map a PT at most once */
if (pt_bo->update_index < 0)
@@ -1469,6 +1864,8 @@ next_cmd:
goto err_sa;
}
+ xe_sched_job_add_migrate_flush(job, MI_INVALIDATE_TLB);
+
if (ops->pre_commit) {
pt_update->job = job;
err = ops->pre_commit(pt_update);
@@ -1553,15 +1950,17 @@ static u32 pte_update_cmd_size(u64 size)
u64 entries = DIV_U64_ROUND_UP(size, XE_PAGE_SIZE);
XE_WARN_ON(size > MAX_PREEMPTDISABLE_TRANSFER);
+
/*
* MI_STORE_DATA_IMM command is used to update page table. Each
- * instruction can update maximumly 0x1ff pte entries. To update
- * n (n <= 0x1ff) pte entries, we need:
- * 1 dword for the MI_STORE_DATA_IMM command header (opcode etc)
- * 2 dword for the page table's physical location
- * 2*n dword for value of pte to fill (each pte entry is 2 dwords)
+ * instruction can update maximumly MAX_PTE_PER_SDI pte entries. To
+ * update n (n <= MAX_PTE_PER_SDI) pte entries, we need:
+ *
+ * - 1 dword for the MI_STORE_DATA_IMM command header (opcode etc)
+ * - 2 dword for the page table's physical location
+ * - 2*n dword for value of pte to fill (each pte entry is 2 dwords)
*/
- num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, 0x1ff);
+ num_dword = (1 + 2) * DIV_U64_ROUND_UP(entries, MAX_PTE_PER_SDI);
num_dword += entries * 2;
return num_dword;
@@ -1569,15 +1968,22 @@ static u32 pte_update_cmd_size(u64 size)
static void build_pt_update_batch_sram(struct xe_migrate *m,
struct xe_bb *bb, u32 pt_offset,
- dma_addr_t *sram_addr, u32 size)
+ struct drm_pagemap_addr *sram_addr,
+ u32 size, int level)
{
u16 pat_index = tile_to_xe(m->tile)->pat.idx[XE_CACHE_WB];
+ u64 gpu_page_size = 0x1ull << xe_pt_shift(level);
u32 ptes;
int i = 0;
- ptes = DIV_ROUND_UP(size, XE_PAGE_SIZE);
+ xe_tile_assert(m->tile, PAGE_ALIGNED(size));
+
+ ptes = DIV_ROUND_UP(size, gpu_page_size);
while (ptes) {
- u32 chunk = min(0x1ffU, ptes);
+ u32 chunk = min(MAX_PTE_PER_SDI, ptes);
+
+ if (!level)
+ chunk = ALIGN_DOWN(chunk, PAGE_SIZE / XE_PAGE_SIZE);
bb->cs[bb->len++] = MI_STORE_DATA_IMM | MI_SDI_NUM_QW(chunk);
bb->cs[bb->len++] = pt_offset;
@@ -1587,53 +1993,101 @@ static void build_pt_update_batch_sram(struct xe_migrate *m,
ptes -= chunk;
while (chunk--) {
- u64 addr = sram_addr[i++] & PAGE_MASK;
+ u64 addr = sram_addr[i].addr;
+ u64 pte;
+ xe_tile_assert(m->tile, sram_addr[i].proto ==
+ DRM_INTERCONNECT_SYSTEM);
xe_tile_assert(m->tile, addr);
- addr = m->q->vm->pt_ops->pte_encode_addr(m->tile->xe,
- addr, pat_index,
- 0, false, 0);
- bb->cs[bb->len++] = lower_32_bits(addr);
- bb->cs[bb->len++] = upper_32_bits(addr);
+ xe_tile_assert(m->tile, PAGE_ALIGNED(addr));
+
+again:
+ pte = m->q->vm->pt_ops->pte_encode_addr(m->tile->xe,
+ addr, pat_index,
+ level, false, 0);
+ bb->cs[bb->len++] = lower_32_bits(pte);
+ bb->cs[bb->len++] = upper_32_bits(pte);
+
+ if (gpu_page_size < PAGE_SIZE) {
+ addr += XE_PAGE_SIZE;
+ if (!PAGE_ALIGNED(addr)) {
+ chunk--;
+ goto again;
+ }
+ i++;
+ } else {
+ i += gpu_page_size / PAGE_SIZE;
+ }
}
}
}
-enum xe_migrate_copy_dir {
- XE_MIGRATE_COPY_TO_VRAM,
- XE_MIGRATE_COPY_TO_SRAM,
-};
+static bool xe_migrate_vram_use_pde(struct drm_pagemap_addr *sram_addr,
+ unsigned long size)
+{
+ u32 large_size = (0x1 << xe_pt_shift(1));
+ unsigned long i, incr = large_size / PAGE_SIZE;
+
+ for (i = 0; i < DIV_ROUND_UP(size, PAGE_SIZE); i += incr)
+ if (PAGE_SIZE << sram_addr[i].order != large_size)
+ return false;
+
+ return true;
+}
#define XE_CACHELINE_BYTES 64ull
#define XE_CACHELINE_MASK (XE_CACHELINE_BYTES - 1)
+static u32 xe_migrate_copy_pitch(struct xe_device *xe, u32 len)
+{
+ u32 pitch;
+
+ if (IS_ALIGNED(len, PAGE_SIZE))
+ pitch = PAGE_SIZE;
+ else if (IS_ALIGNED(len, SZ_4K))
+ pitch = SZ_4K;
+ else if (IS_ALIGNED(len, SZ_256))
+ pitch = SZ_256;
+ else if (IS_ALIGNED(len, 4))
+ pitch = 4;
+ else
+ pitch = 1;
+
+ xe_assert(xe, pitch > 1 || xe->info.has_mem_copy_instr);
+ return pitch;
+}
+
static struct dma_fence *xe_migrate_vram(struct xe_migrate *m,
unsigned long len,
unsigned long sram_offset,
- dma_addr_t *sram_addr, u64 vram_addr,
+ struct drm_pagemap_addr *sram_addr,
+ u64 vram_addr,
+ struct dma_fence *deps,
const enum xe_migrate_copy_dir dir)
{
struct xe_gt *gt = m->tile->primary_gt;
struct xe_device *xe = gt_to_xe(gt);
bool use_usm_batch = xe->info.has_usm;
struct dma_fence *fence = NULL;
- u32 batch_size = 2;
+ u32 batch_size = 1;
u64 src_L0_ofs, dst_L0_ofs;
struct xe_sched_job *job;
struct xe_bb *bb;
u32 update_idx, pt_slot = 0;
unsigned long npages = DIV_ROUND_UP(len + sram_offset, PAGE_SIZE);
- unsigned int pitch = len >= PAGE_SIZE && !(len & ~PAGE_MASK) ?
- PAGE_SIZE : 4;
+ unsigned int pitch = xe_migrate_copy_pitch(xe, len);
int err;
+ unsigned long i, j;
+ bool use_pde = xe_migrate_vram_use_pde(sram_addr, len + sram_offset);
- if (drm_WARN_ON(&xe->drm, (len & XE_CACHELINE_MASK) ||
- (sram_offset | vram_addr) & XE_CACHELINE_MASK))
+ if (!xe->info.has_mem_copy_instr &&
+ drm_WARN_ON(&xe->drm,
+ (!IS_ALIGNED(len, pitch)) || (sram_offset | vram_addr) & XE_CACHELINE_MASK))
return ERR_PTR(-EOPNOTSUPP);
xe_assert(xe, npages * PAGE_SIZE <= MAX_PREEMPTDISABLE_TRANSFER);
- batch_size += pte_update_cmd_size(len);
+ batch_size += pte_update_cmd_size(npages << PAGE_SHIFT);
batch_size += EMIT_COPY_DW;
bb = xe_bb_new(gt, batch_size, use_usm_batch);
@@ -1642,16 +2096,44 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m,
return ERR_PTR(err);
}
- build_pt_update_batch_sram(m, bb, pt_slot * XE_PAGE_SIZE,
- sram_addr, len + sram_offset);
+ /*
+ * If the order of a struct drm_pagemap_addr entry is greater than 0,
+ * the entry is populated by GPU pagemap but subsequent entries within
+ * the range of that order are not populated.
+ * build_pt_update_batch_sram() expects a fully populated array of
+ * struct drm_pagemap_addr. Ensure this is the case even with higher
+ * orders.
+ */
+ for (i = 0; !use_pde && i < npages;) {
+ unsigned int order = sram_addr[i].order;
+
+ for (j = 1; j < NR_PAGES(order) && i + j < npages; j++)
+ if (!sram_addr[i + j].addr)
+ sram_addr[i + j].addr = sram_addr[i].addr + j * PAGE_SIZE;
+
+ i += NR_PAGES(order);
+ }
+
+ if (use_pde)
+ build_pt_update_batch_sram(m, bb, m->large_page_copy_pdes,
+ sram_addr, npages << PAGE_SHIFT, 1);
+ else
+ build_pt_update_batch_sram(m, bb, pt_slot * XE_PAGE_SIZE,
+ sram_addr, npages << PAGE_SHIFT, 0);
if (dir == XE_MIGRATE_COPY_TO_VRAM) {
- src_L0_ofs = xe_migrate_vm_addr(pt_slot, 0) + sram_offset;
+ if (use_pde)
+ src_L0_ofs = m->large_page_copy_ofs + sram_offset;
+ else
+ src_L0_ofs = xe_migrate_vm_addr(pt_slot, 0) + sram_offset;
dst_L0_ofs = xe_migrate_vram_ofs(xe, vram_addr, false);
} else {
src_L0_ofs = xe_migrate_vram_ofs(xe, vram_addr, false);
- dst_L0_ofs = xe_migrate_vm_addr(pt_slot, 0) + sram_offset;
+ if (use_pde)
+ dst_L0_ofs = m->large_page_copy_ofs + sram_offset;
+ else
+ dst_L0_ofs = xe_migrate_vm_addr(pt_slot, 0) + sram_offset;
}
bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
@@ -1667,7 +2149,15 @@ static struct dma_fence *xe_migrate_vram(struct xe_migrate *m,
goto err;
}
- xe_sched_job_add_migrate_flush(job, 0);
+ xe_sched_job_add_migrate_flush(job, MI_INVALIDATE_TLB);
+
+ if (deps && !dma_fence_is_signaled(deps)) {
+ dma_fence_get(deps);
+ err = drm_sched_job_add_dependency(&job->drm, deps);
+ if (err)
+ dma_fence_wait(deps, false);
+ err = 0;
+ }
mutex_lock(&m->job_mutex);
xe_sched_job_arm(job);
@@ -1692,21 +2182,24 @@ err:
* xe_migrate_to_vram() - Migrate to VRAM
* @m: The migration context.
* @npages: Number of pages to migrate.
- * @src_addr: Array of dma addresses (source of migrate)
+ * @src_addr: Array of DMA information (source of migrate)
* @dst_addr: Device physical address of VRAM (destination of migrate)
+ * @deps: struct dma_fence representing the dependencies that need
+ * to be signaled before migration.
*
* Copy from an array dma addresses to a VRAM device physical address
*
- * Return: dma fence for migrate to signal completion on succees, ERR_PTR on
+ * Return: dma fence for migrate to signal completion on success, ERR_PTR on
* failure
*/
struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m,
unsigned long npages,
- dma_addr_t *src_addr,
- u64 dst_addr)
+ struct drm_pagemap_addr *src_addr,
+ u64 dst_addr,
+ struct dma_fence *deps)
{
return xe_migrate_vram(m, npages * PAGE_SIZE, 0, src_addr, dst_addr,
- XE_MIGRATE_COPY_TO_VRAM);
+ deps, XE_MIGRATE_COPY_TO_VRAM);
}
/**
@@ -1714,71 +2207,78 @@ struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m,
* @m: The migration context.
* @npages: Number of pages to migrate.
* @src_addr: Device physical address of VRAM (source of migrate)
- * @dst_addr: Array of dma addresses (destination of migrate)
+ * @dst_addr: Array of DMA information (destination of migrate)
+ * @deps: struct dma_fence representing the dependencies that need
+ * to be signaled before migration.
*
* Copy from a VRAM device physical address to an array dma addresses
*
- * Return: dma fence for migrate to signal completion on succees, ERR_PTR on
+ * Return: dma fence for migrate to signal completion on success, ERR_PTR on
* failure
*/
struct dma_fence *xe_migrate_from_vram(struct xe_migrate *m,
unsigned long npages,
u64 src_addr,
- dma_addr_t *dst_addr)
+ struct drm_pagemap_addr *dst_addr,
+ struct dma_fence *deps)
{
return xe_migrate_vram(m, npages * PAGE_SIZE, 0, dst_addr, src_addr,
- XE_MIGRATE_COPY_TO_SRAM);
+ deps, XE_MIGRATE_COPY_TO_SRAM);
}
-static void xe_migrate_dma_unmap(struct xe_device *xe, dma_addr_t *dma_addr,
+static void xe_migrate_dma_unmap(struct xe_device *xe,
+ struct drm_pagemap_addr *pagemap_addr,
int len, int write)
{
unsigned long i, npages = DIV_ROUND_UP(len, PAGE_SIZE);
for (i = 0; i < npages; ++i) {
- if (!dma_addr[i])
+ if (!pagemap_addr[i].addr)
break;
- dma_unmap_page(xe->drm.dev, dma_addr[i], PAGE_SIZE,
+ dma_unmap_page(xe->drm.dev, pagemap_addr[i].addr, PAGE_SIZE,
write ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
}
- kfree(dma_addr);
+ kfree(pagemap_addr);
}
-static dma_addr_t *xe_migrate_dma_map(struct xe_device *xe,
- void *buf, int len, int write)
+static struct drm_pagemap_addr *xe_migrate_dma_map(struct xe_device *xe,
+ void *buf, int len,
+ int write)
{
- dma_addr_t *dma_addr;
+ struct drm_pagemap_addr *pagemap_addr;
unsigned long i, npages = DIV_ROUND_UP(len, PAGE_SIZE);
- dma_addr = kcalloc(npages, sizeof(*dma_addr), GFP_KERNEL);
- if (!dma_addr)
+ pagemap_addr = kcalloc(npages, sizeof(*pagemap_addr), GFP_KERNEL);
+ if (!pagemap_addr)
return ERR_PTR(-ENOMEM);
for (i = 0; i < npages; ++i) {
dma_addr_t addr;
struct page *page;
+ enum dma_data_direction dir = write ? DMA_TO_DEVICE :
+ DMA_FROM_DEVICE;
if (is_vmalloc_addr(buf))
page = vmalloc_to_page(buf);
else
page = virt_to_page(buf);
- addr = dma_map_page(xe->drm.dev,
- page, 0, PAGE_SIZE,
- write ? DMA_TO_DEVICE :
- DMA_FROM_DEVICE);
+ addr = dma_map_page(xe->drm.dev, page, 0, PAGE_SIZE, dir);
if (dma_mapping_error(xe->drm.dev, addr))
goto err_fault;
- dma_addr[i] = addr;
+ pagemap_addr[i] =
+ drm_pagemap_addr_encode(addr,
+ DRM_INTERCONNECT_SYSTEM,
+ 0, dir);
buf += PAGE_SIZE;
}
- return dma_addr;
+ return pagemap_addr;
err_fault:
- xe_migrate_dma_unmap(xe, dma_addr, len, write);
+ xe_migrate_dma_unmap(xe, pagemap_addr, len, write);
return ERR_PTR(-EFAULT);
}
@@ -1807,7 +2307,7 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
struct xe_device *xe = tile_to_xe(tile);
struct xe_res_cursor cursor;
struct dma_fence *fence = NULL;
- dma_addr_t *dma_addr;
+ struct drm_pagemap_addr *pagemap_addr;
unsigned long page_offset = (unsigned long)buf & ~PAGE_MASK;
int bytes_left = len, current_page = 0;
void *orig_buf = buf;
@@ -1815,18 +2315,24 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
xe_bo_assert_held(bo);
/* Use bounce buffer for small access and unaligned access */
- if (len & XE_CACHELINE_MASK ||
- ((uintptr_t)buf | offset) & XE_CACHELINE_MASK) {
+ if (!xe->info.has_mem_copy_instr &&
+ (!IS_ALIGNED(len, 4) ||
+ !IS_ALIGNED(page_offset, XE_CACHELINE_BYTES) ||
+ !IS_ALIGNED(offset, XE_CACHELINE_BYTES))) {
int buf_offset = 0;
+ void *bounce;
+ int err;
+
+ BUILD_BUG_ON(!is_power_of_2(XE_CACHELINE_BYTES));
+ bounce = kmalloc(XE_CACHELINE_BYTES, GFP_KERNEL);
+ if (!bounce)
+ return -ENOMEM;
/*
* Less than ideal for large unaligned access but this should be
* fairly rare, can fixup if this becomes common.
*/
do {
- u8 bounce[XE_CACHELINE_BYTES];
- void *ptr = (void *)bounce;
- int err;
int copy_bytes = min_t(int, bytes_left,
XE_CACHELINE_BYTES -
(offset & XE_CACHELINE_MASK));
@@ -1835,22 +2341,22 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
err = xe_migrate_access_memory(m, bo,
offset &
~XE_CACHELINE_MASK,
- (void *)ptr,
- sizeof(bounce), 0);
+ bounce,
+ XE_CACHELINE_BYTES, 0);
if (err)
- return err;
+ break;
if (write) {
- memcpy(ptr + ptr_offset, buf + buf_offset, copy_bytes);
+ memcpy(bounce + ptr_offset, buf + buf_offset, copy_bytes);
err = xe_migrate_access_memory(m, bo,
offset & ~XE_CACHELINE_MASK,
- (void *)ptr,
- sizeof(bounce), 0);
+ bounce,
+ XE_CACHELINE_BYTES, write);
if (err)
- return err;
+ break;
} else {
- memcpy(buf + buf_offset, ptr + ptr_offset,
+ memcpy(buf + buf_offset, bounce + ptr_offset,
copy_bytes);
}
@@ -1859,20 +2365,22 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
offset += copy_bytes;
} while (bytes_left);
- return 0;
+ kfree(bounce);
+ return err;
}
- dma_addr = xe_migrate_dma_map(xe, buf, len + page_offset, write);
- if (IS_ERR(dma_addr))
- return PTR_ERR(dma_addr);
+ pagemap_addr = xe_migrate_dma_map(xe, buf, len + page_offset, write);
+ if (IS_ERR(pagemap_addr))
+ return PTR_ERR(pagemap_addr);
- xe_res_first(bo->ttm.resource, offset, bo->size - offset, &cursor);
+ xe_res_first(bo->ttm.resource, offset, xe_bo_size(bo) - offset, &cursor);
do {
struct dma_fence *__fence;
u64 vram_addr = vram_region_gpu_offset(bo->ttm.resource) +
cursor.start;
int current_bytes;
+ u32 pitch;
if (cursor.size > MAX_PREEMPTDISABLE_TRANSFER)
current_bytes = min_t(int, bytes_left,
@@ -1880,21 +2388,30 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
else
current_bytes = min_t(int, bytes_left, cursor.size);
- if (fence)
- dma_fence_put(fence);
+ pitch = xe_migrate_copy_pitch(xe, current_bytes);
+ if (xe->info.has_mem_copy_instr)
+ current_bytes = min_t(int, current_bytes, U16_MAX * pitch);
+ else
+ current_bytes = min_t(int, current_bytes,
+ round_down(S16_MAX * pitch,
+ XE_CACHELINE_BYTES));
__fence = xe_migrate_vram(m, current_bytes,
(unsigned long)buf & ~PAGE_MASK,
- dma_addr + current_page,
- vram_addr, write ?
+ &pagemap_addr[current_page],
+ vram_addr, NULL, write ?
XE_MIGRATE_COPY_TO_VRAM :
XE_MIGRATE_COPY_TO_SRAM);
if (IS_ERR(__fence)) {
- if (fence)
+ if (fence) {
dma_fence_wait(fence, false);
+ dma_fence_put(fence);
+ }
fence = __fence;
goto out_err;
}
+
+ dma_fence_put(fence);
fence = __fence;
buf += current_bytes;
@@ -1909,10 +2426,60 @@ int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
dma_fence_put(fence);
out_err:
- xe_migrate_dma_unmap(xe, dma_addr, len + page_offset, write);
+ xe_migrate_dma_unmap(xe, pagemap_addr, len + page_offset, write);
return IS_ERR(fence) ? PTR_ERR(fence) : 0;
}
+/**
+ * xe_migrate_job_lock() - Lock migrate job lock
+ * @m: The migration context.
+ * @q: Queue associated with the operation which requires a lock
+ *
+ * Lock the migrate job lock if the queue is a migration queue, otherwise
+ * assert the VM's dma-resv is held (user queue's have own locking).
+ */
+void xe_migrate_job_lock(struct xe_migrate *m, struct xe_exec_queue *q)
+{
+ bool is_migrate = q == m->q;
+
+ if (is_migrate)
+ mutex_lock(&m->job_mutex);
+ else
+ xe_vm_assert_held(q->vm); /* User queues VM's should be locked */
+}
+
+/**
+ * xe_migrate_job_unlock() - Unlock migrate job lock
+ * @m: The migration context.
+ * @q: Queue associated with the operation which requires a lock
+ *
+ * Unlock the migrate job lock if the queue is a migration queue, otherwise
+ * assert the VM's dma-resv is held (user queue's have own locking).
+ */
+void xe_migrate_job_unlock(struct xe_migrate *m, struct xe_exec_queue *q)
+{
+ bool is_migrate = q == m->q;
+
+ if (is_migrate)
+ mutex_unlock(&m->job_mutex);
+ else
+ xe_vm_assert_held(q->vm); /* User queues VM's should be locked */
+}
+
+#if IS_ENABLED(CONFIG_PROVE_LOCKING)
+/**
+ * xe_migrate_job_lock_assert() - Assert migrate job lock held of queue
+ * @q: Migrate queue
+ */
+void xe_migrate_job_lock_assert(struct xe_exec_queue *q)
+{
+ struct xe_migrate *m = gt_to_tile(q->gt)->migrate;
+
+ xe_gt_assert(q->gt, q == m->q);
+ lockdep_assert_held(&m->job_mutex);
+}
+#endif
+
#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
#include "tests/xe_migrate.c"
#endif
diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
index fb9839c1bae0..b76441f062b4 100644
--- a/drivers/gpu/drm/xe/xe_migrate.h
+++ b/drivers/gpu/drm/xe/xe_migrate.h
@@ -9,11 +9,13 @@
#include <linux/types.h>
struct dma_fence;
+struct drm_pagemap_addr;
struct iosys_map;
struct ttm_resource;
struct xe_bo;
struct xe_gt;
+struct xe_tlb_inval_job;
struct xe_exec_queue;
struct xe_migrate;
struct xe_migrate_pt_update;
@@ -24,6 +26,13 @@ struct xe_vm;
struct xe_vm_pgtable_update;
struct xe_vma;
+enum xe_sriov_vf_ccs_rw_ctxs;
+
+enum xe_migrate_copy_dir {
+ XE_MIGRATE_COPY_TO_VRAM,
+ XE_MIGRATE_COPY_TO_SRAM,
+};
+
/**
* struct xe_migrate_pt_update_ops - Callbacks for the
* xe_migrate_update_pgtables() function.
@@ -89,21 +98,32 @@ struct xe_migrate_pt_update {
struct xe_vma_ops *vops;
/** @job: The job if a GPU page-table update. NULL otherwise */
struct xe_sched_job *job;
+ /**
+ * @ijob: The TLB invalidation job for primary GT. NULL otherwise
+ */
+ struct xe_tlb_inval_job *ijob;
+ /**
+ * @mjob: The TLB invalidation job for media GT. NULL otherwise
+ */
+ struct xe_tlb_inval_job *mjob;
/** @tile_id: Tile ID of the update */
u8 tile_id;
};
-struct xe_migrate *xe_migrate_init(struct xe_tile *tile);
+struct xe_migrate *xe_migrate_alloc(struct xe_tile *tile);
+int xe_migrate_init(struct xe_migrate *m);
struct dma_fence *xe_migrate_to_vram(struct xe_migrate *m,
unsigned long npages,
- dma_addr_t *src_addr,
- u64 dst_addr);
+ struct drm_pagemap_addr *src_addr,
+ u64 dst_addr,
+ struct dma_fence *deps);
struct dma_fence *xe_migrate_from_vram(struct xe_migrate *m,
unsigned long npages,
u64 src_addr,
- dma_addr_t *dst_addr);
+ struct drm_pagemap_addr *dst_addr,
+ struct dma_fence *deps);
struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
struct xe_bo *src_bo,
@@ -112,6 +132,15 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
struct ttm_resource *dst,
bool copy_only_ccs);
+int xe_migrate_ccs_rw_copy(struct xe_tile *tile, struct xe_exec_queue *q,
+ struct xe_bo *src_bo,
+ enum xe_sriov_vf_ccs_rw_ctxs read_write);
+
+struct xe_lrc *xe_migrate_lrc(struct xe_migrate *migrate);
+struct xe_exec_queue *xe_migrate_exec_queue(struct xe_migrate *migrate);
+struct dma_fence *xe_migrate_vram_copy_chunk(struct xe_bo *vram_bo, u64 vram_offset,
+ struct xe_bo *sysmem_bo, u64 sysmem_offset,
+ u64 size, enum xe_migrate_copy_dir dir);
int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
unsigned long offset, void *buf, int len,
int write);
@@ -133,5 +162,15 @@ xe_migrate_update_pgtables(struct xe_migrate *m,
void xe_migrate_wait(struct xe_migrate *m);
-struct xe_exec_queue *xe_tile_migrate_exec_queue(struct xe_tile *tile);
+#if IS_ENABLED(CONFIG_PROVE_LOCKING)
+void xe_migrate_job_lock_assert(struct xe_exec_queue *q);
+#else
+static inline void xe_migrate_job_lock_assert(struct xe_exec_queue *q)
+{
+}
+#endif
+
+void xe_migrate_job_lock(struct xe_migrate *m, struct xe_exec_queue *q);
+void xe_migrate_job_unlock(struct xe_migrate *m, struct xe_exec_queue *q);
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_migrate_doc.h b/drivers/gpu/drm/xe/xe_migrate_doc.h
index 63c7d67b5b62..c082bc0b7068 100644
--- a/drivers/gpu/drm/xe/xe_migrate_doc.h
+++ b/drivers/gpu/drm/xe/xe_migrate_doc.h
@@ -9,7 +9,7 @@
/**
* DOC: Migrate Layer
*
- * The XE migrate layer is used generate jobs which can copy memory (eviction),
+ * The Xe migrate layer is used generate jobs which can copy memory (eviction),
* clear memory, or program tables (binds). This layer exists in every GT, has
* a migrate engine, and uses a special VM for all generated jobs.
*
diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index 7357458bc0d2..350dca1f0925 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -22,6 +22,9 @@
#include "xe_macros.h"
#include "xe_sriov.h"
#include "xe_trace.h"
+#include "xe_wa.h"
+
+#include "generated/xe_device_wa_oob.h"
static void tiles_fini(void *arg)
{
@@ -64,35 +67,6 @@ static void mmio_multi_tile_setup(struct xe_device *xe, size_t tile_mmio_size)
if (xe->info.tile_count == 1)
return;
- /* Possibly override number of tile based on configuration register */
- if (!xe->info.skip_mtcfg) {
- struct xe_mmio *mmio = xe_root_tile_mmio(xe);
- u8 tile_count;
- u32 mtcfg;
-
- /*
- * Although the per-tile mmio regs are not yet initialized, this
- * is fine as it's going to the root tile's mmio, that's
- * guaranteed to be initialized earlier in xe_mmio_probe_early()
- */
- mtcfg = xe_mmio_read32(mmio, XEHP_MTCFG_ADDR);
- tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1;
-
- if (tile_count < xe->info.tile_count) {
- drm_info(&xe->drm, "tile_count: %d, reduced_tile_count %d\n",
- xe->info.tile_count, tile_count);
- xe->info.tile_count = tile_count;
-
- /*
- * FIXME: Needs some work for standalone media, but
- * should be impossible with multi-tile for now:
- * multi-tile platform with standalone media doesn't
- * exist
- */
- xe->info.gt_count = xe->info.tile_count;
- }
- }
-
for_each_remote_tile(tile, xe, id)
xe_mmio_init(&tile->mmio, tile, xe->mmio.regs + id * tile_mmio_size, SZ_4M);
}
@@ -163,7 +137,7 @@ static void mmio_flush_pending_writes(struct xe_mmio *mmio)
#define DUMMY_REG_OFFSET 0x130030
int i;
- if (mmio->tile->xe->info.platform != XE_LUNARLAKE)
+ if (!XE_DEVICE_WA(mmio->tile->xe, 15015404425))
return;
/* 4 dummy writes */
@@ -176,7 +150,6 @@ u8 xe_mmio_read8(struct xe_mmio *mmio, struct xe_reg reg)
u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr);
u8 val;
- /* Wa_15015404425 */
mmio_flush_pending_writes(mmio);
val = readb(mmio->regs + addr);
@@ -190,7 +163,6 @@ u16 xe_mmio_read16(struct xe_mmio *mmio, struct xe_reg reg)
u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr);
u16 val;
- /* Wa_15015404425 */
mmio_flush_pending_writes(mmio);
val = readw(mmio->regs + addr);
@@ -217,7 +189,6 @@ u32 xe_mmio_read32(struct xe_mmio *mmio, struct xe_reg reg)
u32 addr = xe_mmio_adjusted_addr(mmio, reg.addr);
u32 val;
- /* Wa_15015404425 */
mmio_flush_pending_writes(mmio);
if (!reg.vf && IS_SRIOV_VF(mmio->tile->xe))
@@ -408,3 +379,32 @@ int xe_mmio_wait32_not(struct xe_mmio *mmio, struct xe_reg reg, u32 mask, u32 va
{
return __xe_mmio_wait32(mmio, reg, mask, val, timeout_us, out_val, atomic, false);
}
+
+#ifdef CONFIG_PCI_IOV
+static size_t vf_regs_stride(struct xe_device *xe)
+{
+ return GRAPHICS_VERx100(xe) > 1200 ? 0x400 : 0x1000;
+}
+
+/**
+ * xe_mmio_init_vf_view() - Initialize an MMIO instance for accesses like the VF
+ * @mmio: the target &xe_mmio to initialize as VF's view
+ * @base: the source &xe_mmio to initialize from
+ * @vfid: the VF identifier
+ */
+void xe_mmio_init_vf_view(struct xe_mmio *mmio, const struct xe_mmio *base, unsigned int vfid)
+{
+ struct xe_tile *tile = base->tile;
+ struct xe_device *xe = tile->xe;
+ size_t offset = vf_regs_stride(xe) * vfid;
+
+ xe_assert(xe, IS_SRIOV_PF(xe));
+ xe_assert(xe, vfid);
+ xe_assert(xe, !base->sriov_vf_gt);
+ xe_assert(xe, base->regs_size > offset);
+
+ *mmio = *base;
+ mmio->regs += offset;
+ mmio->regs_size -= offset;
+}
+#endif
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index c151ba569003..15362789ab99 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -42,4 +42,8 @@ static inline struct xe_mmio *xe_root_tile_mmio(struct xe_device *xe)
return &xe->tiles[0].mmio;
}
+#ifdef CONFIG_PCI_IOV
+void xe_mmio_init_vf_view(struct xe_mmio *mmio, const struct xe_mmio *base, unsigned int vfid);
+#endif
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_mmio_gem.c b/drivers/gpu/drm/xe/xe_mmio_gem.c
new file mode 100644
index 000000000000..9a97c4387e4f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_mmio_gem.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include "xe_mmio_gem.h"
+
+#include <drm/drm_drv.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_managed.h>
+
+#include "xe_device_types.h"
+
+/**
+ * DOC: Exposing MMIO regions to userspace
+ *
+ * In certain cases, the driver may allow userspace to mmap a portion of the hardware registers.
+ *
+ * This can be done as follows:
+ * 1. Call xe_mmio_gem_create() to create a GEM object with an mmap-able fake offset.
+ * 2. Use xe_mmio_gem_mmap_offset() on the created GEM object to retrieve the fake offset.
+ * 3. Provide the fake offset to userspace.
+ * 4. Userspace can call mmap with the fake offset. The length provided to mmap
+ * must match the size of the GEM object.
+ * 5. When the region is no longer needed, call xe_mmio_gem_destroy() to release the GEM object.
+ *
+ * NOTE: The exposed MMIO region must be page-aligned with regards to its BAR offset and size.
+ *
+ * WARNING: Exposing MMIO regions to userspace can have security and stability implications.
+ * Make sure not to expose any sensitive registers.
+ */
+
+static void xe_mmio_gem_free(struct drm_gem_object *);
+static int xe_mmio_gem_mmap(struct drm_gem_object *, struct vm_area_struct *);
+static vm_fault_t xe_mmio_gem_vm_fault(struct vm_fault *);
+
+struct xe_mmio_gem {
+ struct drm_gem_object base;
+ phys_addr_t phys_addr;
+};
+
+static const struct vm_operations_struct vm_ops = {
+ .open = drm_gem_vm_open,
+ .close = drm_gem_vm_close,
+ .fault = xe_mmio_gem_vm_fault,
+};
+
+static const struct drm_gem_object_funcs xe_mmio_gem_funcs = {
+ .free = xe_mmio_gem_free,
+ .mmap = xe_mmio_gem_mmap,
+ .vm_ops = &vm_ops,
+};
+
+static inline struct xe_mmio_gem *to_xe_mmio_gem(struct drm_gem_object *obj)
+{
+ return container_of(obj, struct xe_mmio_gem, base);
+}
+
+/**
+ * xe_mmio_gem_create - Expose an MMIO region to userspace
+ * @xe: The xe device
+ * @file: DRM file descriptor
+ * @phys_addr: Start of the exposed MMIO region
+ * @size: The size of the exposed MMIO region
+ *
+ * This function creates a GEM object that exposes an MMIO region with an mmap-able
+ * fake offset.
+ *
+ * See: "Exposing MMIO regions to userspace"
+ */
+struct xe_mmio_gem *xe_mmio_gem_create(struct xe_device *xe, struct drm_file *file,
+ phys_addr_t phys_addr, size_t size)
+{
+ struct xe_mmio_gem *obj;
+ struct drm_gem_object *base;
+ int err;
+
+ if ((phys_addr % PAGE_SIZE != 0) || (size % PAGE_SIZE != 0))
+ return ERR_PTR(-EINVAL);
+
+ obj = kzalloc(sizeof(*obj), GFP_KERNEL);
+ if (!obj)
+ return ERR_PTR(-ENOMEM);
+
+ base = &obj->base;
+ base->funcs = &xe_mmio_gem_funcs;
+ obj->phys_addr = phys_addr;
+
+ drm_gem_private_object_init(&xe->drm, base, size);
+
+ err = drm_gem_create_mmap_offset(base);
+ if (err)
+ goto free_gem;
+
+ err = drm_vma_node_allow(&base->vma_node, file);
+ if (err)
+ goto free_gem;
+
+ return obj;
+
+free_gem:
+ xe_mmio_gem_free(base);
+ return ERR_PTR(err);
+}
+
+/**
+ * xe_mmio_gem_mmap_offset - Return the mmap-able fake offset
+ * @gem: the GEM object created with xe_mmio_gem_create()
+ *
+ * This function returns the mmap-able fake offset allocated during
+ * xe_mmio_gem_create().
+ *
+ * See: "Exposing MMIO regions to userspace"
+ */
+u64 xe_mmio_gem_mmap_offset(struct xe_mmio_gem *gem)
+{
+ return drm_vma_node_offset_addr(&gem->base.vma_node);
+}
+
+static void xe_mmio_gem_free(struct drm_gem_object *base)
+{
+ struct xe_mmio_gem *obj = to_xe_mmio_gem(base);
+
+ drm_gem_object_release(base);
+ kfree(obj);
+}
+
+/**
+ * xe_mmio_gem_destroy - Destroy the GEM object that exposes an MMIO region
+ * @gem: the GEM object to destroy
+ *
+ * This function releases resources associated with the GEM object created by
+ * xe_mmio_gem_create().
+ *
+ * See: "Exposing MMIO regions to userspace"
+ */
+void xe_mmio_gem_destroy(struct xe_mmio_gem *gem)
+{
+ xe_mmio_gem_free(&gem->base);
+}
+
+static int xe_mmio_gem_mmap(struct drm_gem_object *base, struct vm_area_struct *vma)
+{
+ if (vma->vm_end - vma->vm_start != base->size)
+ return -EINVAL;
+
+ if ((vma->vm_flags & VM_SHARED) == 0)
+ return -EINVAL;
+
+ /* Set vm_pgoff (used as a fake buffer offset by DRM) to 0 */
+ vma->vm_pgoff = 0;
+ vma->vm_page_prot = pgprot_noncached(vm_get_page_prot(vma->vm_flags));
+ vm_flags_set(vma, VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP |
+ VM_DONTCOPY | VM_NORESERVE);
+
+ /* Defer actual mapping to the fault handler. */
+ return 0;
+}
+
+static void xe_mmio_gem_release_dummy_page(struct drm_device *dev, void *res)
+{
+ __free_page((struct page *)res);
+}
+
+static vm_fault_t xe_mmio_gem_vm_fault_dummy_page(struct vm_area_struct *vma)
+{
+ struct drm_gem_object *base = vma->vm_private_data;
+ struct drm_device *dev = base->dev;
+ vm_fault_t ret = VM_FAULT_NOPAGE;
+ struct page *page;
+ unsigned long pfn;
+ unsigned long i;
+
+ page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (!page)
+ return VM_FAULT_OOM;
+
+ if (drmm_add_action_or_reset(dev, xe_mmio_gem_release_dummy_page, page))
+ return VM_FAULT_OOM;
+
+ pfn = page_to_pfn(page);
+
+ /* Map the entire VMA to the same dummy page */
+ for (i = 0; i < base->size; i += PAGE_SIZE) {
+ unsigned long addr = vma->vm_start + i;
+
+ ret = vmf_insert_pfn(vma, addr, pfn);
+ if (ret & VM_FAULT_ERROR)
+ break;
+ }
+
+ return ret;
+}
+
+static vm_fault_t xe_mmio_gem_vm_fault(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct drm_gem_object *base = vma->vm_private_data;
+ struct xe_mmio_gem *obj = to_xe_mmio_gem(base);
+ struct drm_device *dev = base->dev;
+ vm_fault_t ret = VM_FAULT_NOPAGE;
+ unsigned long i;
+ int idx;
+
+ if (!drm_dev_enter(dev, &idx)) {
+ /*
+ * Provide a dummy page to avoid SIGBUS for events such as hot-unplug.
+ * This gives the userspace the option to recover instead of crashing.
+ * It is assumed the userspace will receive the notification via some
+ * other channel (e.g. drm uevent).
+ */
+ return xe_mmio_gem_vm_fault_dummy_page(vma);
+ }
+
+ for (i = 0; i < base->size; i += PAGE_SIZE) {
+ unsigned long addr = vma->vm_start + i;
+ unsigned long phys_addr = obj->phys_addr + i;
+
+ ret = vmf_insert_pfn(vma, addr, PHYS_PFN(phys_addr));
+ if (ret & VM_FAULT_ERROR)
+ break;
+ }
+
+ drm_dev_exit(idx);
+ return ret;
+}
diff --git a/drivers/gpu/drm/xe/xe_mmio_gem.h b/drivers/gpu/drm/xe/xe_mmio_gem.h
new file mode 100644
index 000000000000..4b76d5586ebb
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_mmio_gem.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_MMIO_GEM_H_
+#define _XE_MMIO_GEM_H_
+
+#include <linux/types.h>
+
+struct drm_file;
+struct xe_device;
+struct xe_mmio_gem;
+
+struct xe_mmio_gem *xe_mmio_gem_create(struct xe_device *xe, struct drm_file *file,
+ phys_addr_t phys_addr, size_t size);
+u64 xe_mmio_gem_mmap_offset(struct xe_mmio_gem *gem);
+void xe_mmio_gem_destroy(struct xe_mmio_gem *gem);
+
+#endif /* _XE_MMIO_GEM_H_ */
diff --git a/drivers/gpu/drm/xe/xe_mocs.c b/drivers/gpu/drm/xe/xe_mocs.c
index 0c737413fcb6..6613d3b48a84 100644
--- a/drivers/gpu/drm/xe/xe_mocs.c
+++ b/drivers/gpu/drm/xe/xe_mocs.c
@@ -568,6 +568,23 @@ static const struct xe_mocs_ops xe2_mocs_ops = {
.dump = xe2_mocs_dump,
};
+/*
+ * Note that the "L3" and "L4" register fields actually control the L2 and L3
+ * caches respectively on this platform.
+ */
+static const struct xe_mocs_entry xe3p_xpc_mocs_table[] = {
+ /* Defer to PAT */
+ MOCS_ENTRY(0, XE2_L3_0_WB | L4_3_UC, 0),
+ /* UC */
+ MOCS_ENTRY(1, IG_PAT | XE2_L3_3_UC | L4_3_UC, 0),
+ /* L2 */
+ MOCS_ENTRY(2, IG_PAT | XE2_L3_0_WB | L4_3_UC, 0),
+ /* L3 */
+ MOCS_ENTRY(3, IG_PAT | XE2_L3_3_UC | L4_0_WB, 0),
+ /* L2 + L3 */
+ MOCS_ENTRY(4, IG_PAT | XE2_L3_0_WB | L4_0_WB, 0),
+};
+
static unsigned int get_mocs_settings(struct xe_device *xe,
struct xe_mocs_info *info)
{
@@ -576,6 +593,16 @@ static unsigned int get_mocs_settings(struct xe_device *xe,
memset(info, 0, sizeof(struct xe_mocs_info));
switch (xe->info.platform) {
+ case XE_CRESCENTISLAND:
+ info->ops = &xe2_mocs_ops;
+ info->table_size = ARRAY_SIZE(xe3p_xpc_mocs_table);
+ info->table = xe3p_xpc_mocs_table;
+ info->num_mocs_regs = XE2_NUM_MOCS_ENTRIES;
+ info->uc_index = 1;
+ info->wb_index = 4;
+ info->unused_entries_index = 4;
+ break;
+ case XE_NOVALAKE_S:
case XE_PANTHERLAKE:
case XE_LUNARLAKE:
case XE_BATTLEMAGE:
@@ -772,12 +799,20 @@ void xe_mocs_init(struct xe_gt *gt)
init_l3cc_table(gt, &table);
}
-void xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p)
+/**
+ * xe_mocs_dump() - Dump MOCS table.
+ * @gt: the &xe_gt with MOCS table
+ * @p: the &drm_printer to dump info to
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_device *xe = gt_to_xe(gt);
enum xe_force_wake_domains domain;
struct xe_mocs_info table;
unsigned int fw_ref, flags;
+ int err = 0;
flags = get_mocs_settings(xe, &table);
@@ -785,14 +820,17 @@ void xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p)
xe_pm_runtime_get_noresume(xe);
fw_ref = xe_force_wake_get(gt_to_fw(gt), domain);
- if (!xe_force_wake_ref_has_domain(fw_ref, domain))
+ if (!xe_force_wake_ref_has_domain(fw_ref, domain)) {
+ err = -ETIMEDOUT;
goto err_fw;
+ }
table.ops->dump(&table, flags, gt, p);
err_fw:
xe_force_wake_put(gt_to_fw(gt), fw_ref);
xe_pm_runtime_put(xe);
+ return err;
}
#if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
diff --git a/drivers/gpu/drm/xe/xe_mocs.h b/drivers/gpu/drm/xe/xe_mocs.h
index dc972ffd4d07..f00bbb269829 100644
--- a/drivers/gpu/drm/xe/xe_mocs.h
+++ b/drivers/gpu/drm/xe/xe_mocs.h
@@ -11,12 +11,6 @@ struct xe_gt;
void xe_mocs_init_early(struct xe_gt *gt);
void xe_mocs_init(struct xe_gt *gt);
-
-/**
- * xe_mocs_dump - Dump mocs table
- * @gt: GT structure
- * @p: Printer to dump info to
- */
-void xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p);
+int xe_mocs_dump(struct xe_gt *gt, struct drm_printer *p);
#endif
diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
index e4742e27e2cd..d08338fc3bc1 100644
--- a/drivers/gpu/drm/xe/xe_module.c
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -18,29 +18,50 @@
#include "xe_observation.h"
#include "xe_sched_job.h"
+#if IS_ENABLED(CONFIG_DRM_XE_DEBUG)
+#define DEFAULT_GUC_LOG_LEVEL 3
+#else
+#define DEFAULT_GUC_LOG_LEVEL 1
+#endif
+
+#define DEFAULT_PROBE_DISPLAY true
+#define DEFAULT_VRAM_BAR_SIZE 0
+#define DEFAULT_FORCE_PROBE CONFIG_DRM_XE_FORCE_PROBE
+#define DEFAULT_MAX_VFS ~0
+#define DEFAULT_MAX_VFS_STR "unlimited"
+#define DEFAULT_WEDGED_MODE 1
+#define DEFAULT_SVM_NOTIFIER_SIZE 512
+
struct xe_modparam xe_modparam = {
- .probe_display = true,
- .guc_log_level = 3,
- .force_probe = CONFIG_DRM_XE_FORCE_PROBE,
- .wedged_mode = 1,
- .svm_notifier_size = 512,
+ .probe_display = DEFAULT_PROBE_DISPLAY,
+ .guc_log_level = DEFAULT_GUC_LOG_LEVEL,
+ .force_probe = DEFAULT_FORCE_PROBE,
+#ifdef CONFIG_PCI_IOV
+ .max_vfs = DEFAULT_MAX_VFS,
+#endif
+ .wedged_mode = DEFAULT_WEDGED_MODE,
+ .svm_notifier_size = DEFAULT_SVM_NOTIFIER_SIZE,
/* the rest are 0 by default */
};
module_param_named(svm_notifier_size, xe_modparam.svm_notifier_size, uint, 0600);
-MODULE_PARM_DESC(svm_notifier_size, "Set the svm notifier size(in MiB), must be power of 2");
+MODULE_PARM_DESC(svm_notifier_size, "Set the svm notifier size in MiB, must be power of 2 "
+ "[default=" __stringify(DEFAULT_SVM_NOTIFIER_SIZE) "]");
module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, bool, 0444);
MODULE_PARM_DESC(force_execlist, "Force Execlist submission");
module_param_named(probe_display, xe_modparam.probe_display, bool, 0444);
-MODULE_PARM_DESC(probe_display, "Probe display HW, otherwise it's left untouched (default: true)");
+MODULE_PARM_DESC(probe_display, "Probe display HW, otherwise it's left untouched "
+ "[default=" __stringify(DEFAULT_PROBE_DISPLAY) "])");
module_param_named(vram_bar_size, xe_modparam.force_vram_bar_size, int, 0600);
-MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size (in MiB) - <0=disable-resize, 0=max-needed-size[default], >0=force-size");
+MODULE_PARM_DESC(vram_bar_size, "Set the vram bar size in MiB (<0=disable-resize, 0=max-needed-size, >0=force-size "
+ "[default=" __stringify(DEFAULT_VRAM_BAR_SIZE) "])");
module_param_named(guc_log_level, xe_modparam.guc_log_level, int, 0600);
-MODULE_PARM_DESC(guc_log_level, "GuC firmware logging level (0=disable, 1..5=enable with verbosity min..max)");
+MODULE_PARM_DESC(guc_log_level, "GuC firmware logging level (0=disable, 1=normal, 2..5=verbose-levels "
+ "[default=" __stringify(DEFAULT_GUC_LOG_LEVEL) "])");
module_param_named_unsafe(guc_firmware_path, xe_modparam.guc_firmware_path, charp, 0400);
MODULE_PARM_DESC(guc_firmware_path,
@@ -56,18 +77,21 @@ MODULE_PARM_DESC(gsc_firmware_path,
module_param_named_unsafe(force_probe, xe_modparam.force_probe, charp, 0400);
MODULE_PARM_DESC(force_probe,
- "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details.");
+ "Force probe options for specified devices. See CONFIG_DRM_XE_FORCE_PROBE for details "
+ "[default=" DEFAULT_FORCE_PROBE "])");
#ifdef CONFIG_PCI_IOV
module_param_named(max_vfs, xe_modparam.max_vfs, uint, 0400);
MODULE_PARM_DESC(max_vfs,
"Limit number of Virtual Functions (VFs) that could be managed. "
- "(0 = no VFs [default]; N = allow up to N VFs)");
+ "(0=no VFs; N=allow up to N VFs "
+ "[default=" DEFAULT_MAX_VFS_STR "])");
#endif
module_param_named_unsafe(wedged_mode, xe_modparam.wedged_mode, int, 0600);
MODULE_PARM_DESC(wedged_mode,
- "Module's default policy for the wedged mode - 0=never, 1=upon-critical-errors[default], 2=upon-any-hang");
+ "Module's default policy for the wedged mode (0=never, 1=upon-critical-errors, 2=upon-any-hang "
+ "[default=" __stringify(DEFAULT_WEDGED_MODE) "])");
static int xe_check_nomodeset(void)
{
@@ -111,24 +135,17 @@ static const struct init_funcs init_funcs[] = {
},
};
-static int __init xe_call_init_func(unsigned int i)
+static int __init xe_call_init_func(const struct init_funcs *func)
{
- if (WARN_ON(i >= ARRAY_SIZE(init_funcs)))
- return 0;
- if (!init_funcs[i].init)
- return 0;
-
- return init_funcs[i].init();
+ if (func->init)
+ return func->init();
+ return 0;
}
-static void xe_call_exit_func(unsigned int i)
+static void xe_call_exit_func(const struct init_funcs *func)
{
- if (WARN_ON(i >= ARRAY_SIZE(init_funcs)))
- return;
- if (!init_funcs[i].exit)
- return;
-
- init_funcs[i].exit();
+ if (func->exit)
+ func->exit();
}
static int __init xe_init(void)
@@ -136,10 +153,12 @@ static int __init xe_init(void)
int err, i;
for (i = 0; i < ARRAY_SIZE(init_funcs); i++) {
- err = xe_call_init_func(i);
+ err = xe_call_init_func(init_funcs + i);
if (err) {
+ pr_info("%s: module_init aborted at %ps %pe\n",
+ DRIVER_NAME, init_funcs[i].init, ERR_PTR(err));
while (i--)
- xe_call_exit_func(i);
+ xe_call_exit_func(init_funcs + i);
return err;
}
}
@@ -152,7 +171,7 @@ static void __exit xe_exit(void)
int i;
for (i = ARRAY_SIZE(init_funcs) - 1; i >= 0; i--)
- xe_call_exit_func(i);
+ xe_call_exit_func(init_funcs + i);
}
module_init(xe_init);
diff --git a/drivers/gpu/drm/xe/xe_nvm.c b/drivers/gpu/drm/xe/xe_nvm.c
new file mode 100644
index 000000000000..33f4ac82fc80
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_nvm.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright(c) 2019-2025, Intel Corporation. All rights reserved.
+ */
+
+#include <linux/intel_dg_nvm_aux.h>
+#include <linux/pci.h>
+
+#include "xe_device.h"
+#include "xe_device_types.h"
+#include "xe_mmio.h"
+#include "xe_nvm.h"
+#include "regs/xe_gsc_regs.h"
+#include "xe_sriov.h"
+
+#define GEN12_GUNIT_NVM_BASE 0x00102040
+#define GEN12_DEBUG_NVM_BASE 0x00101018
+
+#define GEN12_CNTL_PROTECTED_NVM_REG 0x0010100C
+
+#define GEN12_GUNIT_NVM_SIZE 0x80
+#define GEN12_DEBUG_NVM_SIZE 0x4
+
+#define NVM_NON_POSTED_ERASE_CHICKEN_BIT BIT(13)
+
+#define HECI_FW_STATUS_2_NVM_ACCESS_MODE BIT(3)
+
+static const struct intel_dg_nvm_region regions[INTEL_DG_NVM_REGIONS] = {
+ [0] = { .name = "DESCRIPTOR", },
+ [2] = { .name = "GSC", },
+ [9] = { .name = "PADDING", },
+ [11] = { .name = "OptionROM", },
+ [12] = { .name = "DAM", },
+};
+
+static void xe_nvm_release_dev(struct device *dev)
+{
+ struct auxiliary_device *aux = container_of(dev, struct auxiliary_device, dev);
+ struct intel_dg_nvm_dev *nvm = container_of(aux, struct intel_dg_nvm_dev, aux_dev);
+
+ kfree(nvm);
+}
+
+static bool xe_nvm_non_posted_erase(struct xe_device *xe)
+{
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
+
+ if (xe->info.platform != XE_BATTLEMAGE)
+ return false;
+ return !(xe_mmio_read32(mmio, XE_REG(GEN12_CNTL_PROTECTED_NVM_REG)) &
+ NVM_NON_POSTED_ERASE_CHICKEN_BIT);
+}
+
+static bool xe_nvm_writable_override(struct xe_device *xe)
+{
+ struct xe_mmio *mmio = xe_root_tile_mmio(xe);
+ bool writable_override;
+ resource_size_t base;
+
+ switch (xe->info.platform) {
+ case XE_BATTLEMAGE:
+ base = DG2_GSC_HECI2_BASE;
+ break;
+ case XE_PVC:
+ base = PVC_GSC_HECI2_BASE;
+ break;
+ case XE_DG2:
+ base = DG2_GSC_HECI2_BASE;
+ break;
+ case XE_DG1:
+ base = DG1_GSC_HECI2_BASE;
+ break;
+ default:
+ drm_err(&xe->drm, "Unknown platform\n");
+ return true;
+ }
+
+ writable_override =
+ !(xe_mmio_read32(mmio, HECI_FWSTS2(base)) &
+ HECI_FW_STATUS_2_NVM_ACCESS_MODE);
+ if (writable_override)
+ drm_info(&xe->drm, "NVM access overridden by jumper\n");
+ return writable_override;
+}
+
+int xe_nvm_init(struct xe_device *xe)
+{
+ struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+ struct auxiliary_device *aux_dev;
+ struct intel_dg_nvm_dev *nvm;
+ int ret;
+
+ if (!xe->info.has_gsc_nvm)
+ return 0;
+
+ /* No access to internal NVM from VFs */
+ if (IS_SRIOV_VF(xe))
+ return 0;
+
+ /* Nvm pointer should be NULL here */
+ if (WARN_ON(xe->nvm))
+ return -EFAULT;
+
+ xe->nvm = kzalloc(sizeof(*nvm), GFP_KERNEL);
+ if (!xe->nvm)
+ return -ENOMEM;
+
+ nvm = xe->nvm;
+
+ nvm->writable_override = xe_nvm_writable_override(xe);
+ nvm->non_posted_erase = xe_nvm_non_posted_erase(xe);
+ nvm->bar.parent = &pdev->resource[0];
+ nvm->bar.start = GEN12_GUNIT_NVM_BASE + pdev->resource[0].start;
+ nvm->bar.end = nvm->bar.start + GEN12_GUNIT_NVM_SIZE - 1;
+ nvm->bar.flags = IORESOURCE_MEM;
+ nvm->bar.desc = IORES_DESC_NONE;
+ nvm->regions = regions;
+
+ nvm->bar2.parent = &pdev->resource[0];
+ nvm->bar2.start = GEN12_DEBUG_NVM_BASE + pdev->resource[0].start;
+ nvm->bar2.end = nvm->bar2.start + GEN12_DEBUG_NVM_SIZE - 1;
+ nvm->bar2.flags = IORESOURCE_MEM;
+ nvm->bar2.desc = IORES_DESC_NONE;
+
+ aux_dev = &nvm->aux_dev;
+
+ aux_dev->name = "nvm";
+ aux_dev->id = (pci_domain_nr(pdev->bus) << 16) | pci_dev_id(pdev);
+ aux_dev->dev.parent = &pdev->dev;
+ aux_dev->dev.release = xe_nvm_release_dev;
+
+ ret = auxiliary_device_init(aux_dev);
+ if (ret) {
+ drm_err(&xe->drm, "xe-nvm aux init failed %d\n", ret);
+ goto err;
+ }
+
+ ret = auxiliary_device_add(aux_dev);
+ if (ret) {
+ drm_err(&xe->drm, "xe-nvm aux add failed %d\n", ret);
+ auxiliary_device_uninit(aux_dev);
+ goto err;
+ }
+ return 0;
+
+err:
+ kfree(nvm);
+ xe->nvm = NULL;
+ return ret;
+}
+
+void xe_nvm_fini(struct xe_device *xe)
+{
+ struct intel_dg_nvm_dev *nvm = xe->nvm;
+
+ if (!xe->info.has_gsc_nvm)
+ return;
+
+ /* No access to internal NVM from VFs */
+ if (IS_SRIOV_VF(xe))
+ return;
+
+ /* Nvm pointer should not be NULL here */
+ if (WARN_ON(!nvm))
+ return;
+
+ auxiliary_device_delete(&nvm->aux_dev);
+ auxiliary_device_uninit(&nvm->aux_dev);
+ xe->nvm = NULL;
+}
diff --git a/drivers/gpu/drm/xe/xe_nvm.h b/drivers/gpu/drm/xe/xe_nvm.h
new file mode 100644
index 000000000000..7f3d5f57bed0
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_nvm.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright(c) 2019-2025 Intel Corporation. All rights reserved.
+ */
+
+#ifndef __XE_NVM_H__
+#define __XE_NVM_H__
+
+struct xe_device;
+
+int xe_nvm_init(struct xe_device *xe);
+
+void xe_nvm_fini(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c
index fb842fa0552e..f8bb28ab8124 100644
--- a/drivers/gpu/drm/xe/xe_oa.c
+++ b/drivers/gpu/drm/xe/xe_oa.c
@@ -10,6 +10,7 @@
#include <drm/drm_drv.h>
#include <drm/drm_managed.h>
+#include <drm/drm_syncobj.h>
#include <uapi/drm/xe_drm.h>
#include <generated/xe_wa_oob.h>
@@ -43,6 +44,12 @@
#define DEFAULT_POLL_PERIOD_NS (NSEC_PER_SEC / DEFAULT_POLL_FREQUENCY_HZ)
#define XE_OA_UNIT_INVALID U32_MAX
+enum xe_oam_unit_type {
+ XE_OAM_UNIT_SAG,
+ XE_OAM_UNIT_SCMI_0,
+ XE_OAM_UNIT_SCMI_1,
+};
+
enum xe_oa_submit_deps {
XE_OA_SUBMIT_NO_DEPS,
XE_OA_SUBMIT_ADD_DEPS,
@@ -77,7 +84,7 @@ struct xe_oa_config {
struct xe_oa_open_param {
struct xe_file *xef;
- u32 oa_unit_id;
+ struct xe_oa_unit *oa_unit;
bool sample;
u32 metric_set;
enum xe_oa_format_name oa_format;
@@ -194,7 +201,7 @@ static void free_oa_config_bo(struct xe_oa_config_bo *oa_bo, struct dma_fence *l
static const struct xe_oa_regs *__oa_regs(struct xe_oa_stream *stream)
{
- return &stream->hwe->oa_unit->regs;
+ return &stream->oa_unit->regs;
}
static u32 xe_oa_hw_tail_read(struct xe_oa_stream *stream)
@@ -397,7 +404,7 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf,
static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
{
u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo);
- int size_exponent = __ffs(stream->oa_buffer.bo->size);
+ int size_exponent = __ffs(xe_bo_size(stream->oa_buffer.bo));
u32 oa_buf = gtt_offset | OAG_OABUFFER_MEMORY_SELECT;
struct xe_mmio *mmio = &stream->gt->mmio;
unsigned long flags;
@@ -429,7 +436,7 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream)
spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags);
/* Zero out the OA buffer since we rely on zero report id and timestamp fields */
- memset(stream->oa_buffer.vaddr, 0, stream->oa_buffer.bo->size);
+ memset(stream->oa_buffer.vaddr, 0, xe_bo_size(stream->oa_buffer.bo));
}
static u32 __format_to_oactrl(const struct xe_oa_format *format, int counter_sel_mask)
@@ -454,7 +461,7 @@ static u32 __oa_ccs_select(struct xe_oa_stream *stream)
static u32 __oactrl_used_bits(struct xe_oa_stream *stream)
{
- return stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG ?
+ return stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG ?
OAG_OACONTROL_USED_BITS : OAM_OACONTROL_USED_BITS;
}
@@ -475,7 +482,7 @@ static void xe_oa_enable(struct xe_oa_stream *stream)
__oa_ccs_select(stream) | OAG_OACONTROL_OA_COUNTER_ENABLE;
if (GRAPHICS_VER(stream->oa->xe) >= 20 &&
- stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG)
+ stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG)
val |= OAG_OACONTROL_OA_PES_DISAG_EN;
xe_mmio_rmw32(&stream->gt->mmio, regs->oa_ctrl, __oactrl_used_bits(stream), val);
@@ -816,7 +823,7 @@ static void xe_oa_disable_metric_set(struct xe_oa_stream *stream)
u32 sqcnt1;
/* Enable thread stall DOP gating and EU DOP gating. */
- if (XE_WA(stream->gt, 1508761755)) {
+ if (XE_GT_WA(stream->gt, 1508761755)) {
xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN,
_MASKED_BIT_DISABLE(STALL_DOP_GATING_DISABLE));
xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN2,
@@ -831,18 +838,24 @@ static void xe_oa_disable_metric_set(struct xe_oa_stream *stream)
xe_oa_configure_oa_context(stream, false);
/* Make sure we disable noa to save power. */
- xe_mmio_rmw32(mmio, RPM_CONFIG1, GT_NOA_ENABLE, 0);
+ if (GT_VER(stream->gt) < 35)
+ xe_mmio_rmw32(mmio, RPM_CONFIG1, GT_NOA_ENABLE, 0);
sqcnt1 = SQCNT1_PMON_ENABLE |
(HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0);
/* Reset PMON Enable to save power. */
xe_mmio_rmw32(mmio, XELPMP_SQCNT1, sqcnt1, 0);
+
+ if ((stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAM ||
+ stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAM_SAG) &&
+ GRAPHICS_VER(stream->oa->xe) >= 30)
+ xe_mmio_rmw32(mmio, OAM_COMPRESSION_T3_CONTROL, OAM_LAT_MEASURE_ENABLE, 0);
}
static void xe_oa_stream_destroy(struct xe_oa_stream *stream)
{
- struct xe_oa_unit *u = stream->hwe->oa_unit;
+ struct xe_oa_unit *u = stream->oa_unit;
struct xe_gt *gt = stream->hwe->gt;
if (WARN_ON(stream != u->exclusive_stream))
@@ -857,7 +870,7 @@ static void xe_oa_stream_destroy(struct xe_oa_stream *stream)
xe_oa_free_oa_buffer(stream);
- xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ xe_force_wake_put(gt_to_fw(gt), stream->fw_ref);
xe_pm_runtime_put(stream->oa->xe);
/* Wa_1509372804:pvc: Unset the override of GUCRC mode to enable rc6 */
@@ -872,9 +885,9 @@ static int xe_oa_alloc_oa_buffer(struct xe_oa_stream *stream, size_t size)
{
struct xe_bo *bo;
- bo = xe_bo_create_pin_map(stream->oa->xe, stream->gt->tile, NULL,
- size, ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT);
+ bo = xe_bo_create_pin_map_novm(stream->oa->xe, stream->gt->tile,
+ size, ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM | XE_BO_FLAG_GGTT, false);
if (IS_ERR(bo))
return PTR_ERR(bo);
@@ -1054,7 +1067,7 @@ static u32 oag_report_ctx_switches(const struct xe_oa_stream *stream)
static u32 oag_buf_size_select(const struct xe_oa_stream *stream)
{
return _MASKED_FIELD(OAG_OA_DEBUG_BUF_SIZE_SELECT,
- stream->oa_buffer.bo->size > SZ_16M ?
+ xe_bo_size(stream->oa_buffer.bo) > SZ_16M ?
OAG_OA_DEBUG_BUF_SIZE_SELECT : 0);
}
@@ -1068,7 +1081,7 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream)
* EU NOA signals behave incorrectly if EU clock gating is enabled.
* Disable thread stall DOP gating and EU DOP gating.
*/
- if (XE_WA(stream->gt, 1508761755)) {
+ if (XE_GT_WA(stream->gt, 1508761755)) {
xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN,
_MASKED_BIT_ENABLE(STALL_DOP_GATING_DISABLE));
xe_gt_mcr_multicast_write(stream->gt, ROW_CHICKEN2,
@@ -1092,11 +1105,12 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream)
oag_buf_size_select(stream) |
oag_configure_mmio_trigger(stream, true));
- xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctx_ctrl, stream->periodic ?
- (OAG_OAGLBCTXCTRL_COUNTER_RESUME |
+ xe_mmio_write32(mmio, __oa_regs(stream)->oa_ctx_ctrl,
+ OAG_OAGLBCTXCTRL_COUNTER_RESUME |
+ (stream->periodic ?
OAG_OAGLBCTXCTRL_TIMER_ENABLE |
REG_FIELD_PREP(OAG_OAGLBCTXCTRL_TIMER_PERIOD_MASK,
- stream->period_exponent)) : 0);
+ stream->period_exponent) : 0));
/*
* Initialize Super Queue Internal Cnt Register
@@ -1105,9 +1119,13 @@ static int xe_oa_enable_metric_set(struct xe_oa_stream *stream)
*/
sqcnt1 = SQCNT1_PMON_ENABLE |
(HAS_OA_BPC_REPORTING(stream->oa->xe) ? SQCNT1_OABPC : 0);
-
xe_mmio_rmw32(mmio, XELPMP_SQCNT1, 0, sqcnt1);
+ if ((stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAM ||
+ stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAM_SAG) &&
+ GRAPHICS_VER(stream->oa->xe) >= 30)
+ xe_mmio_rmw32(mmio, OAM_COMPRESSION_T3_CONTROL, 0, OAM_LAT_MEASURE_ENABLE);
+
/* Configure OAR/OAC */
if (stream->exec_q) {
ret = xe_oa_configure_oa_context(stream, true);
@@ -1139,14 +1157,31 @@ static int decode_oa_format(struct xe_oa *oa, u64 fmt, enum xe_oa_format_name *n
return -EINVAL;
}
+static struct xe_oa_unit *xe_oa_lookup_oa_unit(struct xe_oa *oa, u32 oa_unit_id)
+{
+ struct xe_gt *gt;
+ int gt_id, i;
+
+ for_each_gt(gt, oa->xe, gt_id) {
+ for (i = 0; i < gt->oa.num_oa_units; i++) {
+ struct xe_oa_unit *u = &gt->oa.oa_unit[i];
+
+ if (u->oa_unit_id == oa_unit_id)
+ return u;
+ }
+ }
+
+ return NULL;
+}
+
static int xe_oa_set_prop_oa_unit_id(struct xe_oa *oa, u64 value,
struct xe_oa_open_param *param)
{
- if (value >= oa->oa_unit_ids) {
+ param->oa_unit = xe_oa_lookup_oa_unit(oa, value);
+ if (!param->oa_unit) {
drm_dbg(&oa->xe->drm, "OA unit ID out of range %lld\n", value);
return -EINVAL;
}
- param->oa_unit_id = value;
return 0;
}
@@ -1220,6 +1255,9 @@ static int xe_oa_set_no_preempt(struct xe_oa *oa, u64 value,
static int xe_oa_set_prop_num_syncs(struct xe_oa *oa, u64 value,
struct xe_oa_open_param *param)
{
+ if (XE_IOCTL_DBG(oa->xe, value > DRM_XE_MAX_SYNCS))
+ return -EINVAL;
+
param->num_syncs = value;
return 0;
}
@@ -1309,7 +1347,7 @@ static int xe_oa_user_ext_set_property(struct xe_oa *oa, enum xe_oa_user_extn_fr
ARRAY_SIZE(xe_oa_set_property_funcs_config));
if (XE_IOCTL_DBG(oa->xe, ext.property >= ARRAY_SIZE(xe_oa_set_property_funcs_open)) ||
- XE_IOCTL_DBG(oa->xe, ext.pad))
+ XE_IOCTL_DBG(oa->xe, !ext.property) || XE_IOCTL_DBG(oa->xe, ext.pad))
return -EINVAL;
idx = array_index_nospec(ext.property, ARRAY_SIZE(xe_oa_set_property_funcs_open));
@@ -1357,7 +1395,9 @@ static int xe_oa_user_extensions(struct xe_oa *oa, enum xe_oa_user_extn_from fro
return 0;
}
-static int xe_oa_parse_syncs(struct xe_oa *oa, struct xe_oa_open_param *param)
+static int xe_oa_parse_syncs(struct xe_oa *oa,
+ struct xe_oa_stream *stream,
+ struct xe_oa_open_param *param)
{
int ret, num_syncs, num_ufence = 0;
@@ -1377,7 +1417,9 @@ static int xe_oa_parse_syncs(struct xe_oa *oa, struct xe_oa_open_param *param)
for (num_syncs = 0; num_syncs < param->num_syncs; num_syncs++) {
ret = xe_sync_entry_parse(oa->xe, param->xef, &param->syncs[num_syncs],
- &param->syncs_user[num_syncs], 0);
+ &param->syncs_user[num_syncs],
+ stream->ufence_syncobj,
+ ++stream->ufence_timeline_value, 0);
if (ret)
goto err_syncs;
@@ -1507,7 +1549,7 @@ static long xe_oa_config_locked(struct xe_oa_stream *stream, u64 arg)
return -ENODEV;
param.xef = stream->xef;
- err = xe_oa_parse_syncs(stream->oa, &param);
+ err = xe_oa_parse_syncs(stream->oa, stream, &param);
if (err)
goto err_config_put;
@@ -1550,7 +1592,7 @@ static long xe_oa_status_locked(struct xe_oa_stream *stream, unsigned long arg)
static long xe_oa_info_locked(struct xe_oa_stream *stream, unsigned long arg)
{
- struct drm_xe_oa_stream_info info = { .oa_buf_size = stream->oa_buffer.bo->size, };
+ struct drm_xe_oa_stream_info info = { .oa_buf_size = xe_bo_size(stream->oa_buffer.bo), };
void __user *uaddr = (void __user *)arg;
if (copy_to_user(uaddr, &info, sizeof(info)))
@@ -1603,6 +1645,7 @@ static void xe_oa_destroy_locked(struct xe_oa_stream *stream)
if (stream->exec_q)
xe_exec_queue_put(stream->exec_q);
+ drm_syncobj_put(stream->ufence_syncobj);
kfree(stream);
}
@@ -1636,7 +1679,7 @@ static int xe_oa_mmap(struct file *file, struct vm_area_struct *vma)
}
/* Can mmap the entire OA buffer or nothing (no partial OA buffer mmaps) */
- if (vma->vm_end - vma->vm_start != stream->oa_buffer.bo->size) {
+ if (vma->vm_end - vma->vm_start != xe_bo_size(stream->oa_buffer.bo)) {
drm_dbg(&stream->oa->xe->drm, "Wrong mmap size, must be OA buffer size\n");
return -EINVAL;
}
@@ -1677,13 +1720,12 @@ static const struct file_operations xe_oa_fops = {
static int xe_oa_stream_init(struct xe_oa_stream *stream,
struct xe_oa_open_param *param)
{
- struct xe_oa_unit *u = param->hwe->oa_unit;
struct xe_gt *gt = param->hwe->gt;
- unsigned int fw_ref;
int ret;
stream->exec_q = param->exec_q;
stream->poll_period_ns = DEFAULT_POLL_PERIOD_NS;
+ stream->oa_unit = param->oa_unit;
stream->hwe = param->hwe;
stream->gt = stream->hwe->gt;
stream->oa_buffer.format = &stream->oa->oa_formats[param->oa_format];
@@ -1704,7 +1746,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
* buffer whose size, circ_size, is a multiple of the report size
*/
if (GRAPHICS_VER(stream->oa->xe) >= 20 &&
- stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample)
+ stream->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample)
stream->oa_buffer.circ_size =
param->oa_buffer_size -
param->oa_buffer_size % stream->oa_buffer.format->size;
@@ -1722,7 +1764,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
* GuC reset of engines causes OA to lose configuration
* state. Prevent this by overriding GUCRC mode.
*/
- if (XE_WA(stream->gt, 1509372804)) {
+ if (XE_GT_WA(stream->gt, 1509372804)) {
ret = xe_guc_pc_override_gucrc_mode(&gt->uc.guc.pc,
SLPC_GUCRC_MODE_GUCRC_NO_RC6);
if (ret)
@@ -1733,8 +1775,8 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
/* Take runtime pm ref and forcewake to disable RC6 */
xe_pm_runtime_get(stream->oa->xe);
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
- if (!xe_force_wake_ref_has_domain(fw_ref, XE_FORCEWAKE_ALL)) {
+ stream->fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
+ if (!xe_force_wake_ref_has_domain(stream->fw_ref, XE_FORCEWAKE_ALL)) {
ret = -ETIMEDOUT;
goto err_fw_put;
}
@@ -1762,7 +1804,7 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream,
drm_dbg(&stream->oa->xe->drm, "opening stream oa config uuid=%s\n",
stream->oa_config->uuid);
- WRITE_ONCE(u->exclusive_stream, stream);
+ WRITE_ONCE(stream->oa_unit->exclusive_stream, stream);
hrtimer_setup(&stream->poll_check_timer, xe_oa_poll_check_timer_cb, CLOCK_MONOTONIC,
HRTIMER_MODE_REL);
@@ -1779,7 +1821,7 @@ err_put_k_exec_q:
err_free_oa_buf:
xe_oa_free_oa_buffer(stream);
err_fw_put:
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ xe_force_wake_put(gt_to_fw(gt), stream->fw_ref);
xe_pm_runtime_put(stream->oa->xe);
if (stream->override_gucrc)
xe_gt_WARN_ON(gt, xe_guc_pc_unset_gucrc_mode(&gt->uc.guc.pc));
@@ -1794,27 +1836,42 @@ static int xe_oa_stream_open_ioctl_locked(struct xe_oa *oa,
struct xe_oa_open_param *param)
{
struct xe_oa_stream *stream;
+ struct drm_syncobj *ufence_syncobj;
int stream_fd;
int ret;
/* We currently only allow exclusive access */
- if (param->hwe->oa_unit->exclusive_stream) {
+ if (param->oa_unit->exclusive_stream) {
drm_dbg(&oa->xe->drm, "OA unit already in use\n");
ret = -EBUSY;
goto exit;
}
+ ret = drm_syncobj_create(&ufence_syncobj, DRM_SYNCOBJ_CREATE_SIGNALED,
+ NULL);
+ if (ret)
+ goto exit;
+
stream = kzalloc(sizeof(*stream), GFP_KERNEL);
if (!stream) {
ret = -ENOMEM;
- goto exit;
+ goto err_syncobj;
}
-
+ stream->ufence_syncobj = ufence_syncobj;
stream->oa = oa;
- ret = xe_oa_stream_init(stream, param);
+
+ ret = xe_oa_parse_syncs(oa, stream, param);
if (ret)
goto err_free;
+ ret = xe_oa_stream_init(stream, param);
+ if (ret) {
+ while (param->num_syncs--)
+ xe_sync_entry_cleanup(&param->syncs[param->num_syncs]);
+ kfree(param->syncs);
+ goto err_free;
+ }
+
if (!param->disabled) {
ret = xe_oa_enable_locked(stream);
if (ret)
@@ -1838,6 +1895,8 @@ err_destroy:
xe_oa_stream_destroy(stream);
err_free:
kfree(stream);
+err_syncobj:
+ drm_syncobj_put(ufence_syncobj);
exit:
return ret;
}
@@ -1854,7 +1913,7 @@ u32 xe_oa_timestamp_frequency(struct xe_gt *gt)
{
u32 reg, shift;
- if (XE_WA(gt, 18013179988) || XE_WA(gt, 14015568240)) {
+ if (XE_GT_WA(gt, 18013179988) || XE_GT_WA(gt, 14015568240)) {
xe_pm_runtime_get(gt_to_xe(gt));
reg = xe_mmio_read32(&gt->mmio, RPM_CONFIG0);
xe_pm_runtime_put(gt_to_xe(gt));
@@ -1874,13 +1933,14 @@ static u64 oa_exponent_to_ns(struct xe_gt *gt, int exponent)
return div_u64(nom + den - 1, den);
}
-static bool engine_supports_oa_format(const struct xe_hw_engine *hwe, int type)
+static bool oa_unit_supports_oa_format(struct xe_oa_open_param *param, int type)
{
- switch (hwe->oa_unit->type) {
+ switch (param->oa_unit->type) {
case DRM_XE_OA_UNIT_TYPE_OAG:
return type == DRM_XE_OA_FMT_TYPE_OAG || type == DRM_XE_OA_FMT_TYPE_OAR ||
type == DRM_XE_OA_FMT_TYPE_OAC || type == DRM_XE_OA_FMT_TYPE_PEC;
case DRM_XE_OA_UNIT_TYPE_OAM:
+ case DRM_XE_OA_UNIT_TYPE_OAM_SAG:
return type == DRM_XE_OA_FMT_TYPE_OAM || type == DRM_XE_OA_FMT_TYPE_OAM_MPEC;
default:
return false;
@@ -1899,37 +1959,48 @@ u16 xe_oa_unit_id(struct xe_hw_engine *hwe)
hwe->oa_unit->oa_unit_id : U16_MAX;
}
+/* A hwe must be assigned to stream/oa_unit for batch submissions */
static int xe_oa_assign_hwe(struct xe_oa *oa, struct xe_oa_open_param *param)
{
- struct xe_gt *gt;
- int i, ret = 0;
+ struct xe_hw_engine *hwe;
+ enum xe_hw_engine_id id;
+ int ret = 0;
+
+ /* If not provided, OA unit defaults to OA unit 0 as per uapi */
+ if (!param->oa_unit)
+ param->oa_unit = &xe_root_mmio_gt(oa->xe)->oa.oa_unit[0];
+ /* When we have an exec_q, get hwe from the exec_q */
if (param->exec_q) {
- /* When we have an exec_q, get hwe from the exec_q */
param->hwe = xe_gt_hw_engine(param->exec_q->gt, param->exec_q->class,
param->engine_instance, true);
- } else {
- struct xe_hw_engine *hwe;
- enum xe_hw_engine_id id;
-
- /* Else just get the first hwe attached to the oa unit */
- for_each_gt(gt, oa->xe, i) {
- for_each_hw_engine(hwe, gt, id) {
- if (xe_oa_unit_id(hwe) == param->oa_unit_id) {
- param->hwe = hwe;
- goto out;
- }
- }
- }
+ if (!param->hwe || param->hwe->oa_unit != param->oa_unit)
+ goto err;
+ goto out;
}
-out:
- if (!param->hwe || xe_oa_unit_id(param->hwe) != param->oa_unit_id) {
- drm_dbg(&oa->xe->drm, "Unable to find hwe (%d, %d) for OA unit ID %d\n",
- param->exec_q ? param->exec_q->class : -1,
- param->engine_instance, param->oa_unit_id);
- ret = -EINVAL;
+
+ /* Else just get the first hwe attached to the oa unit */
+ for_each_hw_engine(hwe, param->oa_unit->gt, id) {
+ if (hwe->oa_unit == param->oa_unit) {
+ param->hwe = hwe;
+ goto out;
+ }
}
+ /* If we still didn't find a hwe, just get one with a valid oa_unit from the same gt */
+ for_each_hw_engine(hwe, param->oa_unit->gt, id) {
+ if (!hwe->oa_unit)
+ continue;
+
+ param->hwe = hwe;
+ goto out;
+ }
+err:
+ drm_dbg(&oa->xe->drm, "Unable to find hwe (%d, %d) for OA unit ID %d\n",
+ param->exec_q ? param->exec_q->class : -1,
+ param->engine_instance, param->oa_unit->oa_unit_id);
+ ret = -EINVAL;
+out:
return ret;
}
@@ -2007,7 +2078,7 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f
f = &oa->oa_formats[param.oa_format];
if (!param.oa_format || !f->size ||
- !engine_supports_oa_format(param.hwe, f->type)) {
+ !oa_unit_supports_oa_format(&param, f->type)) {
drm_dbg(&oa->xe->drm, "Invalid OA format %d type %d size %d for class %d\n",
param.oa_format, f->type, f->size, param.hwe->class);
ret = -EINVAL;
@@ -2039,22 +2110,14 @@ int xe_oa_stream_open_ioctl(struct drm_device *dev, u64 data, struct drm_file *f
goto err_exec_q;
}
- ret = xe_oa_parse_syncs(oa, &param);
- if (ret)
- goto err_exec_q;
-
mutex_lock(&param.hwe->gt->oa.gt_lock);
ret = xe_oa_stream_open_ioctl_locked(oa, &param);
mutex_unlock(&param.hwe->gt->oa.gt_lock);
if (ret < 0)
- goto err_sync_cleanup;
+ goto err_exec_q;
return ret;
-err_sync_cleanup:
- while (param.num_syncs--)
- xe_sync_entry_cleanup(&param.syncs[param.num_syncs]);
- kfree(param.syncs);
err_exec_q:
if (param.exec_q)
xe_exec_queue_put(param.exec_q);
@@ -2155,6 +2218,7 @@ static const struct xe_mmio_range gen12_oa_mux_regs[] = {
static const struct xe_mmio_range xe2_oa_mux_regs[] = {
{ .start = 0x5194, .end = 0x5194 }, /* SYS_MEM_LAT_MEASURE_MERTF_GRP_3D */
{ .start = 0x8704, .end = 0x8704 }, /* LMEM_LAT_MEASURE_MCFG_GRP */
+ { .start = 0xB01C, .end = 0xB01C }, /* LNCF_MISC_CONFIG_REGISTER0 */
{ .start = 0xB1BC, .end = 0xB1BC }, /* L3_BANK_LAT_MEASURE_LBCF_GFX */
{ .start = 0xD0E0, .end = 0xD0F4 }, /* VISACTL */
{ .start = 0xE18C, .end = 0xE18C }, /* SAMPLER_MODE */
@@ -2343,11 +2407,13 @@ int xe_oa_add_config_ioctl(struct drm_device *dev, u64 data, struct drm_file *fi
goto sysfs_err;
}
- mutex_unlock(&oa->metrics_lock);
+ id = oa_config->id;
- drm_dbg(&oa->xe->drm, "Added config %s id=%i\n", oa_config->uuid, oa_config->id);
+ drm_dbg(&oa->xe->drm, "Added config %s id=%i\n", oa_config->uuid, id);
- return oa_config->id;
+ mutex_unlock(&oa->metrics_lock);
+
+ return id;
sysfs_err:
mutex_unlock(&oa->metrics_lock);
@@ -2448,20 +2514,38 @@ int xe_oa_register(struct xe_device *xe)
static u32 num_oa_units_per_gt(struct xe_gt *gt)
{
- return 1;
+ if (xe_gt_is_main_type(gt) || GRAPHICS_VER(gt_to_xe(gt)) < 20)
+ return 1;
+ else if (!IS_DGFX(gt_to_xe(gt)))
+ return XE_OAM_UNIT_SCMI_0 + 1; /* SAG + SCMI_0 */
+ else
+ return XE_OAM_UNIT_SCMI_1 + 1; /* SAG + SCMI_0 + SCMI_1 */
}
static u32 __hwe_oam_unit(struct xe_hw_engine *hwe)
{
- if (GRAPHICS_VERx100(gt_to_xe(hwe->gt)) >= 1270) {
- /*
- * There's 1 SAMEDIA gt and 1 OAM per SAMEDIA gt. All media slices
- * within the gt use the same OAM. All MTL/LNL SKUs list 1 SA MEDIA
- */
- xe_gt_WARN_ON(hwe->gt, hwe->gt->info.type != XE_GT_TYPE_MEDIA);
+ if (GRAPHICS_VERx100(gt_to_xe(hwe->gt)) < 1270)
+ return XE_OA_UNIT_INVALID;
+
+ xe_gt_WARN_ON(hwe->gt, xe_gt_is_main_type(hwe->gt));
+ if (GRAPHICS_VER(gt_to_xe(hwe->gt)) < 20)
return 0;
- }
+ /*
+ * XE_OAM_UNIT_SAG has only GSCCS attached to it, but only on some platforms. Also
+ * GSCCS cannot be used to submit batches to program the OAM unit. Therefore we don't
+ * assign an OA unit to GSCCS. This means that XE_OAM_UNIT_SAG is exposed as an OA
+ * unit without attached engines. Fused off engines can also result in oa_unit's with
+ * num_engines == 0. OA streams can be opened on all OA units.
+ */
+ else if (hwe->engine_id == XE_HW_ENGINE_GSCCS0)
+ return XE_OA_UNIT_INVALID;
+ else if (!IS_DGFX(gt_to_xe(hwe->gt)))
+ return XE_OAM_UNIT_SCMI_0;
+ else if (hwe->class == XE_ENGINE_CLASS_VIDEO_DECODE)
+ return (hwe->instance / 2 & 0x1) + 1;
+ else if (hwe->class == XE_ENGINE_CLASS_VIDEO_ENHANCE)
+ return (hwe->instance & 0x1) + 1;
return XE_OA_UNIT_INVALID;
}
@@ -2475,6 +2559,7 @@ static u32 __hwe_oa_unit(struct xe_hw_engine *hwe)
case XE_ENGINE_CLASS_VIDEO_DECODE:
case XE_ENGINE_CLASS_VIDEO_ENHANCE:
+ case XE_ENGINE_CLASS_OTHER:
return __hwe_oam_unit(hwe);
default:
@@ -2514,20 +2599,29 @@ static struct xe_oa_regs __oag_regs(void)
static void __xe_oa_init_oa_units(struct xe_gt *gt)
{
- const u32 mtl_oa_base[] = { 0x13000 };
+ /* Actual address is MEDIA_GT_GSI_OFFSET + oam_base_addr[i] */
+ const u32 oam_base_addr[] = {
+ [XE_OAM_UNIT_SAG] = 0x13000,
+ [XE_OAM_UNIT_SCMI_0] = 0x14000,
+ [XE_OAM_UNIT_SCMI_1] = 0x14800,
+ };
int i, num_units = gt->oa.num_oa_units;
for (i = 0; i < num_units; i++) {
struct xe_oa_unit *u = &gt->oa.oa_unit[i];
- if (gt->info.type != XE_GT_TYPE_MEDIA) {
+ if (xe_gt_is_main_type(gt)) {
u->regs = __oag_regs();
u->type = DRM_XE_OA_UNIT_TYPE_OAG;
- } else if (GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270) {
- u->regs = __oam_regs(mtl_oa_base[i]);
- u->type = DRM_XE_OA_UNIT_TYPE_OAM;
+ } else {
+ xe_gt_assert(gt, GRAPHICS_VERx100(gt_to_xe(gt)) >= 1270);
+ u->regs = __oam_regs(oam_base_addr[i]);
+ u->type = i == XE_OAM_UNIT_SAG && GRAPHICS_VER(gt_to_xe(gt)) >= 20 ?
+ DRM_XE_OA_UNIT_TYPE_OAM_SAG : DRM_XE_OA_UNIT_TYPE_OAM;
}
+ u->gt = gt;
+
xe_mmio_write32(&gt->mmio, u->regs.oa_ctrl, 0);
/* Ensure MMIO trigger remains disabled till there is a stream */
@@ -2560,10 +2654,6 @@ static int xe_oa_init_gt(struct xe_gt *gt)
}
}
- /*
- * Fused off engines can result in oa_unit's with num_engines == 0. These units
- * will appear in OA unit query, but no OA streams can be opened on them.
- */
gt->oa.num_oa_units = num_oa_units;
gt->oa.oa_unit = u;
@@ -2574,17 +2664,54 @@ static int xe_oa_init_gt(struct xe_gt *gt)
return 0;
}
+static void xe_oa_print_gt_oa_units(struct xe_gt *gt)
+{
+ enum xe_hw_engine_id hwe_id;
+ struct xe_hw_engine *hwe;
+ struct xe_oa_unit *u;
+ char buf[256];
+ int i, n;
+
+ for (i = 0; i < gt->oa.num_oa_units; i++) {
+ u = &gt->oa.oa_unit[i];
+ buf[0] = '\0';
+ n = 0;
+
+ for_each_hw_engine(hwe, gt, hwe_id)
+ if (xe_oa_unit_id(hwe) == u->oa_unit_id)
+ n += scnprintf(buf + n, sizeof(buf) - n, "%s ", hwe->name);
+
+ xe_gt_dbg(gt, "oa_unit %d, type %d, Engines: %s\n", u->oa_unit_id, u->type, buf);
+ }
+}
+
+static void xe_oa_print_oa_units(struct xe_oa *oa)
+{
+ struct xe_gt *gt;
+ int gt_id;
+
+ for_each_gt(gt, oa->xe, gt_id)
+ xe_oa_print_gt_oa_units(gt);
+}
+
static int xe_oa_init_oa_units(struct xe_oa *oa)
{
struct xe_gt *gt;
int i, ret;
+ /* Needed for OAM implementation here */
+ BUILD_BUG_ON(XE_OAM_UNIT_SAG != 0);
+ BUILD_BUG_ON(XE_OAM_UNIT_SCMI_0 != 1);
+ BUILD_BUG_ON(XE_OAM_UNIT_SCMI_1 != 2);
+
for_each_gt(gt, oa->xe, i) {
ret = xe_oa_init_gt(gt);
if (ret)
return ret;
}
+ xe_oa_print_oa_units(oa);
+
return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_oa_types.h b/drivers/gpu/drm/xe/xe_oa_types.h
index 52e33c37d5ee..cf080f412189 100644
--- a/drivers/gpu/drm/xe/xe_oa_types.h
+++ b/drivers/gpu/drm/xe/xe_oa_types.h
@@ -15,6 +15,8 @@
#include "regs/xe_reg_defs.h"
#include "xe_hw_engine_types.h"
+struct drm_syncobj;
+
#define DEFAULT_XE_OA_BUFFER_SIZE SZ_16M
enum xe_oa_report_header {
@@ -95,6 +97,9 @@ struct xe_oa_unit {
/** @oa_unit_id: identifier for the OA unit */
u16 oa_unit_id;
+ /** @gt: gt associated with the OA unit */
+ struct xe_gt *gt;
+
/** @type: Type of OA unit - OAM, OAG etc. */
enum drm_xe_oa_unit_type type;
@@ -182,6 +187,9 @@ struct xe_oa_stream {
/** @gt: gt associated with the oa stream */
struct xe_gt *gt;
+ /** @oa_unit: oa unit for this stream */
+ struct xe_oa_unit *oa_unit;
+
/** @hwe: hardware engine associated with this oa stream */
struct xe_hw_engine *hwe;
@@ -242,6 +250,12 @@ struct xe_oa_stream {
/** @xef: xe_file with which the stream was opened */
struct xe_file *xef;
+ /** @ufence_syncobj: User fence syncobj */
+ struct drm_syncobj *ufence_syncobj;
+
+ /** @ufence_timeline_value: User fence timeline value */
+ u64 ufence_timeline_value;
+
/** @last_fence: fence to use in stream destroy when needed */
struct dma_fence *last_fence;
@@ -250,5 +264,8 @@ struct xe_oa_stream {
/** @syncs: syncs to wait on and to signal */
struct xe_sync_entry *syncs;
+
+ /** @fw_ref: Forcewake reference */
+ unsigned int fw_ref;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_pagefault.c b/drivers/gpu/drm/xe/xe_pagefault.c
new file mode 100644
index 000000000000..afb06598b6e1
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pagefault.c
@@ -0,0 +1,444 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include <linux/circ_buf.h>
+
+#include <drm/drm_exec.h>
+#include <drm/drm_managed.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_gt_printk.h"
+#include "xe_gt_types.h"
+#include "xe_gt_stats.h"
+#include "xe_hw_engine.h"
+#include "xe_pagefault.h"
+#include "xe_pagefault_types.h"
+#include "xe_svm.h"
+#include "xe_trace_bo.h"
+#include "xe_vm.h"
+
+/**
+ * DOC: Xe page faults
+ *
+ * Xe page faults are handled in two layers. The producer layer interacts with
+ * hardware or firmware to receive and parse faults into struct xe_pagefault,
+ * then forwards them to the consumer. The consumer layer services the faults
+ * (e.g., memory migration, page table updates) and acknowledges the result back
+ * to the producer, which then forwards the results to the hardware or firmware.
+ * The consumer uses a page fault queue sized to absorb all potential faults and
+ * a multi-threaded worker to process them. Multiple producers are supported,
+ * with a single shared consumer.
+ *
+ * xe_pagefault.c implements the consumer layer.
+ */
+
+static int xe_pagefault_entry_size(void)
+{
+ /*
+ * Power of two alignment is not a hardware requirement, rather a
+ * software restriction which makes the math for page fault queue
+ * management simplier.
+ */
+ return roundup_pow_of_two(sizeof(struct xe_pagefault));
+}
+
+static int xe_pagefault_begin(struct drm_exec *exec, struct xe_vma *vma,
+ struct xe_vram_region *vram, bool need_vram_move)
+{
+ struct xe_bo *bo = xe_vma_bo(vma);
+ struct xe_vm *vm = xe_vma_vm(vma);
+ int err;
+
+ err = xe_vm_lock_vma(exec, vma);
+ if (err)
+ return err;
+
+ if (!bo)
+ return 0;
+
+ return need_vram_move ? xe_bo_migrate(bo, vram->placement, NULL, exec) :
+ xe_bo_validate(bo, vm, true, exec);
+}
+
+static int xe_pagefault_handle_vma(struct xe_gt *gt, struct xe_vma *vma,
+ bool atomic)
+{
+ struct xe_vm *vm = xe_vma_vm(vma);
+ struct xe_tile *tile = gt_to_tile(gt);
+ struct xe_validation_ctx ctx;
+ struct drm_exec exec;
+ struct dma_fence *fence;
+ int err, needs_vram;
+
+ lockdep_assert_held_write(&vm->lock);
+
+ needs_vram = xe_vma_need_vram_for_atomic(vm->xe, vma, atomic);
+ if (needs_vram < 0 || (needs_vram && xe_vma_is_userptr(vma)))
+ return needs_vram < 0 ? needs_vram : -EACCES;
+
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_COUNT, 1);
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_VMA_PAGEFAULT_KB,
+ xe_vma_size(vma) / SZ_1K);
+
+ trace_xe_vma_pagefault(vma);
+
+ /* Check if VMA is valid, opportunistic check only */
+ if (xe_vm_has_valid_gpu_mapping(tile, vma->tile_present,
+ vma->tile_invalidated) && !atomic)
+ return 0;
+
+retry_userptr:
+ if (xe_vma_is_userptr(vma) &&
+ xe_vma_userptr_check_repin(to_userptr_vma(vma))) {
+ struct xe_userptr_vma *uvma = to_userptr_vma(vma);
+
+ err = xe_vma_userptr_pin_pages(uvma);
+ if (err)
+ return err;
+ }
+
+ /* Lock VM and BOs dma-resv */
+ xe_validation_ctx_init(&ctx, &vm->xe->val, &exec, (struct xe_val_flags) {});
+ drm_exec_until_all_locked(&exec) {
+ err = xe_pagefault_begin(&exec, vma, tile->mem.vram,
+ needs_vram == 1);
+ drm_exec_retry_on_contention(&exec);
+ xe_validation_retry_on_oom(&ctx, &err);
+ if (err)
+ goto unlock_dma_resv;
+
+ /* Bind VMA only to the GT that has faulted */
+ trace_xe_vma_pf_bind(vma);
+ xe_vm_set_validation_exec(vm, &exec);
+ fence = xe_vma_rebind(vm, vma, BIT(tile->id));
+ xe_vm_set_validation_exec(vm, NULL);
+ if (IS_ERR(fence)) {
+ err = PTR_ERR(fence);
+ xe_validation_retry_on_oom(&ctx, &err);
+ goto unlock_dma_resv;
+ }
+ }
+
+ dma_fence_wait(fence, false);
+ dma_fence_put(fence);
+
+unlock_dma_resv:
+ xe_validation_ctx_fini(&ctx);
+ if (err == -EAGAIN)
+ goto retry_userptr;
+
+ return err;
+}
+
+static bool
+xe_pagefault_access_is_atomic(enum xe_pagefault_access_type access_type)
+{
+ return access_type == XE_PAGEFAULT_ACCESS_TYPE_ATOMIC;
+}
+
+static struct xe_vm *xe_pagefault_asid_to_vm(struct xe_device *xe, u32 asid)
+{
+ struct xe_vm *vm;
+
+ down_read(&xe->usm.lock);
+ vm = xa_load(&xe->usm.asid_to_vm, asid);
+ if (vm && xe_vm_in_fault_mode(vm))
+ xe_vm_get(vm);
+ else
+ vm = ERR_PTR(-EINVAL);
+ up_read(&xe->usm.lock);
+
+ return vm;
+}
+
+static int xe_pagefault_service(struct xe_pagefault *pf)
+{
+ struct xe_gt *gt = pf->gt;
+ struct xe_device *xe = gt_to_xe(gt);
+ struct xe_vm *vm;
+ struct xe_vma *vma = NULL;
+ int err;
+ bool atomic;
+
+ /* Producer flagged this fault to be nacked */
+ if (pf->consumer.fault_level == XE_PAGEFAULT_LEVEL_NACK)
+ return -EFAULT;
+
+ vm = xe_pagefault_asid_to_vm(xe, pf->consumer.asid);
+ if (IS_ERR(vm))
+ return PTR_ERR(vm);
+
+ /*
+ * TODO: Change to read lock? Using write lock for simplicity.
+ */
+ down_write(&vm->lock);
+
+ if (xe_vm_is_closed(vm)) {
+ err = -ENOENT;
+ goto unlock_vm;
+ }
+
+ vma = xe_vm_find_vma_by_addr(vm, pf->consumer.page_addr);
+ if (!vma) {
+ err = -EINVAL;
+ goto unlock_vm;
+ }
+
+ atomic = xe_pagefault_access_is_atomic(pf->consumer.access_type);
+
+ if (xe_vma_is_cpu_addr_mirror(vma))
+ err = xe_svm_handle_pagefault(vm, vma, gt,
+ pf->consumer.page_addr, atomic);
+ else
+ err = xe_pagefault_handle_vma(gt, vma, atomic);
+
+unlock_vm:
+ if (!err)
+ vm->usm.last_fault_vma = vma;
+ up_write(&vm->lock);
+ xe_vm_put(vm);
+
+ return err;
+}
+
+static bool xe_pagefault_queue_pop(struct xe_pagefault_queue *pf_queue,
+ struct xe_pagefault *pf)
+{
+ bool found_fault = false;
+
+ spin_lock_irq(&pf_queue->lock);
+ if (pf_queue->tail != pf_queue->head) {
+ memcpy(pf, pf_queue->data + pf_queue->tail, sizeof(*pf));
+ pf_queue->tail = (pf_queue->tail + xe_pagefault_entry_size()) %
+ pf_queue->size;
+ found_fault = true;
+ }
+ spin_unlock_irq(&pf_queue->lock);
+
+ return found_fault;
+}
+
+static void xe_pagefault_print(struct xe_pagefault *pf)
+{
+ xe_gt_dbg(pf->gt, "\n\tASID: %d\n"
+ "\tFaulted Address: 0x%08x%08x\n"
+ "\tFaultType: %d\n"
+ "\tAccessType: %d\n"
+ "\tFaultLevel: %d\n"
+ "\tEngineClass: %d %s\n"
+ "\tEngineInstance: %d\n",
+ pf->consumer.asid,
+ upper_32_bits(pf->consumer.page_addr),
+ lower_32_bits(pf->consumer.page_addr),
+ pf->consumer.fault_type,
+ pf->consumer.access_type,
+ pf->consumer.fault_level,
+ pf->consumer.engine_class,
+ xe_hw_engine_class_to_str(pf->consumer.engine_class),
+ pf->consumer.engine_instance);
+}
+
+static void xe_pagefault_queue_work(struct work_struct *w)
+{
+ struct xe_pagefault_queue *pf_queue =
+ container_of(w, typeof(*pf_queue), worker);
+ struct xe_pagefault pf;
+ unsigned long threshold;
+
+#define USM_QUEUE_MAX_RUNTIME_MS 20
+ threshold = jiffies + msecs_to_jiffies(USM_QUEUE_MAX_RUNTIME_MS);
+
+ while (xe_pagefault_queue_pop(pf_queue, &pf)) {
+ int err;
+
+ if (!pf.gt) /* Fault squashed during reset */
+ continue;
+
+ err = xe_pagefault_service(&pf);
+ if (err) {
+ xe_pagefault_print(&pf);
+ xe_gt_dbg(pf.gt, "Fault response: Unsuccessful %pe\n",
+ ERR_PTR(err));
+ }
+
+ pf.producer.ops->ack_fault(&pf, err);
+
+ if (time_after(jiffies, threshold)) {
+ queue_work(gt_to_xe(pf.gt)->usm.pf_wq, w);
+ break;
+ }
+ }
+#undef USM_QUEUE_MAX_RUNTIME_MS
+}
+
+static int xe_pagefault_queue_init(struct xe_device *xe,
+ struct xe_pagefault_queue *pf_queue)
+{
+ struct xe_gt *gt;
+ int total_num_eus = 0;
+ u8 id;
+
+ for_each_gt(gt, xe, id) {
+ xe_dss_mask_t all_dss;
+ int num_dss, num_eus;
+
+ bitmap_or(all_dss, gt->fuse_topo.g_dss_mask,
+ gt->fuse_topo.c_dss_mask, XE_MAX_DSS_FUSE_BITS);
+
+ num_dss = bitmap_weight(all_dss, XE_MAX_DSS_FUSE_BITS);
+ num_eus = bitmap_weight(gt->fuse_topo.eu_mask_per_dss,
+ XE_MAX_EU_FUSE_BITS) * num_dss;
+
+ total_num_eus += num_eus;
+ }
+
+ xe_assert(xe, total_num_eus);
+
+ /*
+ * user can issue separate page faults per EU and per CS
+ *
+ * XXX: Multiplier required as compute UMD are getting PF queue errors
+ * without it. Follow on why this multiplier is required.
+ */
+#define PF_MULTIPLIER 8
+ pf_queue->size = (total_num_eus + XE_NUM_HW_ENGINES) *
+ xe_pagefault_entry_size() * PF_MULTIPLIER;
+ pf_queue->size = roundup_pow_of_two(pf_queue->size);
+#undef PF_MULTIPLIER
+
+ drm_dbg(&xe->drm, "xe_pagefault_entry_size=%d, total_num_eus=%d, pf_queue->size=%u",
+ xe_pagefault_entry_size(), total_num_eus, pf_queue->size);
+
+ spin_lock_init(&pf_queue->lock);
+ INIT_WORK(&pf_queue->worker, xe_pagefault_queue_work);
+
+ pf_queue->data = drmm_kzalloc(&xe->drm, pf_queue->size, GFP_KERNEL);
+ if (!pf_queue->data)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void xe_pagefault_fini(void *arg)
+{
+ struct xe_device *xe = arg;
+
+ destroy_workqueue(xe->usm.pf_wq);
+}
+
+/**
+ * xe_pagefault_init() - Page fault init
+ * @xe: xe device instance
+ *
+ * Initialize Xe page fault state. Must be done after reading fuses.
+ *
+ * Return: 0 on Success, errno on failure
+ */
+int xe_pagefault_init(struct xe_device *xe)
+{
+ int err, i;
+
+ if (!xe->info.has_usm)
+ return 0;
+
+ xe->usm.pf_wq = alloc_workqueue("xe_page_fault_work_queue",
+ WQ_UNBOUND | WQ_HIGHPRI,
+ XE_PAGEFAULT_QUEUE_COUNT);
+ if (!xe->usm.pf_wq)
+ return -ENOMEM;
+
+ for (i = 0; i < XE_PAGEFAULT_QUEUE_COUNT; ++i) {
+ err = xe_pagefault_queue_init(xe, xe->usm.pf_queue + i);
+ if (err)
+ goto err_out;
+ }
+
+ return devm_add_action_or_reset(xe->drm.dev, xe_pagefault_fini, xe);
+
+err_out:
+ destroy_workqueue(xe->usm.pf_wq);
+ return err;
+}
+
+static void xe_pagefault_queue_reset(struct xe_device *xe, struct xe_gt *gt,
+ struct xe_pagefault_queue *pf_queue)
+{
+ u32 i;
+
+ /* Driver load failure guard / USM not enabled guard */
+ if (!pf_queue->data)
+ return;
+
+ /* Squash all pending faults on the GT */
+
+ spin_lock_irq(&pf_queue->lock);
+ for (i = pf_queue->tail; i != pf_queue->head;
+ i = (i + xe_pagefault_entry_size()) % pf_queue->size) {
+ struct xe_pagefault *pf = pf_queue->data + i;
+
+ if (pf->gt == gt)
+ pf->gt = NULL;
+ }
+ spin_unlock_irq(&pf_queue->lock);
+}
+
+/**
+ * xe_pagefault_reset() - Page fault reset for a GT
+ * @xe: xe device instance
+ * @gt: GT being reset
+ *
+ * Reset the Xe page fault state for a GT; that is, squash any pending faults on
+ * the GT.
+ */
+void xe_pagefault_reset(struct xe_device *xe, struct xe_gt *gt)
+{
+ int i;
+
+ for (i = 0; i < XE_PAGEFAULT_QUEUE_COUNT; ++i)
+ xe_pagefault_queue_reset(xe, gt, xe->usm.pf_queue + i);
+}
+
+static bool xe_pagefault_queue_full(struct xe_pagefault_queue *pf_queue)
+{
+ lockdep_assert_held(&pf_queue->lock);
+
+ return CIRC_SPACE(pf_queue->head, pf_queue->tail, pf_queue->size) <=
+ xe_pagefault_entry_size();
+}
+
+/**
+ * xe_pagefault_handler() - Page fault handler
+ * @xe: xe device instance
+ * @pf: Page fault
+ *
+ * Sink the page fault to a queue (i.e., a memory buffer) and queue a worker to
+ * service it. Safe to be called from IRQ or process context. Reclaim safe.
+ *
+ * Return: 0 on success, errno on failure
+ */
+int xe_pagefault_handler(struct xe_device *xe, struct xe_pagefault *pf)
+{
+ struct xe_pagefault_queue *pf_queue = xe->usm.pf_queue +
+ (pf->consumer.asid % XE_PAGEFAULT_QUEUE_COUNT);
+ unsigned long flags;
+ bool full;
+
+ spin_lock_irqsave(&pf_queue->lock, flags);
+ full = xe_pagefault_queue_full(pf_queue);
+ if (!full) {
+ memcpy(pf_queue->data + pf_queue->head, pf, sizeof(*pf));
+ pf_queue->head = (pf_queue->head + xe_pagefault_entry_size()) %
+ pf_queue->size;
+ queue_work(xe->usm.pf_wq, &pf_queue->worker);
+ } else {
+ drm_warn(&xe->drm,
+ "PageFault Queue (%d) full, shouldn't be possible\n",
+ pf->consumer.asid % XE_PAGEFAULT_QUEUE_COUNT);
+ }
+ spin_unlock_irqrestore(&pf_queue->lock, flags);
+
+ return full ? -ENOSPC : 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_pagefault.h b/drivers/gpu/drm/xe/xe_pagefault.h
new file mode 100644
index 000000000000..bd0cdf9ed37f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pagefault.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_PAGEFAULT_H_
+#define _XE_PAGEFAULT_H_
+
+struct xe_device;
+struct xe_gt;
+struct xe_pagefault;
+
+int xe_pagefault_init(struct xe_device *xe);
+
+void xe_pagefault_reset(struct xe_device *xe, struct xe_gt *gt);
+
+int xe_pagefault_handler(struct xe_device *xe, struct xe_pagefault *pf);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pagefault_types.h b/drivers/gpu/drm/xe/xe_pagefault_types.h
new file mode 100644
index 000000000000..d3b516407d60
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_pagefault_types.h
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_PAGEFAULT_TYPES_H_
+#define _XE_PAGEFAULT_TYPES_H_
+
+#include <linux/workqueue.h>
+
+struct xe_gt;
+struct xe_pagefault;
+
+/** enum xe_pagefault_access_type - Xe page fault access type */
+enum xe_pagefault_access_type {
+ /** @XE_PAGEFAULT_ACCESS_TYPE_READ: Read access type */
+ XE_PAGEFAULT_ACCESS_TYPE_READ = 0,
+ /** @XE_PAGEFAULT_ACCESS_TYPE_WRITE: Write access type */
+ XE_PAGEFAULT_ACCESS_TYPE_WRITE = 1,
+ /** @XE_PAGEFAULT_ACCESS_TYPE_ATOMIC: Atomic access type */
+ XE_PAGEFAULT_ACCESS_TYPE_ATOMIC = 2,
+};
+
+/** enum xe_pagefault_type - Xe page fault type */
+enum xe_pagefault_type {
+ /** @XE_PAGEFAULT_TYPE_NOT_PRESENT: Not present */
+ XE_PAGEFAULT_TYPE_NOT_PRESENT = 0,
+ /** @XE_PAGEFAULT_TYPE_WRITE_ACCESS_VIOLATION: Write access violation */
+ XE_PAGEFAULT_TYPE_WRITE_ACCESS_VIOLATION = 1,
+ /** @XE_PAGEFAULT_TYPE_ATOMIC_ACCESS_VIOLATION: Atomic access violation */
+ XE_PAGEFAULT_TYPE_ATOMIC_ACCESS_VIOLATION = 2,
+};
+
+/** struct xe_pagefault_ops - Xe pagefault ops (producer) */
+struct xe_pagefault_ops {
+ /**
+ * @ack_fault: Ack fault
+ * @pf: Page fault
+ * @err: Error state of fault
+ *
+ * Page fault producer receives acknowledgment from the consumer and
+ * sends the result to the HW/FW interface.
+ */
+ void (*ack_fault)(struct xe_pagefault *pf, int err);
+};
+
+/**
+ * struct xe_pagefault - Xe page fault
+ *
+ * Generic page fault structure for communication between producer and consumer.
+ * Carefully sized to be 64 bytes. Upon a device page fault, the producer
+ * populates this structure, and the consumer copies it into the page-fault
+ * queue for deferred handling.
+ */
+struct xe_pagefault {
+ /**
+ * @gt: GT of fault
+ */
+ struct xe_gt *gt;
+ /**
+ * @consumer: State for the software handling the fault. Populated by
+ * the producer and may be modified by the consumer to communicate
+ * information back to the producer upon fault acknowledgment.
+ */
+ struct {
+ /** @consumer.page_addr: address of page fault */
+ u64 page_addr;
+ /** @consumer.asid: address space ID */
+ u32 asid;
+ /**
+ * @consumer.access_type: access type, u8 rather than enum to
+ * keep size compact
+ */
+ u8 access_type;
+ /**
+ * @consumer.fault_type: fault type, u8 rather than enum to
+ * keep size compact
+ */
+ u8 fault_type;
+#define XE_PAGEFAULT_LEVEL_NACK 0xff /* Producer indicates nack fault */
+ /** @consumer.fault_level: fault level */
+ u8 fault_level;
+ /** @consumer.engine_class: engine class */
+ u8 engine_class;
+ /** @consumer.engine_instance: engine instance */
+ u8 engine_instance;
+ /** consumer.reserved: reserved bits for future expansion */
+ u8 reserved[7];
+ } consumer;
+ /**
+ * @producer: State for the producer (i.e., HW/FW interface). Populated
+ * by the producer and should not be modified—or even inspected—by the
+ * consumer, except for calling operations.
+ */
+ struct {
+ /** @producer.private: private pointer */
+ void *private;
+ /** @producer.ops: operations */
+ const struct xe_pagefault_ops *ops;
+#define XE_PAGEFAULT_PRODUCER_MSG_LEN_DW 4
+ /**
+ * @producer.msg: page fault message, used by producer in fault
+ * acknowledgment to formulate response to HW/FW interface.
+ * Included in the page-fault message because the producer
+ * typically receives the fault in a context where memory cannot
+ * be allocated (e.g., atomic context or the reclaim path).
+ */
+ u32 msg[XE_PAGEFAULT_PRODUCER_MSG_LEN_DW];
+ } producer;
+};
+
+/**
+ * struct xe_pagefault_queue: Xe pagefault queue (consumer)
+ *
+ * Used to capture all device page faults for deferred processing. Size this
+ * queue to absorb the device’s worst-case number of outstanding faults.
+ */
+struct xe_pagefault_queue {
+ /**
+ * @data: Data in queue containing struct xe_pagefault, protected by
+ * @lock
+ */
+ void *data;
+ /** @size: Size of queue in bytes */
+ u32 size;
+ /** @head: Head pointer in bytes, moved by producer, protected by @lock */
+ u32 head;
+ /** @tail: Tail pointer in bytes, moved by consumer, protected by @lock */
+ u32 tail;
+ /** @lock: protects page fault queue */
+ spinlock_t lock;
+ /** @worker: to process page faults */
+ struct work_struct worker;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pat.c b/drivers/gpu/drm/xe/xe_pat.c
index 30fdbdb9341e..68171cceea18 100644
--- a/drivers/gpu/drm/xe/xe_pat.c
+++ b/drivers/gpu/drm/xe/xe_pat.c
@@ -57,7 +57,7 @@ struct xe_pat_ops {
int n_entries);
void (*program_media)(struct xe_gt *gt, const struct xe_pat_table_entry table[],
int n_entries);
- void (*dump)(struct xe_gt *gt, struct drm_printer *p);
+ int (*dump)(struct xe_gt *gt, struct drm_printer *p);
};
static const struct xe_pat_table_entry xelp_pat_table[] = {
@@ -103,7 +103,8 @@ static const struct xe_pat_table_entry xelpg_pat_table[] = {
*
* Note: There is an implicit assumption in the driver that compression and
* coh_1way+ are mutually exclusive. If this is ever not true then userptr
- * and imported dma-buf from external device will have uncleared ccs state.
+ * and imported dma-buf from external device will have uncleared ccs state. See
+ * also xe_bo_needs_ccs_pages().
*/
#define XE2_PAT(no_promote, comp_en, l3clos, l3_policy, l4_policy, __coh_mode) \
{ \
@@ -114,7 +115,8 @@ static const struct xe_pat_table_entry xelpg_pat_table[] = {
REG_FIELD_PREP(XE2_L4_POLICY, l4_policy) | \
REG_FIELD_PREP(XE2_COH_MODE, __coh_mode), \
.coh_mode = (BUILD_BUG_ON_ZERO(__coh_mode && comp_en) || __coh_mode) ? \
- XE_COH_AT_LEAST_1WAY : XE_COH_NONE \
+ XE_COH_AT_LEAST_1WAY : XE_COH_NONE, \
+ .valid = 1 \
}
static const struct xe_pat_table_entry xe2_pat_table[] = {
@@ -153,6 +155,41 @@ static const struct xe_pat_table_entry xe2_pat_table[] = {
static const struct xe_pat_table_entry xe2_pat_ats = XE2_PAT( 0, 0, 0, 0, 3, 3 );
static const struct xe_pat_table_entry xe2_pat_pta = XE2_PAT( 0, 0, 0, 0, 3, 0 );
+/*
+ * Xe3p_XPC PAT table uses the same layout as Xe2/Xe3, except that there's no
+ * option for compression. Also note that the "L3" and "L4" register fields
+ * actually control L2 and L3 cache respectively on this platform.
+ */
+#define XE3P_XPC_PAT(no_promote, l3clos, l3_policy, l4_policy, __coh_mode) \
+ XE2_PAT(no_promote, 0, l3clos, l3_policy, l4_policy, __coh_mode)
+
+static const struct xe_pat_table_entry xe3p_xpc_pat_ats = XE3P_XPC_PAT( 0, 0, 0, 0, 3 );
+static const struct xe_pat_table_entry xe3p_xpc_pat_pta = XE3P_XPC_PAT( 0, 0, 0, 0, 0 );
+
+static const struct xe_pat_table_entry xe3p_xpc_pat_table[] = {
+ [ 0] = XE3P_XPC_PAT( 0, 0, 0, 0, 0 ),
+ [ 1] = XE3P_XPC_PAT( 0, 0, 0, 0, 2 ),
+ [ 2] = XE3P_XPC_PAT( 0, 0, 0, 0, 3 ),
+ [ 3] = XE3P_XPC_PAT( 0, 0, 3, 3, 0 ),
+ [ 4] = XE3P_XPC_PAT( 0, 0, 3, 3, 2 ),
+ [ 5] = XE3P_XPC_PAT( 0, 0, 3, 0, 0 ),
+ [ 6] = XE3P_XPC_PAT( 0, 0, 3, 0, 2 ),
+ [ 7] = XE3P_XPC_PAT( 0, 0, 3, 0, 3 ),
+ [ 8] = XE3P_XPC_PAT( 0, 0, 0, 3, 0 ),
+ [ 9] = XE3P_XPC_PAT( 0, 0, 0, 3, 2 ),
+ [10] = XE3P_XPC_PAT( 0, 0, 0, 3, 3 ),
+ /* 11..22 are reserved; leave set to all 0's */
+ [23] = XE3P_XPC_PAT( 0, 1, 0, 0, 0 ),
+ [24] = XE3P_XPC_PAT( 0, 1, 0, 0, 2 ),
+ [25] = XE3P_XPC_PAT( 0, 1, 0, 0, 3 ),
+ [26] = XE3P_XPC_PAT( 0, 2, 0, 0, 0 ),
+ [27] = XE3P_XPC_PAT( 0, 2, 0, 0, 2 ),
+ [28] = XE3P_XPC_PAT( 0, 2, 0, 0, 3 ),
+ [29] = XE3P_XPC_PAT( 0, 3, 0, 0, 0 ),
+ [30] = XE3P_XPC_PAT( 0, 3, 0, 0, 2 ),
+ [31] = XE3P_XPC_PAT( 0, 3, 0, 0, 3 ),
+};
+
u16 xe_pat_index_get_coh_mode(struct xe_device *xe, u16 pat_index)
{
WARN_ON(pat_index >= xe->pat.n_entries);
@@ -162,24 +199,38 @@ u16 xe_pat_index_get_coh_mode(struct xe_device *xe, u16 pat_index)
static void program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[],
int n_entries)
{
+ struct xe_device *xe = gt_to_xe(gt);
+
for (int i = 0; i < n_entries; i++) {
struct xe_reg reg = XE_REG(_PAT_INDEX(i));
xe_mmio_write32(&gt->mmio, reg, table[i].value);
}
+
+ if (xe->pat.pat_ats)
+ xe_mmio_write32(&gt->mmio, XE_REG(_PAT_ATS), xe->pat.pat_ats->value);
+ if (xe->pat.pat_pta)
+ xe_mmio_write32(&gt->mmio, XE_REG(_PAT_PTA), xe->pat.pat_pta->value);
}
static void program_pat_mcr(struct xe_gt *gt, const struct xe_pat_table_entry table[],
int n_entries)
{
+ struct xe_device *xe = gt_to_xe(gt);
+
for (int i = 0; i < n_entries; i++) {
struct xe_reg_mcr reg_mcr = XE_REG_MCR(_PAT_INDEX(i));
xe_gt_mcr_multicast_write(gt, reg_mcr, table[i].value);
}
+
+ if (xe->pat.pat_ats)
+ xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_ATS), xe->pat.pat_ats->value);
+ if (xe->pat.pat_pta)
+ xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_PTA), xe->pat.pat_pta->value);
}
-static void xelp_dump(struct xe_gt *gt, struct drm_printer *p)
+static int xelp_dump(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_device *xe = gt_to_xe(gt);
unsigned int fw_ref;
@@ -187,7 +238,7 @@ static void xelp_dump(struct xe_gt *gt, struct drm_printer *p)
fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
if (!fw_ref)
- return;
+ return -ETIMEDOUT;
drm_printf(p, "PAT table:\n");
@@ -200,6 +251,7 @@ static void xelp_dump(struct xe_gt *gt, struct drm_printer *p)
}
xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ return 0;
}
static const struct xe_pat_ops xelp_pat_ops = {
@@ -207,7 +259,7 @@ static const struct xe_pat_ops xelp_pat_ops = {
.dump = xelp_dump,
};
-static void xehp_dump(struct xe_gt *gt, struct drm_printer *p)
+static int xehp_dump(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_device *xe = gt_to_xe(gt);
unsigned int fw_ref;
@@ -215,7 +267,7 @@ static void xehp_dump(struct xe_gt *gt, struct drm_printer *p)
fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
if (!fw_ref)
- return;
+ return -ETIMEDOUT;
drm_printf(p, "PAT table:\n");
@@ -230,6 +282,7 @@ static void xehp_dump(struct xe_gt *gt, struct drm_printer *p)
}
xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ return 0;
}
static const struct xe_pat_ops xehp_pat_ops = {
@@ -237,7 +290,7 @@ static const struct xe_pat_ops xehp_pat_ops = {
.dump = xehp_dump,
};
-static void xehpc_dump(struct xe_gt *gt, struct drm_printer *p)
+static int xehpc_dump(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_device *xe = gt_to_xe(gt);
unsigned int fw_ref;
@@ -245,7 +298,7 @@ static void xehpc_dump(struct xe_gt *gt, struct drm_printer *p)
fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
if (!fw_ref)
- return;
+ return -ETIMEDOUT;
drm_printf(p, "PAT table:\n");
@@ -258,6 +311,7 @@ static void xehpc_dump(struct xe_gt *gt, struct drm_printer *p)
}
xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ return 0;
}
static const struct xe_pat_ops xehpc_pat_ops = {
@@ -265,7 +319,7 @@ static const struct xe_pat_ops xehpc_pat_ops = {
.dump = xehpc_dump,
};
-static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p)
+static int xelpg_dump(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_device *xe = gt_to_xe(gt);
unsigned int fw_ref;
@@ -273,7 +327,7 @@ static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p)
fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
if (!fw_ref)
- return;
+ return -ETIMEDOUT;
drm_printf(p, "PAT table:\n");
@@ -291,6 +345,7 @@ static void xelpg_dump(struct xe_gt *gt, struct drm_printer *p)
}
xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ return 0;
}
/*
@@ -303,27 +358,7 @@ static const struct xe_pat_ops xelpg_pat_ops = {
.dump = xelpg_dump,
};
-static void xe2lpg_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[],
- int n_entries)
-{
- program_pat_mcr(gt, table, n_entries);
- xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_ATS), xe2_pat_ats.value);
-
- if (IS_DGFX(gt_to_xe(gt)))
- xe_gt_mcr_multicast_write(gt, XE_REG_MCR(_PAT_PTA), xe2_pat_pta.value);
-}
-
-static void xe2lpm_program_pat(struct xe_gt *gt, const struct xe_pat_table_entry table[],
- int n_entries)
-{
- program_pat(gt, table, n_entries);
- xe_mmio_write32(&gt->mmio, XE_REG(_PAT_ATS), xe2_pat_ats.value);
-
- if (IS_DGFX(gt_to_xe(gt)))
- xe_mmio_write32(&gt->mmio, XE_REG(_PAT_PTA), xe2_pat_pta.value);
-}
-
-static void xe2_dump(struct xe_gt *gt, struct drm_printer *p)
+static int xe2_dump(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_device *xe = gt_to_xe(gt);
unsigned int fw_ref;
@@ -332,9 +367,9 @@ static void xe2_dump(struct xe_gt *gt, struct drm_printer *p)
fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
if (!fw_ref)
- return;
+ return -ETIMEDOUT;
- drm_printf(p, "PAT table:\n");
+ drm_printf(p, "PAT table: (* = reserved entry)\n");
for (i = 0; i < xe->pat.n_entries; i++) {
if (xe_gt_is_media_type(gt))
@@ -342,14 +377,14 @@ static void xe2_dump(struct xe_gt *gt, struct drm_printer *p)
else
pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i)));
- drm_printf(p, "PAT[%2d] = [ %u, %u, %u, %u, %u, %u ] (%#8x)\n", i,
+ drm_printf(p, "PAT[%2d] = [ %u, %u, %u, %u, %u, %u ] (%#8x)%s\n", i,
!!(pat & XE2_NO_PROMOTE),
!!(pat & XE2_COMP_EN),
REG_FIELD_GET(XE2_L3_CLOS, pat),
REG_FIELD_GET(XE2_L3_POLICY, pat),
REG_FIELD_GET(XE2_L4_POLICY, pat),
REG_FIELD_GET(XE2_COH_MODE, pat),
- pat);
+ pat, xe->pat.table[i].valid ? "" : " *");
}
/*
@@ -372,19 +407,82 @@ static void xe2_dump(struct xe_gt *gt, struct drm_printer *p)
pat);
xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ return 0;
}
static const struct xe_pat_ops xe2_pat_ops = {
- .program_graphics = xe2lpg_program_pat,
- .program_media = xe2lpm_program_pat,
+ .program_graphics = program_pat_mcr,
+ .program_media = program_pat,
.dump = xe2_dump,
};
+static int xe3p_xpc_dump(struct xe_gt *gt, struct drm_printer *p)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ unsigned int fw_ref;
+ u32 pat;
+ int i;
+
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
+ return -ETIMEDOUT;
+
+ drm_printf(p, "PAT table: (* = reserved entry)\n");
+
+ for (i = 0; i < xe->pat.n_entries; i++) {
+ pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_INDEX(i)));
+
+ drm_printf(p, "PAT[%2d] = [ %u, %u, %u, %u, %u ] (%#8x)%s\n", i,
+ !!(pat & XE2_NO_PROMOTE),
+ REG_FIELD_GET(XE2_L3_CLOS, pat),
+ REG_FIELD_GET(XE2_L3_POLICY, pat),
+ REG_FIELD_GET(XE2_L4_POLICY, pat),
+ REG_FIELD_GET(XE2_COH_MODE, pat),
+ pat, xe->pat.table[i].valid ? "" : " *");
+ }
+
+ /*
+ * Also print PTA_MODE, which describes how the hardware accesses
+ * PPGTT entries.
+ */
+ pat = xe_gt_mcr_unicast_read_any(gt, XE_REG_MCR(_PAT_PTA));
+
+ drm_printf(p, "Page Table Access:\n");
+ drm_printf(p, "PTA_MODE= [ %u, %u, %u, %u, %u ] (%#8x)\n",
+ !!(pat & XE2_NO_PROMOTE),
+ REG_FIELD_GET(XE2_L3_CLOS, pat),
+ REG_FIELD_GET(XE2_L3_POLICY, pat),
+ REG_FIELD_GET(XE2_L4_POLICY, pat),
+ REG_FIELD_GET(XE2_COH_MODE, pat),
+ pat);
+
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ return 0;
+}
+
+static const struct xe_pat_ops xe3p_xpc_pat_ops = {
+ .program_graphics = program_pat_mcr,
+ .program_media = program_pat,
+ .dump = xe3p_xpc_dump,
+};
+
void xe_pat_init_early(struct xe_device *xe)
{
- if (GRAPHICS_VER(xe) == 30 || GRAPHICS_VER(xe) == 20) {
+ if (GRAPHICS_VERx100(xe) == 3511) {
+ xe->pat.ops = &xe3p_xpc_pat_ops;
+ xe->pat.table = xe3p_xpc_pat_table;
+ xe->pat.pat_ats = &xe3p_xpc_pat_ats;
+ xe->pat.pat_pta = &xe3p_xpc_pat_pta;
+ xe->pat.n_entries = ARRAY_SIZE(xe3p_xpc_pat_table);
+ xe->pat.idx[XE_CACHE_NONE] = 3;
+ xe->pat.idx[XE_CACHE_WT] = 3; /* N/A (no display); use UC */
+ xe->pat.idx[XE_CACHE_WB] = 2;
+ } else if (GRAPHICS_VER(xe) == 30 || GRAPHICS_VER(xe) == 20) {
xe->pat.ops = &xe2_pat_ops;
xe->pat.table = xe2_pat_table;
+ xe->pat.pat_ats = &xe2_pat_ats;
+ if (IS_DGFX(xe))
+ xe->pat.pat_pta = &xe2_pat_pta;
/* Wa_16023588340. XXX: Should use XE_WA */
if (GRAPHICS_VERx100(xe) == 2001)
@@ -464,12 +562,19 @@ void xe_pat_init(struct xe_gt *gt)
xe->pat.ops->program_graphics(gt, xe->pat.table, xe->pat.n_entries);
}
-void xe_pat_dump(struct xe_gt *gt, struct drm_printer *p)
+/**
+ * xe_pat_dump() - Dump GT PAT table into a drm printer.
+ * @gt: the &xe_gt
+ * @p: the &drm_printer
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_pat_dump(struct xe_gt *gt, struct drm_printer *p)
{
struct xe_device *xe = gt_to_xe(gt);
if (!xe->pat.ops)
- return;
+ return -EOPNOTSUPP;
- xe->pat.ops->dump(gt, p);
+ return xe->pat.ops->dump(gt, p);
}
diff --git a/drivers/gpu/drm/xe/xe_pat.h b/drivers/gpu/drm/xe/xe_pat.h
index fa0dfbe525cd..05dae03a5f54 100644
--- a/drivers/gpu/drm/xe/xe_pat.h
+++ b/drivers/gpu/drm/xe/xe_pat.h
@@ -29,6 +29,11 @@ struct xe_pat_table_entry {
#define XE_COH_NONE 1
#define XE_COH_AT_LEAST_1WAY 2
u16 coh_mode;
+
+ /**
+ * @valid: Set to 1 if the entry is valid, 0 if it's reserved.
+ */
+ u16 valid;
};
/**
@@ -43,12 +48,7 @@ void xe_pat_init_early(struct xe_device *xe);
*/
void xe_pat_init(struct xe_gt *gt);
-/**
- * xe_pat_dump - Dump PAT table
- * @gt: GT structure
- * @p: Printer to dump info to
- */
-void xe_pat_dump(struct xe_gt *gt, struct drm_printer *p);
+int xe_pat_dump(struct xe_gt *gt, struct drm_printer *p);
/**
* xe_pat_index_get_coh_mode - Extract the coherency mode for the given
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 024175cfe61e..9c9ea10d994c 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -17,6 +17,8 @@
#include "display/xe_display.h"
#include "regs/xe_gt_regs.h"
+#include "regs/xe_regs.h"
+#include "xe_configfs.h"
#include "xe_device.h"
#include "xe_drv.h"
#include "xe_gt.h"
@@ -28,6 +30,7 @@
#include "xe_pci_sriov.h"
#include "xe_pci_types.h"
#include "xe_pm.h"
+#include "xe_printk.h"
#include "xe_sriov.h"
#include "xe_step.h"
#include "xe_survivability_mode.h"
@@ -38,42 +41,6 @@ enum toggle_d3cold {
D3COLD_ENABLE,
};
-struct xe_subplatform_desc {
- enum xe_subplatform subplatform;
- const char *name;
- const u16 *pciidlist;
-};
-
-struct xe_device_desc {
- /* Should only ever be set for platforms without GMD_ID */
- const struct xe_ip *pre_gmdid_graphics_ip;
- /* Should only ever be set for platforms without GMD_ID */
- const struct xe_ip *pre_gmdid_media_ip;
-
- const char *platform_name;
- const struct xe_subplatform_desc *subplatforms;
-
- enum xe_platform platform;
-
- u8 dma_mask_size;
- u8 max_remote_tiles:2;
-
- u8 require_force_probe:1;
- u8 is_dgfx:1;
-
- u8 has_display:1;
- u8 has_fan_control:1;
- u8 has_heci_gscfi:1;
- u8 has_heci_cscfi:1;
- u8 has_llc:1;
- u8 has_pxp:1;
- u8 has_sriov:1;
- u8 needs_scratch:1;
- u8 skip_guc_pc:1;
- u8 skip_mtcfg:1;
- u8 skip_pcode:1;
-};
-
__diag_push();
__diag_ignore_all("-Woverride-init", "Allow field overrides in table");
@@ -85,15 +52,10 @@ __diag_ignore_all("-Woverride-init", "Allow field overrides in table");
static const struct xe_graphics_desc graphics_xelp = {
.hw_engine_mask = BIT(XE_HW_ENGINE_RCS0) | BIT(XE_HW_ENGINE_BCS0),
-
- .va_bits = 48,
- .vm_max_level = 3,
};
#define XE_HP_FEATURES \
- .has_range_tlb_invalidation = true, \
- .va_bits = 48, \
- .vm_max_level = 3
+ .has_range_tlb_inval = true
static const struct xe_graphics_desc graphics_xehpg = {
.hw_engine_mask =
@@ -102,9 +64,6 @@ static const struct xe_graphics_desc graphics_xehpg = {
BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3),
XE_HP_FEATURES,
- .vram_flags = XE_VRAM_FLAGS_NEED64K,
-
- .has_flat_ccs = 1,
};
static const struct xe_graphics_desc graphics_xehpc = {
@@ -118,9 +77,6 @@ static const struct xe_graphics_desc graphics_xehpc = {
BIT(XE_HW_ENGINE_CCS2) | BIT(XE_HW_ENGINE_CCS3),
XE_HP_FEATURES,
- .va_bits = 57,
- .vm_max_level = 4,
- .vram_flags = XE_VRAM_FLAGS_NEED64K,
.has_asid = 1,
.has_atomic_enable_pte_bit = 1,
@@ -138,13 +94,9 @@ static const struct xe_graphics_desc graphics_xelpg = {
#define XE2_GFX_FEATURES \
.has_asid = 1, \
.has_atomic_enable_pte_bit = 1, \
- .has_flat_ccs = 1, \
- .has_indirect_ring_state = 1, \
- .has_range_tlb_invalidation = 1, \
+ .has_range_tlb_inval = 1, \
.has_usm = 1, \
.has_64bit_timestamp = 1, \
- .va_bits = 48, \
- .vm_max_level = 4, \
.hw_engine_mask = \
BIT(XE_HW_ENGINE_RCS0) | \
BIT(XE_HW_ENGINE_BCS8) | BIT(XE_HW_ENGINE_BCS0) | \
@@ -154,6 +106,13 @@ static const struct xe_graphics_desc graphics_xe2 = {
XE2_GFX_FEATURES,
};
+static const struct xe_graphics_desc graphics_xe3p_xpc = {
+ XE2_GFX_FEATURES,
+ .hw_engine_mask =
+ GENMASK(XE_HW_ENGINE_BCS8, XE_HW_ENGINE_BCS1) |
+ GENMASK(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0),
+};
+
static const struct xe_media_desc media_xem = {
.hw_engine_mask =
GENMASK(XE_HW_ENGINE_VCS7, XE_HW_ENGINE_VCS0) |
@@ -179,9 +138,14 @@ static const struct xe_ip graphics_ips[] = {
{ 1271, "Xe_LPG", &graphics_xelpg },
{ 1274, "Xe_LPG+", &graphics_xelpg },
{ 2001, "Xe2_HPG", &graphics_xe2 },
+ { 2002, "Xe2_HPG", &graphics_xe2 },
{ 2004, "Xe2_LPG", &graphics_xe2 },
{ 3000, "Xe3_LPG", &graphics_xe2 },
{ 3001, "Xe3_LPG", &graphics_xe2 },
+ { 3003, "Xe3_LPG", &graphics_xe2 },
+ { 3004, "Xe3_LPG", &graphics_xe2 },
+ { 3005, "Xe3_LPG", &graphics_xe2 },
+ { 3511, "Xe3p_XPC", &graphics_xe3p_xpc },
};
/* Pre-GMDID Media IPs */
@@ -194,6 +158,9 @@ static const struct xe_ip media_ips[] = {
{ 1301, "Xe2_HPM", &media_xelpmp },
{ 2000, "Xe2_LPM", &media_xelpmp },
{ 3000, "Xe3_LPM", &media_xelpmp },
+ { 3002, "Xe3_LPM", &media_xelpmp },
+ { 3500, "Xe3p_LPM", &media_xelpmp },
+ { 3503, "Xe3p_HPM", &media_xelpmp },
};
static const struct xe_device_desc tgl_desc = {
@@ -203,7 +170,11 @@ static const struct xe_device_desc tgl_desc = {
.dma_mask_size = 39,
.has_display = true,
.has_llc = true,
+ .has_sriov = true,
+ .max_gt_per_tile = 1,
.require_force_probe = true,
+ .va_bits = 48,
+ .vm_max_level = 3,
};
static const struct xe_device_desc rkl_desc = {
@@ -213,7 +184,10 @@ static const struct xe_device_desc rkl_desc = {
.dma_mask_size = 39,
.has_display = true,
.has_llc = true,
+ .max_gt_per_tile = 1,
.require_force_probe = true,
+ .va_bits = 48,
+ .vm_max_level = 3,
};
static const u16 adls_rpls_ids[] = { INTEL_RPLS_IDS(NOP), 0 };
@@ -225,11 +199,15 @@ static const struct xe_device_desc adl_s_desc = {
.dma_mask_size = 39,
.has_display = true,
.has_llc = true,
+ .has_sriov = true,
+ .max_gt_per_tile = 1,
.require_force_probe = true,
.subplatforms = (const struct xe_subplatform_desc[]) {
{ XE_SUBPLATFORM_ALDERLAKE_S_RPLS, "RPLS", adls_rpls_ids },
{},
},
+ .va_bits = 48,
+ .vm_max_level = 3,
};
static const u16 adlp_rplu_ids[] = { INTEL_RPLU_IDS(NOP), 0 };
@@ -241,11 +219,15 @@ static const struct xe_device_desc adl_p_desc = {
.dma_mask_size = 39,
.has_display = true,
.has_llc = true,
+ .has_sriov = true,
+ .max_gt_per_tile = 1,
.require_force_probe = true,
.subplatforms = (const struct xe_subplatform_desc[]) {
{ XE_SUBPLATFORM_ALDERLAKE_P_RPLU, "RPLU", adlp_rplu_ids },
{},
},
+ .va_bits = 48,
+ .vm_max_level = 3,
};
static const struct xe_device_desc adl_n_desc = {
@@ -255,7 +237,11 @@ static const struct xe_device_desc adl_n_desc = {
.dma_mask_size = 39,
.has_display = true,
.has_llc = true,
+ .has_sriov = true,
+ .max_gt_per_tile = 1,
.require_force_probe = true,
+ .va_bits = 48,
+ .vm_max_level = 3,
};
#define DGFX_FEATURES \
@@ -268,8 +254,12 @@ static const struct xe_device_desc dg1_desc = {
PLATFORM(DG1),
.dma_mask_size = 39,
.has_display = true,
+ .has_gsc_nvm = 1,
.has_heci_gscfi = 1,
+ .max_gt_per_tile = 1,
.require_force_probe = true,
+ .va_bits = 48,
+ .vm_max_level = 3,
};
static const u16 dg2_g10_ids[] = { INTEL_DG2_G10_IDS(NOP), INTEL_ATS_M150_IDS(NOP), 0 };
@@ -279,33 +269,42 @@ static const u16 dg2_g12_ids[] = { INTEL_DG2_G12_IDS(NOP), 0 };
#define DG2_FEATURES \
DGFX_FEATURES, \
PLATFORM(DG2), \
+ .has_flat_ccs = 1, \
+ .has_gsc_nvm = 1, \
.has_heci_gscfi = 1, \
.subplatforms = (const struct xe_subplatform_desc[]) { \
{ XE_SUBPLATFORM_DG2_G10, "G10", dg2_g10_ids }, \
{ XE_SUBPLATFORM_DG2_G11, "G11", dg2_g11_ids }, \
{ XE_SUBPLATFORM_DG2_G12, "G12", dg2_g12_ids }, \
{ } \
- }
+ }, \
+ .va_bits = 48, \
+ .vm_max_level = 3, \
+ .vram_flags = XE_VRAM_FLAGS_NEED64K
static const struct xe_device_desc ats_m_desc = {
.pre_gmdid_graphics_ip = &graphics_ip_xehpg,
.pre_gmdid_media_ip = &media_ip_xehpm,
.dma_mask_size = 46,
+ .max_gt_per_tile = 1,
.require_force_probe = true,
DG2_FEATURES,
.has_display = false,
+ .has_sriov = true,
};
static const struct xe_device_desc dg2_desc = {
.pre_gmdid_graphics_ip = &graphics_ip_xehpg,
.pre_gmdid_media_ip = &media_ip_xehpm,
.dma_mask_size = 46,
+ .max_gt_per_tile = 1,
.require_force_probe = true,
DG2_FEATURES,
.has_display = true,
.has_fan_control = true,
+ .has_mbx_power_limits = false,
};
static const __maybe_unused struct xe_device_desc pvc_desc = {
@@ -314,9 +313,15 @@ static const __maybe_unused struct xe_device_desc pvc_desc = {
PLATFORM(PVC),
.dma_mask_size = 52,
.has_display = false,
+ .has_gsc_nvm = 1,
.has_heci_gscfi = 1,
+ .max_gt_per_tile = 1,
.max_remote_tiles = 1,
.require_force_probe = true,
+ .va_bits = 57,
+ .vm_max_level = 4,
+ .vram_flags = XE_VRAM_FLAGS_NEED64K,
+ .has_mbx_power_limits = false,
};
static const struct xe_device_desc mtl_desc = {
@@ -326,33 +331,87 @@ static const struct xe_device_desc mtl_desc = {
.dma_mask_size = 46,
.has_display = true,
.has_pxp = true,
+ .max_gt_per_tile = 2,
+ .va_bits = 48,
+ .vm_max_level = 3,
};
static const struct xe_device_desc lnl_desc = {
PLATFORM(LUNARLAKE),
.dma_mask_size = 46,
.has_display = true,
+ .has_flat_ccs = 1,
.has_pxp = true,
+ .has_mem_copy_instr = true,
+ .max_gt_per_tile = 2,
.needs_scratch = true,
+ .va_bits = 48,
+ .vm_max_level = 4,
};
+static const u16 bmg_g21_ids[] = { INTEL_BMG_G21_IDS(NOP), 0 };
+
static const struct xe_device_desc bmg_desc = {
DGFX_FEATURES,
PLATFORM(BATTLEMAGE),
.dma_mask_size = 46,
.has_display = true,
.has_fan_control = true,
+ .has_flat_ccs = 1,
+ .has_mbx_power_limits = true,
+ .has_gsc_nvm = 1,
.has_heci_cscfi = 1,
+ .has_late_bind = true,
+ .has_sriov = true,
+ .has_mem_copy_instr = true,
+ .max_gt_per_tile = 2,
.needs_scratch = true,
+ .subplatforms = (const struct xe_subplatform_desc[]) {
+ { XE_SUBPLATFORM_BATTLEMAGE_G21, "G21", bmg_g21_ids },
+ { }
+ },
+ .va_bits = 48,
+ .vm_max_level = 4,
};
static const struct xe_device_desc ptl_desc = {
PLATFORM(PANTHERLAKE),
.dma_mask_size = 46,
.has_display = true,
+ .has_flat_ccs = 1,
.has_sriov = true,
- .require_force_probe = true,
+ .has_mem_copy_instr = true,
+ .max_gt_per_tile = 2,
.needs_scratch = true,
+ .needs_shared_vf_gt_wq = true,
+ .va_bits = 48,
+ .vm_max_level = 4,
+};
+
+static const struct xe_device_desc nvls_desc = {
+ PLATFORM(NOVALAKE_S),
+ .dma_mask_size = 46,
+ .has_display = true,
+ .has_flat_ccs = 1,
+ .has_mem_copy_instr = true,
+ .max_gt_per_tile = 2,
+ .require_force_probe = true,
+ .va_bits = 48,
+ .vm_max_level = 4,
+};
+
+static const struct xe_device_desc cri_desc = {
+ DGFX_FEATURES,
+ PLATFORM(CRESCENTISLAND),
+ .dma_mask_size = 52,
+ .has_display = false,
+ .has_flat_ccs = false,
+ .has_mbx_power_limits = true,
+ .has_sriov = true,
+ .max_gt_per_tile = 2,
+ .require_force_probe = true,
+ .va_bits = 57,
+ .vm_max_level = 4,
};
#undef PLATFORM
@@ -381,6 +440,9 @@ static const struct pci_device_id pciidlist[] = {
INTEL_LNL_IDS(INTEL_VGA_DEVICE, &lnl_desc),
INTEL_BMG_IDS(INTEL_VGA_DEVICE, &bmg_desc),
INTEL_PTL_IDS(INTEL_VGA_DEVICE, &ptl_desc),
+ INTEL_WCL_IDS(INTEL_VGA_DEVICE, &ptl_desc),
+ INTEL_NVLS_IDS(INTEL_VGA_DEVICE, &nvls_desc),
+ INTEL_CRI_IDS(INTEL_PCI_DEVICE, &cri_desc),
{ }
};
MODULE_DEVICE_TABLE(pci, pciidlist);
@@ -453,7 +515,7 @@ enum xe_gmdid_type {
GMDID_MEDIA
};
-static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, u32 *revid)
+static int read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver, u32 *revid)
{
struct xe_mmio *mmio = xe_root_tile_mmio(xe);
struct xe_reg gmdid_reg = GMD_ID;
@@ -462,22 +524,24 @@ static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver,
KUNIT_STATIC_STUB_REDIRECT(read_gmdid, xe, type, ver, revid);
if (IS_SRIOV_VF(xe)) {
- struct xe_gt *gt = xe_root_mmio_gt(xe);
-
/*
* To get the value of the GMDID register, VFs must obtain it
* from the GuC using MMIO communication.
*
- * Note that at this point the xe_gt is not fully uninitialized
- * and only basic access to MMIO registers is possible. To use
- * our existing GuC communication functions we must perform at
- * least basic xe_gt and xe_guc initialization.
- *
- * Since to obtain the value of GMDID_MEDIA we need to use the
- * media GuC, temporarily tweak the gt type.
+ * Note that at this point the GTs are not initialized and only
+ * tile-level access to MMIO registers is possible. To use our
+ * existing GuC communication functions we must create a dummy
+ * GT structure and perform at least basic xe_gt and xe_guc
+ * initialization.
*/
- xe_gt_assert(gt, gt->info.type == XE_GT_TYPE_UNINITIALIZED);
+ struct xe_gt *gt __free(kfree) = NULL;
+ int err;
+ gt = kzalloc(sizeof(*gt), GFP_KERNEL);
+ if (!gt)
+ return -ENOMEM;
+
+ gt->tile = &xe->tiles[0];
if (type == GMDID_MEDIA) {
gt->info.id = 1;
gt->info.type = XE_GT_TYPE_MEDIA;
@@ -489,15 +553,11 @@ static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver,
xe_gt_mmio_init(gt);
xe_guc_comm_init_early(&gt->uc.guc);
- /* Don't bother with GMDID if failed to negotiate the GuC ABI */
- val = xe_gt_sriov_vf_bootstrap(gt) ? 0 : xe_gt_sriov_vf_gmdid(gt);
+ err = xe_gt_sriov_vf_bootstrap(gt);
+ if (err)
+ return err;
- /*
- * Only undo xe_gt.info here, the remaining changes made above
- * will be overwritten as part of the regular initialization.
- */
- gt->info.id = 0;
- gt->info.type = XE_GT_TYPE_UNINITIALIZED;
+ val = xe_gt_sriov_vf_gmdid(gt);
} else {
/*
* GMD_ID is a GT register, but at this point in the driver
@@ -515,55 +575,71 @@ static void read_gmdid(struct xe_device *xe, enum xe_gmdid_type type, u32 *ver,
*ver = REG_FIELD_GET(GMD_ID_ARCH_MASK, val) * 100 + REG_FIELD_GET(GMD_ID_RELEASE_MASK, val);
*revid = REG_FIELD_GET(GMD_ID_REVID, val);
+
+ return 0;
+}
+
+static const struct xe_ip *find_graphics_ip(unsigned int verx100)
+{
+ KUNIT_STATIC_STUB_REDIRECT(find_graphics_ip, verx100);
+
+ for (int i = 0; i < ARRAY_SIZE(graphics_ips); i++)
+ if (graphics_ips[i].verx100 == verx100)
+ return &graphics_ips[i];
+ return NULL;
+}
+
+static const struct xe_ip *find_media_ip(unsigned int verx100)
+{
+ KUNIT_STATIC_STUB_REDIRECT(find_media_ip, verx100);
+
+ for (int i = 0; i < ARRAY_SIZE(media_ips); i++)
+ if (media_ips[i].verx100 == verx100)
+ return &media_ips[i];
+ return NULL;
}
/*
* Read IP version from hardware and select graphics/media IP descriptors
* based on the result.
*/
-static void handle_gmdid(struct xe_device *xe,
- const struct xe_ip **graphics_ip,
- const struct xe_ip **media_ip,
- u32 *graphics_revid,
- u32 *media_revid)
+static int handle_gmdid(struct xe_device *xe,
+ const struct xe_ip **graphics_ip,
+ const struct xe_ip **media_ip,
+ u32 *graphics_revid,
+ u32 *media_revid)
{
u32 ver;
+ int ret;
*graphics_ip = NULL;
*media_ip = NULL;
- read_gmdid(xe, GMDID_GRAPHICS, &ver, graphics_revid);
-
- for (int i = 0; i < ARRAY_SIZE(graphics_ips); i++) {
- if (ver == graphics_ips[i].verx100) {
- *graphics_ip = &graphics_ips[i];
-
- break;
- }
- }
+ ret = read_gmdid(xe, GMDID_GRAPHICS, &ver, graphics_revid);
+ if (ret)
+ return ret;
+ *graphics_ip = find_graphics_ip(ver);
if (!*graphics_ip) {
drm_err(&xe->drm, "Hardware reports unknown graphics version %u.%02u\n",
ver / 100, ver % 100);
}
- read_gmdid(xe, GMDID_MEDIA, &ver, media_revid);
+ ret = read_gmdid(xe, GMDID_MEDIA, &ver, media_revid);
+ if (ret)
+ return ret;
+
/* Media may legitimately be fused off / not present */
if (ver == 0)
- return;
-
- for (int i = 0; i < ARRAY_SIZE(media_ips); i++) {
- if (ver == media_ips[i].verx100) {
- *media_ip = &media_ips[i];
-
- break;
- }
- }
+ return 0;
+ *media_ip = find_media_ip(ver);
if (!*media_ip) {
drm_err(&xe->drm, "Hardware reports unknown media version %u.%02u\n",
ver / 100, ver % 100);
}
+
+ return 0;
}
/*
@@ -582,21 +658,37 @@ static int xe_info_init_early(struct xe_device *xe,
subplatform_desc->subplatform : XE_SUBPLATFORM_NONE;
xe->info.dma_mask_size = desc->dma_mask_size;
+ xe->info.va_bits = desc->va_bits;
+ xe->info.vm_max_level = desc->vm_max_level;
+ xe->info.vram_flags = desc->vram_flags;
+
xe->info.is_dgfx = desc->is_dgfx;
xe->info.has_fan_control = desc->has_fan_control;
+ /* runtime fusing may force flat_ccs to disabled later */
+ xe->info.has_flat_ccs = desc->has_flat_ccs;
+ xe->info.has_mbx_power_limits = desc->has_mbx_power_limits;
+ xe->info.has_gsc_nvm = desc->has_gsc_nvm;
xe->info.has_heci_gscfi = desc->has_heci_gscfi;
xe->info.has_heci_cscfi = desc->has_heci_cscfi;
+ xe->info.has_late_bind = desc->has_late_bind;
xe->info.has_llc = desc->has_llc;
xe->info.has_pxp = desc->has_pxp;
- xe->info.has_sriov = desc->has_sriov;
+ xe->info.has_sriov = xe_configfs_primary_gt_allowed(to_pci_dev(xe->drm.dev)) &&
+ desc->has_sriov;
+ xe->info.has_mem_copy_instr = desc->has_mem_copy_instr;
xe->info.skip_guc_pc = desc->skip_guc_pc;
xe->info.skip_mtcfg = desc->skip_mtcfg;
xe->info.skip_pcode = desc->skip_pcode;
xe->info.needs_scratch = desc->needs_scratch;
+ xe->info.needs_shared_vf_gt_wq = desc->needs_shared_vf_gt_wq;
xe->info.probe_display = IS_ENABLED(CONFIG_DRM_XE_DISPLAY) &&
xe_modparam.probe_display &&
desc->has_display;
+
+ xe_assert(xe, desc->max_gt_per_tile > 0);
+ xe_assert(xe, desc->max_gt_per_tile <= XE_MAX_GT_PER_TILE);
+ xe->info.max_gt_per_tile = desc->max_gt_per_tile;
xe->info.tile_count = 1 + desc->max_remote_tiles;
err = xe_tile_init_early(xe_device_get_root_tile(xe), xe, 0);
@@ -607,6 +699,101 @@ static int xe_info_init_early(struct xe_device *xe,
}
/*
+ * Possibly override number of tile based on configuration register.
+ */
+static void xe_info_probe_tile_count(struct xe_device *xe)
+{
+ struct xe_mmio *mmio;
+ u8 tile_count;
+ u32 mtcfg;
+
+ KUNIT_STATIC_STUB_REDIRECT(xe_info_probe_tile_count, xe);
+
+ /*
+ * Probe for tile count only for platforms that support multiple
+ * tiles.
+ */
+ if (xe->info.tile_count == 1)
+ return;
+
+ if (xe->info.skip_mtcfg)
+ return;
+
+ mmio = xe_root_tile_mmio(xe);
+
+ /*
+ * Although the per-tile mmio regs are not yet initialized, this
+ * is fine as it's going to the root tile's mmio, that's
+ * guaranteed to be initialized earlier in xe_mmio_probe_early()
+ */
+ mtcfg = xe_mmio_read32(mmio, XEHP_MTCFG_ADDR);
+ tile_count = REG_FIELD_GET(TILE_COUNT, mtcfg) + 1;
+
+ if (tile_count < xe->info.tile_count) {
+ drm_info(&xe->drm, "tile_count: %d, reduced_tile_count %d\n",
+ xe->info.tile_count, tile_count);
+ xe->info.tile_count = tile_count;
+ }
+}
+
+static struct xe_gt *alloc_primary_gt(struct xe_tile *tile,
+ const struct xe_graphics_desc *graphics_desc,
+ const struct xe_media_desc *media_desc)
+{
+ struct xe_device *xe = tile_to_xe(tile);
+ struct xe_gt *gt;
+
+ if (!xe_configfs_primary_gt_allowed(to_pci_dev(xe->drm.dev))) {
+ xe_info(xe, "Primary GT disabled via configfs\n");
+ return NULL;
+ }
+
+ gt = xe_gt_alloc(tile);
+ if (IS_ERR(gt))
+ return gt;
+
+ gt->info.type = XE_GT_TYPE_MAIN;
+ gt->info.id = tile->id * xe->info.max_gt_per_tile;
+ gt->info.has_indirect_ring_state = graphics_desc->has_indirect_ring_state;
+ gt->info.engine_mask = graphics_desc->hw_engine_mask;
+
+ /*
+ * Before media version 13, the media IP was part of the primary GT
+ * so we need to add the media engines to the primary GT's engine list.
+ */
+ if (MEDIA_VER(xe) < 13 && media_desc)
+ gt->info.engine_mask |= media_desc->hw_engine_mask;
+
+ return gt;
+}
+
+static struct xe_gt *alloc_media_gt(struct xe_tile *tile,
+ const struct xe_media_desc *media_desc)
+{
+ struct xe_device *xe = tile_to_xe(tile);
+ struct xe_gt *gt;
+
+ if (!xe_configfs_media_gt_allowed(to_pci_dev(xe->drm.dev))) {
+ xe_info(xe, "Media GT disabled via configfs\n");
+ return NULL;
+ }
+
+ if (MEDIA_VER(xe) < 13 || !media_desc)
+ return NULL;
+
+ gt = xe_gt_alloc(tile);
+ if (IS_ERR(gt))
+ return gt;
+
+ gt->info.type = XE_GT_TYPE_MEDIA;
+ gt->info.id = tile->id * xe->info.max_gt_per_tile + 1;
+ gt->info.has_indirect_ring_state = media_desc->has_indirect_ring_state;
+ gt->info.engine_mask = media_desc->hw_engine_mask;
+
+ return gt;
+}
+
+/*
* Initialize device info content that does require knowledge about
* graphics / media IP version.
* Make sure that GT / tile structures allocated by the driver match the data
@@ -622,6 +809,7 @@ static int xe_info_init(struct xe_device *xe,
const struct xe_media_desc *media_desc;
struct xe_tile *tile;
struct xe_gt *gt;
+ int ret;
u8 id;
/*
@@ -637,8 +825,11 @@ static int xe_info_init(struct xe_device *xe,
xe->info.step = xe_step_pre_gmdid_get(xe);
} else {
xe_assert(xe, !desc->pre_gmdid_media_ip);
- handle_gmdid(xe, &graphics_ip, &media_ip,
- &graphics_gmdid_revid, &media_gmdid_revid);
+ ret = handle_gmdid(xe, &graphics_ip, &media_ip,
+ &graphics_gmdid_revid, &media_gmdid_revid);
+ if (ret)
+ return ret;
+
xe->info.step = xe_step_gmdid_get(xe,
graphics_gmdid_revid,
media_gmdid_revid);
@@ -665,21 +856,17 @@ static int xe_info_init(struct xe_device *xe,
media_desc = NULL;
}
- xe->info.vram_flags = graphics_desc->vram_flags;
- xe->info.va_bits = graphics_desc->va_bits;
- xe->info.vm_max_level = graphics_desc->vm_max_level;
xe->info.has_asid = graphics_desc->has_asid;
xe->info.has_atomic_enable_pte_bit = graphics_desc->has_atomic_enable_pte_bit;
if (xe->info.platform != XE_PVC)
xe->info.has_device_atomics_on_smem = 1;
- /* Runtime detection may change this later */
- xe->info.has_flat_ccs = graphics_desc->has_flat_ccs;
-
- xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation;
+ xe->info.has_range_tlb_inval = graphics_desc->has_range_tlb_inval;
xe->info.has_usm = graphics_desc->has_usm;
xe->info.has_64bit_timestamp = graphics_desc->has_64bit_timestamp;
+ xe_info_probe_tile_count(xe);
+
for_each_remote_tile(tile, xe, id) {
int err;
@@ -688,48 +875,42 @@ static int xe_info_init(struct xe_device *xe,
return err;
}
- /*
- * All platforms have at least one primary GT. Any platform with media
- * version 13 or higher has an additional dedicated media GT. And
- * depending on the graphics IP there may be additional "remote tiles."
- * All of these together determine the overall GT count.
- */
+ /* Allocate any GT and VRAM structures necessary for the platform. */
for_each_tile(tile, xe, id) {
- gt = tile->primary_gt;
- gt->info.id = xe->info.gt_count++;
- gt->info.type = XE_GT_TYPE_MAIN;
- gt->info.has_indirect_ring_state = graphics_desc->has_indirect_ring_state;
- gt->info.engine_mask = graphics_desc->hw_engine_mask;
+ int err;
- if (MEDIA_VER(xe) < 13 && media_desc)
- gt->info.engine_mask |= media_desc->hw_engine_mask;
+ err = xe_tile_alloc_vram(tile);
+ if (err)
+ return err;
- if (MEDIA_VER(xe) < 13 || !media_desc)
- continue;
+ tile->primary_gt = alloc_primary_gt(tile, graphics_desc, media_desc);
+ if (IS_ERR(tile->primary_gt))
+ return PTR_ERR(tile->primary_gt);
/*
- * Allocate and setup media GT for platforms with standalone
- * media.
+ * It's not currently possible to probe a device with the
+ * primary GT disabled. With some work, this may be future in
+ * the possible for igpu platforms (although probably not for
+ * dgpu's since access to the primary GT's BCS engines is
+ * required for VRAM management).
*/
- tile->media_gt = xe_gt_alloc(tile);
+ if (!tile->primary_gt) {
+ drm_err(&xe->drm, "Cannot probe device with without a primary GT\n");
+ return -ENODEV;
+ }
+
+ tile->media_gt = alloc_media_gt(tile, media_desc);
if (IS_ERR(tile->media_gt))
return PTR_ERR(tile->media_gt);
-
- gt = tile->media_gt;
- gt->info.type = XE_GT_TYPE_MEDIA;
- gt->info.has_indirect_ring_state = media_desc->has_indirect_ring_state;
- gt->info.engine_mask = media_desc->hw_engine_mask;
-
- /*
- * FIXME: At the moment multi-tile and standalone media are
- * mutually exclusive on current platforms. We'll need to
- * come up with a better way to number GTs if we ever wind
- * up with platforms that support both together.
- */
- drm_WARN_ON(&xe->drm, id != 0);
- gt->info.id = xe->info.gt_count++;
}
+ /*
+ * Now that we have tiles and GTs defined, let's loop over valid GTs
+ * in order to define gt_count.
+ */
+ for_each_gt(gt, xe, id)
+ xe->info.gt_count++;
+
return 0;
}
@@ -740,7 +921,7 @@ static void xe_pci_remove(struct pci_dev *pdev)
if (IS_SRIOV_PF(xe))
xe_pci_sriov_configure(pdev, 0);
- if (xe_survivability_mode_is_enabled(xe))
+ if (xe_survivability_mode_is_boot_enabled(xe))
return;
xe_device_remove(xe);
@@ -773,6 +954,8 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
struct xe_device *xe;
int err;
+ xe_configfs_check_device(pdev);
+
if (desc->require_force_probe && !id_forced(pdev->device)) {
dev_info(&pdev->dev,
"Your graphics device %04x is not officially supported\n"
@@ -813,6 +996,8 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (err)
return err;
+ xe_vram_resize_bar(xe);
+
err = xe_device_probe_early(xe);
/*
* In Boot Survivability mode, no drm card is exposed and driver
@@ -820,7 +1005,7 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
* flashed through mei. Return success, if survivability mode
* is enabled due to pcode failure or configfs being set
*/
- if (xe_survivability_mode_is_enabled(xe))
+ if (xe_survivability_mode_is_boot_enabled(xe))
return 0;
if (err)
@@ -914,7 +1099,7 @@ static int xe_pci_suspend(struct device *dev)
struct xe_device *xe = pdev_to_xe_device(pdev);
int err;
- if (xe_survivability_mode_is_enabled(xe))
+ if (xe_survivability_mode_is_boot_enabled(xe))
return -EBUSY;
err = xe_pm_suspend(xe);
@@ -1038,6 +1223,23 @@ static struct pci_driver xe_pci_driver = {
#endif
};
+/**
+ * xe_pci_to_pf_device() - Get PF &xe_device.
+ * @pdev: the VF &pci_dev device
+ *
+ * Return: pointer to PF &xe_device, NULL otherwise.
+ */
+struct xe_device *xe_pci_to_pf_device(struct pci_dev *pdev)
+{
+ struct drm_device *drm;
+
+ drm = pci_iov_get_pf_drvdata(pdev, &xe_pci_driver);
+ if (IS_ERR(drm))
+ return NULL;
+
+ return to_xe_device(drm);
+}
+
int xe_register_pci_driver(void)
{
return pci_register_driver(&xe_pci_driver);
diff --git a/drivers/gpu/drm/xe/xe_pci.h b/drivers/gpu/drm/xe/xe_pci.h
index 611c1209b14c..11bcc5fe2c5b 100644
--- a/drivers/gpu/drm/xe/xe_pci.h
+++ b/drivers/gpu/drm/xe/xe_pci.h
@@ -6,7 +6,10 @@
#ifndef _XE_PCI_H_
#define _XE_PCI_H_
+struct pci_dev;
+
int xe_register_pci_driver(void);
void xe_unregister_pci_driver(void);
+struct xe_device *xe_pci_to_pf_device(struct pci_dev *pdev);
#endif
diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.c b/drivers/gpu/drm/xe/xe_pci_sriov.c
index 8813efdcafbb..9ff69c4843b0 100644
--- a/drivers/gpu/drm/xe/xe_pci_sriov.c
+++ b/drivers/gpu/drm/xe/xe_pci_sriov.c
@@ -3,6 +3,10 @@
* Copyright © 2023-2024 Intel Corporation
*/
+#include <linux/bitops.h>
+#include <linux/pci.h>
+
+#include "regs/xe_bars.h"
#include "xe_assert.h"
#include "xe_device.h"
#include "xe_gt_sriov_pf_config.h"
@@ -12,68 +16,19 @@
#include "xe_pci_sriov.h"
#include "xe_pm.h"
#include "xe_sriov.h"
+#include "xe_sriov_pf.h"
+#include "xe_sriov_pf_control.h"
#include "xe_sriov_pf_helpers.h"
+#include "xe_sriov_pf_provision.h"
+#include "xe_sriov_pf_sysfs.h"
#include "xe_sriov_printk.h"
-static int pf_needs_provisioning(struct xe_gt *gt, unsigned int num_vfs)
-{
- unsigned int n;
-
- for (n = 1; n <= num_vfs; n++)
- if (!xe_gt_sriov_pf_config_is_empty(gt, n))
- return false;
-
- return true;
-}
-
-static int pf_provision_vfs(struct xe_device *xe, unsigned int num_vfs)
-{
- struct xe_gt *gt;
- unsigned int id;
- int result = 0, err;
-
- for_each_gt(gt, xe, id) {
- if (!pf_needs_provisioning(gt, num_vfs))
- continue;
- err = xe_gt_sriov_pf_config_set_fair(gt, VFID(1), num_vfs);
- result = result ?: err;
- }
-
- return result;
-}
-
-static void pf_unprovision_vfs(struct xe_device *xe, unsigned int num_vfs)
-{
- struct xe_gt *gt;
- unsigned int id;
- unsigned int n;
-
- for_each_gt(gt, xe, id)
- for (n = 1; n <= num_vfs; n++)
- xe_gt_sriov_pf_config_release(gt, n, true);
-}
-
static void pf_reset_vfs(struct xe_device *xe, unsigned int num_vfs)
{
- struct xe_gt *gt;
- unsigned int id;
unsigned int n;
- for_each_gt(gt, xe, id)
- for (n = 1; n <= num_vfs; n++)
- xe_gt_sriov_pf_control_trigger_flr(gt, n);
-}
-
-static struct pci_dev *xe_pci_pf_get_vf_dev(struct xe_device *xe, unsigned int vf_id)
-{
- struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
-
- xe_assert(xe, IS_SRIOV_PF(xe));
-
- /* caller must use pci_dev_put() */
- return pci_get_domain_bus_and_slot(pci_domain_nr(pdev->bus),
- pdev->bus->number,
- pci_iov_virtfn_devfn(pdev, vf_id));
+ for (n = 1; n <= num_vfs; n++)
+ xe_sriov_pf_control_reset_vf(xe, n);
}
static void pf_link_vfs(struct xe_device *xe, int num_vfs)
@@ -94,7 +49,7 @@ static void pf_link_vfs(struct xe_device *xe, int num_vfs)
* enforce correct resume order.
*/
for (n = 1; n <= num_vfs; n++) {
- pdev_vf = xe_pci_pf_get_vf_dev(xe, n - 1);
+ pdev_vf = xe_pci_sriov_get_vf_pdev(pdev_pf, n);
/* unlikely, something weird is happening, abort */
if (!pdev_vf) {
@@ -127,6 +82,32 @@ static void pf_engine_activity_stats(struct xe_device *xe, unsigned int num_vfs,
}
}
+static int resize_vf_vram_bar(struct xe_device *xe, int num_vfs)
+{
+ struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+ u32 sizes;
+
+ sizes = pci_iov_vf_bar_get_sizes(pdev, VF_LMEM_BAR, num_vfs);
+ if (!sizes)
+ return 0;
+
+ return pci_iov_vf_bar_set_size(pdev, VF_LMEM_BAR, __fls(sizes));
+}
+
+static int pf_prepare_vfs_enabling(struct xe_device *xe)
+{
+ xe_assert(xe, IS_SRIOV_PF(xe));
+ /* make sure we are not locked-down by other components */
+ return xe_sriov_pf_arm_guard(xe, &xe->sriov.pf.guard_vfs_enabling, false, NULL);
+}
+
+static void pf_finish_vfs_enabling(struct xe_device *xe)
+{
+ xe_assert(xe, IS_SRIOV_PF(xe));
+ /* allow other components to lockdown VFs enabling */
+ xe_sriov_pf_disarm_guard(xe, &xe->sriov.pf.guard_vfs_enabling, false, NULL);
+}
+
static int pf_enable_vfs(struct xe_device *xe, int num_vfs)
{
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
@@ -138,6 +119,14 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs)
xe_assert(xe, num_vfs <= total_vfs);
xe_sriov_dbg(xe, "enabling %u VF%s\n", num_vfs, str_plural(num_vfs));
+ err = xe_sriov_pf_wait_ready(xe);
+ if (err)
+ goto out;
+
+ err = pf_prepare_vfs_enabling(xe);
+ if (err)
+ goto out;
+
/*
* We must hold additional reference to the runtime PM to keep PF in D0
* during VFs lifetime, as our VFs do not implement the PM capability.
@@ -149,10 +138,16 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs)
*/
xe_pm_runtime_get_noresume(xe);
- err = pf_provision_vfs(xe, num_vfs);
+ err = xe_sriov_pf_provision_vfs(xe, num_vfs);
if (err < 0)
goto failed;
+ if (IS_DGFX(xe)) {
+ err = resize_vf_vram_bar(xe, num_vfs);
+ if (err)
+ xe_sriov_info(xe, "Failed to set VF LMEM BAR size: %d\n", err);
+ }
+
err = pci_enable_sriov(pdev, num_vfs);
if (err < 0)
goto failed;
@@ -162,14 +157,17 @@ static int pf_enable_vfs(struct xe_device *xe, int num_vfs)
xe_sriov_info(xe, "Enabled %u of %u VF%s\n",
num_vfs, total_vfs, str_plural(total_vfs));
+ xe_sriov_pf_sysfs_link_vfs(xe, num_vfs);
+
pf_engine_activity_stats(xe, num_vfs, true);
return num_vfs;
failed:
- pf_unprovision_vfs(xe, num_vfs);
+ xe_sriov_pf_unprovision_vfs(xe, num_vfs);
xe_pm_runtime_put(xe);
-
+ pf_finish_vfs_enabling(xe);
+out:
xe_sriov_notice(xe, "Failed to enable %u VF%s (%pe)\n",
num_vfs, str_plural(num_vfs), ERR_PTR(err));
return err;
@@ -189,15 +187,19 @@ static int pf_disable_vfs(struct xe_device *xe)
pf_engine_activity_stats(xe, num_vfs, false);
+ xe_sriov_pf_sysfs_unlink_vfs(xe, num_vfs);
+
pci_disable_sriov(pdev);
pf_reset_vfs(xe, num_vfs);
- pf_unprovision_vfs(xe, num_vfs);
+ xe_sriov_pf_unprovision_vfs(xe, num_vfs);
/* not needed anymore - see pf_enable_vfs() */
xe_pm_runtime_put(xe);
+ pf_finish_vfs_enabling(xe);
+
xe_sriov_info(xe, "Disabled %u VF%s\n", num_vfs, str_plural(num_vfs));
return 0;
}
@@ -240,3 +242,25 @@ int xe_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
return ret;
}
+
+/**
+ * xe_pci_sriov_get_vf_pdev() - Lookup the VF's PCI device using the VF identifier.
+ * @pdev: the PF's &pci_dev
+ * @vfid: VF identifier (1-based)
+ *
+ * The caller must decrement the reference count by calling pci_dev_put().
+ *
+ * Return: the VF's &pci_dev or NULL if the VF device was not found.
+ */
+struct pci_dev *xe_pci_sriov_get_vf_pdev(struct pci_dev *pdev, unsigned int vfid)
+{
+ struct xe_device *xe = pdev_to_xe_device(pdev);
+
+ xe_assert(xe, dev_is_pf(&pdev->dev));
+ xe_assert(xe, vfid);
+ xe_assert(xe, vfid <= pci_sriov_get_totalvfs(pdev));
+
+ return pci_get_domain_bus_and_slot(pci_domain_nr(pdev->bus),
+ pdev->bus->number,
+ pci_iov_virtfn_devfn(pdev, vfid - 1));
+}
diff --git a/drivers/gpu/drm/xe/xe_pci_sriov.h b/drivers/gpu/drm/xe/xe_pci_sriov.h
index c76dd0d90495..b9105d71dbb1 100644
--- a/drivers/gpu/drm/xe/xe_pci_sriov.h
+++ b/drivers/gpu/drm/xe/xe_pci_sriov.h
@@ -10,6 +10,7 @@ struct pci_dev;
#ifdef CONFIG_PCI_IOV
int xe_pci_sriov_configure(struct pci_dev *pdev, int num_vfs);
+struct pci_dev *xe_pci_sriov_get_vf_pdev(struct pci_dev *pdev, unsigned int vfid);
#else
static inline int xe_pci_sriov_configure(struct pci_dev *pdev, int num_vfs)
{
diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
index ca6b10d35573..9892c063a9c5 100644
--- a/drivers/gpu/drm/xe/xe_pci_types.h
+++ b/drivers/gpu/drm/xe/xe_pci_types.h
@@ -8,18 +8,61 @@
#include <linux/types.h>
-struct xe_graphics_desc {
+#include "xe_platform_types.h"
+
+struct xe_subplatform_desc {
+ enum xe_subplatform subplatform;
+ const char *name;
+ const u16 *pciidlist;
+};
+
+struct xe_device_desc {
+ /* Should only ever be set for platforms without GMD_ID */
+ const struct xe_ip *pre_gmdid_graphics_ip;
+ /* Should only ever be set for platforms without GMD_ID */
+ const struct xe_ip *pre_gmdid_media_ip;
+
+ const char *platform_name;
+ const struct xe_subplatform_desc *subplatforms;
+
+ enum xe_platform platform;
+
+ u8 dma_mask_size;
+ u8 max_remote_tiles:2;
+ u8 max_gt_per_tile:2;
u8 va_bits;
u8 vm_max_level;
u8 vram_flags;
+ u8 require_force_probe:1;
+ u8 is_dgfx:1;
+
+ u8 has_display:1;
+ u8 has_fan_control:1;
+ u8 has_flat_ccs:1;
+ u8 has_gsc_nvm:1;
+ u8 has_heci_gscfi:1;
+ u8 has_heci_cscfi:1;
+ u8 has_late_bind:1;
+ u8 has_llc:1;
+ u8 has_mbx_power_limits:1;
+ u8 has_mem_copy_instr:1;
+ u8 has_pxp:1;
+ u8 has_sriov:1;
+ u8 needs_scratch:1;
+ u8 skip_guc_pc:1;
+ u8 skip_mtcfg:1;
+ u8 skip_pcode:1;
+ u8 needs_shared_vf_gt_wq:1;
+};
+
+struct xe_graphics_desc {
u64 hw_engine_mask; /* hardware engines provided by graphics IP */
u8 has_asid:1;
u8 has_atomic_enable_pte_bit:1;
- u8 has_flat_ccs:1;
u8 has_indirect_ring_state:1;
- u8 has_range_tlb_invalidation:1;
+ u8 has_range_tlb_inval:1;
u8 has_usm:1;
u8 has_64bit_timestamp:1;
};
diff --git a/drivers/gpu/drm/xe/xe_pcode.c b/drivers/gpu/drm/xe/xe_pcode.c
index cf955b3ed52c..0d33c14ea0cf 100644
--- a/drivers/gpu/drm/xe/xe_pcode.c
+++ b/drivers/gpu/drm/xe/xe_pcode.c
@@ -32,27 +32,39 @@
static int pcode_mailbox_status(struct xe_tile *tile)
{
+ const char *err_str;
+ int err_decode;
u32 err;
- static const struct pcode_err_decode err_decode[] = {
- [PCODE_ILLEGAL_CMD] = {-ENXIO, "Illegal Command"},
- [PCODE_TIMEOUT] = {-ETIMEDOUT, "Timed out"},
- [PCODE_ILLEGAL_DATA] = {-EINVAL, "Illegal Data"},
- [PCODE_ILLEGAL_SUBCOMMAND] = {-ENXIO, "Illegal Subcommand"},
- [PCODE_LOCKED] = {-EBUSY, "PCODE Locked"},
- [PCODE_GT_RATIO_OUT_OF_RANGE] = {-EOVERFLOW,
- "GT ratio out of range"},
- [PCODE_REJECTED] = {-EACCES, "PCODE Rejected"},
- [PCODE_ERROR_MASK] = {-EPROTO, "Unknown"},
- };
+
+#define CASE_ERR(_err, _err_decode, _err_str) \
+ case _err: \
+ err_decode = _err_decode; \
+ err_str = _err_str; \
+ break
err = xe_mmio_read32(&tile->mmio, PCODE_MAILBOX) & PCODE_ERROR_MASK;
+ switch (err) {
+ CASE_ERR(PCODE_ILLEGAL_CMD, -ENXIO, "Illegal Command");
+ CASE_ERR(PCODE_TIMEOUT, -ETIMEDOUT, "Timed out");
+ CASE_ERR(PCODE_ILLEGAL_DATA, -EINVAL, "Illegal Data");
+ CASE_ERR(PCODE_ILLEGAL_SUBCOMMAND, -ENXIO, "Illegal Subcommand");
+ CASE_ERR(PCODE_LOCKED, -EBUSY, "PCODE Locked");
+ CASE_ERR(PCODE_GT_RATIO_OUT_OF_RANGE, -EOVERFLOW, "GT ratio out of range");
+ CASE_ERR(PCODE_REJECTED, -EACCES, "PCODE Rejected");
+ default:
+ err_decode = -EPROTO;
+ err_str = "Unknown";
+ }
+
if (err) {
- drm_err(&tile_to_xe(tile)->drm, "PCODE Mailbox failed: %d %s", err,
- err_decode[err].str ?: "Unknown");
- return err_decode[err].errno ?: -EPROTO;
+ drm_err(&tile_to_xe(tile)->drm, "PCODE Mailbox failed: %d %s",
+ err_decode, err_str);
+
+ return err_decode;
}
return 0;
+#undef CASE_ERR
}
static int __pcode_mailbox_rw(struct xe_tile *tile, u32 mbox, u32 *data0, u32 *data1,
@@ -109,6 +121,17 @@ int xe_pcode_write_timeout(struct xe_tile *tile, u32 mbox, u32 data, int timeout
return err;
}
+int xe_pcode_write64_timeout(struct xe_tile *tile, u32 mbox, u32 data0, u32 data1, int timeout)
+{
+ int err;
+
+ mutex_lock(&tile->pcode.lock);
+ err = pcode_mailbox_rw(tile, mbox, &data0, &data1, timeout, false, false);
+ mutex_unlock(&tile->pcode.lock);
+
+ return err;
+}
+
int xe_pcode_read(struct xe_tile *tile, u32 mbox, u32 *val, u32 *val1)
{
int err;
@@ -325,3 +348,33 @@ int xe_pcode_probe_early(struct xe_device *xe)
return xe_pcode_ready(xe, false);
}
ALLOW_ERROR_INJECTION(xe_pcode_probe_early, ERRNO); /* See xe_pci_probe */
+
+/* Helpers with drm device. These should only be called by the display side */
+#if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
+
+int intel_pcode_read(struct drm_device *drm, u32 mbox, u32 *val, u32 *val1)
+{
+ struct xe_device *xe = to_xe_device(drm);
+ struct xe_tile *tile = xe_device_get_root_tile(xe);
+
+ return xe_pcode_read(tile, mbox, val, val1);
+}
+
+int intel_pcode_write_timeout(struct drm_device *drm, u32 mbox, u32 val, int timeout_ms)
+{
+ struct xe_device *xe = to_xe_device(drm);
+ struct xe_tile *tile = xe_device_get_root_tile(xe);
+
+ return xe_pcode_write_timeout(tile, mbox, val, timeout_ms);
+}
+
+int intel_pcode_request(struct drm_device *drm, u32 mbox, u32 request,
+ u32 reply_mask, u32 reply, int timeout_base_ms)
+{
+ struct xe_device *xe = to_xe_device(drm);
+ struct xe_tile *tile = xe_device_get_root_tile(xe);
+
+ return xe_pcode_request(tile, mbox, request, reply_mask, reply, timeout_base_ms);
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pcode.h b/drivers/gpu/drm/xe/xe_pcode.h
index ba33991d72a7..a5584c1c75f9 100644
--- a/drivers/gpu/drm/xe/xe_pcode.h
+++ b/drivers/gpu/drm/xe/xe_pcode.h
@@ -7,8 +7,10 @@
#define _XE_PCODE_H_
#include <linux/types.h>
-struct xe_tile;
+
+struct drm_device;
struct xe_device;
+struct xe_tile;
void xe_pcode_init(struct xe_tile *tile);
int xe_pcode_probe_early(struct xe_device *xe);
@@ -18,6 +20,9 @@ int xe_pcode_init_min_freq_table(struct xe_tile *tile, u32 min_gt_freq,
int xe_pcode_read(struct xe_tile *tile, u32 mbox, u32 *val, u32 *val1);
int xe_pcode_write_timeout(struct xe_tile *tile, u32 mbox, u32 val,
int timeout_ms);
+int xe_pcode_write64_timeout(struct xe_tile *tile, u32 mbox, u32 data0,
+ u32 data1, int timeout);
+
#define xe_pcode_write(tile, mbox, val) \
xe_pcode_write_timeout(tile, mbox, val, 1)
@@ -29,4 +34,12 @@ int xe_pcode_request(struct xe_tile *tile, u32 mbox, u32 request,
| FIELD_PREP(PCODE_MB_PARAM1, param1)\
| FIELD_PREP(PCODE_MB_PARAM2, param2))
+/* Helpers with drm device */
+int intel_pcode_read(struct drm_device *drm, u32 mbox, u32 *val, u32 *val1);
+int intel_pcode_write_timeout(struct drm_device *drm, u32 mbox, u32 val, int timeout_ms);
+#define intel_pcode_write(drm, mbox, val) \
+ intel_pcode_write_timeout((drm), (mbox), (val), 1)
+int intel_pcode_request(struct drm_device *drm, u32 mbox, u32 request,
+ u32 reply_mask, u32 reply, int timeout_base_ms);
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_pcode_api.h b/drivers/gpu/drm/xe/xe_pcode_api.h
index 127d4d26c4cf..70dcd6625680 100644
--- a/drivers/gpu/drm/xe/xe_pcode_api.h
+++ b/drivers/gpu/drm/xe/xe_pcode_api.h
@@ -43,6 +43,28 @@
#define POWER_SETUP_I1_SHIFT 6 /* 10.6 fixed point format */
#define POWER_SETUP_I1_DATA_MASK REG_GENMASK(15, 0)
+#define READ_PSYSGPU_POWER_LIMIT 0x6
+#define WRITE_PSYSGPU_POWER_LIMIT 0x7
+#define READ_PACKAGE_POWER_LIMIT 0x8
+#define WRITE_PACKAGE_POWER_LIMIT 0x9
+#define READ_PL_FROM_FW 0x1
+#define READ_PL_FROM_PCODE 0x0
+
+#define PCODE_LATE_BINDING 0x5C
+#define GET_CAPABILITY_STATUS 0x0
+#define V1_FAN_SUPPORTED REG_BIT(0)
+#define VR_PARAMS_SUPPORTED REG_BIT(3)
+#define V1_FAN_PROVISIONED REG_BIT(16)
+#define VR_PARAMS_PROVISIONED REG_BIT(19)
+#define GET_VERSION_LOW 0x1
+#define GET_VERSION_HIGH 0x2
+#define MAJOR_VERSION_MASK REG_GENMASK(31, 16)
+#define MINOR_VERSION_MASK REG_GENMASK(15, 0)
+#define HOTFIX_VERSION_MASK REG_GENMASK(31, 16)
+#define BUILD_VERSION_MASK REG_GENMASK(15, 0)
+#define FAN_TABLE 1
+#define VR_CONFIG 2
+
#define PCODE_FREQUENCY_CONFIG 0x6e
/* Frequency Config Sub Commands (param1) */
#define PCODE_MBOX_FC_SC_READ_FUSED_P0 0x0
@@ -70,9 +92,3 @@
#define BMG_PCIE_CAP XE_REG(0x138340)
#define LINK_DOWNGRADE REG_GENMASK(1, 0)
#define DOWNGRADE_CAPABLE 2
-
-struct pcode_err_decode {
- int errno;
- const char *str;
-};
-
diff --git a/drivers/gpu/drm/xe/xe_platform_types.h b/drivers/gpu/drm/xe/xe_platform_types.h
index d08574c4cdb8..f516dbddfd88 100644
--- a/drivers/gpu/drm/xe/xe_platform_types.h
+++ b/drivers/gpu/drm/xe/xe_platform_types.h
@@ -24,6 +24,8 @@ enum xe_platform {
XE_LUNARLAKE,
XE_BATTLEMAGE,
XE_PANTHERLAKE,
+ XE_NOVALAKE_S,
+ XE_CRESCENTISLAND,
};
enum xe_subplatform {
@@ -34,6 +36,7 @@ enum xe_subplatform {
XE_SUBPLATFORM_DG2_G10,
XE_SUBPLATFORM_DG2_G11,
XE_SUBPLATFORM_DG2_G12,
+ XE_SUBPLATFORM_BATTLEMAGE_G21,
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
index ff749edc005b..766922530265 100644
--- a/drivers/gpu/drm/xe/xe_pm.c
+++ b/drivers/gpu/drm/xe/xe_pm.c
@@ -18,11 +18,15 @@
#include "xe_device.h"
#include "xe_ggtt.h"
#include "xe_gt.h"
-#include "xe_guc.h"
+#include "xe_gt_idle.h"
+#include "xe_i2c.h"
#include "xe_irq.h"
+#include "xe_late_bind_fw.h"
#include "xe_pcode.h"
#include "xe_pxp.h"
+#include "xe_sriov_vf_ccs.h"
#include "xe_trace.h"
+#include "xe_vm.h"
#include "xe_wa.h"
/**
@@ -79,8 +83,58 @@ static struct lockdep_map xe_pm_runtime_d3cold_map = {
static struct lockdep_map xe_pm_runtime_nod3cold_map = {
.name = "xe_rpm_nod3cold_map"
};
+
+static struct lockdep_map xe_pm_block_lockdep_map = {
+ .name = "xe_pm_block_map",
+};
#endif
+static void xe_pm_block_begin_signalling(void)
+{
+ lock_acquire_shared_recursive(&xe_pm_block_lockdep_map, 0, 1, NULL, _RET_IP_);
+}
+
+static void xe_pm_block_end_signalling(void)
+{
+ lock_release(&xe_pm_block_lockdep_map, _RET_IP_);
+}
+
+/**
+ * xe_pm_might_block_on_suspend() - Annotate that the code might block on suspend
+ *
+ * Annotation to use where the code might block or seize to make
+ * progress pending resume completion.
+ */
+void xe_pm_might_block_on_suspend(void)
+{
+ lock_map_acquire(&xe_pm_block_lockdep_map);
+ lock_map_release(&xe_pm_block_lockdep_map);
+}
+
+/**
+ * xe_pm_block_on_suspend() - Block pending suspend.
+ * @xe: The xe device about to be suspended.
+ *
+ * Block if the pm notifier has start evicting bos, to avoid
+ * racing and validating those bos back. The function is
+ * annotated to ensure no locks are held that are also grabbed
+ * in the pm notifier or the device suspend / resume.
+ * This is intended to be used by freezable tasks only.
+ * (Not freezable workqueues), with the intention that the function
+ * returns %-ERESTARTSYS when tasks are frozen during suspend,
+ * and allows the task to freeze. The caller must be able to
+ * handle the %-ERESTARTSYS.
+ *
+ * Return: %0 on success, %-ERESTARTSYS on signal pending or
+ * if freezing requested.
+ */
+int xe_pm_block_on_suspend(struct xe_device *xe)
+{
+ xe_pm_might_block_on_suspend();
+
+ return wait_for_completion_interruptible(&xe->pm_block);
+}
+
/**
* xe_rpm_reclaim_safe() - Whether runtime resume can be done from reclaim context
* @xe: The xe device.
@@ -120,12 +174,15 @@ int xe_pm_suspend(struct xe_device *xe)
int err;
drm_dbg(&xe->drm, "Suspending device\n");
+ xe_pm_block_begin_signalling();
trace_xe_pm_suspend(xe, __builtin_return_address(0));
err = xe_pxp_pm_suspend(xe->pxp);
if (err)
goto err;
+ xe_late_bind_wait_for_worker_completion(&xe->late_bind);
+
for_each_gt(gt, xe, id)
xe_gt_suspend_prepare(gt);
@@ -134,7 +191,7 @@ int xe_pm_suspend(struct xe_device *xe)
/* FIXME: Super racey... */
err = xe_bo_evict_all(xe);
if (err)
- goto err_pxp;
+ goto err_display;
for_each_gt(gt, xe, id) {
err = xe_gt_suspend(gt);
@@ -146,15 +203,19 @@ int xe_pm_suspend(struct xe_device *xe)
xe_display_pm_suspend_late(xe);
+ xe_i2c_pm_suspend(xe);
+
drm_dbg(&xe->drm, "Device suspended\n");
+ xe_pm_block_end_signalling();
+
return 0;
err_display:
xe_display_pm_resume(xe);
-err_pxp:
xe_pxp_pm_resume(xe->pxp);
err:
drm_dbg(&xe->drm, "Device suspend failed %d\n", err);
+ xe_pm_block_end_signalling();
return err;
}
@@ -171,9 +232,13 @@ int xe_pm_resume(struct xe_device *xe)
u8 id;
int err;
+ xe_pm_block_begin_signalling();
drm_dbg(&xe->drm, "Resuming device\n");
trace_xe_pm_resume(xe, __builtin_return_address(0));
+ for_each_gt(gt, xe, id)
+ xe_gt_idle_disable_c6(gt);
+
for_each_tile(tile, xe, id)
xe_wa_apply_tile_workarounds(tile);
@@ -191,6 +256,8 @@ int xe_pm_resume(struct xe_device *xe)
if (err)
goto err;
+ xe_i2c_pm_resume(xe, true);
+
xe_irq_resume(xe);
for_each_gt(gt, xe, id)
@@ -204,10 +271,17 @@ int xe_pm_resume(struct xe_device *xe)
xe_pxp_pm_resume(xe->pxp);
+ if (IS_VF_CCS_READY(xe))
+ xe_sriov_vf_ccs_register_context(xe);
+
+ xe_late_bind_fw_load(&xe->late_bind);
+
drm_dbg(&xe->drm, "Device resumed\n");
+ xe_pm_block_end_signalling();
return 0;
err:
drm_dbg(&xe->drm, "Device resume failed %d\n", err);
+ xe_pm_block_end_signalling();
return err;
}
@@ -239,6 +313,10 @@ static void xe_pm_runtime_init(struct xe_device *xe)
{
struct device *dev = xe->drm.dev;
+ /* Our current VFs do not support RPM. so, disable it */
+ if (IS_SRIOV_VF(xe))
+ return;
+
/*
* Disable the system suspend direct complete optimization.
* We need to ensure that the regular device suspend/resume functions
@@ -286,6 +364,19 @@ static u32 vram_threshold_value(struct xe_device *xe)
return DEFAULT_VRAM_THRESHOLD;
}
+static void xe_pm_wake_rebind_workers(struct xe_device *xe)
+{
+ struct xe_vm *vm, *next;
+
+ mutex_lock(&xe->rebind_resume_lock);
+ list_for_each_entry_safe(vm, next, &xe->rebind_resume_list,
+ preempt.pm_activate_link) {
+ list_del_init(&vm->preempt.pm_activate_link);
+ xe_vm_resume_rebind_worker(vm);
+ }
+ mutex_unlock(&xe->rebind_resume_lock);
+}
+
static int xe_pm_notifier_callback(struct notifier_block *nb,
unsigned long action, void *data)
{
@@ -295,30 +386,39 @@ static int xe_pm_notifier_callback(struct notifier_block *nb,
switch (action) {
case PM_HIBERNATION_PREPARE:
case PM_SUSPEND_PREPARE:
+ {
+ struct xe_validation_ctx ctx;
+
+ reinit_completion(&xe->pm_block);
+ xe_pm_block_begin_signalling();
xe_pm_runtime_get(xe);
+ (void)xe_validation_ctx_init(&ctx, &xe->val, NULL,
+ (struct xe_val_flags) {.exclusive = true});
err = xe_bo_evict_all_user(xe);
- if (err) {
+ xe_validation_ctx_fini(&ctx);
+ if (err)
drm_dbg(&xe->drm, "Notifier evict user failed (%d)\n", err);
- xe_pm_runtime_put(xe);
- break;
- }
err = xe_bo_notifier_prepare_all_pinned(xe);
- if (err) {
+ if (err)
drm_dbg(&xe->drm, "Notifier prepare pin failed (%d)\n", err);
- xe_pm_runtime_put(xe);
- }
+ /*
+ * Keep the runtime pm reference until post hibernation / post suspend to
+ * avoid a runtime suspend interfering with evicted objects or backup
+ * allocations.
+ */
+ xe_pm_block_end_signalling();
break;
+ }
case PM_POST_HIBERNATION:
case PM_POST_SUSPEND:
+ complete_all(&xe->pm_block);
+ xe_pm_wake_rebind_workers(xe);
xe_bo_notifier_unprepare_all_pinned(xe);
xe_pm_runtime_put(xe);
break;
}
- if (err)
- return NOTIFY_BAD;
-
return NOTIFY_DONE;
}
@@ -340,6 +440,14 @@ int xe_pm_init(struct xe_device *xe)
if (err)
return err;
+ err = drmm_mutex_init(&xe->drm, &xe->rebind_resume_lock);
+ if (err)
+ goto err_unregister;
+
+ init_completion(&xe->pm_block);
+ complete_all(&xe->pm_block);
+ INIT_LIST_HEAD(&xe->rebind_resume_list);
+
/* For now suspend/resume is only allowed with GuC */
if (!xe_device_uc_enabled(xe))
return 0;
@@ -363,6 +471,10 @@ static void xe_pm_runtime_fini(struct xe_device *xe)
{
struct device *dev = xe->drm.dev;
+ /* Our current VFs do not support RPM. so, disable it */
+ if (IS_SRIOV_VF(xe))
+ return;
+
pm_runtime_get_sync(dev);
pm_runtime_forbid(dev);
}
@@ -488,6 +600,8 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
xe_display_pm_runtime_suspend_late(xe);
+ xe_i2c_pm_suspend(xe);
+
xe_rpm_lockmap_release(xe);
xe_pm_write_callback_task(xe, NULL);
return 0;
@@ -519,6 +633,9 @@ int xe_pm_runtime_resume(struct xe_device *xe)
xe_rpm_lockmap_acquire(xe);
+ for_each_gt(gt, xe, id)
+ xe_gt_idle_disable_c6(gt);
+
if (xe->d3cold.allowed) {
err = xe_pcode_ready(xe, true);
if (err)
@@ -535,6 +652,8 @@ int xe_pm_runtime_resume(struct xe_device *xe)
goto out;
}
+ xe_i2c_pm_resume(xe, xe->d3cold.allowed);
+
xe_irq_resume(xe);
for_each_gt(gt, xe, id)
@@ -550,6 +669,12 @@ int xe_pm_runtime_resume(struct xe_device *xe)
xe_pxp_pm_resume(xe->pxp);
+ if (IS_VF_CCS_READY(xe))
+ xe_sriov_vf_ccs_register_context(xe);
+
+ if (xe->d3cold.allowed)
+ xe_late_bind_fw_load(&xe->late_bind);
+
out:
xe_rpm_lockmap_release(xe);
xe_pm_write_callback_task(xe, NULL);
@@ -601,6 +726,13 @@ static void xe_pm_runtime_lockdep_prime(void)
/**
* xe_pm_runtime_get - Get a runtime_pm reference and resume synchronously
* @xe: xe device instance
+ *
+ * When possible, scope-based runtime PM (through guard(xe_pm_runtime)) is
+ * be preferred over direct usage of this function. Manual get/put handling
+ * should only be used when the function contains goto-based logic which
+ * can break scope-based handling, or when the lifetime of the runtime PM
+ * reference does not match a specific scope (e.g., runtime PM obtained in one
+ * function and released in a different one).
*/
void xe_pm_runtime_get(struct xe_device *xe)
{
@@ -633,6 +765,13 @@ void xe_pm_runtime_put(struct xe_device *xe)
* xe_pm_runtime_get_ioctl - Get a runtime_pm reference before ioctl
* @xe: xe device instance
*
+ * When possible, scope-based runtime PM (through
+ * ACQUIRE(xe_pm_runtime_ioctl, ...)) is be preferred over direct usage of this
+ * function. Manual get/put handling should only be used when the function
+ * contains goto-based logic which can break scope-based handling, or when the
+ * lifetime of the runtime PM reference does not match a specific scope (e.g.,
+ * runtime PM obtained in one function and released in a different one).
+ *
* Returns: Any number greater than or equal to 0 for success, negative error
* code otherwise.
*/
@@ -702,6 +841,13 @@ static bool xe_pm_suspending_or_resuming(struct xe_device *xe)
* It will warn if not protected.
* The reference should be put back after this function regardless, since it
* will always bump the usage counter, regardless.
+ *
+ * When possible, scope-based runtime PM (through guard(xe_pm_runtime_noresume))
+ * is be preferred over direct usage of this function. Manual get/put handling
+ * should only be used when the function contains goto-based logic which can
+ * break scope-based handling, or when the lifetime of the runtime PM reference
+ * does not match a specific scope (e.g., runtime PM obtained in one function
+ * and released in a different one).
*/
void xe_pm_runtime_get_noresume(struct xe_device *xe)
{
@@ -753,11 +899,13 @@ void xe_pm_assert_unbounded_bridge(struct xe_device *xe)
}
/**
- * xe_pm_set_vram_threshold - Set a vram threshold for allowing/blocking D3Cold
+ * xe_pm_set_vram_threshold - Set a VRAM threshold for allowing/blocking D3Cold
* @xe: xe device instance
- * @threshold: VRAM size in bites for the D3cold threshold
+ * @threshold: VRAM size in MiB for the D3cold threshold
*
- * Returns 0 for success, negative error code otherwise.
+ * Return:
+ * * 0 - success
+ * * -EINVAL - invalid argument
*/
int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold)
{
diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h
index 59678b310e55..6b27039e7b2d 100644
--- a/drivers/gpu/drm/xe/xe_pm.h
+++ b/drivers/gpu/drm/xe/xe_pm.h
@@ -6,6 +6,7 @@
#ifndef _XE_PM_H_
#define _XE_PM_H_
+#include <linux/cleanup.h>
#include <linux/pm_runtime.h>
#define DEFAULT_VRAM_THRESHOLD 300 /* in MB */
@@ -33,6 +34,24 @@ int xe_pm_set_vram_threshold(struct xe_device *xe, u32 threshold);
void xe_pm_d3cold_allowed_toggle(struct xe_device *xe);
bool xe_rpm_reclaim_safe(const struct xe_device *xe);
struct task_struct *xe_pm_read_callback_task(struct xe_device *xe);
+int xe_pm_block_on_suspend(struct xe_device *xe);
+void xe_pm_might_block_on_suspend(void);
int xe_pm_module_init(void);
+static inline void __xe_pm_runtime_noop(struct xe_device *xe) {}
+
+DEFINE_GUARD(xe_pm_runtime, struct xe_device *,
+ xe_pm_runtime_get(_T), xe_pm_runtime_put(_T))
+DEFINE_GUARD(xe_pm_runtime_noresume, struct xe_device *,
+ xe_pm_runtime_get_noresume(_T), xe_pm_runtime_put(_T))
+DEFINE_GUARD_COND(xe_pm_runtime, _ioctl, xe_pm_runtime_get_ioctl(_T), _RET >= 0)
+
+/*
+ * Used when a function needs to release runtime PM in all possible cases
+ * and error paths, but the wakeref was already acquired by a different
+ * function (i.e., get() has already happened so only a put() is needed).
+ */
+DEFINE_GUARD(xe_pm_runtime_release_only, struct xe_device *,
+ __xe_pm_runtime_noop(_T), xe_pm_runtime_put(_T));
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_pmu.c b/drivers/gpu/drm/xe/xe_pmu.c
index 69df0e3520a5..c63335eb69e5 100644
--- a/drivers/gpu/drm/xe/xe_pmu.c
+++ b/drivers/gpu/drm/xe/xe_pmu.c
@@ -157,10 +157,13 @@ static bool event_gt_forcewake(struct perf_event *event)
return true;
}
-static bool event_supported(struct xe_pmu *pmu, unsigned int gt,
+static bool event_supported(struct xe_pmu *pmu, unsigned int gt_id,
unsigned int id)
{
- if (gt >= XE_MAX_GT_PER_TILE)
+ struct xe_device *xe = container_of(pmu, typeof(*xe), pmu);
+ struct xe_gt *gt = xe_device_get_gt(xe, gt_id);
+
+ if (!gt)
return false;
return id < sizeof(pmu->supported_events) * BITS_PER_BYTE &&
@@ -494,7 +497,12 @@ static const struct attribute_group *pmu_events_attr_update[] = {
static void set_supported_events(struct xe_pmu *pmu)
{
struct xe_device *xe = container_of(pmu, typeof(*xe), pmu);
- struct xe_gt *gt = xe_device_get_gt(xe, 0);
+ struct xe_gt *gt;
+ int id;
+
+ /* If there are no GTs, don't support any GT-related events */
+ if (xe->info.gt_count == 0)
+ return;
if (!xe->info.skip_guc_pc) {
pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_GT_C6_RESIDENCY);
@@ -502,6 +510,10 @@ static void set_supported_events(struct xe_pmu *pmu)
pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_GT_REQUESTED_FREQUENCY);
}
+ /* Find the first available GT to query engine event capabilities */
+ for_each_gt(gt, xe, id)
+ break;
+
if (xe_guc_engine_activity_supported(&gt->uc.guc)) {
pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_ENGINE_ACTIVE_TICKS);
pmu->supported_events |= BIT_ULL(XE_PMU_EVENT_ENGINE_TOTAL_TICKS);
diff --git a/drivers/gpu/drm/xe/xe_preempt_fence.c b/drivers/gpu/drm/xe/xe_preempt_fence.c
index 83fbeea5aa20..7f587ca3947d 100644
--- a/drivers/gpu/drm/xe/xe_preempt_fence.c
+++ b/drivers/gpu/drm/xe/xe_preempt_fence.c
@@ -8,6 +8,8 @@
#include <linux/slab.h>
#include "xe_exec_queue.h"
+#include "xe_gt_printk.h"
+#include "xe_guc_exec_queue_types.h"
#include "xe_vm.h"
static void preempt_fence_work_func(struct work_struct *w)
@@ -22,6 +24,15 @@ static void preempt_fence_work_func(struct work_struct *w)
} else if (!q->ops->reset_status(q)) {
int err = q->ops->suspend_wait(q);
+ if (err == -EAGAIN) {
+ xe_gt_dbg(q->gt, "PREEMPT FENCE RETRY guc_id=%d",
+ q->guc->id);
+ queue_work(q->vm->xe->preempt_fence_wq,
+ &pfence->preempt_work);
+ dma_fence_end_signalling(cookie);
+ return;
+ }
+
if (err)
dma_fence_set_error(&pfence->base, err);
} else {
diff --git a/drivers/gpu/drm/xe/xe_preempt_fence_types.h b/drivers/gpu/drm/xe/xe_preempt_fence_types.h
index 312c3372a49f..ac125c697a41 100644
--- a/drivers/gpu/drm/xe/xe_preempt_fence_types.h
+++ b/drivers/gpu/drm/xe/xe_preempt_fence_types.h
@@ -12,7 +12,7 @@
struct xe_exec_queue;
/**
- * struct xe_preempt_fence - XE preempt fence
+ * struct xe_preempt_fence - Xe preempt fence
*
* hardware and triggers a callback once the xe_engine is complete.
*/
diff --git a/drivers/gpu/drm/xe/xe_printk.h b/drivers/gpu/drm/xe/xe_printk.h
new file mode 100644
index 000000000000..c5be2385aa95
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_printk.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_PRINTK_H_
+#define _XE_PRINTK_H_
+
+#include <drm/drm_print.h>
+
+#include "xe_device_types.h"
+
+#define __XE_PRINTK_FMT(_xe, _fmt, _args...) _fmt, ##_args
+
+#define xe_printk(_xe, _level, _fmt, ...) \
+ drm_##_level(&(_xe)->drm, __XE_PRINTK_FMT((_xe), _fmt, ## __VA_ARGS__))
+
+#define xe_err(_xe, _fmt, ...) \
+ xe_printk((_xe), err, _fmt, ##__VA_ARGS__)
+
+#define xe_err_once(_xe, _fmt, ...) \
+ xe_printk((_xe), err_once, _fmt, ##__VA_ARGS__)
+
+#define xe_err_ratelimited(_xe, _fmt, ...) \
+ xe_printk((_xe), err_ratelimited, _fmt, ##__VA_ARGS__)
+
+#define xe_warn(_xe, _fmt, ...) \
+ xe_printk((_xe), warn, _fmt, ##__VA_ARGS__)
+
+#define xe_notice(_xe, _fmt, ...) \
+ xe_printk((_xe), notice, _fmt, ##__VA_ARGS__)
+
+#define xe_info(_xe, _fmt, ...) \
+ xe_printk((_xe), info, _fmt, ##__VA_ARGS__)
+
+#define xe_dbg(_xe, _fmt, ...) \
+ xe_printk((_xe), dbg, _fmt, ##__VA_ARGS__)
+
+#define xe_WARN_type(_xe, _type, _condition, _fmt, ...) \
+ drm_WARN##_type(&(_xe)->drm, _condition, _fmt, ## __VA_ARGS__)
+
+#define xe_WARN(_xe, _condition, _fmt, ...) \
+ xe_WARN_type((_xe),, _condition, __XE_PRINTK_FMT((_xe), _fmt, ## __VA_ARGS__))
+
+#define xe_WARN_ONCE(_xe, _condition, _fmt, ...) \
+ xe_WARN_type((_xe), _ONCE, _condition, __XE_PRINTK_FMT((_xe), _fmt, ## __VA_ARGS__))
+
+#define xe_WARN_ON(_xe, _condition) \
+ xe_WARN((_xe), _condition, "%s(%s)", "WARN_ON", __stringify(_condition))
+
+#define xe_WARN_ON_ONCE(_xe, _condition) \
+ xe_WARN_ONCE((_xe), _condition, "%s(%s)", "WARN_ON_ONCE", __stringify(_condition))
+
+static inline void __xe_printfn_err(struct drm_printer *p, struct va_format *vaf)
+{
+ struct xe_device *xe = p->arg;
+
+ xe_err(xe, "%pV", vaf);
+}
+
+static inline void __xe_printfn_info(struct drm_printer *p, struct va_format *vaf)
+{
+ struct xe_device *xe = p->arg;
+
+ xe_info(xe, "%pV", vaf);
+}
+
+static inline void __xe_printfn_dbg(struct drm_printer *p, struct va_format *vaf)
+{
+ struct xe_device *xe = p->arg;
+ struct drm_printer ddp;
+
+ /*
+ * The original xe_dbg() callsite annotations are useless here,
+ * redirect to the tweaked drm_dbg_printer() instead.
+ */
+ ddp = drm_dbg_printer(&xe->drm, DRM_UT_DRIVER, NULL);
+ ddp.origin = p->origin;
+
+ drm_printf(&ddp, __XE_PRINTK_FMT(xe, "%pV", vaf));
+}
+
+/**
+ * xe_err_printer - Construct a &drm_printer that outputs to xe_err()
+ * @xe: the &xe_device pointer to use in xe_err()
+ *
+ * Return: The &drm_printer object.
+ */
+static inline struct drm_printer xe_err_printer(struct xe_device *xe)
+{
+ struct drm_printer p = {
+ .printfn = __xe_printfn_err,
+ .arg = xe,
+ };
+ return p;
+}
+
+/**
+ * xe_info_printer - Construct a &drm_printer that outputs to xe_info()
+ * @xe: the &xe_device pointer to use in xe_info()
+ *
+ * Return: The &drm_printer object.
+ */
+static inline struct drm_printer xe_info_printer(struct xe_device *xe)
+{
+ struct drm_printer p = {
+ .printfn = __xe_printfn_info,
+ .arg = xe,
+ };
+ return p;
+}
+
+/**
+ * xe_dbg_printer - Construct a &drm_printer that outputs like xe_dbg()
+ * @xe: the &xe_device pointer to use in xe_dbg()
+ *
+ * Return: The &drm_printer object.
+ */
+static inline struct drm_printer xe_dbg_printer(struct xe_device *xe)
+{
+ struct drm_printer p = {
+ .printfn = __xe_printfn_dbg,
+ .arg = xe,
+ .origin = (const void *)_THIS_IP_,
+ };
+ return p;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_psmi.c b/drivers/gpu/drm/xe/xe_psmi.c
new file mode 100644
index 000000000000..6a54e38b81ba
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_psmi.c
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include <linux/debugfs.h>
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_configfs.h"
+#include "xe_psmi.h"
+
+/*
+ * PSMI capture support
+ *
+ * Requirement for PSMI capture is to have a physically contiguous buffer. The
+ * PSMI tool owns doing all necessary configuration (MMIO register writes are
+ * done from user-space). However, KMD needs to provide the PSMI tool with the
+ * required physical address of the base of PSMI buffer in case of VRAM.
+ *
+ * VRAM backed PSMI buffer:
+ * Buffer is allocated as GEM object and with XE_BO_CREATE_PINNED_BIT flag which
+ * creates a contiguous allocation. The physical address is returned from
+ * psmi_debugfs_capture_addr_show(). PSMI tool can mmap the buffer via the
+ * PCIBAR through sysfs.
+ *
+ * SYSTEM memory backed PSMI buffer:
+ * Interface here does not support allocating from SYSTEM memory region. The
+ * PSMI tool needs to allocate memory themselves using hugetlbfs. In order to
+ * get the physical address, user-space can query /proc/[pid]/pagemap. As an
+ * alternative, CMA debugfs could also be used to allocate reserved CMA memory.
+ */
+
+static bool psmi_enabled(struct xe_device *xe)
+{
+ return xe_configfs_get_psmi_enabled(to_pci_dev(xe->drm.dev));
+}
+
+static void psmi_free_object(struct xe_bo *bo)
+{
+ xe_bo_lock(bo, NULL);
+ xe_bo_unpin(bo);
+ xe_bo_unlock(bo);
+ xe_bo_put(bo);
+}
+
+/*
+ * Free PSMI capture buffer objects.
+ */
+static void psmi_cleanup(struct xe_device *xe)
+{
+ unsigned long id, region_mask = xe->psmi.region_mask;
+ struct xe_bo *bo;
+
+ for_each_set_bit(id, &region_mask,
+ ARRAY_SIZE(xe->psmi.capture_obj)) {
+ /* smem should never be set */
+ xe_assert(xe, id);
+
+ bo = xe->psmi.capture_obj[id];
+ if (bo) {
+ psmi_free_object(bo);
+ xe->psmi.capture_obj[id] = NULL;
+ }
+ }
+}
+
+static struct xe_bo *psmi_alloc_object(struct xe_device *xe,
+ unsigned int id, size_t bo_size)
+{
+ struct xe_tile *tile;
+
+ xe_assert(xe, id);
+ xe_assert(xe, bo_size);
+
+ tile = &xe->tiles[id - 1];
+
+ /* VRAM: Allocate GEM object for the capture buffer */
+ return xe_bo_create_pin_range_novm(xe, tile, bo_size, 0, ~0ull,
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_PINNED |
+ XE_BO_FLAG_PINNED_LATE_RESTORE |
+ XE_BO_FLAG_NEEDS_CPU_ACCESS);
+}
+
+/*
+ * Allocate PSMI capture buffer objects (via debugfs set function), based on
+ * which regions the user has selected in region_mask. @size: size in bytes
+ * (should be power of 2)
+ *
+ * Always release/free the current buffer objects before attempting to allocate
+ * new ones. Size == 0 will free all current buffers.
+ *
+ * Note, we don't write any registers as the capture tool is already configuring
+ * all PSMI registers itself via mmio space.
+ */
+static int psmi_resize_object(struct xe_device *xe, size_t size)
+{
+ unsigned long id, region_mask = xe->psmi.region_mask;
+ struct xe_bo *bo = NULL;
+ int err = 0;
+
+ /* if resizing, free currently allocated buffers first */
+ psmi_cleanup(xe);
+
+ /* can set size to 0, in which case, now done */
+ if (!size)
+ return 0;
+
+ for_each_set_bit(id, &region_mask,
+ ARRAY_SIZE(xe->psmi.capture_obj)) {
+ /* smem should never be set */
+ xe_assert(xe, id);
+
+ bo = psmi_alloc_object(xe, id, size);
+ if (IS_ERR(bo)) {
+ err = PTR_ERR(bo);
+ break;
+ }
+ xe->psmi.capture_obj[id] = bo;
+
+ drm_info(&xe->drm,
+ "PSMI capture size requested: %zu bytes, allocated: %lu:%zu\n",
+ size, id, bo ? xe_bo_size(bo) : 0);
+ }
+
+ /* on error, reverse what was allocated */
+ if (err)
+ psmi_cleanup(xe);
+
+ return err;
+}
+
+/*
+ * Returns an address for the capture tool to use to find start of capture
+ * buffer. Capture tool requires the capability to have a buffer allocated per
+ * each tile (VRAM region), thus we return an address for each region.
+ */
+static int psmi_debugfs_capture_addr_show(struct seq_file *m, void *data)
+{
+ struct xe_device *xe = m->private;
+ unsigned long id, region_mask;
+ struct xe_bo *bo;
+ u64 val;
+
+ region_mask = xe->psmi.region_mask;
+ for_each_set_bit(id, &region_mask,
+ ARRAY_SIZE(xe->psmi.capture_obj)) {
+ /* smem should never be set */
+ xe_assert(xe, id);
+
+ /* VRAM region */
+ bo = xe->psmi.capture_obj[id];
+ if (!bo)
+ continue;
+
+ /* pinned, so don't need bo_lock */
+ val = __xe_bo_addr(bo, 0, PAGE_SIZE);
+ seq_printf(m, "%ld: 0x%llx\n", id, val);
+ }
+
+ return 0;
+}
+
+/*
+ * Return capture buffer size, using the size from first allocated object that
+ * is found. This works because all objects must be of the same size.
+ */
+static int psmi_debugfs_capture_size_get(void *data, u64 *val)
+{
+ unsigned long id, region_mask;
+ struct xe_device *xe = data;
+ struct xe_bo *bo;
+
+ region_mask = xe->psmi.region_mask;
+ for_each_set_bit(id, &region_mask,
+ ARRAY_SIZE(xe->psmi.capture_obj)) {
+ /* smem should never be set */
+ xe_assert(xe, id);
+
+ bo = xe->psmi.capture_obj[id];
+ if (bo) {
+ *val = xe_bo_size(bo);
+ return 0;
+ }
+ }
+
+ /* no capture objects are allocated */
+ *val = 0;
+
+ return 0;
+}
+
+/*
+ * Set size of PSMI capture buffer. This triggers the allocation of capture
+ * buffer in each memory region as specified with prior write to
+ * psmi_capture_region_mask.
+ */
+static int psmi_debugfs_capture_size_set(void *data, u64 val)
+{
+ struct xe_device *xe = data;
+
+ /* user must have specified at least one region */
+ if (!xe->psmi.region_mask)
+ return -EINVAL;
+
+ return psmi_resize_object(xe, val);
+}
+
+static int psmi_debugfs_capture_region_mask_get(void *data, u64 *val)
+{
+ struct xe_device *xe = data;
+
+ *val = xe->psmi.region_mask;
+
+ return 0;
+}
+
+/*
+ * Select VRAM regions for multi-tile devices, only allowed when buffer is not
+ * currently allocated.
+ */
+static int psmi_debugfs_capture_region_mask_set(void *data, u64 region_mask)
+{
+ struct xe_device *xe = data;
+ u64 size = 0;
+
+ /* SMEM is not supported (see comments at top of file) */
+ if (region_mask & 0x1)
+ return -EOPNOTSUPP;
+
+ /* input bitmask should contain only valid TTM regions */
+ if (!region_mask || region_mask & ~xe->info.mem_region_mask)
+ return -EINVAL;
+
+ /* only allow setting mask if buffer is not yet allocated */
+ psmi_debugfs_capture_size_get(xe, &size);
+ if (size)
+ return -EBUSY;
+
+ xe->psmi.region_mask = region_mask;
+
+ return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(psmi_debugfs_capture_addr);
+
+DEFINE_DEBUGFS_ATTRIBUTE(psmi_debugfs_capture_region_mask_fops,
+ psmi_debugfs_capture_region_mask_get,
+ psmi_debugfs_capture_region_mask_set,
+ "0x%llx\n");
+
+DEFINE_DEBUGFS_ATTRIBUTE(psmi_debugfs_capture_size_fops,
+ psmi_debugfs_capture_size_get,
+ psmi_debugfs_capture_size_set,
+ "%lld\n");
+
+void xe_psmi_debugfs_register(struct xe_device *xe)
+{
+ struct drm_minor *minor;
+
+ if (!psmi_enabled(xe))
+ return;
+
+ minor = xe->drm.primary;
+ if (!minor->debugfs_root)
+ return;
+
+ debugfs_create_file("psmi_capture_addr",
+ 0400, minor->debugfs_root, xe,
+ &psmi_debugfs_capture_addr_fops);
+
+ debugfs_create_file("psmi_capture_region_mask",
+ 0600, minor->debugfs_root, xe,
+ &psmi_debugfs_capture_region_mask_fops);
+
+ debugfs_create_file("psmi_capture_size",
+ 0600, minor->debugfs_root, xe,
+ &psmi_debugfs_capture_size_fops);
+}
+
+static void psmi_fini(void *arg)
+{
+ psmi_cleanup(arg);
+}
+
+int xe_psmi_init(struct xe_device *xe)
+{
+ if (!psmi_enabled(xe))
+ return 0;
+
+ return devm_add_action(xe->drm.dev, psmi_fini, xe);
+}
diff --git a/drivers/gpu/drm/xe/xe_psmi.h b/drivers/gpu/drm/xe/xe_psmi.h
new file mode 100644
index 000000000000..b1dfba80d893
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_psmi.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_PSMI_H_
+#define _XE_PSMI_H_
+
+struct xe_device;
+
+int xe_psmi_init(struct xe_device *xe);
+void xe_psmi_debugfs_register(struct xe_device *xe);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index b04756a97cdc..884127b4d97d 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -3,8 +3,6 @@
* Copyright © 2022 Intel Corporation
*/
-#include <linux/dma-fence-array.h>
-
#include "xe_pt.h"
#include "regs/xe_gtt_defs.h"
@@ -13,16 +11,17 @@
#include "xe_drm_client.h"
#include "xe_exec_queue.h"
#include "xe_gt.h"
-#include "xe_gt_tlb_invalidation.h"
#include "xe_migrate.h"
#include "xe_pt_types.h"
#include "xe_pt_walk.h"
#include "xe_res_cursor.h"
#include "xe_sched_job.h"
-#include "xe_sync.h"
#include "xe_svm.h"
+#include "xe_sync.h"
+#include "xe_tlb_inval_job.h"
#include "xe_trace.h"
#include "xe_ttm_stolen_mgr.h"
+#include "xe_userptr.h"
#include "xe_vm.h"
struct xe_pt_dir {
@@ -69,7 +68,7 @@ static u64 __xe_pt_empty_pte(struct xe_tile *tile, struct xe_vm *vm,
if (level > MAX_HUGEPTE_LEVEL)
return vm->pt_ops->pde_encode_bo(vm->scratch_pt[id][level - 1]->bo,
- 0, pat_index);
+ 0);
return vm->pt_ops->pte_encode_addr(xe, 0, pat_index, level, IS_DGFX(xe), 0) |
XE_PTE_NULL;
@@ -88,6 +87,7 @@ static void xe_pt_free(struct xe_pt *pt)
* @vm: The vm to create for.
* @tile: The tile to create for.
* @level: The page-table level.
+ * @exec: The drm_exec object used to lock the vm.
*
* Allocate and initialize a single struct xe_pt metadata structure. Also
* create the corresponding page-table bo, but don't initialize it. If the
@@ -99,7 +99,7 @@ static void xe_pt_free(struct xe_pt *pt)
* error.
*/
struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
- unsigned int level)
+ unsigned int level, struct drm_exec *exec)
{
struct xe_pt *pt;
struct xe_bo *bo;
@@ -120,12 +120,14 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE |
XE_BO_FLAG_NO_RESV_EVICT | XE_BO_FLAG_PAGETABLE;
if (vm->xef) /* userspace */
- bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE;
+ bo_flags |= XE_BO_FLAG_PINNED_LATE_RESTORE | XE_BO_FLAG_FORCE_USER_VRAM;
pt->level = level;
+
+ drm_WARN_ON(&vm->xe->drm, IS_ERR_OR_NULL(exec));
bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K,
ttm_bo_type_kernel,
- bo_flags);
+ bo_flags, exec);
if (IS_ERR(bo)) {
err = PTR_ERR(bo);
goto err_kfree;
@@ -518,7 +520,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
{
struct xe_pt_stage_bind_walk *xe_walk =
container_of(walk, typeof(*xe_walk), base);
- u16 pat_index = xe_walk->vma->pat_index;
+ u16 pat_index = xe_walk->vma->attr.pat_index;
struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base);
struct xe_vm *vm = xe_walk->vm;
struct xe_pt *xe_child;
@@ -589,7 +591,8 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
if (covers || !*child) {
u64 flags = 0;
- xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1);
+ xe_child = xe_pt_create(xe_walk->vm, xe_walk->tile, level - 1,
+ xe_vm_validation_exec(vm));
if (IS_ERR(xe_child))
return PTR_ERR(xe_child);
@@ -616,7 +619,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
xe_child->is_compact = true;
}
- pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0, pat_index) | flags;
+ pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0) | flags;
ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child,
pte);
}
@@ -640,28 +643,31 @@ static const struct xe_pt_walk_ops xe_pt_stage_bind_ops = {
* - In all other cases device atomics will be disabled with AE=0 until an application
* request differently using a ioctl like madvise.
*/
-static bool xe_atomic_for_vram(struct xe_vm *vm)
+static bool xe_atomic_for_vram(struct xe_vm *vm, struct xe_vma *vma)
{
+ if (vma->attr.atomic_access == DRM_XE_ATOMIC_CPU)
+ return false;
+
return true;
}
-static bool xe_atomic_for_system(struct xe_vm *vm, struct xe_bo *bo)
+static bool xe_atomic_for_system(struct xe_vm *vm, struct xe_vma *vma)
{
struct xe_device *xe = vm->xe;
+ struct xe_bo *bo = xe_vma_bo(vma);
- if (!xe->info.has_device_atomics_on_smem)
+ if (!xe->info.has_device_atomics_on_smem ||
+ vma->attr.atomic_access == DRM_XE_ATOMIC_CPU)
return false;
+ if (vma->attr.atomic_access == DRM_XE_ATOMIC_DEVICE)
+ return true;
+
/*
* If a SMEM+LMEM allocation is backed by SMEM, a device
* atomics will cause a gpu page fault and which then
* gets migrated to LMEM, bind such allocations with
* device atomics enabled.
- *
- * TODO: Revisit this. Perhaps add something like a
- * fault_on_atomics_in_system UAPI flag.
- * Note that this also prohibits GPU atomics in LR mode for
- * userptr and system memory on DGFX.
*/
return (!IS_DGFX(xe) || (!xe_vm_in_lr_mode(vm) ||
(bo && xe_bo_has_single_placement(bo))));
@@ -707,7 +713,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
.vm = vm,
.tile = tile,
.curs = &curs,
- .va_curs_start = range ? range->base.itree.start :
+ .va_curs_start = range ? xe_svm_range_start(range) :
xe_vma_start(vma),
.vma = vma,
.wupd.entries = entries,
@@ -725,8 +731,8 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
return -EAGAIN;
}
if (xe_svm_range_has_dma_mapping(range)) {
- xe_res_first_dma(range->base.dma_addr, 0,
- range->base.itree.last + 1 - range->base.itree.start,
+ xe_res_first_dma(range->base.pages.dma_addr, 0,
+ xe_svm_range_size(range),
&curs);
xe_svm_range_debug(range, "BIND PREPARE - MIXED");
} else {
@@ -744,8 +750,8 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
goto walk_pt;
if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) {
- xe_walk.default_vram_pte = xe_atomic_for_vram(vm) ? XE_USM_PPGTT_PTE_AE : 0;
- xe_walk.default_system_pte = xe_atomic_for_system(vm, bo) ?
+ xe_walk.default_vram_pte = xe_atomic_for_vram(vm, vma) ? XE_USM_PPGTT_PTE_AE : 0;
+ xe_walk.default_system_pte = xe_atomic_for_system(vm, vma) ?
XE_USM_PPGTT_PTE_AE : 0;
}
@@ -756,8 +762,8 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
if (!xe_vma_is_null(vma) && !range) {
if (xe_vma_is_userptr(vma))
- xe_res_first_sg(to_userptr_vma(vma)->userptr.sg, 0,
- xe_vma_size(vma), &curs);
+ xe_res_first_dma(to_userptr_vma(vma)->userptr.pages.dma_addr, 0,
+ xe_vma_size(vma), &curs);
else if (xe_bo_is_vram(bo) || xe_bo_is_stolen(bo))
xe_res_first(bo->ttm.resource, xe_vma_bo_offset(vma),
xe_vma_size(vma), &curs);
@@ -770,8 +776,8 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
walk_pt:
ret = xe_pt_walk_range(&pt->base, pt->level,
- range ? range->base.itree.start : xe_vma_start(vma),
- range ? range->base.itree.last + 1 : xe_vma_end(vma),
+ range ? xe_svm_range_start(range) : xe_vma_start(vma),
+ range ? xe_svm_range_end(range) : xe_vma_end(vma),
&xe_walk.base);
*num_entries = xe_walk.wupd.num_used_entries;
@@ -907,6 +913,11 @@ bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma)
struct xe_pt *pt = xe_vma_vm(vma)->pt_root[tile->id];
u8 pt_mask = (vma->tile_present & ~vma->tile_invalidated);
+ if (xe_vma_bo(vma))
+ xe_bo_assert_held(xe_vma_bo(vma));
+ else if (xe_vma_is_userptr(vma))
+ lockdep_assert_held(&xe_vma_vm(vma)->svm.gpusvm.notifier_lock);
+
if (!(pt_mask & BIT(tile->id)))
return false;
@@ -945,13 +956,25 @@ bool xe_pt_zap_ptes_range(struct xe_tile *tile, struct xe_vm *vm,
struct xe_pt *pt = vm->pt_root[tile->id];
u8 pt_mask = (range->tile_present & ~range->tile_invalidated);
- xe_svm_assert_in_notifier(vm);
+ /*
+ * Locking rules:
+ *
+ * - notifier_lock (write): full protection against page table changes
+ * and MMU notifier invalidations.
+ *
+ * - notifier_lock (read) + vm_lock (write): combined protection against
+ * invalidations and concurrent page table modifications. (e.g., madvise)
+ *
+ */
+ lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) ||
+ (lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) &&
+ lockdep_is_held_type(&vm->lock, 0)));
if (!(pt_mask & BIT(tile->id)))
return false;
- (void)xe_pt_walk_shared(&pt->base, pt->level, range->base.itree.start,
- range->base.itree.last + 1, &xe_walk.base);
+ (void)xe_pt_walk_shared(&pt->base, pt->level, xe_svm_range_start(range),
+ xe_svm_range_end(range), &xe_walk.base);
return xe_walk.needs_invalidate;
}
@@ -1028,7 +1051,7 @@ static void xe_pt_commit_locks_assert(struct xe_vma *vma)
xe_pt_commit_prepare_locks_assert(vma);
if (xe_vma_is_userptr(vma))
- lockdep_assert_held_read(&vm->userptr.notifier_lock);
+ xe_svm_assert_held_read(vm);
}
static void xe_pt_commit(struct xe_vma *vma,
@@ -1256,6 +1279,8 @@ static int op_add_deps(struct xe_vm *vm, struct xe_vma_op *op,
}
static int xe_pt_vm_dependencies(struct xe_sched_job *job,
+ struct xe_tlb_inval_job *ijob,
+ struct xe_tlb_inval_job *mjob,
struct xe_vm *vm,
struct xe_vma_ops *vops,
struct xe_vm_pgtable_update_ops *pt_update_ops,
@@ -1313,16 +1338,23 @@ static int xe_pt_vm_dependencies(struct xe_sched_job *job,
return err;
}
- if (!(pt_update_ops->q->flags & EXEC_QUEUE_FLAG_KERNEL)) {
- if (job)
- err = xe_sched_job_last_fence_add_dep(job, vm);
- else
- err = xe_exec_queue_last_fence_test_dep(pt_update_ops->q, vm);
- }
-
for (i = 0; job && !err && i < vops->num_syncs; i++)
err = xe_sync_entry_add_deps(&vops->syncs[i], job);
+ if (job) {
+ if (ijob) {
+ err = xe_tlb_inval_job_alloc_dep(ijob);
+ if (err)
+ return err;
+ }
+
+ if (mjob) {
+ err = xe_tlb_inval_job_alloc_dep(mjob);
+ if (err)
+ return err;
+ }
+ }
+
return err;
}
@@ -1334,10 +1366,12 @@ static int xe_pt_pre_commit(struct xe_migrate_pt_update *pt_update)
struct xe_vm_pgtable_update_ops *pt_update_ops =
&vops->pt_update_ops[pt_update->tile_id];
- return xe_pt_vm_dependencies(pt_update->job, vm, pt_update->vops,
+ return xe_pt_vm_dependencies(pt_update->job, pt_update->ijob,
+ pt_update->mjob, vm, pt_update->vops,
pt_update_ops, rftree);
}
+#if IS_ENABLED(CONFIG_DRM_GPUSVM)
#ifdef CONFIG_DRM_XE_USERPTR_INVAL_INJECT
static bool xe_pt_userptr_inject_eagain(struct xe_userptr_vma *uvma)
@@ -1368,7 +1402,7 @@ static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma,
struct xe_userptr_vma *uvma;
unsigned long notifier_seq;
- lockdep_assert_held_read(&vm->userptr.notifier_lock);
+ xe_svm_assert_held_read(vm);
if (!xe_vma_is_userptr(vma))
return 0;
@@ -1377,7 +1411,7 @@ static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma,
if (xe_pt_userptr_inject_eagain(uvma))
xe_vma_userptr_force_invalidate(uvma);
- notifier_seq = uvma->userptr.notifier_seq;
+ notifier_seq = uvma->userptr.pages.notifier_seq;
if (!mmu_interval_read_retry(&uvma->userptr.notifier,
notifier_seq))
@@ -1393,12 +1427,12 @@ static int vma_check_userptr(struct xe_vm *vm, struct xe_vma *vma,
return 0;
}
-static int op_check_userptr(struct xe_vm *vm, struct xe_vma_op *op,
- struct xe_vm_pgtable_update_ops *pt_update)
+static int op_check_svm_userptr(struct xe_vm *vm, struct xe_vma_op *op,
+ struct xe_vm_pgtable_update_ops *pt_update)
{
int err = 0;
- lockdep_assert_held_read(&vm->userptr.notifier_lock);
+ xe_svm_assert_held_read(vm);
switch (op->base.op) {
case DRM_GPUVA_OP_MAP:
@@ -1416,9 +1450,40 @@ static int op_check_userptr(struct xe_vm *vm, struct xe_vma_op *op,
case DRM_GPUVA_OP_UNMAP:
break;
case DRM_GPUVA_OP_PREFETCH:
- err = vma_check_userptr(vm, gpuva_to_vma(op->base.prefetch.va),
- pt_update);
+ if (xe_vma_is_cpu_addr_mirror(gpuva_to_vma(op->base.prefetch.va))) {
+ struct xe_svm_range *range = op->map_range.range;
+ unsigned long i;
+
+ xe_assert(vm->xe,
+ xe_vma_is_cpu_addr_mirror(gpuva_to_vma(op->base.prefetch.va)));
+ xa_for_each(&op->prefetch_range.range, i, range) {
+ xe_svm_range_debug(range, "PRE-COMMIT");
+
+ if (!xe_svm_range_pages_valid(range)) {
+ xe_svm_range_debug(range, "PRE-COMMIT - RETRY");
+ return -ENODATA;
+ }
+ }
+ } else {
+ err = vma_check_userptr(vm, gpuva_to_vma(op->base.prefetch.va), pt_update);
+ }
+ break;
+#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM)
+ case DRM_GPUVA_OP_DRIVER:
+ if (op->subop == XE_VMA_SUBOP_MAP_RANGE) {
+ struct xe_svm_range *range = op->map_range.range;
+
+ xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(op->map_range.vma));
+
+ xe_svm_range_debug(range, "PRE-COMMIT");
+
+ if (!xe_svm_range_pages_valid(range)) {
+ xe_svm_range_debug(range, "PRE-COMMIT - RETRY");
+ return -EAGAIN;
+ }
+ }
break;
+#endif
default:
drm_warn(&vm->xe->drm, "NOT POSSIBLE");
}
@@ -1426,7 +1491,7 @@ static int op_check_userptr(struct xe_vm *vm, struct xe_vma_op *op,
return err;
}
-static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
+static int xe_pt_svm_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
{
struct xe_vm *vm = pt_update->vops->vm;
struct xe_vma_ops *vops = pt_update->vops;
@@ -1439,124 +1504,20 @@ static int xe_pt_userptr_pre_commit(struct xe_migrate_pt_update *pt_update)
if (err)
return err;
- down_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_lock(vm);
list_for_each_entry(op, &vops->list, link) {
- err = op_check_userptr(vm, op, pt_update_ops);
+ err = op_check_svm_userptr(vm, op, pt_update_ops);
if (err) {
- up_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_unlock(vm);
break;
}
}
return err;
}
-
-#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM)
-static int xe_pt_svm_pre_commit(struct xe_migrate_pt_update *pt_update)
-{
- struct xe_vm *vm = pt_update->vops->vm;
- struct xe_vma_ops *vops = pt_update->vops;
- struct xe_vma_op *op;
- int err;
-
- err = xe_pt_pre_commit(pt_update);
- if (err)
- return err;
-
- xe_svm_notifier_lock(vm);
-
- list_for_each_entry(op, &vops->list, link) {
- struct xe_svm_range *range = op->map_range.range;
-
- if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE)
- continue;
-
- xe_svm_range_debug(range, "PRE-COMMIT");
-
- xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(op->map_range.vma));
- xe_assert(vm->xe, op->subop == XE_VMA_SUBOP_MAP_RANGE);
-
- if (!xe_svm_range_pages_valid(range)) {
- xe_svm_range_debug(range, "PRE-COMMIT - RETRY");
- xe_svm_notifier_unlock(vm);
- return -EAGAIN;
- }
- }
-
- return 0;
-}
#endif
-struct invalidation_fence {
- struct xe_gt_tlb_invalidation_fence base;
- struct xe_gt *gt;
- struct dma_fence *fence;
- struct dma_fence_cb cb;
- struct work_struct work;
- u64 start;
- u64 end;
- u32 asid;
-};
-
-static void invalidation_fence_cb(struct dma_fence *fence,
- struct dma_fence_cb *cb)
-{
- struct invalidation_fence *ifence =
- container_of(cb, struct invalidation_fence, cb);
- struct xe_device *xe = gt_to_xe(ifence->gt);
-
- trace_xe_gt_tlb_invalidation_fence_cb(xe, &ifence->base);
- if (!ifence->fence->error) {
- queue_work(system_wq, &ifence->work);
- } else {
- ifence->base.base.error = ifence->fence->error;
- xe_gt_tlb_invalidation_fence_signal(&ifence->base);
- }
- dma_fence_put(ifence->fence);
-}
-
-static void invalidation_fence_work_func(struct work_struct *w)
-{
- struct invalidation_fence *ifence =
- container_of(w, struct invalidation_fence, work);
- struct xe_device *xe = gt_to_xe(ifence->gt);
-
- trace_xe_gt_tlb_invalidation_fence_work_func(xe, &ifence->base);
- xe_gt_tlb_invalidation_range(ifence->gt, &ifence->base, ifence->start,
- ifence->end, ifence->asid);
-}
-
-static void invalidation_fence_init(struct xe_gt *gt,
- struct invalidation_fence *ifence,
- struct dma_fence *fence,
- u64 start, u64 end, u32 asid)
-{
- int ret;
-
- trace_xe_gt_tlb_invalidation_fence_create(gt_to_xe(gt), &ifence->base);
-
- xe_gt_tlb_invalidation_fence_init(gt, &ifence->base, false);
-
- ifence->fence = fence;
- ifence->gt = gt;
- ifence->start = start;
- ifence->end = end;
- ifence->asid = asid;
-
- INIT_WORK(&ifence->work, invalidation_fence_work_func);
- ret = dma_fence_add_callback(fence, &ifence->cb, invalidation_fence_cb);
- if (ret == -ENOENT) {
- dma_fence_put(ifence->fence); /* Usually dropped in CB */
- invalidation_fence_work_func(&ifence->work);
- } else if (ret) {
- dma_fence_put(&ifence->base.base); /* Caller ref */
- dma_fence_put(&ifence->base.base); /* Creation ref */
- }
-
- xe_gt_assert(gt, !ret || ret == -ENOENT);
-}
-
struct xe_pt_stage_unbind_walk {
/** @base: The pagewalk base-class. */
struct xe_pt_walk base;
@@ -1691,8 +1652,8 @@ static unsigned int xe_pt_stage_unbind(struct xe_tile *tile,
struct xe_svm_range *range,
struct xe_vm_pgtable_update *entries)
{
- u64 start = range ? range->base.itree.start : xe_vma_start(vma);
- u64 end = range ? range->base.itree.last + 1 : xe_vma_end(vma);
+ u64 start = range ? xe_svm_range_start(range) : xe_vma_start(vma);
+ u64 end = range ? xe_svm_range_end(range) : xe_vma_end(vma);
struct xe_pt_stage_unbind_walk xe_walk = {
.base = {
.ops = &xe_pt_stage_unbind_ops,
@@ -1858,7 +1819,7 @@ static int bind_op_prepare(struct xe_vm *vm, struct xe_tile *tile,
xe_vma_start(vma),
xe_vma_end(vma));
++pt_update_ops->current_op;
- pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma);
+ pt_update_ops->needs_svm_lock |= xe_vma_is_userptr(vma);
/*
* If rebind, we have to invalidate TLB on !LR vms to invalidate
@@ -1902,7 +1863,7 @@ static int bind_range_prepare(struct xe_vm *vm, struct xe_tile *tile,
vm_dbg(&xe_vma_vm(vma)->xe->drm,
"Preparing bind, with range [%lx...%lx)\n",
- range->base.itree.start, range->base.itree.last);
+ xe_svm_range_start(range), xe_svm_range_end(range) - 1);
pt_op->vma = NULL;
pt_op->bind = true;
@@ -1917,8 +1878,8 @@ static int bind_range_prepare(struct xe_vm *vm, struct xe_tile *tile,
pt_op->num_entries, true);
xe_pt_update_ops_rfence_interval(pt_update_ops,
- range->base.itree.start,
- range->base.itree.last + 1);
+ xe_svm_range_start(range),
+ xe_svm_range_end(range));
++pt_update_ops->current_op;
pt_update_ops->needs_svm_lock = true;
@@ -1966,7 +1927,7 @@ static int unbind_op_prepare(struct xe_tile *tile,
xe_pt_update_ops_rfence_interval(pt_update_ops, xe_vma_start(vma),
xe_vma_end(vma));
++pt_update_ops->current_op;
- pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma);
+ pt_update_ops->needs_svm_lock |= xe_vma_is_userptr(vma);
pt_update_ops->needs_invalidation = true;
xe_pt_commit_prepare_unbind(vma, pt_op->entries, pt_op->num_entries);
@@ -1974,6 +1935,32 @@ static int unbind_op_prepare(struct xe_tile *tile,
return 0;
}
+static bool
+xe_pt_op_check_range_skip_invalidation(struct xe_vm_pgtable_update_op *pt_op,
+ struct xe_svm_range *range)
+{
+ struct xe_vm_pgtable_update *update = pt_op->entries;
+
+ XE_WARN_ON(!pt_op->num_entries);
+
+ /*
+ * We can't skip the invalidation if we are removing PTEs that span more
+ * than the range, do some checks to ensure we are removing PTEs that
+ * are invalid.
+ */
+
+ if (pt_op->num_entries > 1)
+ return false;
+
+ if (update->pt->level == 0)
+ return true;
+
+ if (update->pt->level == 1)
+ return xe_svm_range_size(range) >= SZ_2M;
+
+ return false;
+}
+
static int unbind_range_prepare(struct xe_vm *vm,
struct xe_tile *tile,
struct xe_vm_pgtable_update_ops *pt_update_ops,
@@ -1987,7 +1974,7 @@ static int unbind_range_prepare(struct xe_vm *vm,
vm_dbg(&vm->xe->drm,
"Preparing unbind, with range [%lx...%lx)\n",
- range->base.itree.start, range->base.itree.last);
+ xe_svm_range_start(range), xe_svm_range_end(range) - 1);
pt_op->vma = XE_INVALID_VMA;
pt_op->bind = false;
@@ -1998,11 +1985,14 @@ static int unbind_range_prepare(struct xe_vm *vm,
xe_vm_dbg_print_entries(tile_to_xe(tile), pt_op->entries,
pt_op->num_entries, false);
- xe_pt_update_ops_rfence_interval(pt_update_ops, range->base.itree.start,
- range->base.itree.last + 1);
+ xe_pt_update_ops_rfence_interval(pt_update_ops, xe_svm_range_start(range),
+ xe_svm_range_end(range));
++pt_update_ops->current_op;
pt_update_ops->needs_svm_lock = true;
- pt_update_ops->needs_invalidation = true;
+ pt_update_ops->needs_invalidation |= xe_vm_has_scratch(vm) ||
+ xe_vm_has_valid_gpu_mapping(tile, range->tile_present,
+ range->tile_invalidated) ||
+ !xe_pt_op_check_range_skip_invalidation(pt_op, range);
xe_pt_commit_prepare_unbind(XE_INVALID_VMA, pt_op->entries,
pt_op->num_entries);
@@ -2023,7 +2013,7 @@ static int op_prepare(struct xe_vm *vm,
case DRM_GPUVA_OP_MAP:
if ((!op->map.immediate && xe_vm_in_fault_mode(vm) &&
!op->map.invalidate_on_bind) ||
- op->map.is_cpu_addr_mirror)
+ (op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR))
break;
err = bind_op_prepare(vm, tile, pt_update_ops, op->map.vma,
@@ -2065,11 +2055,20 @@ static int op_prepare(struct xe_vm *vm,
{
struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
- if (xe_vma_is_cpu_addr_mirror(vma))
- break;
+ if (xe_vma_is_cpu_addr_mirror(vma)) {
+ struct xe_svm_range *range;
+ unsigned long i;
- err = bind_op_prepare(vm, tile, pt_update_ops, vma, false);
- pt_update_ops->wait_vm_kernel = true;
+ xa_for_each(&op->prefetch_range.range, i, range) {
+ err = bind_range_prepare(vm, tile, pt_update_ops,
+ vma, range);
+ if (err)
+ return err;
+ }
+ } else {
+ err = bind_op_prepare(vm, tile, pt_update_ops, vma, false);
+ pt_update_ops->wait_vm_kernel = true;
+ }
break;
}
case DRM_GPUVA_OP_DRIVER:
@@ -2166,12 +2165,17 @@ static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
DMA_RESV_USAGE_KERNEL :
DMA_RESV_USAGE_BOOKKEEP);
}
- vma->tile_present |= BIT(tile->id);
- vma->tile_staged &= ~BIT(tile->id);
+ /* All WRITE_ONCE pair with READ_ONCE in xe_vm_has_valid_gpu_mapping() */
+ WRITE_ONCE(vma->tile_present, vma->tile_present | BIT(tile->id));
if (invalidate_on_bind)
- vma->tile_invalidated |= BIT(tile->id);
+ WRITE_ONCE(vma->tile_invalidated,
+ vma->tile_invalidated | BIT(tile->id));
+ else
+ WRITE_ONCE(vma->tile_invalidated,
+ vma->tile_invalidated & ~BIT(tile->id));
+ vma->tile_staged &= ~BIT(tile->id);
if (xe_vma_is_userptr(vma)) {
- lockdep_assert_held_read(&vm->userptr.notifier_lock);
+ xe_svm_assert_held_read(vm);
to_userptr_vma(vma)->userptr.initial_bind = true;
}
@@ -2207,7 +2211,7 @@ static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
if (!vma->tile_present) {
list_del_init(&vma->combined_links.rebind);
if (xe_vma_is_userptr(vma)) {
- lockdep_assert_held_read(&vm->userptr.notifier_lock);
+ xe_svm_assert_held_read(vm);
spin_lock(&vm->userptr.invalidated_lock);
list_del_init(&to_userptr_vma(vma)->userptr.invalidate_link);
@@ -2216,6 +2220,18 @@ static void unbind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
}
}
+static void range_present_and_invalidated_tile(struct xe_vm *vm,
+ struct xe_svm_range *range,
+ u8 tile_id)
+{
+ /* All WRITE_ONCE pair with READ_ONCE in xe_vm_has_valid_gpu_mapping() */
+
+ lockdep_assert_held(&vm->svm.gpusvm.notifier_lock);
+
+ WRITE_ONCE(range->tile_present, range->tile_present | BIT(tile_id));
+ WRITE_ONCE(range->tile_invalidated, range->tile_invalidated & ~BIT(tile_id));
+}
+
static void op_commit(struct xe_vm *vm,
struct xe_tile *tile,
struct xe_vm_pgtable_update_ops *pt_update_ops,
@@ -2227,7 +2243,7 @@ static void op_commit(struct xe_vm *vm,
switch (op->base.op) {
case DRM_GPUVA_OP_MAP:
if ((!op->map.immediate && xe_vm_in_fault_mode(vm)) ||
- op->map.is_cpu_addr_mirror)
+ (op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR))
break;
bind_op_commit(vm, tile, pt_update_ops, op->map.vma, fence,
@@ -2263,27 +2279,28 @@ static void op_commit(struct xe_vm *vm,
{
struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
- if (!xe_vma_is_cpu_addr_mirror(vma))
+ if (xe_vma_is_cpu_addr_mirror(vma)) {
+ struct xe_svm_range *range = NULL;
+ unsigned long i;
+
+ xa_for_each(&op->prefetch_range.range, i, range)
+ range_present_and_invalidated_tile(vm, range, tile->id);
+ } else {
bind_op_commit(vm, tile, pt_update_ops, vma, fence,
fence2, false);
+ }
break;
}
case DRM_GPUVA_OP_DRIVER:
{
- /* WRITE_ONCE pairs with READ_ONCE in xe_svm.c */
-
- if (op->subop == XE_VMA_SUBOP_MAP_RANGE) {
- WRITE_ONCE(op->map_range.range->tile_present,
- op->map_range.range->tile_present |
- BIT(tile->id));
- WRITE_ONCE(op->map_range.range->tile_invalidated,
- op->map_range.range->tile_invalidated &
- ~BIT(tile->id));
- } else if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) {
+ /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */
+ if (op->subop == XE_VMA_SUBOP_MAP_RANGE)
+ range_present_and_invalidated_tile(vm, op->map_range.range, tile->id);
+ else if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE)
WRITE_ONCE(op->unmap_range.range->tile_present,
op->unmap_range.range->tile_present &
~BIT(tile->id));
- }
+
break;
}
default:
@@ -2297,22 +2314,25 @@ static const struct xe_migrate_pt_update_ops migrate_ops = {
.pre_commit = xe_pt_pre_commit,
};
-static const struct xe_migrate_pt_update_ops userptr_migrate_ops = {
- .populate = xe_vm_populate_pgtable,
- .clear = xe_migrate_clear_pgtable_callback,
- .pre_commit = xe_pt_userptr_pre_commit,
-};
-
-#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM)
-static const struct xe_migrate_pt_update_ops svm_migrate_ops = {
+#if IS_ENABLED(CONFIG_DRM_GPUSVM)
+static const struct xe_migrate_pt_update_ops svm_userptr_migrate_ops = {
.populate = xe_vm_populate_pgtable,
.clear = xe_migrate_clear_pgtable_callback,
- .pre_commit = xe_pt_svm_pre_commit,
+ .pre_commit = xe_pt_svm_userptr_pre_commit,
};
#else
-static const struct xe_migrate_pt_update_ops svm_migrate_ops;
+static const struct xe_migrate_pt_update_ops svm_userptr_migrate_ops;
#endif
+static struct xe_dep_scheduler *to_dep_scheduler(struct xe_exec_queue *q,
+ struct xe_gt *gt)
+{
+ if (xe_gt_is_media_type(gt))
+ return q->tlb_inval[XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT].dep_scheduler;
+
+ return q->tlb_inval[XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT].dep_scheduler;
+}
+
/**
* xe_pt_update_ops_run() - Run PT update operations
* @tile: Tile of PT update operations
@@ -2330,18 +2350,15 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
struct xe_vm *vm = vops->vm;
struct xe_vm_pgtable_update_ops *pt_update_ops =
&vops->pt_update_ops[tile->id];
- struct dma_fence *fence;
- struct invalidation_fence *ifence = NULL, *mfence = NULL;
- struct dma_fence **fences = NULL;
- struct dma_fence_array *cf = NULL;
+ struct xe_exec_queue *q = pt_update_ops->q;
+ struct dma_fence *fence, *ifence = NULL, *mfence = NULL;
+ struct xe_tlb_inval_job *ijob = NULL, *mjob = NULL;
struct xe_range_fence *rfence;
struct xe_vma_op *op;
int err = 0, i;
struct xe_migrate_pt_update update = {
.ops = pt_update_ops->needs_svm_lock ?
- &svm_migrate_ops :
- pt_update_ops->needs_userptr_lock ?
- &userptr_migrate_ops :
+ &svm_userptr_migrate_ops :
&migrate_ops,
.vops = vops,
.tile_id = tile->id,
@@ -2363,34 +2380,41 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
#endif
if (pt_update_ops->needs_invalidation) {
- ifence = kzalloc(sizeof(*ifence), GFP_KERNEL);
- if (!ifence) {
- err = -ENOMEM;
+ struct xe_dep_scheduler *dep_scheduler =
+ to_dep_scheduler(q, tile->primary_gt);
+
+ ijob = xe_tlb_inval_job_create(q, &tile->primary_gt->tlb_inval,
+ dep_scheduler, vm,
+ pt_update_ops->start,
+ pt_update_ops->last,
+ XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
+ if (IS_ERR(ijob)) {
+ err = PTR_ERR(ijob);
goto kill_vm_tile1;
}
+ update.ijob = ijob;
+
if (tile->media_gt) {
- mfence = kzalloc(sizeof(*ifence), GFP_KERNEL);
- if (!mfence) {
- err = -ENOMEM;
- goto free_ifence;
- }
- fences = kmalloc_array(2, sizeof(*fences), GFP_KERNEL);
- if (!fences) {
- err = -ENOMEM;
- goto free_ifence;
- }
- cf = dma_fence_array_alloc(2);
- if (!cf) {
- err = -ENOMEM;
- goto free_ifence;
+ dep_scheduler = to_dep_scheduler(q, tile->media_gt);
+
+ mjob = xe_tlb_inval_job_create(q,
+ &tile->media_gt->tlb_inval,
+ dep_scheduler, vm,
+ pt_update_ops->start,
+ pt_update_ops->last,
+ XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT);
+ if (IS_ERR(mjob)) {
+ err = PTR_ERR(mjob);
+ goto free_ijob;
}
+ update.mjob = mjob;
}
}
rfence = kzalloc(sizeof(*rfence), GFP_KERNEL);
if (!rfence) {
err = -ENOMEM;
- goto free_ifence;
+ goto free_ijob;
}
fence = xe_migrate_update_pgtables(tile->migrate, &update);
@@ -2414,30 +2438,12 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
pt_update_ops->last, fence))
dma_fence_wait(fence, false);
- /* tlb invalidation must be done before signaling rebind */
- if (ifence) {
- if (mfence)
- dma_fence_get(fence);
- invalidation_fence_init(tile->primary_gt, ifence, fence,
- pt_update_ops->start,
- pt_update_ops->last, vm->usm.asid);
- if (mfence) {
- invalidation_fence_init(tile->media_gt, mfence, fence,
- pt_update_ops->start,
- pt_update_ops->last, vm->usm.asid);
- fences[0] = &ifence->base.base;
- fences[1] = &mfence->base.base;
- dma_fence_array_init(cf, 2, fences,
- vm->composite_fence_ctx,
- vm->composite_fence_seqno++,
- false);
- fence = &cf->base;
- } else {
- fence = &ifence->base.base;
- }
- }
+ if (ijob)
+ ifence = xe_tlb_inval_job_push(ijob, tile->migrate, fence);
+ if (mjob)
+ mfence = xe_tlb_inval_job_push(mjob, tile->migrate, fence);
- if (!mfence) {
+ if (!mjob && !ijob) {
dma_resv_add_fence(xe_vm_resv(vm), fence,
pt_update_ops->wait_vm_bookkeep ?
DMA_RESV_USAGE_KERNEL :
@@ -2445,38 +2451,54 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
list_for_each_entry(op, &vops->list, link)
op_commit(vops->vm, tile, pt_update_ops, op, fence, NULL);
+ } else if (ijob && !mjob) {
+ dma_resv_add_fence(xe_vm_resv(vm), ifence,
+ pt_update_ops->wait_vm_bookkeep ?
+ DMA_RESV_USAGE_KERNEL :
+ DMA_RESV_USAGE_BOOKKEEP);
+
+ list_for_each_entry(op, &vops->list, link)
+ op_commit(vops->vm, tile, pt_update_ops, op, ifence, NULL);
} else {
- dma_resv_add_fence(xe_vm_resv(vm), &ifence->base.base,
+ dma_resv_add_fence(xe_vm_resv(vm), ifence,
pt_update_ops->wait_vm_bookkeep ?
DMA_RESV_USAGE_KERNEL :
DMA_RESV_USAGE_BOOKKEEP);
- dma_resv_add_fence(xe_vm_resv(vm), &mfence->base.base,
+ dma_resv_add_fence(xe_vm_resv(vm), mfence,
pt_update_ops->wait_vm_bookkeep ?
DMA_RESV_USAGE_KERNEL :
DMA_RESV_USAGE_BOOKKEEP);
list_for_each_entry(op, &vops->list, link)
- op_commit(vops->vm, tile, pt_update_ops, op,
- &ifence->base.base, &mfence->base.base);
+ op_commit(vops->vm, tile, pt_update_ops, op, ifence,
+ mfence);
}
if (pt_update_ops->needs_svm_lock)
xe_svm_notifier_unlock(vm);
- if (pt_update_ops->needs_userptr_lock)
- up_read(&vm->userptr.notifier_lock);
+
+ /*
+ * The last fence is only used for zero bind queue idling; migrate
+ * queues are not exposed to user space.
+ */
+ if (!(q->flags & EXEC_QUEUE_FLAG_MIGRATE))
+ xe_exec_queue_last_fence_set(q, vm, fence);
+
+ xe_tlb_inval_job_put(mjob);
+ xe_tlb_inval_job_put(ijob);
+ dma_fence_put(ifence);
+ dma_fence_put(mfence);
return fence;
free_rfence:
kfree(rfence);
-free_ifence:
- kfree(cf);
- kfree(fences);
- kfree(mfence);
- kfree(ifence);
+free_ijob:
+ xe_tlb_inval_job_put(mjob);
+ xe_tlb_inval_job_put(ijob);
kill_vm_tile1:
- if (err != -EAGAIN && tile->id)
+ if (err != -EAGAIN && err != -ENODATA && tile->id)
xe_vm_kill(vops->vm, false);
return ERR_PTR(err);
diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h
index 5ecf003d513c..4daeebaab5a1 100644
--- a/drivers/gpu/drm/xe/xe_pt.h
+++ b/drivers/gpu/drm/xe/xe_pt.h
@@ -10,6 +10,7 @@
#include "xe_pt_types.h"
struct dma_fence;
+struct drm_exec;
struct xe_bo;
struct xe_device;
struct xe_exec_queue;
@@ -29,7 +30,7 @@ struct xe_vma_ops;
unsigned int xe_pt_shift(unsigned int level);
struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
- unsigned int level);
+ unsigned int level, struct drm_exec *exec);
void xe_pt_populate_empty(struct xe_tile *tile, struct xe_vm *vm,
struct xe_pt *pt);
diff --git a/drivers/gpu/drm/xe/xe_pt_types.h b/drivers/gpu/drm/xe/xe_pt_types.h
index 69eab6f37cfe..881f01e14db8 100644
--- a/drivers/gpu/drm/xe/xe_pt_types.h
+++ b/drivers/gpu/drm/xe/xe_pt_types.h
@@ -45,8 +45,7 @@ struct xe_pt_ops {
u64 (*pte_encode_addr)(struct xe_device *xe, u64 addr,
u16 pat_index,
u32 pt_level, bool devmem, u64 flags);
- u64 (*pde_encode_bo)(struct xe_bo *bo, u64 bo_offset,
- u16 pat_index);
+ u64 (*pde_encode_bo)(struct xe_bo *bo, u64 bo_offset);
};
struct xe_pt_entry {
@@ -106,8 +105,6 @@ struct xe_vm_pgtable_update_ops {
u32 current_op;
/** @needs_svm_lock: Needs SVM lock */
bool needs_svm_lock;
- /** @needs_userptr_lock: Needs userptr lock */
- bool needs_userptr_lock;
/** @needs_invalidation: Needs invalidation */
bool needs_invalidation;
/**
diff --git a/drivers/gpu/drm/xe/xe_pxp.c b/drivers/gpu/drm/xe/xe_pxp.c
index 454ea7dc08ac..bdbdbbf6a678 100644
--- a/drivers/gpu/drm/xe/xe_pxp.c
+++ b/drivers/gpu/drm/xe/xe_pxp.c
@@ -504,65 +504,62 @@ int xe_pxp_exec_queue_set_type(struct xe_pxp *pxp, struct xe_exec_queue *q, u8 t
return 0;
}
-static void __exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue *q)
+static int __exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue *q)
{
- spin_lock_irq(&pxp->queues.lock);
- list_add_tail(&q->pxp.link, &pxp->queues.list);
- spin_unlock_irq(&pxp->queues.lock);
+ int ret = 0;
+
+ /*
+ * A queue can be added to the list only if the PXP is in active status,
+ * otherwise the termination might not handle it correctly.
+ */
+ mutex_lock(&pxp->mutex);
+
+ if (pxp->status == XE_PXP_ACTIVE) {
+ spin_lock_irq(&pxp->queues.lock);
+ list_add_tail(&q->pxp.link, &pxp->queues.list);
+ spin_unlock_irq(&pxp->queues.lock);
+ } else if (pxp->status == XE_PXP_ERROR || pxp->status == XE_PXP_SUSPENDED) {
+ ret = -EIO;
+ } else {
+ ret = -EBUSY; /* try again later */
+ }
+
+ mutex_unlock(&pxp->mutex);
+
+ return ret;
}
-/**
- * xe_pxp_exec_queue_add - add a queue to the PXP list
- * @pxp: the xe->pxp pointer (it will be NULL if PXP is disabled)
- * @q: the queue to add to the list
- *
- * If PXP is enabled and the prerequisites are done, start the PXP ARB
- * session (if not already running) and add the queue to the PXP list. Note
- * that the queue must have previously been marked as using PXP with
- * xe_pxp_exec_queue_set_type.
- *
- * Returns 0 if the PXP ARB session is running and the queue is in the list,
- * -ENODEV if PXP is disabled, -EBUSY if the PXP prerequisites are not done,
- * other errno value if something goes wrong during the session start.
- */
-int xe_pxp_exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue *q)
+static int pxp_start(struct xe_pxp *pxp, u8 type)
{
int ret = 0;
+ bool restart = false;
if (!xe_pxp_is_enabled(pxp))
return -ENODEV;
/* we only support HWDRM sessions right now */
- xe_assert(pxp->xe, q->pxp.type == DRM_XE_PXP_TYPE_HWDRM);
+ xe_assert(pxp->xe, type == DRM_XE_PXP_TYPE_HWDRM);
- /*
- * Runtime suspend kills PXP, so we take a reference to prevent it from
- * happening while we have active queues that use PXP
- */
- xe_pm_runtime_get(pxp->xe);
+ /* get_readiness_status() returns 0 for in-progress and 1 for done */
+ ret = xe_pxp_get_readiness_status(pxp);
+ if (ret <= 0)
+ return ret ?: -EBUSY;
- if (!pxp_prerequisites_done(pxp)) {
- ret = -EBUSY;
- goto out;
- }
+ ret = 0;
wait_for_idle:
/*
* if there is an action in progress, wait for it. We need to wait
* outside the lock because the completion is done from within the lock.
- * Note that the two action should never be pending at the same time.
+ * Note that the two actions should never be pending at the same time.
*/
if (!wait_for_completion_timeout(&pxp->termination,
- msecs_to_jiffies(PXP_TERMINATION_TIMEOUT_MS))) {
- ret = -ETIMEDOUT;
- goto out;
- }
+ msecs_to_jiffies(PXP_TERMINATION_TIMEOUT_MS)))
+ return -ETIMEDOUT;
if (!wait_for_completion_timeout(&pxp->activation,
- msecs_to_jiffies(PXP_ACTIVATION_TIMEOUT_MS))) {
- ret = -ETIMEDOUT;
- goto out;
- }
+ msecs_to_jiffies(PXP_ACTIVATION_TIMEOUT_MS)))
+ return -ETIMEDOUT;
mutex_lock(&pxp->mutex);
@@ -570,11 +567,9 @@ wait_for_idle:
switch (pxp->status) {
case XE_PXP_ERROR:
ret = -EIO;
- break;
+ goto out_unlock;
case XE_PXP_ACTIVE:
- __exec_queue_add(pxp, q);
- mutex_unlock(&pxp->mutex);
- goto out;
+ goto out_unlock;
case XE_PXP_READY_TO_START:
pxp->status = XE_PXP_START_IN_PROGRESS;
reinit_completion(&pxp->activation);
@@ -582,8 +577,8 @@ wait_for_idle:
case XE_PXP_START_IN_PROGRESS:
/* If a start is in progress then the completion must not be done */
XE_WARN_ON(completion_done(&pxp->activation));
- mutex_unlock(&pxp->mutex);
- goto wait_for_idle;
+ restart = true;
+ goto out_unlock;
case XE_PXP_NEEDS_TERMINATION:
mark_termination_in_progress(pxp);
break;
@@ -591,29 +586,25 @@ wait_for_idle:
case XE_PXP_NEEDS_ADDITIONAL_TERMINATION:
/* If a termination is in progress then the completion must not be done */
XE_WARN_ON(completion_done(&pxp->termination));
- mutex_unlock(&pxp->mutex);
- goto wait_for_idle;
+ restart = true;
+ goto out_unlock;
case XE_PXP_SUSPENDED:
default:
drm_err(&pxp->xe->drm, "unexpected state during PXP start: %u\n", pxp->status);
ret = -EIO;
- break;
+ goto out_unlock;
}
mutex_unlock(&pxp->mutex);
- if (ret)
- goto out;
-
if (!completion_done(&pxp->termination)) {
ret = pxp_terminate_hw(pxp);
if (ret) {
drm_err(&pxp->xe->drm, "PXP termination failed before start\n");
mutex_lock(&pxp->mutex);
pxp->status = XE_PXP_ERROR;
- mutex_unlock(&pxp->mutex);
- goto out;
+ goto out_unlock;
}
goto wait_for_idle;
@@ -635,21 +626,59 @@ wait_for_idle:
if (pxp->status != XE_PXP_START_IN_PROGRESS) {
drm_err(&pxp->xe->drm, "unexpected state after PXP start: %u\n", pxp->status);
pxp->status = XE_PXP_NEEDS_TERMINATION;
- mutex_unlock(&pxp->mutex);
- goto wait_for_idle;
+ restart = true;
+ goto out_unlock;
}
/* If everything went ok, update the status and add the queue to the list */
- if (!ret) {
+ if (!ret)
pxp->status = XE_PXP_ACTIVE;
- __exec_queue_add(pxp, q);
- } else {
+ else
pxp->status = XE_PXP_ERROR;
- }
+out_unlock:
mutex_unlock(&pxp->mutex);
-out:
+ if (restart)
+ goto wait_for_idle;
+
+ return ret;
+}
+
+/**
+ * xe_pxp_exec_queue_add - add a queue to the PXP list
+ * @pxp: the xe->pxp pointer (it will be NULL if PXP is disabled)
+ * @q: the queue to add to the list
+ *
+ * If PXP is enabled and the prerequisites are done, start the PXP default
+ * session (if not already running) and add the queue to the PXP list.
+ *
+ * Returns 0 if the PXP session is running and the queue is in the list,
+ * -ENODEV if PXP is disabled, -EBUSY if the PXP prerequisites are not done,
+ * other errno value if something goes wrong during the session start.
+ */
+int xe_pxp_exec_queue_add(struct xe_pxp *pxp, struct xe_exec_queue *q)
+{
+ int ret;
+
+ if (!xe_pxp_is_enabled(pxp))
+ return -ENODEV;
+
+ /*
+ * Runtime suspend kills PXP, so we take a reference to prevent it from
+ * happening while we have active queues that use PXP
+ */
+ xe_pm_runtime_get(pxp->xe);
+
+start:
+ ret = pxp_start(pxp, q->pxp.type);
+
+ if (!ret) {
+ ret = __exec_queue_add(pxp, q);
+ if (ret == -EBUSY)
+ goto start;
+ }
+
/*
* in the successful case the PM ref is released from
* xe_pxp_exec_queue_remove
@@ -659,6 +688,7 @@ out:
return ret;
}
+ALLOW_ERROR_INJECTION(xe_pxp_exec_queue_add, ERRNO);
static void __pxp_exec_queue_remove(struct xe_pxp *pxp, struct xe_exec_queue *q, bool lock)
{
diff --git a/drivers/gpu/drm/xe/xe_pxp_submit.c b/drivers/gpu/drm/xe/xe_pxp_submit.c
index d92ec0f515b0..e60526e30030 100644
--- a/drivers/gpu/drm/xe/xe_pxp_submit.c
+++ b/drivers/gpu/drm/xe/xe_pxp_submit.c
@@ -54,8 +54,9 @@ static int allocate_vcs_execution_resources(struct xe_pxp *pxp)
* Each termination is 16 DWORDS, so 4K is enough to contain a
* termination for each sessions.
*/
- bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K, ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | XE_BO_FLAG_GGTT);
+ bo = xe_bo_create_pin_map_novm(xe, tile, SZ_4K, ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | XE_BO_FLAG_GGTT,
+ false);
if (IS_ERR(bo)) {
err = PTR_ERR(bo);
goto out_queue;
@@ -87,7 +88,9 @@ static int allocate_gsc_client_resources(struct xe_gt *gt,
{
struct xe_tile *tile = gt_to_tile(gt);
struct xe_device *xe = tile_to_xe(tile);
+ struct xe_validation_ctx ctx;
struct xe_hw_engine *hwe;
+ struct drm_exec exec;
struct xe_vm *vm;
struct xe_bo *bo;
struct xe_exec_queue *q;
@@ -101,20 +104,31 @@ static int allocate_gsc_client_resources(struct xe_gt *gt,
xe_assert(xe, hwe);
/* PXP instructions must be issued from PPGTT */
- vm = xe_vm_create(xe, XE_VM_FLAG_GSC);
+ vm = xe_vm_create(xe, XE_VM_FLAG_GSC, NULL);
if (IS_ERR(vm))
return PTR_ERR(vm);
/* We allocate a single object for the batch and the in/out memory */
- xe_vm_lock(vm, false);
- bo = xe_bo_create_pin_map(xe, tile, vm, PXP_BB_SIZE + inout_size * 2,
- ttm_bo_type_kernel,
- XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | XE_BO_FLAG_NEEDS_UC);
- xe_vm_unlock(vm);
- if (IS_ERR(bo)) {
- err = PTR_ERR(bo);
- goto vm_out;
+
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags){}, err) {
+ err = xe_vm_drm_exec_lock(vm, &exec);
+ drm_exec_retry_on_contention(&exec);
+ if (err)
+ break;
+
+ bo = xe_bo_create_pin_map(xe, tile, vm, PXP_BB_SIZE + inout_size * 2,
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED |
+ XE_BO_FLAG_NEEDS_UC, &exec);
+ drm_exec_retry_on_contention(&exec);
+ if (IS_ERR(bo)) {
+ err = PTR_ERR(bo);
+ xe_validation_retry_on_oom(&ctx, &err);
+ break;
+ }
}
+ if (err)
+ goto vm_out;
fence = xe_vm_bind_kernel_bo(vm, bo, NULL, 0, XE_CACHE_WB);
if (IS_ERR(fence)) {
diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
index 2dbf4066d86f..1c0915e2cc16 100644
--- a/drivers/gpu/drm/xe/xe_query.c
+++ b/drivers/gpu/drm/xe/xe_query.c
@@ -21,12 +21,14 @@
#include "xe_force_wake.h"
#include "xe_ggtt.h"
#include "xe_gt.h"
+#include "xe_gt_topology.h"
#include "xe_guc_hwconfig.h"
#include "xe_macros.h"
#include "xe_mmio.h"
#include "xe_oa.h"
#include "xe_pxp.h"
#include "xe_ttm_vram_mgr.h"
+#include "xe_vram_types.h"
#include "xe_wa.h"
static const u16 xe_to_user_engine_class[] = {
@@ -141,7 +143,7 @@ query_engine_cycles(struct xe_device *xe,
return -EINVAL;
eci = &resp.eci;
- if (eci->gt_id >= XE_MAX_GT_PER_TILE)
+ if (eci->gt_id >= xe->info.max_gt_per_tile)
return -EINVAL;
gt = xe_device_get_gt(xe, eci->gt_id);
@@ -274,8 +276,7 @@ static int query_mem_regions(struct xe_device *xe,
mem_regions->mem_regions[0].instance = 0;
mem_regions->mem_regions[0].min_page_size = PAGE_SIZE;
mem_regions->mem_regions[0].total_size = man->size << PAGE_SHIFT;
- if (perfmon_capable())
- mem_regions->mem_regions[0].used = ttm_resource_manager_usage(man);
+ mem_regions->mem_regions[0].used = ttm_resource_manager_usage(man);
mem_regions->num_mem_regions = 1;
for (i = XE_PL_VRAM0; i <= XE_PL_VRAM1; ++i) {
@@ -291,13 +292,11 @@ static int query_mem_regions(struct xe_device *xe,
mem_regions->mem_regions[mem_regions->num_mem_regions].total_size =
man->size;
- if (perfmon_capable()) {
- xe_ttm_vram_get_used(man,
- &mem_regions->mem_regions
- [mem_regions->num_mem_regions].used,
- &mem_regions->mem_regions
- [mem_regions->num_mem_regions].cpu_visible_used);
- }
+ xe_ttm_vram_get_used(man,
+ &mem_regions->mem_regions
+ [mem_regions->num_mem_regions].used,
+ &mem_regions->mem_regions
+ [mem_regions->num_mem_regions].cpu_visible_used);
mem_regions->mem_regions[mem_regions->num_mem_regions].cpu_visible_size =
xe_ttm_vram_get_cpu_visible_size(man);
@@ -337,7 +336,7 @@ static int query_config(struct xe_device *xe, struct drm_xe_device_query *query)
config->num_params = num_params;
config->info[DRM_XE_QUERY_CONFIG_REV_AND_DEVICE_ID] =
xe->info.devid | (xe->info.revid << 16);
- if (xe_device_get_root_tile(xe)->mem.vram.usable_size)
+ if (xe->mem.vram)
config->info[DRM_XE_QUERY_CONFIG_FLAGS] |=
DRM_XE_QUERY_CONFIG_FLAG_HAS_VRAM;
if (xe->info.has_usm && IS_ENABLED(CONFIG_DRM_XE_GPUSVM))
@@ -368,6 +367,7 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query
struct drm_xe_query_gt_list __user *query_ptr =
u64_to_user_ptr(query->data);
struct drm_xe_query_gt_list *gt_list;
+ int iter = 0;
u8 id;
if (query->size == 0) {
@@ -385,12 +385,12 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query
for_each_gt(gt, xe, id) {
if (xe_gt_is_media_type(gt))
- gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MEDIA;
+ gt_list->gt_list[iter].type = DRM_XE_QUERY_GT_TYPE_MEDIA;
else
- gt_list->gt_list[id].type = DRM_XE_QUERY_GT_TYPE_MAIN;
- gt_list->gt_list[id].tile_id = gt_to_tile(gt)->id;
- gt_list->gt_list[id].gt_id = gt->info.id;
- gt_list->gt_list[id].reference_clock = gt->info.reference_clock;
+ gt_list->gt_list[iter].type = DRM_XE_QUERY_GT_TYPE_MAIN;
+ gt_list->gt_list[iter].tile_id = gt_to_tile(gt)->id;
+ gt_list->gt_list[iter].gt_id = gt->info.id;
+ gt_list->gt_list[iter].reference_clock = gt->info.reference_clock;
/*
* The mem_regions indexes in the mask below need to
* directly identify the struct
@@ -406,19 +406,21 @@ static int query_gt_list(struct xe_device *xe, struct drm_xe_device_query *query
* assumption.
*/
if (!IS_DGFX(xe))
- gt_list->gt_list[id].near_mem_regions = 0x1;
+ gt_list->gt_list[iter].near_mem_regions = 0x1;
else
- gt_list->gt_list[id].near_mem_regions =
- BIT(gt_to_tile(gt)->id) << 1;
- gt_list->gt_list[id].far_mem_regions = xe->info.mem_region_mask ^
- gt_list->gt_list[id].near_mem_regions;
+ gt_list->gt_list[iter].near_mem_regions =
+ BIT(gt_to_tile(gt)->mem.vram->id) << 1;
+ gt_list->gt_list[iter].far_mem_regions = xe->info.mem_region_mask ^
+ gt_list->gt_list[iter].near_mem_regions;
- gt_list->gt_list[id].ip_ver_major =
+ gt_list->gt_list[iter].ip_ver_major =
REG_FIELD_GET(GMD_ID_ARCH_MASK, gt->info.gmdid);
- gt_list->gt_list[id].ip_ver_minor =
+ gt_list->gt_list[iter].ip_ver_minor =
REG_FIELD_GET(GMD_ID_RELEASE_MASK, gt->info.gmdid);
- gt_list->gt_list[id].ip_ver_rev =
+ gt_list->gt_list[iter].ip_ver_rev =
REG_FIELD_GET(GMD_ID_REVID, gt->info.gmdid);
+
+ iter++;
}
if (copy_to_user(query_ptr, gt_list, size)) {
@@ -434,7 +436,7 @@ static int query_hwconfig(struct xe_device *xe,
struct drm_xe_device_query *query)
{
struct xe_gt *gt = xe_root_mmio_gt(xe);
- size_t size = xe_guc_hwconfig_size(&gt->uc.guc);
+ size_t size = gt ? xe_guc_hwconfig_size(&gt->uc.guc) : 0;
void __user *query_ptr = u64_to_user_ptr(query->data);
void *hwconfig;
@@ -473,7 +475,7 @@ static size_t calc_topo_query_size(struct xe_device *xe)
sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss);
/* L3bank mask may not be available for some GTs */
- if (!XE_WA(gt, no_media_l3))
+ if (xe_gt_topology_report_l3(gt))
query_size += sizeof(struct drm_xe_query_topology_mask) +
sizeof_field(struct xe_gt, fuse_topo.l3_bank_mask);
}
@@ -536,7 +538,7 @@ static int query_gt_topology(struct xe_device *xe,
* mask, then it's better to omit L3 from the query rather than
* reporting bogus or zeroed information to userspace.
*/
- if (!XE_WA(gt, no_media_l3)) {
+ if (xe_gt_topology_report_l3(gt)) {
topo.type = DRM_XE_TOPO_L3_BANK;
err = copy_mask(&query_ptr, &topo, gt->fuse_topo.l3_bank_mask,
sizeof(gt->fuse_topo.l3_bank_mask));
@@ -683,8 +685,8 @@ static int query_oa_units(struct xe_device *xe,
du->oa_timestamp_freq = xe_oa_timestamp_frequency(gt);
du->capabilities = DRM_XE_OA_CAPS_BASE | DRM_XE_OA_CAPS_SYNCS |
DRM_XE_OA_CAPS_OA_BUFFER_SIZE |
- DRM_XE_OA_CAPS_WAIT_NUM_REPORTS;
-
+ DRM_XE_OA_CAPS_WAIT_NUM_REPORTS |
+ DRM_XE_OA_CAPS_OAM;
j = 0;
for_each_hw_engine(hwe, gt, hwe_id) {
if (!xe_hw_engine_is_reserved(hwe) &&
@@ -745,10 +747,8 @@ static int query_eu_stall(struct xe_device *xe,
u32 num_rates;
int ret;
- if (!xe_eu_stall_supported_on_platform(xe)) {
- drm_dbg(&xe->drm, "EU stall monitoring is not supported on this platform\n");
+ if (!xe_eu_stall_supported_on_platform(xe))
return -ENODEV;
- }
array_size = xe_eu_stall_get_sampling_rates(&num_rates, &rates);
size = sizeof(struct drm_xe_query_eu_stall) + array_size;
diff --git a/drivers/gpu/drm/xe/xe_range_fence.h b/drivers/gpu/drm/xe/xe_range_fence.h
index edd58b34f5c0..4934729dd904 100644
--- a/drivers/gpu/drm/xe/xe_range_fence.h
+++ b/drivers/gpu/drm/xe/xe_range_fence.h
@@ -13,13 +13,13 @@
struct xe_range_fence_tree;
struct xe_range_fence;
-/** struct xe_range_fence_ops - XE range fence ops */
+/** struct xe_range_fence_ops - Xe range fence ops */
struct xe_range_fence_ops {
/** @free: free range fence op */
void (*free)(struct xe_range_fence *rfence);
};
-/** struct xe_range_fence - XE range fence (address conflict tracking) */
+/** struct xe_range_fence - Xe range fence (address conflict tracking) */
struct xe_range_fence {
/** @rb: RB tree node inserted into interval tree */
struct rb_node rb;
diff --git a/drivers/gpu/drm/xe/xe_reg_whitelist.c b/drivers/gpu/drm/xe/xe_reg_whitelist.c
index 23f6c81d9994..7ca360b2c20d 100644
--- a/drivers/gpu/drm/xe/xe_reg_whitelist.c
+++ b/drivers/gpu/drm/xe/xe_reg_whitelist.c
@@ -19,7 +19,8 @@
#undef XE_REG_MCR
#define XE_REG_MCR(...) XE_REG(__VA_ARGS__, .mcr = 1)
-static bool match_not_render(const struct xe_gt *gt,
+static bool match_not_render(const struct xe_device *xe,
+ const struct xe_gt *gt,
const struct xe_hw_engine *hwe)
{
return hwe->class != XE_ENGINE_CLASS_RENDER;
@@ -88,6 +89,13 @@ static const struct xe_rtp_entry_sr register_whitelist[] = {
RING_FORCE_TO_NONPRIV_ACCESS_RD |
RING_FORCE_TO_NONPRIV_RANGE_4))
},
+ { XE_RTP_NAME("14024997852"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005), ENGINE_CLASS(RENDER)),
+ XE_RTP_ACTIONS(WHITELIST(FF_MODE,
+ RING_FORCE_TO_NONPRIV_ACCESS_RW),
+ WHITELIST(VFLSKPD,
+ RING_FORCE_TO_NONPRIV_ACCESS_RW))
+ },
};
static void whitelist_apply_to_hwe(struct xe_hw_engine *hwe)
diff --git a/drivers/gpu/drm/xe/xe_res_cursor.h b/drivers/gpu/drm/xe/xe_res_cursor.h
index d1a403cfb628..4e00008b7081 100644
--- a/drivers/gpu/drm/xe/xe_res_cursor.h
+++ b/drivers/gpu/drm/xe/xe_res_cursor.h
@@ -55,8 +55,8 @@ struct xe_res_cursor {
u32 mem_type;
/** @sgl: Scatterlist for cursor */
struct scatterlist *sgl;
- /** @dma_addr: Current element in a struct drm_pagemap_device_addr array */
- const struct drm_pagemap_device_addr *dma_addr;
+ /** @dma_addr: Current element in a struct drm_pagemap_addr array */
+ const struct drm_pagemap_addr *dma_addr;
/** @mm: Buddy allocator for VRAM cursor */
struct drm_buddy *mm;
/**
@@ -170,7 +170,7 @@ static inline void __xe_res_sg_next(struct xe_res_cursor *cur)
*/
static inline void __xe_res_dma_next(struct xe_res_cursor *cur)
{
- const struct drm_pagemap_device_addr *addr = cur->dma_addr;
+ const struct drm_pagemap_addr *addr = cur->dma_addr;
u64 start = cur->start;
while (start >= cur->dma_seg_size) {
@@ -222,14 +222,14 @@ static inline void xe_res_first_sg(const struct sg_table *sg,
/**
* xe_res_first_dma - initialize a xe_res_cursor with dma_addr array
*
- * @dma_addr: struct drm_pagemap_device_addr array to walk
+ * @dma_addr: struct drm_pagemap_addr array to walk
* @start: Start of the range
* @size: Size of the range
* @cur: cursor object to initialize
*
* Start walking over the range of allocations between @start and @size.
*/
-static inline void xe_res_first_dma(const struct drm_pagemap_device_addr *dma_addr,
+static inline void xe_res_first_dma(const struct drm_pagemap_addr *dma_addr,
u64 start, u64 size,
struct xe_res_cursor *cur)
{
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index bc1689db4cd7..ac0c6dcffe15 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -110,13 +110,14 @@ static int emit_bb_start(u64 batch_addr, u32 ppgtt_flag, u32 *dw, int i)
return i;
}
-static int emit_flush_invalidate(u32 *dw, int i)
+static int emit_flush_invalidate(u32 addr, u32 val, u32 flush_flags, u32 *dw, int i)
{
- dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | MI_FLUSH_DW_OP_STOREDW |
- MI_FLUSH_IMM_DW | MI_FLUSH_DW_STORE_INDEX;
- dw[i++] = LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR;
- dw[i++] = 0;
+ dw[i++] = MI_FLUSH_DW | MI_FLUSH_DW_OP_STOREDW |
+ MI_FLUSH_IMM_DW | (flush_flags & MI_INVALIDATE_TLB) ?: 0;
+
+ dw[i++] = addr | MI_FLUSH_DW_USE_GTT;
dw[i++] = 0;
+ dw[i++] = val;
return i;
}
@@ -178,7 +179,7 @@ static int emit_render_cache_flush(struct xe_sched_job *job, u32 *dw, int i)
bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK);
u32 flags;
- if (XE_WA(gt, 14016712196))
+ if (XE_GT_WA(gt, 14016712196))
i = emit_pipe_control(dw, i, 0, PIPE_CONTROL_DEPTH_CACHE_FLUSH,
LRC_PPHWSP_FLUSH_INVAL_SCRATCH_ADDR, 0);
@@ -189,7 +190,7 @@ static int emit_render_cache_flush(struct xe_sched_job *job, u32 *dw, int i)
PIPE_CONTROL_DC_FLUSH_ENABLE |
PIPE_CONTROL_FLUSH_ENABLE);
- if (XE_WA(gt, 1409600907))
+ if (XE_GT_WA(gt, 1409600907))
flags |= PIPE_CONTROL_DEPTH_STALL;
if (lacks_render)
@@ -205,7 +206,7 @@ static int emit_pipe_control_to_ring_end(struct xe_hw_engine *hwe, u32 *dw, int
if (hwe->class != XE_ENGINE_CLASS_RENDER)
return i;
- if (XE_WA(hwe->gt, 16020292621))
+ if (XE_GT_WA(hwe->gt, 16020292621))
i = emit_pipe_control(dw, i, 0, PIPE_CONTROL_LRI_POST_SYNC,
RING_NOPID(hwe->mmio_base).addr, 0);
@@ -244,12 +245,14 @@ static int emit_copy_timestamp(struct xe_lrc *lrc, u32 *dw, int i)
/* for engines that don't require any special HW handling (no EUs, no aux inval, etc) */
static void __emit_job_gen12_simple(struct xe_sched_job *job, struct xe_lrc *lrc,
- u64 batch_addr, u32 seqno)
+ u64 batch_addr, u32 *head, u32 seqno)
{
u32 dw[MAX_JOB_SIZE_DW], i = 0;
u32 ppgtt_flag = get_ppgtt_flag(job);
struct xe_gt *gt = job->q->gt;
+ *head = lrc->ring.tail;
+
i = emit_copy_timestamp(lrc, dw, i);
if (job->ring_ops_flush_tlb) {
@@ -295,7 +298,7 @@ static bool has_aux_ccs(struct xe_device *xe)
}
static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
- u64 batch_addr, u32 seqno)
+ u64 batch_addr, u32 *head, u32 seqno)
{
u32 dw[MAX_JOB_SIZE_DW], i = 0;
u32 ppgtt_flag = get_ppgtt_flag(job);
@@ -303,6 +306,8 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
struct xe_device *xe = gt_to_xe(gt);
bool decode = job->q->class == XE_ENGINE_CLASS_VIDEO_DECODE;
+ *head = lrc->ring.tail;
+
i = emit_copy_timestamp(lrc, dw, i);
dw[i++] = preparser_disable(true);
@@ -345,7 +350,8 @@ static void __emit_job_gen12_video(struct xe_sched_job *job, struct xe_lrc *lrc,
static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
struct xe_lrc *lrc,
- u64 batch_addr, u32 seqno)
+ u64 batch_addr, u32 *head,
+ u32 seqno)
{
u32 dw[MAX_JOB_SIZE_DW], i = 0;
u32 ppgtt_flag = get_ppgtt_flag(job);
@@ -354,6 +360,8 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
bool lacks_render = !(gt->info.engine_mask & XE_HW_ENGINE_RCS_MASK);
u32 mask_flags = 0;
+ *head = lrc->ring.tail;
+
i = emit_copy_timestamp(lrc, dw, i);
dw[i++] = preparser_disable(true);
@@ -395,33 +403,31 @@ static void __emit_job_gen12_render_compute(struct xe_sched_job *job,
}
static void emit_migration_job_gen12(struct xe_sched_job *job,
- struct xe_lrc *lrc, u32 seqno)
+ struct xe_lrc *lrc, u32 *head,
+ u32 seqno)
{
+ u32 saddr = xe_lrc_start_seqno_ggtt_addr(lrc);
u32 dw[MAX_JOB_SIZE_DW], i = 0;
+ *head = lrc->ring.tail;
+
i = emit_copy_timestamp(lrc, dw, i);
- i = emit_store_imm_ggtt(xe_lrc_start_seqno_ggtt_addr(lrc),
- seqno, dw, i);
+ i = emit_store_imm_ggtt(saddr, seqno, dw, i);
dw[i++] = MI_ARB_ON_OFF | MI_ARB_DISABLE; /* Enabled again below */
i = emit_bb_start(job->ptrs[0].batch_addr, BIT(8), dw, i);
- if (!IS_SRIOV_VF(gt_to_xe(job->q->gt))) {
- /* XXX: Do we need this? Leaving for now. */
- dw[i++] = preparser_disable(true);
- i = emit_flush_invalidate(dw, i);
- dw[i++] = preparser_disable(false);
- }
+ dw[i++] = preparser_disable(true);
+ i = emit_flush_invalidate(saddr, seqno, job->migrate_flush_flags, dw, i);
+ dw[i++] = preparser_disable(false);
i = emit_bb_start(job->ptrs[1].batch_addr, BIT(8), dw, i);
- dw[i++] = MI_FLUSH_DW | MI_INVALIDATE_TLB | job->migrate_flush_flags |
- MI_FLUSH_DW_OP_STOREDW | MI_FLUSH_IMM_DW;
- dw[i++] = xe_lrc_seqno_ggtt_addr(lrc) | MI_FLUSH_DW_USE_GTT;
- dw[i++] = 0;
- dw[i++] = seqno; /* value */
+ i = emit_flush_imm_ggtt(xe_lrc_seqno_ggtt_addr(lrc), seqno,
+ job->migrate_flush_flags,
+ dw, i);
i = emit_user_interrupt(dw, i);
@@ -438,6 +444,7 @@ static void emit_job_gen12_gsc(struct xe_sched_job *job)
__emit_job_gen12_simple(job, job->q->lrc[0],
job->ptrs[0].batch_addr,
+ &job->ptrs[0].head,
xe_sched_job_lrc_seqno(job));
}
@@ -447,6 +454,7 @@ static void emit_job_gen12_copy(struct xe_sched_job *job)
if (xe_sched_job_is_migration(job->q)) {
emit_migration_job_gen12(job, job->q->lrc[0],
+ &job->ptrs[0].head,
xe_sched_job_lrc_seqno(job));
return;
}
@@ -454,6 +462,7 @@ static void emit_job_gen12_copy(struct xe_sched_job *job)
for (i = 0; i < job->q->width; ++i)
__emit_job_gen12_simple(job, job->q->lrc[i],
job->ptrs[i].batch_addr,
+ &job->ptrs[i].head,
xe_sched_job_lrc_seqno(job));
}
@@ -465,6 +474,7 @@ static void emit_job_gen12_video(struct xe_sched_job *job)
for (i = 0; i < job->q->width; ++i)
__emit_job_gen12_video(job, job->q->lrc[i],
job->ptrs[i].batch_addr,
+ &job->ptrs[i].head,
xe_sched_job_lrc_seqno(job));
}
@@ -475,6 +485,7 @@ static void emit_job_gen12_render_compute(struct xe_sched_job *job)
for (i = 0; i < job->q->width; ++i)
__emit_job_gen12_render_compute(job, job->q->lrc[i],
job->ptrs[i].batch_addr,
+ &job->ptrs[i].head,
xe_sched_job_lrc_seqno(job));
}
diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index 29e694bb1219..ed509b1c8cfc 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -9,6 +9,7 @@
#include <uapi/drm/xe_drm.h>
+#include "xe_configfs.h"
#include "xe_gt.h"
#include "xe_gt_topology.h"
#include "xe_macros.h"
@@ -56,37 +57,61 @@ static bool rule_matches(const struct xe_device *xe,
xe->info.subplatform == r->subplatform;
break;
case XE_RTP_MATCH_GRAPHICS_VERSION:
+ if (drm_WARN_ON(&xe->drm, !gt))
+ return false;
+
match = xe->info.graphics_verx100 == r->ver_start &&
(!has_samedia(xe) || !xe_gt_is_media_type(gt));
break;
case XE_RTP_MATCH_GRAPHICS_VERSION_RANGE:
+ if (drm_WARN_ON(&xe->drm, !gt))
+ return false;
+
match = xe->info.graphics_verx100 >= r->ver_start &&
xe->info.graphics_verx100 <= r->ver_end &&
(!has_samedia(xe) || !xe_gt_is_media_type(gt));
break;
case XE_RTP_MATCH_GRAPHICS_VERSION_ANY_GT:
+ if (drm_WARN_ON(&xe->drm, !gt))
+ return false;
+
match = xe->info.graphics_verx100 == r->ver_start;
break;
case XE_RTP_MATCH_GRAPHICS_STEP:
+ if (drm_WARN_ON(&xe->drm, !gt))
+ return false;
+
match = xe->info.step.graphics >= r->step_start &&
xe->info.step.graphics < r->step_end &&
(!has_samedia(xe) || !xe_gt_is_media_type(gt));
break;
case XE_RTP_MATCH_MEDIA_VERSION:
+ if (drm_WARN_ON(&xe->drm, !gt))
+ return false;
+
match = xe->info.media_verx100 == r->ver_start &&
(!has_samedia(xe) || xe_gt_is_media_type(gt));
break;
case XE_RTP_MATCH_MEDIA_VERSION_RANGE:
+ if (drm_WARN_ON(&xe->drm, !gt))
+ return false;
+
match = xe->info.media_verx100 >= r->ver_start &&
xe->info.media_verx100 <= r->ver_end &&
(!has_samedia(xe) || xe_gt_is_media_type(gt));
break;
case XE_RTP_MATCH_MEDIA_STEP:
+ if (drm_WARN_ON(&xe->drm, !gt))
+ return false;
+
match = xe->info.step.media >= r->step_start &&
xe->info.step.media < r->step_end &&
(!has_samedia(xe) || xe_gt_is_media_type(gt));
break;
case XE_RTP_MATCH_MEDIA_VERSION_ANY_GT:
+ if (drm_WARN_ON(&xe->drm, !gt))
+ return false;
+
match = xe->info.media_verx100 == r->ver_start;
break;
case XE_RTP_MATCH_INTEGRATED:
@@ -108,7 +133,7 @@ static bool rule_matches(const struct xe_device *xe,
match = hwe->class != r->engine_class;
break;
case XE_RTP_MATCH_FUNC:
- match = r->match_func(gt, hwe);
+ match = r->match_func(xe, gt, hwe);
break;
default:
drm_warn(&xe->drm, "Invalid RTP match %u\n",
@@ -186,6 +211,11 @@ static void rtp_get_context(struct xe_rtp_process_ctx *ctx,
struct xe_device **xe)
{
switch (ctx->type) {
+ case XE_RTP_PROCESS_TYPE_DEVICE:
+ *hwe = NULL;
+ *gt = NULL;
+ *xe = ctx->xe;
+ break;
case XE_RTP_PROCESS_TYPE_GT:
*hwe = NULL;
*gt = ctx->gt;
@@ -310,13 +340,15 @@ void xe_rtp_process(struct xe_rtp_process_ctx *ctx,
}
EXPORT_SYMBOL_IF_KUNIT(xe_rtp_process);
-bool xe_rtp_match_even_instance(const struct xe_gt *gt,
+bool xe_rtp_match_even_instance(const struct xe_device *xe,
+ const struct xe_gt *gt,
const struct xe_hw_engine *hwe)
{
return hwe->instance % 2 == 0;
}
-bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt,
+bool xe_rtp_match_first_render_or_compute(const struct xe_device *xe,
+ const struct xe_gt *gt,
const struct xe_hw_engine *hwe)
{
u64 render_compute_mask = gt->info.engine_mask &
@@ -326,23 +358,30 @@ bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt,
hwe->engine_id == __ffs(render_compute_mask);
}
-bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt,
- const struct xe_hw_engine *hwe)
+bool xe_rtp_match_not_sriov_vf(const struct xe_device *xe,
+ const struct xe_gt *gt,
+ const struct xe_hw_engine *hwe)
{
- unsigned int dss_per_gslice = 4;
- unsigned int dss;
-
- if (drm_WARN(&gt_to_xe(gt)->drm, xe_dss_mask_empty(gt->fuse_topo.g_dss_mask),
- "Checking gslice for platform without geometry pipeline\n"))
- return false;
+ return !IS_SRIOV_VF(xe);
+}
- dss = xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0);
+bool xe_rtp_match_psmi_enabled(const struct xe_device *xe,
+ const struct xe_gt *gt,
+ const struct xe_hw_engine *hwe)
+{
+ return xe_configfs_get_psmi_enabled(to_pci_dev(xe->drm.dev));
+}
- return dss >= dss_per_gslice;
+bool xe_rtp_match_gt_has_discontiguous_dss_groups(const struct xe_device *xe,
+ const struct xe_gt *gt,
+ const struct xe_hw_engine *hwe)
+{
+ return xe_gt_has_discontiguous_dss_groups(gt);
}
-bool xe_rtp_match_not_sriov_vf(const struct xe_gt *gt,
+bool xe_rtp_match_has_flat_ccs(const struct xe_device *xe,
+ const struct xe_gt *gt,
const struct xe_hw_engine *hwe)
{
- return !IS_SRIOV_VF(gt_to_xe(gt));
+ return xe->info.has_flat_ccs;
}
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
index 4fe736a11c42..ba5f940c0a96 100644
--- a/drivers/gpu/drm/xe/xe_rtp.h
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -422,7 +422,8 @@ struct xe_reg_sr;
#define XE_RTP_PROCESS_CTX_INITIALIZER(arg__) _Generic((arg__), \
struct xe_hw_engine * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_ENGINE }, \
- struct xe_gt * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_GT })
+ struct xe_gt * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_GT }, \
+ struct xe_device * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_DEVICE })
void xe_rtp_process_ctx_enable_active_tracking(struct xe_rtp_process_ctx *ctx,
unsigned long *active_entries,
@@ -439,18 +440,21 @@ void xe_rtp_process(struct xe_rtp_process_ctx *ctx,
/**
* xe_rtp_match_even_instance - Match if engine instance is even
+ * @xe: Device structure
* @gt: GT structure
* @hwe: Engine instance
*
* Returns: true if engine instance is even, false otherwise
*/
-bool xe_rtp_match_even_instance(const struct xe_gt *gt,
+bool xe_rtp_match_even_instance(const struct xe_device *xe,
+ const struct xe_gt *gt,
const struct xe_hw_engine *hwe);
/*
* xe_rtp_match_first_render_or_compute - Match if it's first render or compute
* engine in the GT
*
+ * @xe: Device structure
* @gt: GT structure
* @hwe: Engine instance
*
@@ -462,29 +466,41 @@ bool xe_rtp_match_even_instance(const struct xe_gt *gt,
* Returns: true if engine id is the first to match the render reset domain,
* false otherwise.
*/
-bool xe_rtp_match_first_render_or_compute(const struct xe_gt *gt,
+bool xe_rtp_match_first_render_or_compute(const struct xe_device *xe,
+ const struct xe_gt *gt,
const struct xe_hw_engine *hwe);
/*
- * xe_rtp_match_first_gslice_fused_off - Match when first gslice is fused off
+ * xe_rtp_match_not_sriov_vf - Match when not on SR-IOV VF device
*
+ * @xe: Device structure
* @gt: GT structure
* @hwe: Engine instance
*
- * Returns: true if first gslice is fused off, false otherwise.
+ * Returns: true if device is not VF, false otherwise.
*/
-bool xe_rtp_match_first_gslice_fused_off(const struct xe_gt *gt,
- const struct xe_hw_engine *hwe);
+bool xe_rtp_match_not_sriov_vf(const struct xe_device *xe,
+ const struct xe_gt *gt,
+ const struct xe_hw_engine *hwe);
-/*
- * xe_rtp_match_not_sriov_vf - Match when not on SR-IOV VF device
- *
+bool xe_rtp_match_psmi_enabled(const struct xe_device *xe,
+ const struct xe_gt *gt,
+ const struct xe_hw_engine *hwe);
+
+bool xe_rtp_match_gt_has_discontiguous_dss_groups(const struct xe_device *xe,
+ const struct xe_gt *gt,
+ const struct xe_hw_engine *hwe);
+
+/**
+ * xe_rtp_match_has_flat_ccs - Match when platform has FlatCCS compression
+ * @xe: Device structure
* @gt: GT structure
* @hwe: Engine instance
*
- * Returns: true if device is not VF, false otherwise.
+ * Returns: true if platform has FlatCCS compression, false otherwise
*/
-bool xe_rtp_match_not_sriov_vf(const struct xe_gt *gt,
+bool xe_rtp_match_has_flat_ccs(const struct xe_device *xe,
+ const struct xe_gt *gt,
const struct xe_hw_engine *hwe);
#endif
diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h
index 1b76b947c706..6ba7f226c227 100644
--- a/drivers/gpu/drm/xe/xe_rtp_types.h
+++ b/drivers/gpu/drm/xe/xe_rtp_types.h
@@ -10,6 +10,7 @@
#include "regs/xe_reg_defs.h"
+struct xe_device;
struct xe_hw_engine;
struct xe_gt;
@@ -86,7 +87,8 @@ struct xe_rtp_rule {
u8 engine_class;
};
/* MATCH_FUNC */
- bool (*match_func)(const struct xe_gt *gt,
+ bool (*match_func)(const struct xe_device *xe,
+ const struct xe_gt *gt,
const struct xe_hw_engine *hwe);
};
};
@@ -110,12 +112,14 @@ struct xe_rtp_entry {
};
enum xe_rtp_process_type {
+ XE_RTP_PROCESS_TYPE_DEVICE,
XE_RTP_PROCESS_TYPE_GT,
XE_RTP_PROCESS_TYPE_ENGINE,
};
struct xe_rtp_process_ctx {
union {
+ struct xe_device *xe;
struct xe_gt *gt;
struct xe_hw_engine *hwe;
};
diff --git a/drivers/gpu/drm/xe/xe_sa.c b/drivers/gpu/drm/xe/xe_sa.c
index 1d43e183ca21..63a5263dcf1b 100644
--- a/drivers/gpu/drm/xe/xe_sa.c
+++ b/drivers/gpu/drm/xe/xe_sa.c
@@ -69,7 +69,6 @@ struct xe_sa_manager *__xe_sa_bo_manager_init(struct xe_tile *tile, u32 size, u3
}
sa_manager->bo = bo;
sa_manager->is_iomem = bo->vmap.is_iomem;
- sa_manager->gpu_addr = xe_bo_ggtt_addr(bo);
if (bo->vmap.is_iomem) {
sa_manager->cpu_ptr = kvzalloc(managed_size, GFP_KERNEL);
@@ -111,6 +110,10 @@ struct drm_suballoc *__xe_sa_bo_new(struct xe_sa_manager *sa_manager, u32 size,
return drm_suballoc_new(&sa_manager->base, size, gfp, true, 0);
}
+/**
+ * xe_sa_bo_flush_write() - Copy the data from the sub-allocation to the GPU memory.
+ * @sa_bo: the &drm_suballoc to flush
+ */
void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo)
{
struct xe_sa_manager *sa_manager = to_xe_sa_manager(sa_bo->manager);
@@ -124,6 +127,23 @@ void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo)
drm_suballoc_size(sa_bo));
}
+/**
+ * xe_sa_bo_sync_read() - Copy the data from GPU memory to the sub-allocation.
+ * @sa_bo: the &drm_suballoc to sync
+ */
+void xe_sa_bo_sync_read(struct drm_suballoc *sa_bo)
+{
+ struct xe_sa_manager *sa_manager = to_xe_sa_manager(sa_bo->manager);
+ struct xe_device *xe = tile_to_xe(sa_manager->bo->tile);
+
+ if (!sa_manager->bo->vmap.is_iomem)
+ return;
+
+ xe_map_memcpy_from(xe, xe_sa_bo_cpu_addr(sa_bo), &sa_manager->bo->vmap,
+ drm_suballoc_soffset(sa_bo),
+ drm_suballoc_size(sa_bo));
+}
+
void xe_sa_bo_free(struct drm_suballoc *sa_bo,
struct dma_fence *fence)
{
diff --git a/drivers/gpu/drm/xe/xe_sa.h b/drivers/gpu/drm/xe/xe_sa.h
index 1170ee5a81a8..1be744350836 100644
--- a/drivers/gpu/drm/xe/xe_sa.h
+++ b/drivers/gpu/drm/xe/xe_sa.h
@@ -7,6 +7,8 @@
#include <linux/sizes.h>
#include <linux/types.h>
+
+#include "xe_bo.h"
#include "xe_sa_types.h"
struct dma_fence;
@@ -35,6 +37,7 @@ static inline struct drm_suballoc *xe_sa_bo_new(struct xe_sa_manager *sa_manager
}
void xe_sa_bo_flush_write(struct drm_suballoc *sa_bo);
+void xe_sa_bo_sync_read(struct drm_suballoc *sa_bo);
void xe_sa_bo_free(struct drm_suballoc *sa_bo, struct dma_fence *fence);
static inline struct xe_sa_manager *
@@ -43,9 +46,20 @@ to_xe_sa_manager(struct drm_suballoc_manager *mng)
return container_of(mng, struct xe_sa_manager, base);
}
+/**
+ * xe_sa_manager_gpu_addr - Retrieve GPU address of a back storage BO
+ * within suballocator.
+ * @sa_manager: the &xe_sa_manager struct instance
+ * Return: GGTT address of the back storage BO.
+ */
+static inline u64 xe_sa_manager_gpu_addr(struct xe_sa_manager *sa_manager)
+{
+ return xe_bo_ggtt_addr(sa_manager->bo);
+}
+
static inline u64 xe_sa_bo_gpu_addr(struct drm_suballoc *sa)
{
- return to_xe_sa_manager(sa->manager)->gpu_addr +
+ return xe_sa_manager_gpu_addr(to_xe_sa_manager(sa->manager)) +
drm_suballoc_soffset(sa);
}
diff --git a/drivers/gpu/drm/xe/xe_sa_types.h b/drivers/gpu/drm/xe/xe_sa_types.h
index 2b070ff1292e..cb7238799dcb 100644
--- a/drivers/gpu/drm/xe/xe_sa_types.h
+++ b/drivers/gpu/drm/xe/xe_sa_types.h
@@ -12,7 +12,6 @@ struct xe_bo;
struct xe_sa_manager {
struct drm_suballoc_manager base;
struct xe_bo *bo;
- u64 gpu_addr;
void *cpu_ptr;
bool is_iomem;
};
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index 1905ca590965..cb674a322113 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -113,7 +113,8 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
kref_init(&job->refcount);
xe_exec_queue_get(job->q);
- err = drm_sched_job_init(&job->drm, q->entity, 1, NULL);
+ err = drm_sched_job_init(&job->drm, q->entity, 1, NULL,
+ q->xef ? q->xef->drm->client_id : 0);
if (err)
goto err_free;
@@ -145,6 +146,7 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
for (i = 0; i < width; ++i)
job->ptrs[i].batch_addr = batch_addr[i];
+ atomic_inc(&q->job_cnt);
xe_pm_runtime_get_noresume(job_to_xe(job));
trace_xe_sched_job_create(job);
return job;
@@ -159,11 +161,11 @@ err_free:
}
/**
- * xe_sched_job_destroy - Destroy XE schedule job
- * @ref: reference to XE schedule job
+ * xe_sched_job_destroy - Destroy Xe schedule job
+ * @ref: reference to Xe schedule job
*
* Called when ref == 0, drop a reference to job's xe_engine + fence, cleanup
- * base DRM schedule job, and free memory for XE schedule job.
+ * base DRM schedule job, and free memory for Xe schedule job.
*/
void xe_sched_job_destroy(struct kref *ref)
{
@@ -176,6 +178,7 @@ void xe_sched_job_destroy(struct kref *ref)
dma_fence_put(job->fence);
drm_sched_job_cleanup(&job->drm);
job_free(job);
+ atomic_dec(&q->job_cnt);
xe_exec_queue_put(q);
xe_pm_runtime_put(xe);
}
@@ -216,15 +219,17 @@ void xe_sched_job_set_error(struct xe_sched_job *job, int error)
bool xe_sched_job_started(struct xe_sched_job *job)
{
+ struct dma_fence *fence = dma_fence_chain_contained(job->fence);
struct xe_lrc *lrc = job->q->lrc[0];
- return !__dma_fence_is_later(xe_sched_job_lrc_seqno(job),
- xe_lrc_start_seqno(lrc),
- dma_fence_chain_contained(job->fence)->ops);
+ return !__dma_fence_is_later(fence,
+ xe_sched_job_lrc_seqno(job),
+ xe_lrc_start_seqno(lrc));
}
bool xe_sched_job_completed(struct xe_sched_job *job)
{
+ struct dma_fence *fence = dma_fence_chain_contained(job->fence);
struct xe_lrc *lrc = job->q->lrc[0];
/*
@@ -232,9 +237,9 @@ bool xe_sched_job_completed(struct xe_sched_job *job)
* parallel handshake is done.
*/
- return !__dma_fence_is_later(xe_sched_job_lrc_seqno(job),
- xe_lrc_seqno(lrc),
- dma_fence_chain_contained(job->fence)->ops);
+ return !__dma_fence_is_later(fence,
+ xe_sched_job_lrc_seqno(job),
+ xe_lrc_seqno(lrc));
}
void xe_sched_job_arm(struct xe_sched_job *job)
@@ -293,23 +298,6 @@ void xe_sched_job_push(struct xe_sched_job *job)
}
/**
- * xe_sched_job_last_fence_add_dep - Add last fence dependency to job
- * @job:job to add the last fence dependency to
- * @vm: virtual memory job belongs to
- *
- * Returns:
- * 0 on success, or an error on failing to expand the array.
- */
-int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm)
-{
- struct dma_fence *fence;
-
- fence = xe_exec_queue_last_fence_get(job->q, vm);
-
- return drm_sched_job_add_dependency(&job->drm, fence);
-}
-
-/**
* xe_sched_job_init_user_fence - Initialize user_fence for the job
* @job: job whose user_fence needs an init
* @sync: sync to be use to init user_fence
diff --git a/drivers/gpu/drm/xe/xe_sched_job.h b/drivers/gpu/drm/xe/xe_sched_job.h
index 3dc72c5c1f13..1c1cb44216c3 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.h
+++ b/drivers/gpu/drm/xe/xe_sched_job.h
@@ -23,10 +23,10 @@ struct xe_sched_job *xe_sched_job_create(struct xe_exec_queue *q,
void xe_sched_job_destroy(struct kref *ref);
/**
- * xe_sched_job_get - get reference to XE schedule job
- * @job: XE schedule job object
+ * xe_sched_job_get - get reference to Xe schedule job
+ * @job: Xe schedule job object
*
- * Increment XE schedule job's reference count
+ * Increment Xe schedule job's reference count
*/
static inline struct xe_sched_job *xe_sched_job_get(struct xe_sched_job *job)
{
@@ -35,10 +35,10 @@ static inline struct xe_sched_job *xe_sched_job_get(struct xe_sched_job *job)
}
/**
- * xe_sched_job_put - put reference to XE schedule job
- * @job: XE schedule job object
+ * xe_sched_job_put - put reference to Xe schedule job
+ * @job: Xe schedule job object
*
- * Decrement XE schedule job's reference count, call xe_sched_job_destroy when
+ * Decrement Xe schedule job's reference count, call xe_sched_job_destroy when
* reference count == 0.
*/
static inline void xe_sched_job_put(struct xe_sched_job *job)
@@ -58,7 +58,6 @@ bool xe_sched_job_completed(struct xe_sched_job *job);
void xe_sched_job_arm(struct xe_sched_job *job);
void xe_sched_job_push(struct xe_sched_job *job);
-int xe_sched_job_last_fence_add_dep(struct xe_sched_job *job, struct xe_vm *vm);
void xe_sched_job_init_user_fence(struct xe_sched_job *job,
struct xe_sync_entry *sync);
diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h
index dbf260dded8d..7c4c54fe920a 100644
--- a/drivers/gpu/drm/xe/xe_sched_job_types.h
+++ b/drivers/gpu/drm/xe/xe_sched_job_types.h
@@ -24,10 +24,15 @@ struct xe_job_ptrs {
struct dma_fence_chain *chain_fence;
/** @batch_addr: Batch buffer address. */
u64 batch_addr;
+ /**
+ * @head: The tail pointer of the LRC (so head pointer of job) when the
+ * job was submitted
+ */
+ u32 head;
};
/**
- * struct xe_sched_job - XE schedule job (batch buffer tracking)
+ * struct xe_sched_job - Xe schedule job (batch buffer tracking)
*/
struct xe_sched_job {
/** @drm: base DRM scheduler job */
@@ -58,6 +63,10 @@ struct xe_sched_job {
bool ring_ops_flush_tlb;
/** @ggtt: mapped in ggtt. */
bool ggtt;
+ /** @restore_replay: job being replayed for restore */
+ bool restore_replay;
+ /** @last_replay: last job being replayed */
+ bool last_replay;
/** @ptrs: per instance pointers. */
struct xe_job_ptrs ptrs[];
};
diff --git a/drivers/gpu/drm/xe/xe_shrinker.c b/drivers/gpu/drm/xe/xe_shrinker.c
index 86d47aaf0358..90244fe59b59 100644
--- a/drivers/gpu/drm/xe/xe_shrinker.c
+++ b/drivers/gpu/drm/xe/xe_shrinker.c
@@ -5,6 +5,7 @@
#include <linux/shrinker.h>
+#include <drm/drm_managed.h>
#include <drm/ttm/ttm_backup.h>
#include <drm/ttm/ttm_bo.h>
#include <drm/ttm/ttm_tt.h>
@@ -53,10 +54,10 @@ xe_shrinker_mod_pages(struct xe_shrinker *shrinker, long shrinkable, long purgea
write_unlock(&shrinker->lock);
}
-static s64 xe_shrinker_walk(struct xe_device *xe,
- struct ttm_operation_ctx *ctx,
- const struct xe_bo_shrink_flags flags,
- unsigned long to_scan, unsigned long *scanned)
+static s64 __xe_shrinker_walk(struct xe_device *xe,
+ struct ttm_operation_ctx *ctx,
+ const struct xe_bo_shrink_flags flags,
+ unsigned long to_scan, unsigned long *scanned)
{
unsigned int mem_type;
s64 freed = 0, lret;
@@ -65,11 +66,15 @@ static s64 xe_shrinker_walk(struct xe_device *xe,
struct ttm_resource_manager *man = ttm_manager_type(&xe->ttm, mem_type);
struct ttm_bo_lru_cursor curs;
struct ttm_buffer_object *ttm_bo;
+ struct ttm_lru_walk_arg arg = {
+ .ctx = ctx,
+ .trylock_only = true,
+ };
if (!man || !man->use_tt)
continue;
- ttm_bo_lru_for_each_reserved_guarded(&curs, man, ctx, ttm_bo) {
+ ttm_bo_lru_for_each_reserved_guarded(&curs, man, &arg, ttm_bo) {
if (!ttm_bo_shrink_suitable(ttm_bo, ctx))
continue;
@@ -81,6 +86,50 @@ static s64 xe_shrinker_walk(struct xe_device *xe,
if (*scanned >= to_scan)
break;
}
+ /* Trylocks should never error, just fail. */
+ xe_assert(xe, !IS_ERR(ttm_bo));
+ }
+
+ return freed;
+}
+
+/*
+ * Try shrinking idle objects without writeback first, then if not sufficient,
+ * try also non-idle objects and finally if that's not sufficient either,
+ * add writeback. This avoids stalls and explicit writebacks with light or
+ * moderate memory pressure.
+ */
+static s64 xe_shrinker_walk(struct xe_device *xe,
+ struct ttm_operation_ctx *ctx,
+ const struct xe_bo_shrink_flags flags,
+ unsigned long to_scan, unsigned long *scanned)
+{
+ bool no_wait_gpu = true;
+ struct xe_bo_shrink_flags save_flags = flags;
+ s64 lret, freed;
+
+ swap(no_wait_gpu, ctx->no_wait_gpu);
+ save_flags.writeback = false;
+ lret = __xe_shrinker_walk(xe, ctx, save_flags, to_scan, scanned);
+ swap(no_wait_gpu, ctx->no_wait_gpu);
+ if (lret < 0 || *scanned >= to_scan)
+ return lret;
+
+ freed = lret;
+ if (!ctx->no_wait_gpu) {
+ lret = __xe_shrinker_walk(xe, ctx, save_flags, to_scan, scanned);
+ if (lret < 0)
+ return lret;
+ freed += lret;
+ if (*scanned >= to_scan)
+ return freed;
+ }
+
+ if (flags.writeback) {
+ lret = __xe_shrinker_walk(xe, ctx, flags, to_scan, scanned);
+ if (lret < 0)
+ return lret;
+ freed += lret;
}
return freed;
@@ -192,6 +241,7 @@ static unsigned long xe_shrinker_scan(struct shrinker *shrink, struct shrink_con
runtime_pm = xe_shrinker_runtime_pm_get(shrinker, true, 0, can_backup);
shrink_flags.purge = false;
+
lret = xe_shrinker_walk(shrinker->xe, &ctx, shrink_flags,
nr_to_scan, &nr_scanned);
if (lret >= 0)
@@ -213,24 +263,34 @@ static void xe_shrinker_pm(struct work_struct *work)
xe_pm_runtime_put(shrinker->xe);
}
+static void xe_shrinker_fini(struct drm_device *drm, void *arg)
+{
+ struct xe_shrinker *shrinker = arg;
+
+ xe_assert(shrinker->xe, !shrinker->shrinkable_pages);
+ xe_assert(shrinker->xe, !shrinker->purgeable_pages);
+ shrinker_free(shrinker->shrink);
+ flush_work(&shrinker->pm_worker);
+ kfree(shrinker);
+}
+
/**
* xe_shrinker_create() - Create an xe per-device shrinker
* @xe: Pointer to the xe device.
*
- * Returns: A pointer to the created shrinker on success,
- * Negative error code on failure.
+ * Return: %0 on success. Negative error code on failure.
*/
-struct xe_shrinker *xe_shrinker_create(struct xe_device *xe)
+int xe_shrinker_create(struct xe_device *xe)
{
struct xe_shrinker *shrinker = kzalloc(sizeof(*shrinker), GFP_KERNEL);
if (!shrinker)
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
shrinker->shrink = shrinker_alloc(0, "drm-xe_gem:%s", xe->drm.unique);
if (!shrinker->shrink) {
kfree(shrinker);
- return ERR_PTR(-ENOMEM);
+ return -ENOMEM;
}
INIT_WORK(&shrinker->pm_worker, xe_shrinker_pm);
@@ -240,19 +300,7 @@ struct xe_shrinker *xe_shrinker_create(struct xe_device *xe)
shrinker->shrink->scan_objects = xe_shrinker_scan;
shrinker->shrink->private_data = shrinker;
shrinker_register(shrinker->shrink);
+ xe->mem.shrinker = shrinker;
- return shrinker;
-}
-
-/**
- * xe_shrinker_destroy() - Destroy an xe per-device shrinker
- * @shrinker: Pointer to the shrinker to destroy.
- */
-void xe_shrinker_destroy(struct xe_shrinker *shrinker)
-{
- xe_assert(shrinker->xe, !shrinker->shrinkable_pages);
- xe_assert(shrinker->xe, !shrinker->purgeable_pages);
- shrinker_free(shrinker->shrink);
- flush_work(&shrinker->pm_worker);
- kfree(shrinker);
+ return drmm_add_action_or_reset(&xe->drm, xe_shrinker_fini, shrinker);
}
diff --git a/drivers/gpu/drm/xe/xe_shrinker.h b/drivers/gpu/drm/xe/xe_shrinker.h
index 28a038f4fcbf..5132ae5192e1 100644
--- a/drivers/gpu/drm/xe/xe_shrinker.h
+++ b/drivers/gpu/drm/xe/xe_shrinker.h
@@ -11,8 +11,6 @@ struct xe_device;
void xe_shrinker_mod_pages(struct xe_shrinker *shrinker, long shrinkable, long purgeable);
-struct xe_shrinker *xe_shrinker_create(struct xe_device *xe);
-
-void xe_shrinker_destroy(struct xe_shrinker *shrinker);
+int xe_shrinker_create(struct xe_device *xe);
#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov.c b/drivers/gpu/drm/xe/xe_sriov.c
index a0eab44c0e76..ea411944609b 100644
--- a/drivers/gpu/drm/xe/xe_sriov.c
+++ b/drivers/gpu/drm/xe/xe_sriov.c
@@ -15,6 +15,7 @@
#include "xe_sriov.h"
#include "xe_sriov_pf.h"
#include "xe_sriov_vf.h"
+#include "xe_sriov_vf_ccs.h"
/**
* xe_sriov_mode_to_string - Convert enum value to string.
@@ -157,3 +158,19 @@ const char *xe_sriov_function_name(unsigned int n, char *buf, size_t size)
strscpy(buf, "PF", size);
return buf;
}
+
+/**
+ * xe_sriov_init_late() - SR-IOV late initialization functions.
+ * @xe: the &xe_device to initialize
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_init_late(struct xe_device *xe)
+{
+ if (IS_SRIOV_PF(xe))
+ return xe_sriov_pf_init_late(xe);
+ if (IS_SRIOV_VF(xe))
+ return xe_sriov_vf_init_late(xe);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_sriov.h b/drivers/gpu/drm/xe/xe_sriov.h
index 688fbabf08f1..6db45df55615 100644
--- a/drivers/gpu/drm/xe/xe_sriov.h
+++ b/drivers/gpu/drm/xe/xe_sriov.h
@@ -18,6 +18,7 @@ const char *xe_sriov_function_name(unsigned int n, char *buf, size_t len);
void xe_sriov_probe_early(struct xe_device *xe);
void xe_sriov_print_info(struct xe_device *xe, struct drm_printer *p);
int xe_sriov_init(struct xe_device *xe);
+int xe_sriov_init_late(struct xe_device *xe);
static inline enum xe_sriov_mode xe_device_sriov_mode(const struct xe_device *xe)
{
diff --git a/drivers/gpu/drm/xe/xe_sriov_packet.c b/drivers/gpu/drm/xe/xe_sriov_packet.c
new file mode 100644
index 000000000000..bab994696896
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_packet.c
@@ -0,0 +1,520 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_guc_klv_helpers.h"
+#include "xe_printk.h"
+#include "xe_sriov_packet.h"
+#include "xe_sriov_packet_types.h"
+#include "xe_sriov_pf_helpers.h"
+#include "xe_sriov_pf_migration.h"
+#include "xe_sriov_printk.h"
+
+static struct mutex *pf_migration_mutex(struct xe_device *xe, unsigned int vfid)
+{
+ xe_assert(xe, IS_SRIOV_PF(xe));
+ xe_assert(xe, vfid <= xe_sriov_pf_get_totalvfs(xe));
+
+ return &xe->sriov.pf.vfs[vfid].migration.lock;
+}
+
+static struct xe_sriov_packet **pf_pick_pending(struct xe_device *xe, unsigned int vfid)
+{
+ xe_assert(xe, IS_SRIOV_PF(xe));
+ xe_assert(xe, vfid <= xe_sriov_pf_get_totalvfs(xe));
+ lockdep_assert_held(pf_migration_mutex(xe, vfid));
+
+ return &xe->sriov.pf.vfs[vfid].migration.pending;
+}
+
+static struct xe_sriov_packet **
+pf_pick_descriptor(struct xe_device *xe, unsigned int vfid)
+{
+ xe_assert(xe, IS_SRIOV_PF(xe));
+ xe_assert(xe, vfid <= xe_sriov_pf_get_totalvfs(xe));
+ lockdep_assert_held(pf_migration_mutex(xe, vfid));
+
+ return &xe->sriov.pf.vfs[vfid].migration.descriptor;
+}
+
+static struct xe_sriov_packet **pf_pick_trailer(struct xe_device *xe, unsigned int vfid)
+{
+ xe_assert(xe, IS_SRIOV_PF(xe));
+ xe_assert(xe, vfid <= xe_sriov_pf_get_totalvfs(xe));
+ lockdep_assert_held(pf_migration_mutex(xe, vfid));
+
+ return &xe->sriov.pf.vfs[vfid].migration.trailer;
+}
+
+static struct xe_sriov_packet **pf_pick_read_packet(struct xe_device *xe,
+ unsigned int vfid)
+{
+ struct xe_sriov_packet **data;
+
+ data = pf_pick_descriptor(xe, vfid);
+ if (*data)
+ return data;
+
+ data = pf_pick_pending(xe, vfid);
+ if (!*data)
+ *data = xe_sriov_pf_migration_save_consume(xe, vfid);
+ if (*data)
+ return data;
+
+ data = pf_pick_trailer(xe, vfid);
+ if (*data)
+ return data;
+
+ return NULL;
+}
+
+static bool pkt_needs_bo(struct xe_sriov_packet *data)
+{
+ return data->hdr.type == XE_SRIOV_PACKET_TYPE_VRAM;
+}
+
+/**
+ * xe_sriov_packet_alloc() - Allocate migration data packet
+ * @xe: the &xe_device
+ *
+ * Only allocates the "outer" structure, without initializing the migration
+ * data backing storage.
+ *
+ * Return: Pointer to &xe_sriov_packet on success,
+ * NULL in case of error.
+ */
+struct xe_sriov_packet *xe_sriov_packet_alloc(struct xe_device *xe)
+{
+ struct xe_sriov_packet *data;
+
+ data = kzalloc(sizeof(*data), GFP_KERNEL);
+ if (!data)
+ return NULL;
+
+ data->xe = xe;
+ data->hdr_remaining = sizeof(data->hdr);
+
+ return data;
+}
+
+/**
+ * xe_sriov_packet_free() - Free migration data packet.
+ * @data: the &xe_sriov_packet
+ */
+void xe_sriov_packet_free(struct xe_sriov_packet *data)
+{
+ if (IS_ERR_OR_NULL(data))
+ return;
+
+ if (pkt_needs_bo(data))
+ xe_bo_unpin_map_no_vm(data->bo);
+ else
+ kvfree(data->buff);
+
+ kfree(data);
+}
+
+static int pkt_init(struct xe_sriov_packet *data)
+{
+ struct xe_gt *gt = xe_device_get_gt(data->xe, data->hdr.gt_id);
+
+ if (!gt)
+ return -EINVAL;
+
+ if (data->hdr.size == 0)
+ return 0;
+
+ if (pkt_needs_bo(data)) {
+ struct xe_bo *bo;
+
+ bo = xe_bo_create_pin_map_novm(data->xe, gt->tile, PAGE_ALIGN(data->hdr.size),
+ ttm_bo_type_kernel,
+ XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED, false);
+ if (IS_ERR(bo))
+ return PTR_ERR(bo);
+
+ data->bo = bo;
+ data->vaddr = bo->vmap.vaddr;
+ } else {
+ void *buff = kvzalloc(data->hdr.size, GFP_KERNEL);
+
+ if (!buff)
+ return -ENOMEM;
+
+ data->buff = buff;
+ data->vaddr = buff;
+ }
+
+ return 0;
+}
+
+#define XE_SRIOV_PACKET_SUPPORTED_VERSION 1
+
+/**
+ * xe_sriov_packet_init() - Initialize migration packet header and backing storage.
+ * @data: the &xe_sriov_packet
+ * @tile_id: tile identifier
+ * @gt_id: GT identifier
+ * @type: &xe_sriov_packet_type
+ * @offset: offset of data packet payload (within wider resource)
+ * @size: size of data packet payload
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_packet_init(struct xe_sriov_packet *data, u8 tile_id, u8 gt_id,
+ enum xe_sriov_packet_type type, loff_t offset, size_t size)
+{
+ data->hdr.version = XE_SRIOV_PACKET_SUPPORTED_VERSION;
+ data->hdr.type = type;
+ data->hdr.tile_id = tile_id;
+ data->hdr.gt_id = gt_id;
+ data->hdr.offset = offset;
+ data->hdr.size = size;
+ data->remaining = size;
+
+ return pkt_init(data);
+}
+
+/**
+ * xe_sriov_packet_init_from_hdr() - Initialize migration packet backing storage based on header.
+ * @data: the &xe_sriov_packet
+ *
+ * Header data is expected to be filled prior to calling this function.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_packet_init_from_hdr(struct xe_sriov_packet *data)
+{
+ xe_assert(data->xe, !data->hdr_remaining);
+
+ if (data->hdr.version != XE_SRIOV_PACKET_SUPPORTED_VERSION)
+ return -EINVAL;
+
+ data->remaining = data->hdr.size;
+
+ return pkt_init(data);
+}
+
+static ssize_t pkt_hdr_read(struct xe_sriov_packet *data,
+ char __user *buf, size_t len)
+{
+ loff_t offset = sizeof(data->hdr) - data->hdr_remaining;
+
+ if (!data->hdr_remaining)
+ return -EINVAL;
+
+ if (len > data->hdr_remaining)
+ len = data->hdr_remaining;
+
+ if (copy_to_user(buf, (void *)&data->hdr + offset, len))
+ return -EFAULT;
+
+ data->hdr_remaining -= len;
+
+ return len;
+}
+
+static ssize_t pkt_data_read(struct xe_sriov_packet *data,
+ char __user *buf, size_t len)
+{
+ if (len > data->remaining)
+ len = data->remaining;
+
+ if (copy_to_user(buf, data->vaddr + (data->hdr.size - data->remaining), len))
+ return -EFAULT;
+
+ data->remaining -= len;
+
+ return len;
+}
+
+static ssize_t pkt_read_single(struct xe_sriov_packet **data,
+ unsigned int vfid, char __user *buf, size_t len)
+{
+ ssize_t copied = 0;
+
+ if ((*data)->hdr_remaining)
+ copied = pkt_hdr_read(*data, buf, len);
+ else
+ copied = pkt_data_read(*data, buf, len);
+
+ if ((*data)->remaining == 0 && (*data)->hdr_remaining == 0) {
+ xe_sriov_packet_free(*data);
+ *data = NULL;
+ }
+
+ return copied;
+}
+
+/**
+ * xe_sriov_packet_read_single() - Read migration data from a single packet.
+ * @xe: the &xe_device
+ * @vfid: the VF identifier
+ * @buf: start address of userspace buffer
+ * @len: requested read size from userspace
+ *
+ * Return: number of bytes that has been successfully read,
+ * 0 if no more migration data is available,
+ * -errno on failure.
+ */
+ssize_t xe_sriov_packet_read_single(struct xe_device *xe, unsigned int vfid,
+ char __user *buf, size_t len)
+{
+ struct xe_sriov_packet **data = pf_pick_read_packet(xe, vfid);
+
+ if (!data)
+ return -ENODATA;
+ if (IS_ERR(*data))
+ return PTR_ERR(*data);
+
+ return pkt_read_single(data, vfid, buf, len);
+}
+
+static ssize_t pkt_hdr_write(struct xe_sriov_packet *data,
+ const char __user *buf, size_t len)
+{
+ loff_t offset = sizeof(data->hdr) - data->hdr_remaining;
+ int ret;
+
+ if (len > data->hdr_remaining)
+ len = data->hdr_remaining;
+
+ if (copy_from_user((void *)&data->hdr + offset, buf, len))
+ return -EFAULT;
+
+ data->hdr_remaining -= len;
+
+ if (!data->hdr_remaining) {
+ ret = xe_sriov_packet_init_from_hdr(data);
+ if (ret)
+ return ret;
+ }
+
+ return len;
+}
+
+static ssize_t pkt_data_write(struct xe_sriov_packet *data,
+ const char __user *buf, size_t len)
+{
+ if (len > data->remaining)
+ len = data->remaining;
+
+ if (copy_from_user(data->vaddr + (data->hdr.size - data->remaining), buf, len))
+ return -EFAULT;
+
+ data->remaining -= len;
+
+ return len;
+}
+
+/**
+ * xe_sriov_packet_write_single() - Write migration data to a single packet.
+ * @xe: the &xe_device
+ * @vfid: the VF identifier
+ * @buf: start address of userspace buffer
+ * @len: requested write size from userspace
+ *
+ * Return: number of bytes that has been successfully written,
+ * -errno on failure.
+ */
+ssize_t xe_sriov_packet_write_single(struct xe_device *xe, unsigned int vfid,
+ const char __user *buf, size_t len)
+{
+ struct xe_sriov_packet **data = pf_pick_pending(xe, vfid);
+ int ret;
+ ssize_t copied;
+
+ if (IS_ERR_OR_NULL(*data)) {
+ *data = xe_sriov_packet_alloc(xe);
+ if (!*data)
+ return -ENOMEM;
+ }
+
+ if ((*data)->hdr_remaining)
+ copied = pkt_hdr_write(*data, buf, len);
+ else
+ copied = pkt_data_write(*data, buf, len);
+
+ if ((*data)->hdr_remaining == 0 && (*data)->remaining == 0) {
+ ret = xe_sriov_pf_migration_restore_produce(xe, vfid, *data);
+ if (ret) {
+ xe_sriov_packet_free(*data);
+ return ret;
+ }
+
+ *data = NULL;
+ }
+
+ return copied;
+}
+
+#define MIGRATION_KLV_DEVICE_DEVID_KEY 0xf001u
+#define MIGRATION_KLV_DEVICE_DEVID_LEN 1u
+#define MIGRATION_KLV_DEVICE_REVID_KEY 0xf002u
+#define MIGRATION_KLV_DEVICE_REVID_LEN 1u
+
+#define MIGRATION_DESCRIPTOR_DWORDS (GUC_KLV_LEN_MIN + MIGRATION_KLV_DEVICE_DEVID_LEN + \
+ GUC_KLV_LEN_MIN + MIGRATION_KLV_DEVICE_REVID_LEN)
+static size_t pf_descriptor_init(struct xe_device *xe, unsigned int vfid)
+{
+ struct xe_sriov_packet **desc = pf_pick_descriptor(xe, vfid);
+ struct xe_sriov_packet *data;
+ unsigned int len = 0;
+ u32 *klvs;
+ int ret;
+
+ data = xe_sriov_packet_alloc(xe);
+ if (!data)
+ return -ENOMEM;
+
+ ret = xe_sriov_packet_init(data, 0, 0, XE_SRIOV_PACKET_TYPE_DESCRIPTOR,
+ 0, MIGRATION_DESCRIPTOR_DWORDS * sizeof(u32));
+ if (ret) {
+ xe_sriov_packet_free(data);
+ return ret;
+ }
+
+ klvs = data->vaddr;
+ klvs[len++] = PREP_GUC_KLV_CONST(MIGRATION_KLV_DEVICE_DEVID_KEY,
+ MIGRATION_KLV_DEVICE_DEVID_LEN);
+ klvs[len++] = xe->info.devid;
+ klvs[len++] = PREP_GUC_KLV_CONST(MIGRATION_KLV_DEVICE_REVID_KEY,
+ MIGRATION_KLV_DEVICE_REVID_LEN);
+ klvs[len++] = xe->info.revid;
+
+ xe_assert(xe, len == MIGRATION_DESCRIPTOR_DWORDS);
+
+ *desc = data;
+
+ return 0;
+}
+
+/**
+ * xe_sriov_packet_process_descriptor() - Process migration data descriptor packet.
+ * @xe: the &xe_device
+ * @vfid: the VF identifier
+ * @data: the &xe_sriov_packet containing the descriptor
+ *
+ * The descriptor uses the same KLV format as GuC, and contains metadata used for
+ * checking migration data compatibility.
+ *
+ * Return: 0 on success, -errno on failure.
+ */
+int xe_sriov_packet_process_descriptor(struct xe_device *xe, unsigned int vfid,
+ struct xe_sriov_packet *data)
+{
+ u32 num_dwords = data->hdr.size / sizeof(u32);
+ u32 *klvs = data->vaddr;
+
+ xe_assert(xe, data->hdr.type == XE_SRIOV_PACKET_TYPE_DESCRIPTOR);
+
+ if (data->hdr.size % sizeof(u32)) {
+ xe_sriov_warn(xe, "Aborting migration, descriptor not in KLV format (size=%llu)\n",
+ data->hdr.size);
+ return -EINVAL;
+ }
+
+ while (num_dwords >= GUC_KLV_LEN_MIN) {
+ u32 key = FIELD_GET(GUC_KLV_0_KEY, klvs[0]);
+ u32 len = FIELD_GET(GUC_KLV_0_LEN, klvs[0]);
+
+ klvs += GUC_KLV_LEN_MIN;
+ num_dwords -= GUC_KLV_LEN_MIN;
+
+ if (len > num_dwords) {
+ xe_sriov_warn(xe, "Aborting migration, truncated KLV %#x, len %u\n",
+ key, len);
+ return -EINVAL;
+ }
+
+ switch (key) {
+ case MIGRATION_KLV_DEVICE_DEVID_KEY:
+ if (*klvs != xe->info.devid) {
+ xe_sriov_warn(xe,
+ "Aborting migration, devid mismatch %#06x!=%#06x\n",
+ *klvs, xe->info.devid);
+ return -ENODEV;
+ }
+ break;
+ case MIGRATION_KLV_DEVICE_REVID_KEY:
+ if (*klvs != xe->info.revid) {
+ xe_sriov_warn(xe,
+ "Aborting migration, revid mismatch %#06x!=%#06x\n",
+ *klvs, xe->info.revid);
+ return -ENODEV;
+ }
+ break;
+ default:
+ xe_sriov_dbg(xe,
+ "Skipping unknown migration KLV %#x, len=%u\n",
+ key, len);
+ print_hex_dump_bytes("desc: ", DUMP_PREFIX_OFFSET, klvs,
+ min(SZ_64, len * sizeof(u32)));
+ break;
+ }
+
+ klvs += len;
+ num_dwords -= len;
+ }
+
+ return 0;
+}
+
+static void pf_pending_init(struct xe_device *xe, unsigned int vfid)
+{
+ struct xe_sriov_packet **data = pf_pick_pending(xe, vfid);
+
+ *data = NULL;
+}
+
+#define MIGRATION_TRAILER_SIZE 0
+static int pf_trailer_init(struct xe_device *xe, unsigned int vfid)
+{
+ struct xe_sriov_packet **trailer = pf_pick_trailer(xe, vfid);
+ struct xe_sriov_packet *data;
+ int ret;
+
+ data = xe_sriov_packet_alloc(xe);
+ if (!data)
+ return -ENOMEM;
+
+ ret = xe_sriov_packet_init(data, 0, 0, XE_SRIOV_PACKET_TYPE_TRAILER,
+ 0, MIGRATION_TRAILER_SIZE);
+ if (ret) {
+ xe_sriov_packet_free(data);
+ return ret;
+ }
+
+ *trailer = data;
+
+ return 0;
+}
+
+/**
+ * xe_sriov_packet_save_init() - Initialize the pending save migration packets.
+ * @xe: the &xe_device
+ * @vfid: the VF identifier
+ *
+ * Return: 0 on success, -errno on failure.
+ */
+int xe_sriov_packet_save_init(struct xe_device *xe, unsigned int vfid)
+{
+ int ret;
+
+ scoped_cond_guard(mutex_intr, return -EINTR, pf_migration_mutex(xe, vfid)) {
+ ret = pf_descriptor_init(xe, vfid);
+ if (ret)
+ return ret;
+
+ ret = pf_trailer_init(xe, vfid);
+ if (ret)
+ return ret;
+
+ pf_pending_init(xe, vfid);
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_sriov_packet.h b/drivers/gpu/drm/xe/xe_sriov_packet.h
new file mode 100644
index 000000000000..2731e52cf7ef
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_packet.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_SRIOV_PACKET_H_
+#define _XE_SRIOV_PACKET_H_
+
+#include <linux/types.h>
+
+struct xe_device;
+struct xe_sriov_packet;
+enum xe_sriov_packet_type;
+
+struct xe_sriov_packet *xe_sriov_packet_alloc(struct xe_device *xe);
+void xe_sriov_packet_free(struct xe_sriov_packet *data);
+
+int xe_sriov_packet_init(struct xe_sriov_packet *data, u8 tile_id, u8 gt_id,
+ enum xe_sriov_packet_type, loff_t offset, size_t size);
+int xe_sriov_packet_init_from_hdr(struct xe_sriov_packet *data);
+
+ssize_t xe_sriov_packet_read_single(struct xe_device *xe, unsigned int vfid,
+ char __user *buf, size_t len);
+ssize_t xe_sriov_packet_write_single(struct xe_device *xe, unsigned int vfid,
+ const char __user *buf, size_t len);
+int xe_sriov_packet_save_init(struct xe_device *xe, unsigned int vfid);
+int xe_sriov_packet_process_descriptor(struct xe_device *xe, unsigned int vfid,
+ struct xe_sriov_packet *data);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_packet_types.h b/drivers/gpu/drm/xe/xe_sriov_packet_types.h
new file mode 100644
index 000000000000..078a1c95e786
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_packet_types.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_SRIOV_PACKET_TYPES_H_
+#define _XE_SRIOV_PACKET_TYPES_H_
+
+#include <linux/types.h>
+
+/**
+ * enum xe_sriov_packet_type - Xe SR-IOV VF migration data packet type
+ * @XE_SRIOV_PACKET_TYPE_DESCRIPTOR: Descriptor with VF device metadata
+ * @XE_SRIOV_PACKET_TYPE_TRAILER: Trailer indicating end-of-stream
+ * @XE_SRIOV_PACKET_TYPE_GGTT: Global GTT migration data
+ * @XE_SRIOV_PACKET_TYPE_MMIO: MMIO registers migration data
+ * @XE_SRIOV_PACKET_TYPE_GUC: GuC firmware migration data
+ * @XE_SRIOV_PACKET_TYPE_VRAM: VRAM migration data
+ */
+enum xe_sriov_packet_type {
+ /* Skipping 0 to catch uninitialized data */
+ XE_SRIOV_PACKET_TYPE_DESCRIPTOR = 1,
+ XE_SRIOV_PACKET_TYPE_TRAILER,
+ XE_SRIOV_PACKET_TYPE_GGTT,
+ XE_SRIOV_PACKET_TYPE_MMIO,
+ XE_SRIOV_PACKET_TYPE_GUC,
+ XE_SRIOV_PACKET_TYPE_VRAM,
+};
+
+/**
+ * struct xe_sriov_packet_hdr - Xe SR-IOV VF migration data packet header
+ */
+struct xe_sriov_packet_hdr {
+ /** @version: migration data protocol version */
+ u8 version;
+ /** @type: migration data type */
+ u8 type;
+ /** @tile_id: migration data tile id */
+ u8 tile_id;
+ /** @gt_id: migration data gt id */
+ u8 gt_id;
+ /** @flags: migration data flags */
+ u32 flags;
+ /**
+ * @offset: offset into the resource;
+ * used when multiple packets of given type are used for migration
+ */
+ u64 offset;
+ /** @size: migration data size */
+ u64 size;
+} __packed;
+
+/**
+ * struct xe_sriov_packet - Xe SR-IOV VF migration data packet
+ */
+struct xe_sriov_packet {
+ /** @xe: the PF &xe_device this data packet belongs to */
+ struct xe_device *xe;
+ /** @vaddr: CPU pointer to payload data */
+ void *vaddr;
+ /** @remaining: payload data remaining */
+ size_t remaining;
+ /** @hdr_remaining: header data remaining */
+ size_t hdr_remaining;
+ union {
+ /** @bo: Buffer object with migration data */
+ struct xe_bo *bo;
+ /** @buff: Buffer with migration data */
+ void *buff;
+ };
+ /** @hdr: data packet header */
+ struct xe_sriov_packet_hdr hdr;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.c b/drivers/gpu/drm/xe/xe_sriov_pf.c
index 0f721ae17b26..7c779d63179f 100644
--- a/drivers/gpu/drm/xe/xe_sriov_pf.c
+++ b/drivers/gpu/drm/xe/xe_sriov_pf.c
@@ -3,17 +3,27 @@
* Copyright © 2023-2024 Intel Corporation
*/
+#include <linux/debugfs.h>
+#include <drm/drm_debugfs.h>
#include <drm/drm_managed.h>
#include "xe_assert.h"
+#include "xe_configfs.h"
#include "xe_device.h"
+#include "xe_gt_sriov_pf.h"
#include "xe_module.h"
#include "xe_sriov.h"
#include "xe_sriov_pf.h"
+#include "xe_sriov_pf_helpers.h"
+#include "xe_sriov_pf_migration.h"
+#include "xe_sriov_pf_service.h"
+#include "xe_sriov_pf_sysfs.h"
#include "xe_sriov_printk.h"
static unsigned int wanted_max_vfs(struct xe_device *xe)
{
+ if (IS_ENABLED(CONFIG_CONFIGFS_FS))
+ return xe_configfs_get_max_vfs(to_pci_dev(xe->drm.dev));
return xe_modparam.max_vfs;
}
@@ -80,9 +90,178 @@ bool xe_sriov_pf_readiness(struct xe_device *xe)
*/
int xe_sriov_pf_init_early(struct xe_device *xe)
{
+ int err;
+
+ xe_assert(xe, IS_SRIOV_PF(xe));
+
+ xe->sriov.pf.vfs = drmm_kcalloc(&xe->drm, 1 + xe_sriov_pf_get_totalvfs(xe),
+ sizeof(*xe->sriov.pf.vfs), GFP_KERNEL);
+ if (!xe->sriov.pf.vfs)
+ return -ENOMEM;
+
+ err = drmm_mutex_init(&xe->drm, &xe->sriov.pf.master_lock);
+ if (err)
+ return err;
+
+ err = xe_sriov_pf_migration_init(xe);
+ if (err)
+ return err;
+
+ xe_guard_init(&xe->sriov.pf.guard_vfs_enabling, "vfs_enabling");
+
+ xe_sriov_pf_service_init(xe);
+
+ return 0;
+}
+
+/**
+ * xe_sriov_pf_init_late() - Late initialization of the SR-IOV PF.
+ * @xe: the &xe_device to initialize
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_init_late(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+ int err;
+
+ xe_assert(xe, IS_SRIOV_PF(xe));
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_sriov_pf_init(gt);
+ if (err)
+ return err;
+ }
+
+ err = xe_sriov_pf_sysfs_init(xe);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+/**
+ * xe_sriov_pf_wait_ready() - Wait until PF is ready to operate.
+ * @xe: the &xe_device to test
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_wait_ready(struct xe_device *xe)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+ int err;
+
+ if (xe_device_wedged(xe))
+ return -ECANCELED;
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_sriov_pf_wait_ready(gt);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+/**
+ * xe_sriov_pf_arm_guard() - Arm the guard for exclusive/lockdown mode.
+ * @xe: the PF &xe_device
+ * @guard: the &xe_guard to arm
+ * @lockdown: arm for lockdown(true) or exclusive(false) mode
+ * @who: the address of the new owner, or NULL if it's a caller
+ *
+ * This function can only be called on PF.
+ *
+ * It is a simple wrapper for xe_guard_arm() with additional debug
+ * messages.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_arm_guard(struct xe_device *xe, struct xe_guard *guard,
+ bool lockdown, void *who)
+{
+ void *new_owner = who ?: __builtin_return_address(0);
+ int err;
+
+ err = xe_guard_arm(guard, lockdown, new_owner);
+ if (err) {
+ xe_sriov_dbg(xe, "%s/%s mode denied (%pe) last owner %ps\n",
+ guard->name, xe_guard_mode_str(lockdown),
+ ERR_PTR(err), guard->owner);
+ return err;
+ }
+
+ xe_sriov_dbg_verbose(xe, "%s/%s by %ps\n",
+ guard->name, xe_guard_mode_str(lockdown),
+ new_owner);
+ return 0;
+}
+
+/**
+ * xe_sriov_pf_disarm_guard() - Disarm the guard.
+ * @xe: the PF &xe_device
+ * @guard: the &xe_guard to disarm
+ * @lockdown: disarm from lockdown(true) or exclusive(false) mode
+ * @who: the address of the indirect owner, or NULL if it's a caller
+ *
+ * This function can only be called on PF.
+ *
+ * It is a simple wrapper for xe_guard_disarm() with additional debug
+ * messages and xe_assert() to easily catch any illegal calls.
+ */
+void xe_sriov_pf_disarm_guard(struct xe_device *xe, struct xe_guard *guard,
+ bool lockdown, void *who)
+{
+ bool disarmed;
+
+ xe_sriov_dbg_verbose(xe, "%s/%s by %ps\n",
+ guard->name, xe_guard_mode_str(lockdown),
+ who ?: __builtin_return_address(0));
+
+ disarmed = xe_guard_disarm(guard, lockdown);
+ xe_assert_msg(xe, disarmed, "%s/%s not armed? last owner %ps",
+ guard->name, xe_guard_mode_str(lockdown), guard->owner);
+}
+
+/**
+ * xe_sriov_pf_lockdown() - Lockdown the PF to prevent VFs enabling.
+ * @xe: the PF &xe_device
+ *
+ * This function can only be called on PF.
+ *
+ * Once the PF is locked down, it will not enable VFs.
+ * If VFs are already enabled, the -EBUSY will be returned.
+ * To allow the PF enable VFs again call xe_sriov_pf_end_lockdown().
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_lockdown(struct xe_device *xe)
+{
+ xe_assert(xe, IS_SRIOV_PF(xe));
+
+ return xe_sriov_pf_arm_guard(xe, &xe->sriov.pf.guard_vfs_enabling, true,
+ __builtin_return_address(0));
+}
+
+/**
+ * xe_sriov_pf_end_lockdown() - Allow the PF to enable VFs again.
+ * @xe: the PF &xe_device
+ *
+ * This function can only be called on PF.
+ * See xe_sriov_pf_lockdown() for details.
+ */
+void xe_sriov_pf_end_lockdown(struct xe_device *xe)
+{
xe_assert(xe, IS_SRIOV_PF(xe));
- return drmm_mutex_init(&xe->drm, &xe->sriov.pf.master_lock);
+ xe_sriov_pf_disarm_guard(xe, &xe->sriov.pf.guard_vfs_enabling, true,
+ __builtin_return_address(0));
}
/**
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf.h b/drivers/gpu/drm/xe/xe_sriov_pf.h
index d1220e70e1c0..b4d050ad5b7c 100644
--- a/drivers/gpu/drm/xe/xe_sriov_pf.h
+++ b/drivers/gpu/drm/xe/xe_sriov_pf.h
@@ -8,23 +8,24 @@
#include <linux/types.h>
+struct dentry;
struct drm_printer;
struct xe_device;
#ifdef CONFIG_PCI_IOV
bool xe_sriov_pf_readiness(struct xe_device *xe);
int xe_sriov_pf_init_early(struct xe_device *xe);
+int xe_sriov_pf_init_late(struct xe_device *xe);
+int xe_sriov_pf_wait_ready(struct xe_device *xe);
+int xe_sriov_pf_lockdown(struct xe_device *xe);
+void xe_sriov_pf_end_lockdown(struct xe_device *xe);
void xe_sriov_pf_print_vfs_summary(struct xe_device *xe, struct drm_printer *p);
#else
-static inline bool xe_sriov_pf_readiness(struct xe_device *xe)
-{
- return false;
-}
-
-static inline int xe_sriov_pf_init_early(struct xe_device *xe)
-{
- return 0;
-}
+static inline bool xe_sriov_pf_readiness(struct xe_device *xe) { return false; }
+static inline int xe_sriov_pf_init_early(struct xe_device *xe) { return 0; }
+static inline int xe_sriov_pf_init_late(struct xe_device *xe) { return 0; }
+static inline int xe_sriov_pf_lockdown(struct xe_device *xe) { return 0; }
+static inline void xe_sriov_pf_end_lockdown(struct xe_device *xe) { }
#endif
#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_control.c b/drivers/gpu/drm/xe/xe_sriov_pf_control.c
new file mode 100644
index 000000000000..ed4b9820b06e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_pf_control.c
@@ -0,0 +1,279 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include "xe_device.h"
+#include "xe_gt_sriov_pf_control.h"
+#include "xe_gt_sriov_pf_migration.h"
+#include "xe_sriov_packet.h"
+#include "xe_sriov_pf_control.h"
+#include "xe_sriov_printk.h"
+
+/**
+ * xe_sriov_pf_control_pause_vf() - Pause a VF on all GTs.
+ * @xe: the &xe_device
+ * @vfid: the VF identifier (can't be 0 == PFID)
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_control_pause_vf(struct xe_device *xe, unsigned int vfid)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+ int result = 0;
+ int err;
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_sriov_pf_control_pause_vf(gt, vfid);
+ result = result ? -EUCLEAN : err;
+ }
+
+ if (result)
+ return result;
+
+ xe_sriov_info(xe, "VF%u paused!\n", vfid);
+ return 0;
+}
+
+/**
+ * xe_sriov_pf_control_resume_vf() - Resume a VF on all GTs.
+ * @xe: the &xe_device
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_control_resume_vf(struct xe_device *xe, unsigned int vfid)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+ int result = 0;
+ int err;
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_sriov_pf_control_resume_vf(gt, vfid);
+ result = result ? -EUCLEAN : err;
+ }
+
+ if (result)
+ return result;
+
+ xe_sriov_info(xe, "VF%u resumed!\n", vfid);
+ return 0;
+}
+
+/**
+ * xe_sriov_pf_control_stop_vf - Stop a VF on all GTs.
+ * @xe: the &xe_device
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_control_stop_vf(struct xe_device *xe, unsigned int vfid)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+ int result = 0;
+ int err;
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_sriov_pf_control_stop_vf(gt, vfid);
+ result = result ? -EUCLEAN : err;
+ }
+
+ if (result)
+ return result;
+
+ xe_sriov_info(xe, "VF%u stopped!\n", vfid);
+ return 0;
+}
+
+/**
+ * xe_sriov_pf_control_reset_vf() - Perform a VF reset (FLR).
+ * @xe: the &xe_device
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_control_reset_vf(struct xe_device *xe, unsigned int vfid)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+ int result = 0;
+ int err;
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_sriov_pf_control_trigger_flr(gt, vfid);
+ result = result ? -EUCLEAN : err;
+ }
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_sriov_pf_control_wait_flr(gt, vfid);
+ result = result ? -EUCLEAN : err;
+ }
+
+ return result;
+}
+
+/**
+ * xe_sriov_pf_control_wait_flr() - Wait for a VF reset (FLR) to complete.
+ * @xe: the &xe_device
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_control_wait_flr(struct xe_device *xe, unsigned int vfid)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+ int result = 0;
+ int err;
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_sriov_pf_control_wait_flr(gt, vfid);
+ result = result ? -EUCLEAN : err;
+ }
+
+ return result;
+}
+
+/**
+ * xe_sriov_pf_control_sync_flr() - Synchronize a VF FLR between all GTs.
+ * @xe: the &xe_device
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_control_sync_flr(struct xe_device *xe, unsigned int vfid)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+ int ret;
+
+ for_each_gt(gt, xe, id) {
+ ret = xe_gt_sriov_pf_control_sync_flr(gt, vfid, false);
+ if (ret < 0)
+ return ret;
+ }
+ for_each_gt(gt, xe, id) {
+ ret = xe_gt_sriov_pf_control_sync_flr(gt, vfid, true);
+ if (ret < 0)
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * xe_sriov_pf_control_trigger_save_vf() - Start VF migration data SAVE sequence on all GTs.
+ * @xe: the &xe_device
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_control_trigger_save_vf(struct xe_device *xe, unsigned int vfid)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+ int ret;
+
+ ret = xe_sriov_packet_save_init(xe, vfid);
+ if (ret)
+ return ret;
+
+ for_each_gt(gt, xe, id) {
+ xe_gt_sriov_pf_migration_save_init(gt, vfid);
+
+ ret = xe_gt_sriov_pf_control_trigger_save_vf(gt, vfid);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+}
+
+/**
+ * xe_sriov_pf_control_finish_save_vf() - Complete VF migration data SAVE sequence on all GTs.
+ * @xe: the &xe_device
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_control_finish_save_vf(struct xe_device *xe, unsigned int vfid)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+ int ret;
+
+ for_each_gt(gt, xe, id) {
+ ret = xe_gt_sriov_pf_control_finish_save_vf(gt, vfid);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
+
+/**
+ * xe_sriov_pf_control_trigger_restore_vf() - Start VF migration data RESTORE sequence on all GTs.
+ * @xe: the &xe_device
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_control_trigger_restore_vf(struct xe_device *xe, unsigned int vfid)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+ int ret;
+
+ for_each_gt(gt, xe, id) {
+ ret = xe_gt_sriov_pf_control_trigger_restore_vf(gt, vfid);
+ if (ret)
+ return ret;
+ }
+
+ return ret;
+}
+
+/**
+ * xe_sriov_pf_control_finish_restore_vf() - Complete VF migration data RESTORE sequence on all GTs.
+ * @xe: the &xe_device
+ * @vfid: the VF identifier
+ *
+ * This function is for PF only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_control_finish_restore_vf(struct xe_device *xe, unsigned int vfid)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+ int ret;
+
+ for_each_gt(gt, xe, id) {
+ ret = xe_gt_sriov_pf_control_finish_restore_vf(gt, vfid);
+ if (ret)
+ break;
+ }
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_control.h b/drivers/gpu/drm/xe/xe_sriov_pf_control.h
new file mode 100644
index 000000000000..ef9f219b2109
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_pf_control.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_SRIOV_PF_CONTROL_H_
+#define _XE_SRIOV_PF_CONTROL_H_
+
+struct xe_device;
+
+int xe_sriov_pf_control_pause_vf(struct xe_device *xe, unsigned int vfid);
+int xe_sriov_pf_control_resume_vf(struct xe_device *xe, unsigned int vfid);
+int xe_sriov_pf_control_stop_vf(struct xe_device *xe, unsigned int vfid);
+int xe_sriov_pf_control_reset_vf(struct xe_device *xe, unsigned int vfid);
+int xe_sriov_pf_control_wait_flr(struct xe_device *xe, unsigned int vfid);
+int xe_sriov_pf_control_sync_flr(struct xe_device *xe, unsigned int vfid);
+int xe_sriov_pf_control_trigger_save_vf(struct xe_device *xe, unsigned int vfid);
+int xe_sriov_pf_control_finish_save_vf(struct xe_device *xe, unsigned int vfid);
+int xe_sriov_pf_control_trigger_restore_vf(struct xe_device *xe, unsigned int vfid);
+int xe_sriov_pf_control_finish_restore_vf(struct xe_device *xe, unsigned int vfid);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c
new file mode 100644
index 000000000000..bad751217e1e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.c
@@ -0,0 +1,395 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include <linux/debugfs.h>
+#include <drm/drm_debugfs.h>
+
+#include "xe_device.h"
+#include "xe_device_types.h"
+#include "xe_pm.h"
+#include "xe_sriov_pf.h"
+#include "xe_sriov_pf_control.h"
+#include "xe_sriov_pf_debugfs.h"
+#include "xe_sriov_pf_helpers.h"
+#include "xe_sriov_pf_migration.h"
+#include "xe_sriov_pf_provision.h"
+#include "xe_sriov_pf_service.h"
+#include "xe_sriov_printk.h"
+#include "xe_tile_sriov_pf_debugfs.h"
+
+/*
+ * /sys/kernel/debug/dri/BDF/
+ * ├── sriov # d_inode->i_private = (xe_device*)
+ * │ ├── pf # d_inode->i_private = (xe_device*)
+ * │ ├── vf1 # d_inode->i_private = VFID(1)
+ * : :
+ * │ ├── vfN # d_inode->i_private = VFID(N)
+ */
+
+static void *extract_priv(struct dentry *d)
+{
+ return d->d_inode->i_private;
+}
+
+static struct xe_device *extract_xe(struct dentry *d)
+{
+ return extract_priv(d->d_parent);
+}
+
+static unsigned int extract_vfid(struct dentry *d)
+{
+ void *p = extract_priv(d);
+
+ return p == extract_xe(d) ? PFID : (uintptr_t)p;
+}
+
+/*
+ * /sys/kernel/debug/dri/BDF/
+ * ├── sriov
+ * │ ├── restore_auto_provisioning
+ * │ :
+ * │ ├── pf/
+ * │ ├── vf1
+ * │ │ ├── ...
+ */
+
+static ssize_t from_file_write_to_xe_call(struct file *file, const char __user *userbuf,
+ size_t count, loff_t *ppos,
+ int (*call)(struct xe_device *))
+{
+ struct dentry *dent = file_dentry(file);
+ struct xe_device *xe = extract_xe(dent);
+ bool yes;
+ int ret;
+
+ if (*ppos)
+ return -EINVAL;
+ ret = kstrtobool_from_user(userbuf, count, &yes);
+ if (ret < 0)
+ return ret;
+ if (yes) {
+ xe_pm_runtime_get(xe);
+ ret = call(xe);
+ xe_pm_runtime_put(xe);
+ }
+ if (ret < 0)
+ return ret;
+ return count;
+}
+
+#define DEFINE_SRIOV_ATTRIBUTE(OP) \
+static int OP##_show(struct seq_file *s, void *unused) \
+{ \
+ return 0; \
+} \
+static ssize_t OP##_write(struct file *file, const char __user *userbuf, \
+ size_t count, loff_t *ppos) \
+{ \
+ return from_file_write_to_xe_call(file, userbuf, count, ppos, \
+ xe_sriov_pf_##OP); \
+} \
+DEFINE_SHOW_STORE_ATTRIBUTE(OP)
+
+static inline int xe_sriov_pf_restore_auto_provisioning(struct xe_device *xe)
+{
+ return xe_sriov_pf_provision_set_mode(xe, XE_SRIOV_PROVISIONING_MODE_AUTO);
+}
+
+DEFINE_SRIOV_ATTRIBUTE(restore_auto_provisioning);
+
+static int lockdown_vfs_enabling_open(struct inode *inode, struct file *file)
+{
+ struct dentry *dent = file_dentry(file);
+ struct xe_device *xe = extract_xe(dent);
+ ssize_t ret;
+
+ ret = xe_sriov_pf_lockdown(xe);
+ if (ret < 0)
+ return ret;
+
+ file->private_data = xe;
+ return nonseekable_open(inode, file);
+}
+
+static int lockdown_vfs_enabling_release(struct inode *inode, struct file *file)
+{
+ struct xe_device *xe = file->private_data;
+
+ xe_sriov_pf_end_lockdown(xe);
+ return 0;
+}
+
+static const struct file_operations lockdown_vfs_enabling_fops = {
+ .owner = THIS_MODULE,
+ .open = lockdown_vfs_enabling_open,
+ .release = lockdown_vfs_enabling_release,
+};
+
+static void pf_populate_root(struct xe_device *xe, struct dentry *dent)
+{
+ debugfs_create_file("restore_auto_provisioning", 0200, dent, xe,
+ &restore_auto_provisioning_fops);
+ debugfs_create_file("lockdown_vfs_enabling", 0400, dent, xe,
+ &lockdown_vfs_enabling_fops);
+}
+
+static int simple_show(struct seq_file *m, void *data)
+{
+ struct drm_printer p = drm_seq_file_printer(m);
+ struct drm_info_node *node = m->private;
+ struct dentry *parent = node->dent->d_parent;
+ struct xe_device *xe = parent->d_inode->i_private;
+ void (*print)(struct xe_device *, struct drm_printer *) = node->info_ent->data;
+
+ print(xe, &p);
+ return 0;
+}
+
+static const struct drm_info_list debugfs_list[] = {
+ { .name = "vfs", .show = simple_show, .data = xe_sriov_pf_print_vfs_summary },
+ { .name = "versions", .show = simple_show, .data = xe_sriov_pf_service_print_versions },
+};
+
+static void pf_populate_pf(struct xe_device *xe, struct dentry *pfdent)
+{
+ struct drm_minor *minor = xe->drm.primary;
+
+ drm_debugfs_create_files(debugfs_list, ARRAY_SIZE(debugfs_list), pfdent, minor);
+}
+
+/*
+ * /sys/kernel/debug/dri/BDF/
+ * ├── sriov
+ * │ ├── vf1
+ * │ │ ├── migration_data
+ * │ │ ├── pause
+ * │ │ ├── reset
+ * │ │ ├── resume
+ * │ │ ├── stop
+ * │ │ ├── save
+ * │ │ ├── restore
+ * │ │ :
+ * │ ├── vf2
+ * │ │ ├── ...
+ */
+
+static int from_file_read_to_vf_call(struct seq_file *s,
+ int (*call)(struct xe_device *, unsigned int))
+{
+ struct dentry *dent = file_dentry(s->file)->d_parent;
+ struct xe_device *xe = extract_xe(dent);
+ unsigned int vfid = extract_vfid(dent);
+ int ret;
+
+ xe_pm_runtime_get(xe);
+ ret = call(xe, vfid);
+ xe_pm_runtime_put(xe);
+
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+static ssize_t from_file_write_to_vf_call(struct file *file, const char __user *userbuf,
+ size_t count, loff_t *ppos,
+ int (*call)(struct xe_device *, unsigned int))
+{
+ struct dentry *dent = file_dentry(file)->d_parent;
+ struct xe_device *xe = extract_xe(dent);
+ unsigned int vfid = extract_vfid(dent);
+ bool yes;
+ int ret;
+
+ if (*ppos)
+ return -EINVAL;
+ ret = kstrtobool_from_user(userbuf, count, &yes);
+ if (ret < 0)
+ return ret;
+ if (yes) {
+ xe_pm_runtime_get(xe);
+ ret = call(xe, vfid);
+ xe_pm_runtime_put(xe);
+ }
+ if (ret < 0)
+ return ret;
+ return count;
+}
+
+#define DEFINE_VF_CONTROL_ATTRIBUTE(OP) \
+static int OP##_show(struct seq_file *s, void *unused) \
+{ \
+ return 0; \
+} \
+static ssize_t OP##_write(struct file *file, const char __user *userbuf, \
+ size_t count, loff_t *ppos) \
+{ \
+ return from_file_write_to_vf_call(file, userbuf, count, ppos, \
+ xe_sriov_pf_control_##OP); \
+} \
+DEFINE_SHOW_STORE_ATTRIBUTE(OP)
+
+#define DEFINE_VF_CONTROL_ATTRIBUTE_RW(OP) \
+static int OP##_show(struct seq_file *s, void *unused) \
+{ \
+ return from_file_read_to_vf_call(s, \
+ xe_sriov_pf_control_finish_##OP); \
+} \
+static ssize_t OP##_write(struct file *file, const char __user *userbuf, \
+ size_t count, loff_t *ppos) \
+{ \
+ return from_file_write_to_vf_call(file, userbuf, count, ppos, \
+ xe_sriov_pf_control_trigger_##OP); \
+} \
+DEFINE_SHOW_STORE_ATTRIBUTE(OP)
+
+DEFINE_VF_CONTROL_ATTRIBUTE(pause_vf);
+DEFINE_VF_CONTROL_ATTRIBUTE(resume_vf);
+DEFINE_VF_CONTROL_ATTRIBUTE(stop_vf);
+DEFINE_VF_CONTROL_ATTRIBUTE(reset_vf);
+DEFINE_VF_CONTROL_ATTRIBUTE_RW(save_vf);
+DEFINE_VF_CONTROL_ATTRIBUTE_RW(restore_vf);
+
+static ssize_t data_write(struct file *file, const char __user *buf, size_t count, loff_t *pos)
+{
+ struct dentry *dent = file_dentry(file)->d_parent;
+ struct xe_device *xe = extract_xe(dent);
+ unsigned int vfid = extract_vfid(dent);
+
+ if (*pos)
+ return -ESPIPE;
+
+ return xe_sriov_pf_migration_write(xe, vfid, buf, count);
+}
+
+static ssize_t data_read(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+{
+ struct dentry *dent = file_dentry(file)->d_parent;
+ struct xe_device *xe = extract_xe(dent);
+ unsigned int vfid = extract_vfid(dent);
+
+ if (*ppos)
+ return -ESPIPE;
+
+ return xe_sriov_pf_migration_read(xe, vfid, buf, count);
+}
+
+static const struct file_operations data_vf_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .write = data_write,
+ .read = data_read,
+ .llseek = default_llseek,
+};
+
+static ssize_t size_read(struct file *file, char __user *ubuf, size_t count, loff_t *ppos)
+{
+ struct dentry *dent = file_dentry(file)->d_parent;
+ struct xe_device *xe = extract_xe(dent);
+ unsigned int vfid = extract_vfid(dent);
+ char buf[21];
+ ssize_t ret;
+ int len;
+
+ xe_pm_runtime_get(xe);
+ ret = xe_sriov_pf_migration_size(xe, vfid);
+ xe_pm_runtime_put(xe);
+ if (ret < 0)
+ return ret;
+
+ len = scnprintf(buf, sizeof(buf), "%zd\n", ret);
+
+ return simple_read_from_buffer(ubuf, count, ppos, buf, len);
+}
+
+static const struct file_operations size_vf_fops = {
+ .owner = THIS_MODULE,
+ .open = simple_open,
+ .read = size_read,
+ .llseek = default_llseek,
+};
+
+static void pf_populate_vf(struct xe_device *xe, struct dentry *vfdent)
+{
+ debugfs_create_file("pause", 0200, vfdent, xe, &pause_vf_fops);
+ debugfs_create_file("resume", 0200, vfdent, xe, &resume_vf_fops);
+ debugfs_create_file("stop", 0200, vfdent, xe, &stop_vf_fops);
+ debugfs_create_file("reset", 0200, vfdent, xe, &reset_vf_fops);
+ debugfs_create_file("save", 0600, vfdent, xe, &save_vf_fops);
+ debugfs_create_file("restore", 0600, vfdent, xe, &restore_vf_fops);
+ debugfs_create_file("migration_data", 0600, vfdent, xe, &data_vf_fops);
+ debugfs_create_file("migration_size", 0400, vfdent, xe, &size_vf_fops);
+}
+
+static void pf_populate_with_tiles(struct xe_device *xe, struct dentry *dent, unsigned int vfid)
+{
+ struct xe_tile *tile;
+ unsigned int id;
+
+ for_each_tile(tile, xe, id)
+ xe_tile_sriov_pf_debugfs_populate(tile, dent, vfid);
+}
+
+/**
+ * xe_sriov_pf_debugfs_register - Register PF debugfs attributes.
+ * @xe: the &xe_device
+ * @root: the root &dentry
+ *
+ * Create separate directory that will contain all SR-IOV related files,
+ * organized per each SR-IOV function (PF, VF1, VF2, ..., VFn).
+ */
+void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root)
+{
+ int totalvfs = xe_sriov_pf_get_totalvfs(xe);
+ struct dentry *pfdent;
+ struct dentry *vfdent;
+ struct dentry *dent;
+ char vfname[16]; /* should be more than enough for "vf%u\0" and VFID(UINT_MAX) */
+ unsigned int n;
+
+ /*
+ * /sys/kernel/debug/dri/BDF/
+ * ├── sriov # d_inode->i_private = (xe_device*)
+ * │ ├── ...
+ */
+ dent = debugfs_create_dir("sriov", root);
+ if (IS_ERR(dent))
+ return;
+ dent->d_inode->i_private = xe;
+
+ pf_populate_root(xe, dent);
+
+ /*
+ * /sys/kernel/debug/dri/BDF/
+ * ├── sriov # d_inode->i_private = (xe_device*)
+ * │ ├── pf # d_inode->i_private = (xe_device*)
+ * │ │ ├── ...
+ */
+ pfdent = debugfs_create_dir("pf", dent);
+ if (IS_ERR(pfdent))
+ return;
+ pfdent->d_inode->i_private = xe;
+
+ pf_populate_pf(xe, pfdent);
+ pf_populate_with_tiles(xe, pfdent, PFID);
+
+ /*
+ * /sys/kernel/debug/dri/BDF/
+ * ├── sriov # d_inode->i_private = (xe_device*)
+ * │ ├── vf1 # d_inode->i_private = VFID(1)
+ * │ ├── vf2 # d_inode->i_private = VFID(2)
+ * │ ├── ...
+ */
+ for (n = 1; n <= totalvfs; n++) {
+ snprintf(vfname, sizeof(vfname), "vf%u", VFID(n));
+ vfdent = debugfs_create_dir(vfname, dent);
+ if (IS_ERR(vfdent))
+ return;
+ vfdent->d_inode->i_private = (void *)(uintptr_t)VFID(n);
+
+ pf_populate_vf(xe, vfdent);
+ pf_populate_with_tiles(xe, vfdent, VFID(n));
+ }
+}
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.h b/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.h
new file mode 100644
index 000000000000..93db13585b82
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_pf_debugfs.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_SRIOV_PF_DEBUGFS_H_
+#define _XE_SRIOV_PF_DEBUGFS_H_
+
+struct dentry;
+struct xe_device;
+
+#ifdef CONFIG_PCI_IOV
+void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root);
+#else
+static inline void xe_sriov_pf_debugfs_register(struct xe_device *xe, struct dentry *root) { }
+#endif
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h b/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h
index dd1df950b021..9054fdc34597 100644
--- a/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h
+++ b/drivers/gpu/drm/xe/xe_sriov_pf_helpers.h
@@ -37,10 +37,37 @@ static inline int xe_sriov_pf_get_totalvfs(struct xe_device *xe)
return xe->sriov.pf.driver_max_vfs;
}
+/**
+ * xe_sriov_pf_num_vfs() - Number of enabled VFs on the PF.
+ * @xe: the PF &xe_device
+ *
+ * Return: Number of enabled VFs on the PF.
+ */
+static inline unsigned int xe_sriov_pf_num_vfs(const struct xe_device *xe)
+{
+ return pci_num_vf(to_pci_dev(xe->drm.dev));
+}
+
+/**
+ * xe_sriov_pf_admin_only() - Check if PF is mainly used for VFs administration.
+ * @xe: the PF &xe_device
+ *
+ * Return: True if PF is mainly used for VFs administration.
+ */
+static inline bool xe_sriov_pf_admin_only(const struct xe_device *xe)
+{
+ return !xe->info.probe_display;
+}
+
static inline struct mutex *xe_sriov_pf_master_mutex(struct xe_device *xe)
{
xe_assert(xe, IS_SRIOV_PF(xe));
return &xe->sriov.pf.master_lock;
}
+int xe_sriov_pf_arm_guard(struct xe_device *xe, struct xe_guard *guard,
+ bool write, void *who);
+void xe_sriov_pf_disarm_guard(struct xe_device *xe, struct xe_guard *guard,
+ bool write, void *who);
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_migration.c b/drivers/gpu/drm/xe/xe_sriov_pf_migration.c
new file mode 100644
index 000000000000..6c4b16409cc9
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_pf_migration.c
@@ -0,0 +1,365 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "xe_device.h"
+#include "xe_gt_sriov_pf_control.h"
+#include "xe_gt_sriov_pf_migration.h"
+#include "xe_pm.h"
+#include "xe_sriov.h"
+#include "xe_sriov_packet.h"
+#include "xe_sriov_packet_types.h"
+#include "xe_sriov_pf_helpers.h"
+#include "xe_sriov_pf_migration.h"
+#include "xe_sriov_printk.h"
+
+static struct xe_sriov_migration_state *pf_pick_migration(struct xe_device *xe, unsigned int vfid)
+{
+ xe_assert(xe, IS_SRIOV_PF(xe));
+ xe_assert(xe, vfid <= xe_sriov_pf_get_totalvfs(xe));
+
+ return &xe->sriov.pf.vfs[vfid].migration;
+}
+
+/**
+ * xe_sriov_pf_migration_waitqueue() - Get waitqueue for migration.
+ * @xe: the &xe_device
+ * @vfid: the VF identifier
+ *
+ * Return: pointer to the migration waitqueue.
+ */
+wait_queue_head_t *xe_sriov_pf_migration_waitqueue(struct xe_device *xe, unsigned int vfid)
+{
+ return &pf_pick_migration(xe, vfid)->wq;
+}
+
+/**
+ * xe_sriov_pf_migration_supported() - Check if SR-IOV VF migration is supported by the device
+ * @xe: the &xe_device
+ *
+ * Return: true if migration is supported, false otherwise
+ */
+bool xe_sriov_pf_migration_supported(struct xe_device *xe)
+{
+ xe_assert(xe, IS_SRIOV_PF(xe));
+
+ return IS_ENABLED(CONFIG_DRM_XE_DEBUG) || !xe->sriov.pf.migration.disabled;
+}
+
+/**
+ * xe_sriov_pf_migration_disable() - Turn off SR-IOV VF migration support on PF.
+ * @xe: the &xe_device instance.
+ * @fmt: format string for the log message, to be combined with following VAs.
+ */
+void xe_sriov_pf_migration_disable(struct xe_device *xe, const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list va_args;
+
+ xe_assert(xe, IS_SRIOV_PF(xe));
+
+ va_start(va_args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &va_args;
+ xe_sriov_notice(xe, "migration %s: %pV\n",
+ IS_ENABLED(CONFIG_DRM_XE_DEBUG) ?
+ "missing prerequisite" : "disabled",
+ &vaf);
+ va_end(va_args);
+
+ xe->sriov.pf.migration.disabled = true;
+}
+
+static void pf_migration_check_support(struct xe_device *xe)
+{
+ if (!xe_device_has_memirq(xe))
+ xe_sriov_pf_migration_disable(xe, "requires memory-based IRQ support");
+}
+
+static void pf_migration_cleanup(void *arg)
+{
+ struct xe_sriov_migration_state *migration = arg;
+
+ xe_sriov_packet_free(migration->pending);
+ xe_sriov_packet_free(migration->trailer);
+ xe_sriov_packet_free(migration->descriptor);
+}
+
+/**
+ * xe_sriov_pf_migration_init() - Initialize support for SR-IOV VF migration.
+ * @xe: the &xe_device
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_migration_init(struct xe_device *xe)
+{
+ unsigned int n, totalvfs;
+ int err;
+
+ xe_assert(xe, IS_SRIOV_PF(xe));
+
+ pf_migration_check_support(xe);
+
+ if (!xe_sriov_pf_migration_supported(xe))
+ return 0;
+
+ totalvfs = xe_sriov_pf_get_totalvfs(xe);
+ for (n = 1; n <= totalvfs; n++) {
+ struct xe_sriov_migration_state *migration = pf_pick_migration(xe, n);
+
+ err = drmm_mutex_init(&xe->drm, &migration->lock);
+ if (err)
+ return err;
+
+ init_waitqueue_head(&migration->wq);
+
+ err = devm_add_action_or_reset(xe->drm.dev, pf_migration_cleanup, migration);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static bool pf_migration_data_ready(struct xe_device *xe, unsigned int vfid)
+{
+ struct xe_gt *gt;
+ u8 gt_id;
+
+ for_each_gt(gt, xe, gt_id) {
+ if (xe_gt_sriov_pf_control_check_save_failed(gt, vfid) ||
+ xe_gt_sriov_pf_control_check_save_data_done(gt, vfid) ||
+ !xe_gt_sriov_pf_migration_ring_empty(gt, vfid))
+ return true;
+ }
+
+ return false;
+}
+
+static struct xe_sriov_packet *
+pf_migration_consume(struct xe_device *xe, unsigned int vfid)
+{
+ struct xe_sriov_packet *data;
+ bool more_data = false;
+ struct xe_gt *gt;
+ u8 gt_id;
+
+ for_each_gt(gt, xe, gt_id) {
+ data = xe_gt_sriov_pf_migration_save_consume(gt, vfid);
+ if (data && PTR_ERR(data) != EAGAIN)
+ return data;
+ if (PTR_ERR(data) == -EAGAIN)
+ more_data = true;
+ }
+
+ if (!more_data)
+ return NULL;
+
+ return ERR_PTR(-EAGAIN);
+}
+
+/**
+ * xe_sriov_pf_migration_save_consume() - Consume a VF migration data packet from the device.
+ * @xe: the &xe_device
+ * @vfid: the VF identifier
+ *
+ * Called by the save migration data consumer (userspace) when
+ * processing migration data.
+ * If there is no migration data to process, wait until more data is available.
+ *
+ * Return: Pointer to &xe_sriov_packet on success,
+ * NULL if ring is empty and no more migration data is expected,
+ * ERR_PTR value in case of error.
+ */
+struct xe_sriov_packet *
+xe_sriov_pf_migration_save_consume(struct xe_device *xe, unsigned int vfid)
+{
+ struct xe_sriov_migration_state *migration = pf_pick_migration(xe, vfid);
+ struct xe_sriov_packet *data;
+ int ret;
+
+ xe_assert(xe, IS_SRIOV_PF(xe));
+
+ for (;;) {
+ data = pf_migration_consume(xe, vfid);
+ if (PTR_ERR(data) != -EAGAIN)
+ break;
+
+ ret = wait_event_interruptible(migration->wq,
+ pf_migration_data_ready(xe, vfid));
+ if (ret)
+ return ERR_PTR(ret);
+ }
+
+ return data;
+}
+
+static int pf_handle_descriptor(struct xe_device *xe, unsigned int vfid,
+ struct xe_sriov_packet *data)
+{
+ int ret;
+
+ if (data->hdr.tile_id != 0 || data->hdr.gt_id != 0)
+ return -EINVAL;
+
+ ret = xe_sriov_packet_process_descriptor(xe, vfid, data);
+ if (ret)
+ return ret;
+
+ xe_sriov_packet_free(data);
+
+ return 0;
+}
+
+static int pf_handle_trailer(struct xe_device *xe, unsigned int vfid,
+ struct xe_sriov_packet *data)
+{
+ struct xe_gt *gt;
+ u8 gt_id;
+
+ if (data->hdr.tile_id != 0 || data->hdr.gt_id != 0)
+ return -EINVAL;
+ if (data->hdr.offset != 0 || data->hdr.size != 0 || data->buff || data->bo)
+ return -EINVAL;
+
+ xe_sriov_packet_free(data);
+
+ for_each_gt(gt, xe, gt_id)
+ xe_gt_sriov_pf_control_restore_data_done(gt, vfid);
+
+ return 0;
+}
+
+/**
+ * xe_sriov_pf_migration_restore_produce() - Produce a VF migration data packet to the device.
+ * @xe: the &xe_device
+ * @vfid: the VF identifier
+ * @data: Pointer to &xe_sriov_packet
+ *
+ * Called by the restore migration data producer (userspace) when processing
+ * migration data.
+ * If the underlying data structure is full, wait until there is space.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_migration_restore_produce(struct xe_device *xe, unsigned int vfid,
+ struct xe_sriov_packet *data)
+{
+ struct xe_gt *gt;
+
+ xe_assert(xe, IS_SRIOV_PF(xe));
+
+ if (data->hdr.type == XE_SRIOV_PACKET_TYPE_DESCRIPTOR)
+ return pf_handle_descriptor(xe, vfid, data);
+ if (data->hdr.type == XE_SRIOV_PACKET_TYPE_TRAILER)
+ return pf_handle_trailer(xe, vfid, data);
+
+ gt = xe_device_get_gt(xe, data->hdr.gt_id);
+ if (!gt || data->hdr.tile_id != gt->tile->id || data->hdr.type == 0) {
+ xe_sriov_err_ratelimited(xe, "Received invalid restore packet for VF%u (type:%u, tile:%u, GT:%u)\n",
+ vfid, data->hdr.type, data->hdr.tile_id, data->hdr.gt_id);
+ return -EINVAL;
+ }
+
+ return xe_gt_sriov_pf_migration_restore_produce(gt, vfid, data);
+}
+
+/**
+ * xe_sriov_pf_migration_read() - Read migration data from the device.
+ * @xe: the &xe_device
+ * @vfid: the VF identifier
+ * @buf: start address of userspace buffer
+ * @len: requested read size from userspace
+ *
+ * Return: number of bytes that has been successfully read,
+ * 0 if no more migration data is available,
+ * -errno on failure.
+ */
+ssize_t xe_sriov_pf_migration_read(struct xe_device *xe, unsigned int vfid,
+ char __user *buf, size_t len)
+{
+ struct xe_sriov_migration_state *migration = pf_pick_migration(xe, vfid);
+ ssize_t ret, consumed = 0;
+
+ xe_assert(xe, IS_SRIOV_PF(xe));
+
+ scoped_cond_guard(mutex_intr, return -EINTR, &migration->lock) {
+ while (consumed < len) {
+ ret = xe_sriov_packet_read_single(xe, vfid, buf, len - consumed);
+ if (ret == -ENODATA)
+ break;
+ if (ret < 0)
+ return ret;
+
+ consumed += ret;
+ buf += ret;
+ }
+ }
+
+ return consumed;
+}
+
+/**
+ * xe_sriov_pf_migration_write() - Write migration data to the device.
+ * @xe: the &xe_device
+ * @vfid: the VF identifier
+ * @buf: start address of userspace buffer
+ * @len: requested write size from userspace
+ *
+ * Return: number of bytes that has been successfully written,
+ * -errno on failure.
+ */
+ssize_t xe_sriov_pf_migration_write(struct xe_device *xe, unsigned int vfid,
+ const char __user *buf, size_t len)
+{
+ struct xe_sriov_migration_state *migration = pf_pick_migration(xe, vfid);
+ ssize_t ret, produced = 0;
+
+ xe_assert(xe, IS_SRIOV_PF(xe));
+
+ scoped_cond_guard(mutex_intr, return -EINTR, &migration->lock) {
+ while (produced < len) {
+ ret = xe_sriov_packet_write_single(xe, vfid, buf, len - produced);
+ if (ret < 0)
+ return ret;
+
+ produced += ret;
+ buf += ret;
+ }
+ }
+
+ return produced;
+}
+
+/**
+ * xe_sriov_pf_migration_size() - Total size of migration data from all components within a device
+ * @xe: the &xe_device
+ * @vfid: the VF identifier (can't be 0)
+ *
+ * This function is for PF only.
+ *
+ * Return: total migration data size in bytes or a negative error code on failure.
+ */
+ssize_t xe_sriov_pf_migration_size(struct xe_device *xe, unsigned int vfid)
+{
+ size_t size = 0;
+ struct xe_gt *gt;
+ ssize_t ret;
+ u8 gt_id;
+
+ xe_assert(xe, IS_SRIOV_PF(xe));
+ xe_assert(xe, vfid);
+
+ for_each_gt(gt, xe, gt_id) {
+ ret = xe_gt_sriov_pf_migration_size(gt, vfid);
+ if (ret < 0)
+ return ret;
+
+ size += ret;
+ }
+
+ return size;
+}
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_migration.h b/drivers/gpu/drm/xe/xe_sriov_pf_migration.h
new file mode 100644
index 000000000000..f8f408df8481
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_pf_migration.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_SRIOV_PF_MIGRATION_H_
+#define _XE_SRIOV_PF_MIGRATION_H_
+
+#include <linux/types.h>
+#include <linux/wait.h>
+
+struct xe_device;
+struct xe_sriov_packet;
+
+int xe_sriov_pf_migration_init(struct xe_device *xe);
+bool xe_sriov_pf_migration_supported(struct xe_device *xe);
+void xe_sriov_pf_migration_disable(struct xe_device *xe, const char *fmt, ...);
+int xe_sriov_pf_migration_restore_produce(struct xe_device *xe, unsigned int vfid,
+ struct xe_sriov_packet *data);
+struct xe_sriov_packet *
+xe_sriov_pf_migration_save_consume(struct xe_device *xe, unsigned int vfid);
+ssize_t xe_sriov_pf_migration_size(struct xe_device *xe, unsigned int vfid);
+wait_queue_head_t *xe_sriov_pf_migration_waitqueue(struct xe_device *xe, unsigned int vfid);
+
+ssize_t xe_sriov_pf_migration_read(struct xe_device *xe, unsigned int vfid,
+ char __user *buf, size_t len);
+ssize_t xe_sriov_pf_migration_write(struct xe_device *xe, unsigned int vfid,
+ const char __user *buf, size_t len);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_migration_types.h b/drivers/gpu/drm/xe/xe_sriov_pf_migration_types.h
new file mode 100644
index 000000000000..7d9a8a278d91
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_pf_migration_types.h
@@ -0,0 +1,37 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_SRIOV_PF_MIGRATION_TYPES_H_
+#define _XE_SRIOV_PF_MIGRATION_TYPES_H_
+
+#include <linux/types.h>
+#include <linux/mutex_types.h>
+#include <linux/wait.h>
+
+/**
+ * struct xe_sriov_pf_migration - Xe device level VF migration data
+ */
+struct xe_sriov_pf_migration {
+ /** @disabled: indicates whether VF migration feature is disabled */
+ bool disabled;
+};
+
+/**
+ * struct xe_sriov_migration_state - Per VF device-level migration related data
+ */
+struct xe_sriov_migration_state {
+ /** @wq: waitqueue used to avoid busy-waiting for snapshot production/consumption */
+ wait_queue_head_t wq;
+ /** @lock: Mutex protecting the migration data */
+ struct mutex lock;
+ /** @pending: currently processed data packet of VF resource */
+ struct xe_sriov_packet *pending;
+ /** @trailer: data packet used to indicate the end of stream */
+ struct xe_sriov_packet *trailer;
+ /** @descriptor: data packet containing the metadata describing the device */
+ struct xe_sriov_packet *descriptor;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_provision.c b/drivers/gpu/drm/xe/xe_sriov_pf_provision.c
new file mode 100644
index 000000000000..01470c42e8a7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_pf_provision.c
@@ -0,0 +1,438 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include "xe_assert.h"
+#include "xe_device.h"
+#include "xe_gt_sriov_pf_config.h"
+#include "xe_gt_sriov_pf_policy.h"
+#include "xe_sriov.h"
+#include "xe_sriov_pf_helpers.h"
+#include "xe_sriov_pf_provision.h"
+#include "xe_sriov_pf_provision_types.h"
+#include "xe_sriov_printk.h"
+
+static const char *mode_to_string(enum xe_sriov_provisioning_mode mode)
+{
+ switch (mode) {
+ case XE_SRIOV_PROVISIONING_MODE_AUTO:
+ return "auto";
+ case XE_SRIOV_PROVISIONING_MODE_CUSTOM:
+ return "custom";
+ default:
+ return "<invalid>";
+ }
+}
+
+static bool pf_auto_provisioning_mode(struct xe_device *xe)
+{
+ xe_assert(xe, IS_SRIOV_PF(xe));
+
+ return xe->sriov.pf.provision.mode == XE_SRIOV_PROVISIONING_MODE_AUTO;
+}
+
+static bool pf_needs_provisioning(struct xe_gt *gt, unsigned int num_vfs)
+{
+ unsigned int n;
+
+ for (n = 1; n <= num_vfs; n++)
+ if (!xe_gt_sriov_pf_config_is_empty(gt, n))
+ return false;
+
+ return true;
+}
+
+static int pf_provision_vfs(struct xe_device *xe, unsigned int num_vfs)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+ int result = 0;
+ int err;
+
+ for_each_gt(gt, xe, id) {
+ if (!pf_needs_provisioning(gt, num_vfs))
+ return -EUCLEAN;
+ err = xe_gt_sriov_pf_config_set_fair(gt, VFID(1), num_vfs);
+ result = result ?: err;
+ }
+
+ return result;
+}
+
+static void pf_unprovision_vfs(struct xe_device *xe, unsigned int num_vfs)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+ unsigned int n;
+
+ for_each_gt(gt, xe, id)
+ for (n = 1; n <= num_vfs; n++)
+ xe_gt_sriov_pf_config_release(gt, n, true);
+}
+
+static void pf_unprovision_all_vfs(struct xe_device *xe)
+{
+ pf_unprovision_vfs(xe, xe_sriov_pf_get_totalvfs(xe));
+}
+
+/**
+ * xe_sriov_pf_provision_vfs() - Provision VFs in auto-mode.
+ * @xe: the PF &xe_device
+ * @num_vfs: the number of VFs to auto-provision
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_provision_vfs(struct xe_device *xe, unsigned int num_vfs)
+{
+ xe_assert(xe, IS_SRIOV_PF(xe));
+
+ if (!pf_auto_provisioning_mode(xe))
+ return 0;
+
+ return pf_provision_vfs(xe, num_vfs);
+}
+
+/**
+ * xe_sriov_pf_unprovision_vfs() - Unprovision VFs in auto-mode.
+ * @xe: the PF &xe_device
+ * @num_vfs: the number of VFs to unprovision
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_unprovision_vfs(struct xe_device *xe, unsigned int num_vfs)
+{
+ xe_assert(xe, IS_SRIOV_PF(xe));
+
+ if (!pf_auto_provisioning_mode(xe))
+ return 0;
+
+ pf_unprovision_vfs(xe, num_vfs);
+ return 0;
+}
+
+/**
+ * xe_sriov_pf_provision_set_mode() - Change VFs provision mode.
+ * @xe: the PF &xe_device
+ * @mode: the new VFs provisioning mode
+ *
+ * When changing from AUTO to CUSTOM mode, any already allocated VFs resources
+ * will remain allocated and will not be released upon VFs disabling.
+ *
+ * When changing back to AUTO mode, if VFs are not enabled, already allocated
+ * VFs resources will be immediately released. If VFs are still enabled, such
+ * mode change is rejected.
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_provision_set_mode(struct xe_device *xe, enum xe_sriov_provisioning_mode mode)
+{
+ xe_assert(xe, IS_SRIOV_PF(xe));
+
+ if (mode == xe->sriov.pf.provision.mode)
+ return 0;
+
+ if (mode == XE_SRIOV_PROVISIONING_MODE_AUTO) {
+ if (xe_sriov_pf_num_vfs(xe)) {
+ xe_sriov_dbg(xe, "can't restore %s: VFs must be disabled!\n",
+ mode_to_string(mode));
+ return -EBUSY;
+ }
+ pf_unprovision_all_vfs(xe);
+ }
+
+ xe_sriov_dbg(xe, "mode %s changed to %s by %ps\n",
+ mode_to_string(xe->sriov.pf.provision.mode),
+ mode_to_string(mode), __builtin_return_address(0));
+ xe->sriov.pf.provision.mode = mode;
+ return 0;
+}
+
+/**
+ * xe_sriov_pf_provision_bulk_apply_eq() - Change execution quantum for all VFs and PF.
+ * @xe: the PF &xe_device
+ * @eq: execution quantum in [ms] to set
+ *
+ * Change execution quantum (EQ) provisioning on all tiles/GTs.
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_provision_bulk_apply_eq(struct xe_device *xe, u32 eq)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+ int result = 0;
+ int err;
+
+ guard(mutex)(xe_sriov_pf_master_mutex(xe));
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_sriov_pf_config_bulk_set_exec_quantum_locked(gt, eq);
+ result = result ?: err;
+ }
+
+ return result;
+}
+
+/**
+ * xe_sriov_pf_provision_apply_vf_eq() - Change VF's execution quantum.
+ * @xe: the PF &xe_device
+ * @vfid: the VF identifier
+ * @eq: execution quantum in [ms] to set
+ *
+ * Change VF's execution quantum (EQ) provisioning on all tiles/GTs.
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_provision_apply_vf_eq(struct xe_device *xe, unsigned int vfid, u32 eq)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+ int result = 0;
+ int err;
+
+ guard(mutex)(xe_sriov_pf_master_mutex(xe));
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_sriov_pf_config_set_exec_quantum_locked(gt, vfid, eq);
+ result = result ?: err;
+ }
+
+ return result;
+}
+
+static int pf_report_unclean(struct xe_gt *gt, unsigned int vfid,
+ const char *what, u32 found, u32 expected)
+{
+ char name[8];
+
+ xe_sriov_dbg(gt_to_xe(gt), "%s on GT%u has %s=%u (expected %u)\n",
+ xe_sriov_function_name(vfid, name, sizeof(name)),
+ gt->info.id, what, found, expected);
+ return -EUCLEAN;
+}
+
+/**
+ * xe_sriov_pf_provision_query_vf_eq() - Query VF's execution quantum.
+ * @xe: the PF &xe_device
+ * @vfid: the VF identifier
+ * @eq: placeholder for the returned execution quantum in [ms]
+ *
+ * Query VF's execution quantum (EQ) provisioning from all tiles/GTs.
+ * If values across tiles/GTs are inconsistent then -EUCLEAN error will be returned.
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_provision_query_vf_eq(struct xe_device *xe, unsigned int vfid, u32 *eq)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+ int count = 0;
+ u32 value;
+
+ guard(mutex)(xe_sriov_pf_master_mutex(xe));
+
+ for_each_gt(gt, xe, id) {
+ value = xe_gt_sriov_pf_config_get_exec_quantum_locked(gt, vfid);
+ if (!count++)
+ *eq = value;
+ else if (value != *eq)
+ return pf_report_unclean(gt, vfid, "EQ", value, *eq);
+ }
+
+ return !count ? -ENODATA : 0;
+}
+
+/**
+ * xe_sriov_pf_provision_bulk_apply_pt() - Change preemption timeout for all VFs and PF.
+ * @xe: the PF &xe_device
+ * @pt: preemption timeout in [us] to set
+ *
+ * Change preemption timeout (PT) provisioning on all tiles/GTs.
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_provision_bulk_apply_pt(struct xe_device *xe, u32 pt)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+ int result = 0;
+ int err;
+
+ guard(mutex)(xe_sriov_pf_master_mutex(xe));
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_sriov_pf_config_bulk_set_preempt_timeout_locked(gt, pt);
+ result = result ?: err;
+ }
+
+ return result;
+}
+
+/**
+ * xe_sriov_pf_provision_apply_vf_pt() - Change VF's preemption timeout.
+ * @xe: the PF &xe_device
+ * @vfid: the VF identifier
+ * @pt: preemption timeout in [us] to set
+ *
+ * Change VF's preemption timeout (PT) provisioning on all tiles/GTs.
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_provision_apply_vf_pt(struct xe_device *xe, unsigned int vfid, u32 pt)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+ int result = 0;
+ int err;
+
+ guard(mutex)(xe_sriov_pf_master_mutex(xe));
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_sriov_pf_config_set_preempt_timeout_locked(gt, vfid, pt);
+ result = result ?: err;
+ }
+
+ return result;
+}
+
+/**
+ * xe_sriov_pf_provision_query_vf_pt() - Query VF's preemption timeout.
+ * @xe: the PF &xe_device
+ * @vfid: the VF identifier
+ * @pt: placeholder for the returned preemption timeout in [us]
+ *
+ * Query VF's preemption timeout (PT) provisioning from all tiles/GTs.
+ * If values across tiles/GTs are inconsistent then -EUCLEAN error will be returned.
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_provision_query_vf_pt(struct xe_device *xe, unsigned int vfid, u32 *pt)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+ int count = 0;
+ u32 value;
+
+ guard(mutex)(xe_sriov_pf_master_mutex(xe));
+
+ for_each_gt(gt, xe, id) {
+ value = xe_gt_sriov_pf_config_get_preempt_timeout_locked(gt, vfid);
+ if (!count++)
+ *pt = value;
+ else if (value != *pt)
+ return pf_report_unclean(gt, vfid, "PT", value, *pt);
+ }
+
+ return !count ? -ENODATA : 0;
+}
+
+/**
+ * xe_sriov_pf_provision_bulk_apply_priority() - Change scheduling priority of all VFs and PF.
+ * @xe: the PF &xe_device
+ * @prio: scheduling priority to set
+ *
+ * Change the scheduling priority provisioning on all tiles/GTs.
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_provision_bulk_apply_priority(struct xe_device *xe, u32 prio)
+{
+ bool sched_if_idle;
+ struct xe_gt *gt;
+ unsigned int id;
+ int result = 0;
+ int err;
+
+ /*
+ * Currently, priority changes that involves VFs are only allowed using
+ * the 'sched_if_idle' policy KLV, so only LOW and NORMAL are supported.
+ */
+ xe_assert(xe, prio < GUC_SCHED_PRIORITY_HIGH);
+ sched_if_idle = prio == GUC_SCHED_PRIORITY_NORMAL;
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_sriov_pf_policy_set_sched_if_idle(gt, sched_if_idle);
+ result = result ?: err;
+ }
+
+ return result;
+}
+
+/**
+ * xe_sriov_pf_provision_apply_vf_priority() - Change VF's scheduling priority.
+ * @xe: the PF &xe_device
+ * @vfid: the VF identifier
+ * @prio: scheduling priority to set
+ *
+ * Change VF's scheduling priority provisioning on all tiles/GTs.
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_provision_apply_vf_priority(struct xe_device *xe, unsigned int vfid, u32 prio)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+ int result = 0;
+ int err;
+
+ for_each_gt(gt, xe, id) {
+ err = xe_gt_sriov_pf_config_set_sched_priority(gt, vfid, prio);
+ result = result ?: err;
+ }
+
+ return result;
+}
+
+/**
+ * xe_sriov_pf_provision_query_vf_priority() - Query VF's scheduling priority.
+ * @xe: the PF &xe_device
+ * @vfid: the VF identifier
+ * @prio: placeholder for the returned scheduling priority
+ *
+ * Query VF's scheduling priority provisioning from all tiles/GTs.
+ * If values across tiles/GTs are inconsistent then -EUCLEAN error will be returned.
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_provision_query_vf_priority(struct xe_device *xe, unsigned int vfid, u32 *prio)
+{
+ struct xe_gt *gt;
+ unsigned int id;
+ int count = 0;
+ u32 value;
+
+ for_each_gt(gt, xe, id) {
+ value = xe_gt_sriov_pf_config_get_sched_priority(gt, vfid);
+ if (!count++)
+ *prio = value;
+ else if (value != *prio)
+ return pf_report_unclean(gt, vfid, "priority", value, *prio);
+ }
+
+ return !count ? -ENODATA : 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_provision.h b/drivers/gpu/drm/xe/xe_sriov_pf_provision.h
new file mode 100644
index 000000000000..bccf23d51396
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_pf_provision.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_SRIOV_PF_PROVISION_H_
+#define _XE_SRIOV_PF_PROVISION_H_
+
+#include <linux/types.h>
+
+#include "xe_sriov_pf_provision_types.h"
+
+struct xe_device;
+
+int xe_sriov_pf_provision_bulk_apply_eq(struct xe_device *xe, u32 eq);
+int xe_sriov_pf_provision_apply_vf_eq(struct xe_device *xe, unsigned int vfid, u32 eq);
+int xe_sriov_pf_provision_query_vf_eq(struct xe_device *xe, unsigned int vfid, u32 *eq);
+
+int xe_sriov_pf_provision_bulk_apply_pt(struct xe_device *xe, u32 pt);
+int xe_sriov_pf_provision_apply_vf_pt(struct xe_device *xe, unsigned int vfid, u32 pt);
+int xe_sriov_pf_provision_query_vf_pt(struct xe_device *xe, unsigned int vfid, u32 *pt);
+
+int xe_sriov_pf_provision_bulk_apply_priority(struct xe_device *xe, u32 prio);
+int xe_sriov_pf_provision_apply_vf_priority(struct xe_device *xe, unsigned int vfid, u32 prio);
+int xe_sriov_pf_provision_query_vf_priority(struct xe_device *xe, unsigned int vfid, u32 *prio);
+
+int xe_sriov_pf_provision_vfs(struct xe_device *xe, unsigned int num_vfs);
+int xe_sriov_pf_unprovision_vfs(struct xe_device *xe, unsigned int num_vfs);
+
+int xe_sriov_pf_provision_set_mode(struct xe_device *xe, enum xe_sriov_provisioning_mode mode);
+
+/**
+ * xe_sriov_pf_provision_set_custom_mode() - Change VFs provision mode to custom.
+ * @xe: the PF &xe_device
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+static inline int xe_sriov_pf_provision_set_custom_mode(struct xe_device *xe)
+{
+ return xe_sriov_pf_provision_set_mode(xe, XE_SRIOV_PROVISIONING_MODE_CUSTOM);
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_provision_types.h b/drivers/gpu/drm/xe/xe_sriov_pf_provision_types.h
new file mode 100644
index 000000000000..a847b8a4c4da
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_pf_provision_types.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_SRIOV_PF_PROVISION_TYPES_H_
+#define _XE_SRIOV_PF_PROVISION_TYPES_H_
+
+#include <linux/build_bug.h>
+
+/**
+ * enum xe_sriov_provisioning_mode - SR-IOV provisioning mode.
+ *
+ * @XE_SRIOV_PROVISIONING_MODE_AUTO: VFs are provisioned during VFs enabling.
+ * Any allocated resources to the VFs will be
+ * automatically released when disabling VFs.
+ * This is a default mode.
+ * @XE_SRIOV_PROVISIONING_MODE_CUSTOM: Explicit VFs provisioning using uABI interfaces.
+ * VFs resources remains allocated regardless if
+ * VFs are enabled or not.
+ */
+enum xe_sriov_provisioning_mode {
+ XE_SRIOV_PROVISIONING_MODE_AUTO,
+ XE_SRIOV_PROVISIONING_MODE_CUSTOM,
+};
+static_assert(XE_SRIOV_PROVISIONING_MODE_AUTO == 0);
+
+/**
+ * struct xe_sriov_pf_provision - Data used by the PF provisioning.
+ */
+struct xe_sriov_pf_provision {
+ /** @mode: selected provisioning mode. */
+ enum xe_sriov_provisioning_mode mode;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_service.c b/drivers/gpu/drm/xe/xe_sriov_pf_service.c
new file mode 100644
index 000000000000..eee3b2a1ba41
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_pf_service.c
@@ -0,0 +1,216 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023-2025 Intel Corporation
+ */
+
+#include "abi/guc_relay_actions_abi.h"
+
+#include "xe_device_types.h"
+#include "xe_sriov.h"
+#include "xe_sriov_pf_helpers.h"
+#include "xe_sriov_printk.h"
+
+#include "xe_sriov_pf_service.h"
+#include "xe_sriov_pf_service_types.h"
+
+/**
+ * xe_sriov_pf_service_init - Early initialization of the SR-IOV PF service.
+ * @xe: the &xe_device to initialize
+ *
+ * Performs early initialization of the SR-IOV PF service.
+ *
+ * This function can only be called on PF.
+ */
+void xe_sriov_pf_service_init(struct xe_device *xe)
+{
+ BUILD_BUG_ON(!GUC_RELAY_VERSION_BASE_MAJOR && !GUC_RELAY_VERSION_BASE_MINOR);
+ BUILD_BUG_ON(GUC_RELAY_VERSION_BASE_MAJOR > GUC_RELAY_VERSION_LATEST_MAJOR);
+
+ xe_assert(xe, IS_SRIOV_PF(xe));
+
+ /* base versions may differ between platforms */
+ xe->sriov.pf.service.version.base.major = GUC_RELAY_VERSION_BASE_MAJOR;
+ xe->sriov.pf.service.version.base.minor = GUC_RELAY_VERSION_BASE_MINOR;
+
+ /* latest version is same for all platforms */
+ xe->sriov.pf.service.version.latest.major = GUC_RELAY_VERSION_LATEST_MAJOR;
+ xe->sriov.pf.service.version.latest.minor = GUC_RELAY_VERSION_LATEST_MINOR;
+}
+
+/* Return: 0 on success or a negative error code on failure. */
+static int pf_negotiate_version(struct xe_device *xe,
+ u32 wanted_major, u32 wanted_minor,
+ u32 *major, u32 *minor)
+{
+ struct xe_sriov_pf_service_version base = xe->sriov.pf.service.version.base;
+ struct xe_sriov_pf_service_version latest = xe->sriov.pf.service.version.latest;
+
+ xe_assert(xe, IS_SRIOV_PF(xe));
+ xe_assert(xe, base.major);
+ xe_assert(xe, base.major <= latest.major);
+ xe_assert(xe, (base.major < latest.major) || (base.minor <= latest.minor));
+
+ /* VF doesn't care - return our latest */
+ if (wanted_major == VF2PF_HANDSHAKE_MAJOR_ANY &&
+ wanted_minor == VF2PF_HANDSHAKE_MINOR_ANY) {
+ *major = latest.major;
+ *minor = latest.minor;
+ return 0;
+ }
+
+ /* VF wants newer than our - return our latest */
+ if (wanted_major > latest.major) {
+ *major = latest.major;
+ *minor = latest.minor;
+ return 0;
+ }
+
+ /* VF wants older than min required - reject */
+ if (wanted_major < base.major ||
+ (wanted_major == base.major && wanted_minor < base.minor)) {
+ return -EPERM;
+ }
+
+ /* previous major - return wanted, as we should still support it */
+ if (wanted_major < latest.major) {
+ /* XXX: we are not prepared for multi-versions yet */
+ xe_assert(xe, base.major == latest.major);
+ return -ENOPKG;
+ }
+
+ /* same major - return common minor */
+ *major = wanted_major;
+ *minor = min_t(u32, latest.minor, wanted_minor);
+ return 0;
+}
+
+static void pf_connect(struct xe_device *xe, u32 vfid, u32 major, u32 minor)
+{
+ xe_sriov_pf_assert_vfid(xe, vfid);
+ xe_assert(xe, major || minor);
+
+ xe->sriov.pf.vfs[vfid].version.major = major;
+ xe->sriov.pf.vfs[vfid].version.minor = minor;
+}
+
+static void pf_disconnect(struct xe_device *xe, u32 vfid)
+{
+ xe_sriov_pf_assert_vfid(xe, vfid);
+
+ xe->sriov.pf.vfs[vfid].version.major = 0;
+ xe->sriov.pf.vfs[vfid].version.minor = 0;
+}
+
+/**
+ * xe_sriov_pf_service_is_negotiated - Check if VF has negotiated given ABI version.
+ * @xe: the &xe_device
+ * @vfid: the VF identifier
+ * @major: the major version to check
+ * @minor: the minor version to check
+ *
+ * Performs early initialization of the SR-IOV PF service.
+ *
+ * This function can only be called on PF.
+ *
+ * Returns: true if VF can use given ABI version functionality.
+ */
+bool xe_sriov_pf_service_is_negotiated(struct xe_device *xe, u32 vfid, u32 major, u32 minor)
+{
+ xe_sriov_pf_assert_vfid(xe, vfid);
+
+ return major == xe->sriov.pf.vfs[vfid].version.major &&
+ minor <= xe->sriov.pf.vfs[vfid].version.minor;
+}
+
+/**
+ * xe_sriov_pf_service_handshake_vf - Confirm a connection with the VF.
+ * @xe: the &xe_device
+ * @vfid: the VF identifier
+ * @wanted_major: the major service version expected by the VF
+ * @wanted_minor: the minor service version expected by the VF
+ * @major: the major service version to be used by the VF
+ * @minor: the minor service version to be used by the VF
+ *
+ * Negotiate a VF/PF ABI version to allow VF use the PF services.
+ *
+ * This function can only be called on PF.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_service_handshake_vf(struct xe_device *xe, u32 vfid,
+ u32 wanted_major, u32 wanted_minor,
+ u32 *major, u32 *minor)
+{
+ int err;
+
+ xe_sriov_dbg_verbose(xe, "VF%u wants ABI version %u.%u\n",
+ vfid, wanted_major, wanted_minor);
+
+ err = pf_negotiate_version(xe, wanted_major, wanted_minor, major, minor);
+
+ if (err < 0) {
+ xe_sriov_notice(xe, "VF%u failed to negotiate ABI %u.%u (%pe)\n",
+ vfid, wanted_major, wanted_minor, ERR_PTR(err));
+ pf_disconnect(xe, vfid);
+ } else {
+ xe_sriov_dbg(xe, "VF%u negotiated ABI version %u.%u\n",
+ vfid, *major, *minor);
+ pf_connect(xe, vfid, *major, *minor);
+ }
+
+ return err;
+}
+
+/**
+ * xe_sriov_pf_service_reset_vf - Reset a connection with the VF.
+ * @xe: the &xe_device
+ * @vfid: the VF identifier
+ *
+ * Reset a VF driver negotiated VF/PF ABI version.
+ *
+ * After that point, the VF driver will have to perform new version handshake
+ * to continue use of the PF services again.
+ *
+ * This function can only be called on PF.
+ */
+void xe_sriov_pf_service_reset_vf(struct xe_device *xe, unsigned int vfid)
+{
+ pf_disconnect(xe, vfid);
+}
+
+static void print_pf_version(struct drm_printer *p, const char *name,
+ const struct xe_sriov_pf_service_version *version)
+{
+ drm_printf(p, "%s:\t%u.%u\n", name, version->major, version->minor);
+}
+
+/**
+ * xe_sriov_pf_service_print_versions - Print ABI versions negotiated with VFs.
+ * @xe: the &xe_device
+ * @p: the &drm_printer
+ *
+ * This function is for PF use only.
+ */
+void xe_sriov_pf_service_print_versions(struct xe_device *xe, struct drm_printer *p)
+{
+ unsigned int n, total_vfs = xe_sriov_pf_get_totalvfs(xe);
+ struct xe_sriov_pf_service_version *version;
+ char name[8];
+
+ xe_assert(xe, IS_SRIOV_PF(xe));
+
+ print_pf_version(p, "base", &xe->sriov.pf.service.version.base);
+ print_pf_version(p, "latest", &xe->sriov.pf.service.version.latest);
+
+ for (n = 1; n <= total_vfs; n++) {
+ version = &xe->sriov.pf.vfs[n].version;
+ if (!version->major && !version->minor)
+ continue;
+
+ print_pf_version(p, xe_sriov_function_name(n, name, sizeof(name)), version);
+ }
+}
+
+#if IS_BUILTIN(CONFIG_DRM_XE_KUNIT_TEST)
+#include "tests/xe_sriov_pf_service_kunit.c"
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_service.h b/drivers/gpu/drm/xe/xe_sriov_pf_service.h
new file mode 100644
index 000000000000..d38c18f5ed10
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_pf_service.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_SRIOV_PF_SERVICE_H_
+#define _XE_SRIOV_PF_SERVICE_H_
+
+#include <linux/types.h>
+
+struct drm_printer;
+struct xe_device;
+
+void xe_sriov_pf_service_init(struct xe_device *xe);
+void xe_sriov_pf_service_print_versions(struct xe_device *xe, struct drm_printer *p);
+
+int xe_sriov_pf_service_handshake_vf(struct xe_device *xe, u32 vfid,
+ u32 wanted_major, u32 wanted_minor,
+ u32 *major, u32 *minor);
+bool xe_sriov_pf_service_is_negotiated(struct xe_device *xe, u32 vfid, u32 major, u32 minor);
+void xe_sriov_pf_service_reset_vf(struct xe_device *xe, unsigned int vfid);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_service_types.h b/drivers/gpu/drm/xe/xe_sriov_pf_service_types.h
new file mode 100644
index 000000000000..0835dde358c1
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_pf_service_types.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2025 Intel Corporation
+ */
+
+#ifndef _XE_SRIOV_PF_SERVICE_TYPES_H_
+#define _XE_SRIOV_PF_SERVICE_TYPES_H_
+
+#include <linux/types.h>
+
+/**
+ * struct xe_sriov_pf_service_version - VF/PF ABI Version.
+ * @major: the major version of the VF/PF ABI
+ * @minor: the minor version of the VF/PF ABI
+ *
+ * See `GuC Relay Communication`_.
+ */
+struct xe_sriov_pf_service_version {
+ u16 major;
+ u16 minor;
+};
+
+/**
+ * struct xe_sriov_pf_service - Data used by the PF service.
+ * @version: information about VF/PF ABI versions for current platform.
+ * @version.base: lowest VF/PF ABI version that could be negotiated with VF.
+ * @version.latest: latest VF/PF ABI version supported by the PF driver.
+ */
+struct xe_sriov_pf_service {
+ struct {
+ struct xe_sriov_pf_service_version base;
+ struct xe_sriov_pf_service_version latest;
+ } version;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c b/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c
new file mode 100644
index 000000000000..c0b767ac735c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.c
@@ -0,0 +1,647 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+
+#include <drm/drm_managed.h>
+
+#include "xe_assert.h"
+#include "xe_pci_sriov.h"
+#include "xe_pm.h"
+#include "xe_sriov.h"
+#include "xe_sriov_pf.h"
+#include "xe_sriov_pf_control.h"
+#include "xe_sriov_pf_helpers.h"
+#include "xe_sriov_pf_provision.h"
+#include "xe_sriov_pf_sysfs.h"
+#include "xe_sriov_printk.h"
+
+static int emit_choice(char *buf, int choice, const char * const *array, size_t size)
+{
+ int pos = 0;
+ int n;
+
+ for (n = 0; n < size; n++) {
+ pos += sysfs_emit_at(buf, pos, "%s%s%s%s",
+ n ? " " : "",
+ n == choice ? "[" : "",
+ array[n],
+ n == choice ? "]" : "");
+ }
+ pos += sysfs_emit_at(buf, pos, "\n");
+
+ return pos;
+}
+
+/*
+ * /sys/bus/pci/drivers/xe/BDF/
+ * :
+ * ├── sriov_admin/
+ * ├── ...
+ * ├── .bulk_profile
+ * │ ├── exec_quantum_ms
+ * │ ├── preempt_timeout_us
+ * │ └── sched_priority
+ * ├── pf/
+ * │ ├── ...
+ * │ ├── device -> ../../../BDF
+ * │ └── profile
+ * │ ├── exec_quantum_ms
+ * │ ├── preempt_timeout_us
+ * │ └── sched_priority
+ * ├── vf1/
+ * │ ├── ...
+ * │ ├── device -> ../../../BDF.1
+ * │ ├── stop
+ * │ └── profile
+ * │ ├── exec_quantum_ms
+ * │ ├── preempt_timeout_us
+ * │ └── sched_priority
+ * ├── vf2/
+ * :
+ * └── vfN/
+ */
+
+struct xe_sriov_kobj {
+ struct kobject base;
+ struct xe_device *xe;
+ unsigned int vfid;
+};
+#define to_xe_sriov_kobj(p) container_of_const((p), struct xe_sriov_kobj, base)
+
+struct xe_sriov_dev_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct xe_device *xe, char *buf);
+ ssize_t (*store)(struct xe_device *xe, const char *buf, size_t count);
+};
+#define to_xe_sriov_dev_attr(p) container_of_const((p), struct xe_sriov_dev_attr, attr)
+
+#define XE_SRIOV_DEV_ATTR(NAME) \
+struct xe_sriov_dev_attr xe_sriov_dev_attr_##NAME = \
+ __ATTR(NAME, 0644, xe_sriov_dev_attr_##NAME##_show, xe_sriov_dev_attr_##NAME##_store)
+
+#define XE_SRIOV_DEV_ATTR_RO(NAME) \
+struct xe_sriov_dev_attr xe_sriov_dev_attr_##NAME = \
+ __ATTR(NAME, 0444, xe_sriov_dev_attr_##NAME##_show, NULL)
+
+#define XE_SRIOV_DEV_ATTR_WO(NAME) \
+struct xe_sriov_dev_attr xe_sriov_dev_attr_##NAME = \
+ __ATTR(NAME, 0200, NULL, xe_sriov_dev_attr_##NAME##_store)
+
+struct xe_sriov_vf_attr {
+ struct attribute attr;
+ ssize_t (*show)(struct xe_device *xe, unsigned int vfid, char *buf);
+ ssize_t (*store)(struct xe_device *xe, unsigned int vfid, const char *buf, size_t count);
+};
+#define to_xe_sriov_vf_attr(p) container_of_const((p), struct xe_sriov_vf_attr, attr)
+
+#define XE_SRIOV_VF_ATTR(NAME) \
+struct xe_sriov_vf_attr xe_sriov_vf_attr_##NAME = \
+ __ATTR(NAME, 0644, xe_sriov_vf_attr_##NAME##_show, xe_sriov_vf_attr_##NAME##_store)
+
+#define XE_SRIOV_VF_ATTR_RO(NAME) \
+struct xe_sriov_vf_attr xe_sriov_vf_attr_##NAME = \
+ __ATTR(NAME, 0444, xe_sriov_vf_attr_##NAME##_show, NULL)
+
+#define XE_SRIOV_VF_ATTR_WO(NAME) \
+struct xe_sriov_vf_attr xe_sriov_vf_attr_##NAME = \
+ __ATTR(NAME, 0200, NULL, xe_sriov_vf_attr_##NAME##_store)
+
+/* device level attributes go here */
+
+#define DEFINE_SIMPLE_BULK_PROVISIONING_SRIOV_DEV_ATTR_WO(NAME, ITEM, TYPE) \
+ \
+static ssize_t xe_sriov_dev_attr_##NAME##_store(struct xe_device *xe, \
+ const char *buf, size_t count) \
+{ \
+ TYPE value; \
+ int err; \
+ \
+ err = kstrto##TYPE(buf, 0, &value); \
+ if (err) \
+ return err; \
+ \
+ err = xe_sriov_pf_provision_bulk_apply_##ITEM(xe, value); \
+ return err ?: count; \
+} \
+ \
+static XE_SRIOV_DEV_ATTR_WO(NAME)
+
+DEFINE_SIMPLE_BULK_PROVISIONING_SRIOV_DEV_ATTR_WO(exec_quantum_ms, eq, u32);
+DEFINE_SIMPLE_BULK_PROVISIONING_SRIOV_DEV_ATTR_WO(preempt_timeout_us, pt, u32);
+
+static const char * const sched_priority_names[] = {
+ [GUC_SCHED_PRIORITY_LOW] = "low",
+ [GUC_SCHED_PRIORITY_NORMAL] = "normal",
+ [GUC_SCHED_PRIORITY_HIGH] = "high",
+};
+
+static bool sched_priority_change_allowed(unsigned int vfid)
+{
+ /* As of today GuC FW allows to selectively change only the PF priority. */
+ return vfid == PFID;
+}
+
+static bool sched_priority_high_allowed(unsigned int vfid)
+{
+ /* As of today GuC FW allows to select 'high' priority only for the PF. */
+ return vfid == PFID;
+}
+
+static bool sched_priority_bulk_high_allowed(struct xe_device *xe)
+{
+ /* all VFs are equal - it's sufficient to check VF1 only */
+ return sched_priority_high_allowed(VFID(1));
+}
+
+static ssize_t xe_sriov_dev_attr_sched_priority_store(struct xe_device *xe,
+ const char *buf, size_t count)
+{
+ size_t num_priorities = ARRAY_SIZE(sched_priority_names);
+ int match;
+ int err;
+
+ if (!sched_priority_bulk_high_allowed(xe))
+ num_priorities--;
+
+ match = __sysfs_match_string(sched_priority_names, num_priorities, buf);
+ if (match < 0)
+ return -EINVAL;
+
+ err = xe_sriov_pf_provision_bulk_apply_priority(xe, match);
+ return err ?: count;
+}
+
+static XE_SRIOV_DEV_ATTR_WO(sched_priority);
+
+static struct attribute *bulk_profile_dev_attrs[] = {
+ &xe_sriov_dev_attr_exec_quantum_ms.attr,
+ &xe_sriov_dev_attr_preempt_timeout_us.attr,
+ &xe_sriov_dev_attr_sched_priority.attr,
+ NULL
+};
+
+static const struct attribute_group bulk_profile_dev_attr_group = {
+ .name = ".bulk_profile",
+ .attrs = bulk_profile_dev_attrs,
+};
+
+static const struct attribute_group *xe_sriov_dev_attr_groups[] = {
+ &bulk_profile_dev_attr_group,
+ NULL
+};
+
+/* and VF-level attributes go here */
+
+#define DEFINE_SIMPLE_PROVISIONING_SRIOV_VF_ATTR(NAME, ITEM, TYPE, FORMAT) \
+static ssize_t xe_sriov_vf_attr_##NAME##_show(struct xe_device *xe, unsigned int vfid, \
+ char *buf) \
+{ \
+ TYPE value = 0; \
+ int err; \
+ \
+ err = xe_sriov_pf_provision_query_vf_##ITEM(xe, vfid, &value); \
+ if (err) \
+ return err; \
+ \
+ return sysfs_emit(buf, FORMAT, value); \
+} \
+ \
+static ssize_t xe_sriov_vf_attr_##NAME##_store(struct xe_device *xe, unsigned int vfid, \
+ const char *buf, size_t count) \
+{ \
+ TYPE value; \
+ int err; \
+ \
+ err = kstrto##TYPE(buf, 0, &value); \
+ if (err) \
+ return err; \
+ \
+ err = xe_sriov_pf_provision_apply_vf_##ITEM(xe, vfid, value); \
+ return err ?: count; \
+} \
+ \
+static XE_SRIOV_VF_ATTR(NAME)
+
+DEFINE_SIMPLE_PROVISIONING_SRIOV_VF_ATTR(exec_quantum_ms, eq, u32, "%u\n");
+DEFINE_SIMPLE_PROVISIONING_SRIOV_VF_ATTR(preempt_timeout_us, pt, u32, "%u\n");
+
+static ssize_t xe_sriov_vf_attr_sched_priority_show(struct xe_device *xe, unsigned int vfid,
+ char *buf)
+{
+ size_t num_priorities = ARRAY_SIZE(sched_priority_names);
+ u32 priority;
+ int err;
+
+ err = xe_sriov_pf_provision_query_vf_priority(xe, vfid, &priority);
+ if (err)
+ return err;
+
+ if (!sched_priority_high_allowed(vfid))
+ num_priorities--;
+
+ xe_assert(xe, priority < num_priorities);
+ return emit_choice(buf, priority, sched_priority_names, num_priorities);
+}
+
+static ssize_t xe_sriov_vf_attr_sched_priority_store(struct xe_device *xe, unsigned int vfid,
+ const char *buf, size_t count)
+{
+ size_t num_priorities = ARRAY_SIZE(sched_priority_names);
+ int match;
+ int err;
+
+ if (!sched_priority_change_allowed(vfid))
+ return -EOPNOTSUPP;
+
+ if (!sched_priority_high_allowed(vfid))
+ num_priorities--;
+
+ match = __sysfs_match_string(sched_priority_names, num_priorities, buf);
+ if (match < 0)
+ return -EINVAL;
+
+ err = xe_sriov_pf_provision_apply_vf_priority(xe, vfid, match);
+ return err ?: count;
+}
+
+static XE_SRIOV_VF_ATTR(sched_priority);
+
+static struct attribute *profile_vf_attrs[] = {
+ &xe_sriov_vf_attr_exec_quantum_ms.attr,
+ &xe_sriov_vf_attr_preempt_timeout_us.attr,
+ &xe_sriov_vf_attr_sched_priority.attr,
+ NULL
+};
+
+static umode_t profile_vf_attr_is_visible(struct kobject *kobj,
+ struct attribute *attr, int index)
+{
+ struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj);
+
+ if (attr == &xe_sriov_vf_attr_sched_priority.attr &&
+ !sched_priority_change_allowed(vkobj->vfid))
+ return attr->mode & 0444;
+
+ return attr->mode;
+}
+
+static const struct attribute_group profile_vf_attr_group = {
+ .name = "profile",
+ .attrs = profile_vf_attrs,
+ .is_visible = profile_vf_attr_is_visible,
+};
+
+#define DEFINE_SIMPLE_CONTROL_SRIOV_VF_ATTR(NAME) \
+ \
+static ssize_t xe_sriov_vf_attr_##NAME##_store(struct xe_device *xe, unsigned int vfid, \
+ const char *buf, size_t count) \
+{ \
+ bool yes; \
+ int err; \
+ \
+ if (!vfid) \
+ return -EPERM; \
+ \
+ err = kstrtobool(buf, &yes); \
+ if (err) \
+ return err; \
+ if (!yes) \
+ return count; \
+ \
+ err = xe_sriov_pf_control_##NAME##_vf(xe, vfid); \
+ return err ?: count; \
+} \
+ \
+static XE_SRIOV_VF_ATTR_WO(NAME)
+
+DEFINE_SIMPLE_CONTROL_SRIOV_VF_ATTR(stop);
+
+static struct attribute *control_vf_attrs[] = {
+ &xe_sriov_vf_attr_stop.attr,
+ NULL
+};
+
+static umode_t control_vf_attr_is_visible(struct kobject *kobj,
+ struct attribute *attr, int index)
+{
+ struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj);
+
+ if (vkobj->vfid == PFID)
+ return 0;
+
+ return attr->mode;
+}
+
+static const struct attribute_group control_vf_attr_group = {
+ .attrs = control_vf_attrs,
+ .is_visible = control_vf_attr_is_visible,
+};
+
+static const struct attribute_group *xe_sriov_vf_attr_groups[] = {
+ &profile_vf_attr_group,
+ &control_vf_attr_group,
+ NULL
+};
+
+/* no user serviceable parts below */
+
+static struct kobject *create_xe_sriov_kobj(struct xe_device *xe, unsigned int vfid)
+{
+ struct xe_sriov_kobj *vkobj;
+
+ xe_sriov_pf_assert_vfid(xe, vfid);
+
+ vkobj = kzalloc(sizeof(*vkobj), GFP_KERNEL);
+ if (!vkobj)
+ return NULL;
+
+ vkobj->xe = xe;
+ vkobj->vfid = vfid;
+ return &vkobj->base;
+}
+
+static void release_xe_sriov_kobj(struct kobject *kobj)
+{
+ struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj);
+
+ kfree(vkobj);
+}
+
+static ssize_t xe_sriov_dev_attr_show(struct kobject *kobj, struct attribute *attr, char *buf)
+{
+ struct xe_sriov_dev_attr *vattr = to_xe_sriov_dev_attr(attr);
+ struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj);
+ struct xe_device *xe = vkobj->xe;
+
+ if (!vattr->show)
+ return -EPERM;
+
+ return vattr->show(xe, buf);
+}
+
+static ssize_t xe_sriov_dev_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t count)
+{
+ struct xe_sriov_dev_attr *vattr = to_xe_sriov_dev_attr(attr);
+ struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj);
+ struct xe_device *xe = vkobj->xe;
+ ssize_t ret;
+
+ if (!vattr->store)
+ return -EPERM;
+
+ xe_pm_runtime_get(xe);
+ ret = xe_sriov_pf_wait_ready(xe) ?: vattr->store(xe, buf, count);
+ xe_pm_runtime_put(xe);
+
+ return ret;
+}
+
+static ssize_t xe_sriov_vf_attr_show(struct kobject *kobj, struct attribute *attr, char *buf)
+{
+ struct xe_sriov_vf_attr *vattr = to_xe_sriov_vf_attr(attr);
+ struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj);
+ struct xe_device *xe = vkobj->xe;
+ unsigned int vfid = vkobj->vfid;
+
+ xe_sriov_pf_assert_vfid(xe, vfid);
+
+ if (!vattr->show)
+ return -EPERM;
+
+ return vattr->show(xe, vfid, buf);
+}
+
+static ssize_t xe_sriov_vf_attr_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t count)
+{
+ struct xe_sriov_vf_attr *vattr = to_xe_sriov_vf_attr(attr);
+ struct xe_sriov_kobj *vkobj = to_xe_sriov_kobj(kobj);
+ struct xe_device *xe = vkobj->xe;
+ unsigned int vfid = vkobj->vfid;
+ ssize_t ret;
+
+ xe_sriov_pf_assert_vfid(xe, vfid);
+
+ if (!vattr->store)
+ return -EPERM;
+
+ xe_pm_runtime_get(xe);
+ ret = xe_sriov_pf_wait_ready(xe) ?: vattr->store(xe, vfid, buf, count);
+ xe_pm_runtime_get(xe);
+
+ return ret;
+}
+
+static const struct sysfs_ops xe_sriov_dev_sysfs_ops = {
+ .show = xe_sriov_dev_attr_show,
+ .store = xe_sriov_dev_attr_store,
+};
+
+static const struct sysfs_ops xe_sriov_vf_sysfs_ops = {
+ .show = xe_sriov_vf_attr_show,
+ .store = xe_sriov_vf_attr_store,
+};
+
+static const struct kobj_type xe_sriov_dev_ktype = {
+ .release = release_xe_sriov_kobj,
+ .sysfs_ops = &xe_sriov_dev_sysfs_ops,
+ .default_groups = xe_sriov_dev_attr_groups,
+};
+
+static const struct kobj_type xe_sriov_vf_ktype = {
+ .release = release_xe_sriov_kobj,
+ .sysfs_ops = &xe_sriov_vf_sysfs_ops,
+ .default_groups = xe_sriov_vf_attr_groups,
+};
+
+static int pf_sysfs_error(struct xe_device *xe, int err, const char *what)
+{
+ if (IS_ENABLED(CONFIG_DRM_XE_DEBUG))
+ xe_sriov_dbg(xe, "Failed to setup sysfs %s (%pe)\n", what, ERR_PTR(err));
+ return err;
+}
+
+static void pf_sysfs_note(struct xe_device *xe, int err, const char *what)
+{
+ xe_sriov_dbg(xe, "Failed to setup sysfs %s (%pe)\n", what, ERR_PTR(err));
+}
+
+static void action_put_kobject(void *arg)
+{
+ struct kobject *kobj = arg;
+
+ kobject_put(kobj);
+}
+
+static int pf_setup_root(struct xe_device *xe)
+{
+ struct kobject *parent = &xe->drm.dev->kobj;
+ struct kobject *root;
+ int err;
+
+ root = create_xe_sriov_kobj(xe, PFID);
+ if (!root)
+ return pf_sysfs_error(xe, -ENOMEM, "root obj");
+
+ err = devm_add_action_or_reset(xe->drm.dev, action_put_kobject, root);
+ if (err)
+ return pf_sysfs_error(xe, err, "root action");
+
+ err = kobject_init_and_add(root, &xe_sriov_dev_ktype, parent, "sriov_admin");
+ if (err)
+ return pf_sysfs_error(xe, err, "root init");
+
+ xe_assert(xe, IS_SRIOV_PF(xe));
+ xe_assert(xe, !xe->sriov.pf.sysfs.root);
+ xe->sriov.pf.sysfs.root = root;
+ return 0;
+}
+
+static int pf_setup_tree(struct xe_device *xe)
+{
+ unsigned int totalvfs = xe_sriov_pf_get_totalvfs(xe);
+ struct kobject *root, *kobj;
+ unsigned int n;
+ int err;
+
+ xe_assert(xe, IS_SRIOV_PF(xe));
+ root = xe->sriov.pf.sysfs.root;
+
+ for (n = 0; n <= totalvfs; n++) {
+ kobj = create_xe_sriov_kobj(xe, VFID(n));
+ if (!kobj)
+ return pf_sysfs_error(xe, -ENOMEM, "tree obj");
+
+ err = devm_add_action_or_reset(xe->drm.dev, action_put_kobject, root);
+ if (err)
+ return pf_sysfs_error(xe, err, "tree action");
+
+ if (n)
+ err = kobject_init_and_add(kobj, &xe_sriov_vf_ktype,
+ root, "vf%u", n);
+ else
+ err = kobject_init_and_add(kobj, &xe_sriov_vf_ktype,
+ root, "pf");
+ if (err)
+ return pf_sysfs_error(xe, err, "tree init");
+
+ xe_assert(xe, !xe->sriov.pf.vfs[n].kobj);
+ xe->sriov.pf.vfs[n].kobj = kobj;
+ }
+
+ return 0;
+}
+
+static void action_rm_device_link(void *arg)
+{
+ struct kobject *kobj = arg;
+
+ sysfs_remove_link(kobj, "device");
+}
+
+static int pf_link_pf_device(struct xe_device *xe)
+{
+ struct kobject *kobj = xe->sriov.pf.vfs[PFID].kobj;
+ int err;
+
+ err = sysfs_create_link(kobj, &xe->drm.dev->kobj, "device");
+ if (err)
+ return pf_sysfs_error(xe, err, "PF device link");
+
+ err = devm_add_action_or_reset(xe->drm.dev, action_rm_device_link, kobj);
+ if (err)
+ return pf_sysfs_error(xe, err, "PF unlink action");
+
+ return 0;
+}
+
+/**
+ * xe_sriov_pf_sysfs_init() - Setup PF's SR-IOV sysfs tree.
+ * @xe: the PF &xe_device to setup sysfs
+ *
+ * This function will create additional nodes that will represent PF and VFs
+ * devices, each populated with SR-IOV Xe specific attributes.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_pf_sysfs_init(struct xe_device *xe)
+{
+ int err;
+
+ err = pf_setup_root(xe);
+ if (err)
+ return err;
+
+ err = pf_setup_tree(xe);
+ if (err)
+ return err;
+
+ err = pf_link_pf_device(xe);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+/**
+ * xe_sriov_pf_sysfs_link_vfs() - Add VF's links in SR-IOV sysfs tree.
+ * @xe: the &xe_device where to update sysfs
+ * @num_vfs: number of enabled VFs to link
+ *
+ * This function is specific for the PF driver.
+ *
+ * This function will add symbolic links between VFs represented in the SR-IOV
+ * sysfs tree maintained by the PF and enabled VF PCI devices.
+ *
+ * The @xe_sriov_pf_sysfs_unlink_vfs() shall be used to remove those links.
+ */
+void xe_sriov_pf_sysfs_link_vfs(struct xe_device *xe, unsigned int num_vfs)
+{
+ unsigned int totalvfs = xe_sriov_pf_get_totalvfs(xe);
+ struct pci_dev *pf_pdev = to_pci_dev(xe->drm.dev);
+ struct pci_dev *vf_pdev = NULL;
+ unsigned int n;
+ int err;
+
+ xe_assert(xe, IS_SRIOV_PF(xe));
+ xe_assert(xe, num_vfs <= totalvfs);
+
+ for (n = 1; n <= num_vfs; n++) {
+ vf_pdev = xe_pci_sriov_get_vf_pdev(pf_pdev, VFID(n));
+ if (!vf_pdev)
+ return pf_sysfs_note(xe, -ENOENT, "VF link");
+
+ err = sysfs_create_link(xe->sriov.pf.vfs[VFID(n)].kobj,
+ &vf_pdev->dev.kobj, "device");
+
+ /* must balance xe_pci_sriov_get_vf_pdev() */
+ pci_dev_put(vf_pdev);
+
+ if (err)
+ return pf_sysfs_note(xe, err, "VF link");
+ }
+}
+
+/**
+ * xe_sriov_pf_sysfs_unlink_vfs() - Remove VF's links from SR-IOV sysfs tree.
+ * @xe: the &xe_device where to update sysfs
+ * @num_vfs: number of VFs to unlink
+ *
+ * This function shall be called only on the PF.
+ * This function will remove "device" links added by @xe_sriov_sysfs_link_vfs().
+ */
+void xe_sriov_pf_sysfs_unlink_vfs(struct xe_device *xe, unsigned int num_vfs)
+{
+ unsigned int n;
+
+ xe_assert(xe, IS_SRIOV_PF(xe));
+ xe_assert(xe, num_vfs <= xe_sriov_pf_get_totalvfs(xe));
+
+ for (n = 1; n <= num_vfs; n++)
+ sysfs_remove_link(xe->sriov.pf.vfs[VFID(n)].kobj, "device");
+}
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.h b/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.h
new file mode 100644
index 000000000000..ae92ed1766e7
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_pf_sysfs.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_SRIOV_PF_SYSFS_H_
+#define _XE_SRIOV_PF_SYSFS_H_
+
+struct xe_device;
+
+int xe_sriov_pf_sysfs_init(struct xe_device *xe);
+
+void xe_sriov_pf_sysfs_link_vfs(struct xe_device *xe, unsigned int num_vfs);
+void xe_sriov_pf_sysfs_unlink_vfs(struct xe_device *xe, unsigned int num_vfs);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_pf_types.h b/drivers/gpu/drm/xe/xe_sriov_pf_types.h
new file mode 100644
index 000000000000..b0253e1ae5da
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_pf_types.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2025 Intel Corporation
+ */
+
+#ifndef _XE_SRIOV_PF_TYPES_H_
+#define _XE_SRIOV_PF_TYPES_H_
+
+#include <linux/mutex.h>
+#include <linux/types.h>
+
+#include "xe_guard.h"
+#include "xe_sriov_pf_migration_types.h"
+#include "xe_sriov_pf_provision_types.h"
+#include "xe_sriov_pf_service_types.h"
+
+struct kobject;
+
+/**
+ * struct xe_sriov_metadata - per-VF device level metadata
+ */
+struct xe_sriov_metadata {
+ /** @kobj: kobject representing VF in PF's SR-IOV sysfs tree. */
+ struct kobject *kobj;
+
+ /** @version: negotiated VF/PF ABI version */
+ struct xe_sriov_pf_service_version version;
+ /** @migration: migration state */
+ struct xe_sriov_migration_state migration;
+};
+
+/**
+ * struct xe_device_pf - Xe PF related data
+ *
+ * The data in this structure is valid only if driver is running in the
+ * @XE_SRIOV_MODE_PF mode.
+ */
+struct xe_device_pf {
+ /** @device_total_vfs: Maximum number of VFs supported by the device. */
+ u16 device_total_vfs;
+
+ /** @driver_max_vfs: Maximum number of VFs supported by the driver. */
+ u16 driver_max_vfs;
+
+ /** @guard_vfs_enabling: guards VFs enabling */
+ struct xe_guard guard_vfs_enabling;
+
+ /** @master_lock: protects all VFs configurations across GTs */
+ struct mutex master_lock;
+
+ /** @provision: device level provisioning data. */
+ struct xe_sriov_pf_provision provision;
+
+ /** @migration: device level migration data. */
+ struct xe_sriov_pf_migration migration;
+
+ /** @service: device level service data. */
+ struct xe_sriov_pf_service service;
+
+ /** @sysfs: device level sysfs data. */
+ struct {
+ /** @sysfs.root: the root kobject for all SR-IOV entries in sysfs. */
+ struct kobject *root;
+ } sysfs;
+
+ /** @vfs: metadata for all VFs. */
+ struct xe_sriov_metadata *vfs;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_printk.h b/drivers/gpu/drm/xe/xe_sriov_printk.h
index 117e1d541692..4c6b5c3d2190 100644
--- a/drivers/gpu/drm/xe/xe_sriov_printk.h
+++ b/drivers/gpu/drm/xe/xe_sriov_printk.h
@@ -1,22 +1,22 @@
/* SPDX-License-Identifier: MIT */
/*
- * Copyright © 2023 Intel Corporation
+ * Copyright © 2023-2025 Intel Corporation
*/
#ifndef _XE_SRIOV_PRINTK_H_
#define _XE_SRIOV_PRINTK_H_
-#include <drm/drm_print.h>
-
-#include "xe_device_types.h"
-#include "xe_sriov_types.h"
+#include "xe_printk.h"
#define xe_sriov_printk_prefix(xe) \
((xe)->sriov.__mode == XE_SRIOV_MODE_PF ? "PF: " : \
(xe)->sriov.__mode == XE_SRIOV_MODE_VF ? "VF: " : "")
+#define __XE_SRIOV_PRINTK_FMT(_xe, _fmt, _args...) \
+ "%s" _fmt, xe_sriov_printk_prefix(_xe), ##_args
+
#define xe_sriov_printk(xe, _level, fmt, ...) \
- drm_##_level(&(xe)->drm, "%s" fmt, xe_sriov_printk_prefix(xe), ##__VA_ARGS__)
+ xe_##_level((xe), __XE_SRIOV_PRINTK_FMT((xe), fmt, ##__VA_ARGS__))
#define xe_sriov_err(xe, fmt, ...) \
xe_sriov_printk((xe), err, fmt, ##__VA_ARGS__)
diff --git a/drivers/gpu/drm/xe/xe_sriov_types.h b/drivers/gpu/drm/xe/xe_sriov_types.h
index ca94382a721e..1a138108d139 100644
--- a/drivers/gpu/drm/xe/xe_sriov_types.h
+++ b/drivers/gpu/drm/xe/xe_sriov_types.h
@@ -7,9 +7,6 @@
#define _XE_SRIOV_TYPES_H_
#include <linux/build_bug.h>
-#include <linux/mutex.h>
-#include <linux/types.h>
-#include <linux/workqueue_types.h>
/**
* VFID - Virtual Function Identifier
@@ -40,37 +37,4 @@ enum xe_sriov_mode {
};
static_assert(XE_SRIOV_MODE_NONE);
-/**
- * struct xe_device_pf - Xe PF related data
- *
- * The data in this structure is valid only if driver is running in the
- * @XE_SRIOV_MODE_PF mode.
- */
-struct xe_device_pf {
- /** @device_total_vfs: Maximum number of VFs supported by the device. */
- u16 device_total_vfs;
-
- /** @driver_max_vfs: Maximum number of VFs supported by the driver. */
- u16 driver_max_vfs;
-
- /** @master_lock: protects all VFs configurations across GTs */
- struct mutex master_lock;
-};
-
-/**
- * struct xe_device_vf - Xe Virtual Function related data
- *
- * The data in this structure is valid only if driver is running in the
- * @XE_SRIOV_MODE_VF mode.
- */
-struct xe_device_vf {
- /** @migration: VF Migration state data */
- struct {
- /** @migration.worker: VF migration recovery worker */
- struct work_struct worker;
- /** @migration.gt_flags: Per-GT request flags for VF migration recovery */
- unsigned long gt_flags;
- } migration;
-};
-
#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.c b/drivers/gpu/drm/xe/xe_sriov_vf.c
index c1275e64aa9c..284ce37ca92d 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf.c
+++ b/drivers/gpu/drm/xe/xe_sriov_vf.c
@@ -3,16 +3,15 @@
* Copyright © 2023-2024 Intel Corporation
*/
+#include <drm/drm_debugfs.h>
#include <drm/drm_managed.h>
-#include "xe_assert.h"
-#include "xe_device.h"
-#include "xe_gt_sriov_printk.h"
+#include "xe_gt.h"
#include "xe_gt_sriov_vf.h"
-#include "xe_pm.h"
-#include "xe_sriov.h"
+#include "xe_guc.h"
#include "xe_sriov_printk.h"
#include "xe_sriov_vf.h"
+#include "xe_sriov_vf_ccs.h"
/**
* DOC: VF restore procedure in PF KMD and VF KMD
@@ -121,143 +120,92 @@
* | | |
*/
-static void migration_worker_func(struct work_struct *w);
-
/**
- * xe_sriov_vf_init_early - Initialize SR-IOV VF specific data.
- * @xe: the &xe_device to initialize
+ * xe_sriov_vf_migration_supported - Report whether SR-IOV VF migration is
+ * supported or not.
+ * @xe: the &xe_device to check
+ *
+ * Returns: true if VF migration is supported, false otherwise.
*/
-void xe_sriov_vf_init_early(struct xe_device *xe)
+bool xe_sriov_vf_migration_supported(struct xe_device *xe)
{
- INIT_WORK(&xe->sriov.vf.migration.worker, migration_worker_func);
+ xe_assert(xe, IS_SRIOV_VF(xe));
+ return !xe->sriov.vf.migration.disabled;
}
/**
- * vf_post_migration_requery_guc - Re-query GuC for current VF provisioning.
- * @xe: the &xe_device struct instance
- *
- * After migration, we need to re-query all VF configuration to make sure
- * they match previous provisioning. Note that most of VF provisioning
- * shall be the same, except GGTT range, since GGTT is not virtualized per-VF.
- *
- * Returns: 0 if the operation completed successfully, or a negative error
- * code otherwise.
+ * xe_sriov_vf_migration_disable - Turn off VF migration with given log message.
+ * @xe: the &xe_device instance.
+ * @fmt: format string for the log message, to be combined with following VAs.
*/
-static int vf_post_migration_requery_guc(struct xe_device *xe)
+void xe_sriov_vf_migration_disable(struct xe_device *xe, const char *fmt, ...)
{
- struct xe_gt *gt;
- unsigned int id;
- int err, ret = 0;
+ struct va_format vaf;
+ va_list va_args;
- for_each_gt(gt, xe, id) {
- err = xe_gt_sriov_vf_query_config(gt);
- ret = ret ?: err;
- }
+ xe_assert(xe, IS_SRIOV_VF(xe));
- return ret;
-}
+ va_start(va_args, fmt);
+ vaf.fmt = fmt;
+ vaf.va = &va_args;
+ xe_sriov_notice(xe, "migration disabled: %pV\n", &vaf);
+ va_end(va_args);
-/*
- * vf_post_migration_imminent - Check if post-restore recovery is coming.
- * @xe: the &xe_device struct instance
- *
- * Return: True if migration recovery worker will soon be running. Any worker currently
- * executing does not affect the result.
- */
-static bool vf_post_migration_imminent(struct xe_device *xe)
-{
- return xe->sriov.vf.migration.gt_flags != 0 ||
- work_pending(&xe->sriov.vf.migration.worker);
+ xe->sriov.vf.migration.disabled = true;
}
-/*
- * Notify all GuCs about resource fixups apply finished.
- */
-static void vf_post_migration_notify_resfix_done(struct xe_device *xe)
+static void vf_migration_init_early(struct xe_device *xe)
{
- struct xe_gt *gt;
- unsigned int id;
+ if (!xe_device_has_memirq(xe))
+ return xe_sriov_vf_migration_disable(xe, "requires memory-based IRQ support");
- for_each_gt(gt, xe, id) {
- if (vf_post_migration_imminent(xe))
- goto skip;
- xe_gt_sriov_vf_notify_resfix_done(gt);
- }
- return;
-
-skip:
- drm_dbg(&xe->drm, "another recovery imminent, skipping notifications\n");
}
-static void vf_post_migration_recovery(struct xe_device *xe)
+/**
+ * xe_sriov_vf_init_early - Initialize SR-IOV VF specific data.
+ * @xe: the &xe_device to initialize
+ */
+void xe_sriov_vf_init_early(struct xe_device *xe)
{
- int err;
-
- drm_dbg(&xe->drm, "migration recovery in progress\n");
- xe_pm_runtime_get(xe);
- err = vf_post_migration_requery_guc(xe);
- if (vf_post_migration_imminent(xe))
- goto defer;
- if (unlikely(err))
- goto fail;
-
- /* FIXME: add the recovery steps */
- vf_post_migration_notify_resfix_done(xe);
- xe_pm_runtime_put(xe);
- drm_notice(&xe->drm, "migration recovery ended\n");
- return;
-defer:
- xe_pm_runtime_put(xe);
- drm_dbg(&xe->drm, "migration recovery deferred\n");
- return;
-fail:
- xe_pm_runtime_put(xe);
- drm_err(&xe->drm, "migration recovery failed (%pe)\n", ERR_PTR(err));
- xe_device_declare_wedged(xe);
+ vf_migration_init_early(xe);
}
-static void migration_worker_func(struct work_struct *w)
+/**
+ * xe_sriov_vf_init_late() - SR-IOV VF late initialization functions.
+ * @xe: the &xe_device to initialize
+ *
+ * This function initializes code for CCS migration.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_sriov_vf_init_late(struct xe_device *xe)
{
- struct xe_device *xe = container_of(w, struct xe_device,
- sriov.vf.migration.worker);
-
- vf_post_migration_recovery(xe);
+ return xe_sriov_vf_ccs_init(xe);
}
-static bool vf_ready_to_recovery_on_all_gts(struct xe_device *xe)
+static int sa_info_vf_ccs(struct seq_file *m, void *data)
{
- struct xe_gt *gt;
- unsigned int id;
+ struct drm_info_node *node = m->private;
+ struct xe_device *xe = to_xe_device(node->minor->dev);
+ struct drm_printer p = drm_seq_file_printer(m);
- for_each_gt(gt, xe, id) {
- if (!test_bit(id, &xe->sriov.vf.migration.gt_flags)) {
- xe_gt_sriov_dbg_verbose(gt, "still not ready to recover\n");
- return false;
- }
- }
- return true;
+ xe_sriov_vf_ccs_print(xe, &p);
+ return 0;
}
+static const struct drm_info_list debugfs_list[] = {
+ { .name = "sa_info_vf_ccs", .show = sa_info_vf_ccs },
+};
+
/**
- * xe_sriov_vf_start_migration_recovery - Start VF migration recovery.
- * @xe: the &xe_device to start recovery on
+ * xe_sriov_vf_debugfs_register - Register VF debugfs attributes.
+ * @xe: the &xe_device
+ * @root: the root &dentry
*
- * This function shall be called only by VF.
+ * Prepare debugfs attributes exposed by the VF.
*/
-void xe_sriov_vf_start_migration_recovery(struct xe_device *xe)
+void xe_sriov_vf_debugfs_register(struct xe_device *xe, struct dentry *root)
{
- bool started;
-
- xe_assert(xe, IS_SRIOV_VF(xe));
-
- if (!vf_ready_to_recovery_on_all_gts(xe))
- return;
-
- WRITE_ONCE(xe->sriov.vf.migration.gt_flags, 0);
- /* Ensure other threads see that no flags are set now. */
- smp_mb();
-
- started = queue_work(xe->sriov.wq, &xe->sriov.vf.migration.worker);
- drm_info(&xe->drm, "VF migration recovery %s\n", started ?
- "scheduled" : "already in progress");
+ drm_debugfs_create_files(debugfs_list, ARRAY_SIZE(debugfs_list),
+ root, xe->drm.primary);
}
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf.h b/drivers/gpu/drm/xe/xe_sriov_vf.h
index 7b8622cff2b7..e967d4166a43 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf.h
+++ b/drivers/gpu/drm/xe/xe_sriov_vf.h
@@ -6,9 +6,15 @@
#ifndef _XE_SRIOV_VF_H_
#define _XE_SRIOV_VF_H_
+#include <linux/types.h>
+
+struct dentry;
struct xe_device;
void xe_sriov_vf_init_early(struct xe_device *xe);
-void xe_sriov_vf_start_migration_recovery(struct xe_device *xe);
+int xe_sriov_vf_init_late(struct xe_device *xe);
+bool xe_sriov_vf_migration_supported(struct xe_device *xe);
+void xe_sriov_vf_migration_disable(struct xe_device *xe, const char *fmt, ...);
+void xe_sriov_vf_debugfs_register(struct xe_device *xe, struct dentry *root);
#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
new file mode 100644
index 000000000000..797a4b866226
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
@@ -0,0 +1,480 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include "instructions/xe_mi_commands.h"
+#include "instructions/xe_gpu_commands.h"
+#include "xe_bb.h"
+#include "xe_bo.h"
+#include "xe_device.h"
+#include "xe_exec_queue.h"
+#include "xe_exec_queue_types.h"
+#include "xe_gt_sriov_vf.h"
+#include "xe_guc.h"
+#include "xe_guc_submit.h"
+#include "xe_lrc.h"
+#include "xe_migrate.h"
+#include "xe_pm.h"
+#include "xe_sa.h"
+#include "xe_sriov_printk.h"
+#include "xe_sriov_vf.h"
+#include "xe_sriov_vf_ccs.h"
+#include "xe_sriov_vf_ccs_types.h"
+
+/**
+ * DOC: VF save/restore of compression Meta Data
+ *
+ * VF KMD registers two special contexts/LRCAs.
+ *
+ * Save Context/LRCA: contain necessary cmds+page table to trigger Meta data /
+ * compression control surface (Aka CCS) save in regular System memory in VM.
+ *
+ * Restore Context/LRCA: contain necessary cmds+page table to trigger Meta data /
+ * compression control surface (Aka CCS) Restore from regular System memory in
+ * VM to corresponding CCS pool.
+ *
+ * Below diagram explain steps needed for VF save/Restore of compression Meta Data::
+ *
+ * CCS Save CCS Restore VF KMD Guc BCS
+ * LRCA LRCA
+ * | | | | |
+ * | | | | |
+ * | Create Save LRCA | | |
+ * [ ]<----------------------------- [ ] | |
+ * | | | | |
+ * | | | | |
+ * | | | Register save LRCA | |
+ * | | | with Guc | |
+ * | | [ ]--------------------------->[ ] |
+ * | | | | |
+ * | | Create restore LRCA | | |
+ * | [ ]<------------------[ ] | |
+ * | | | | |
+ * | | | Register restore LRCA | |
+ * | | | with Guc | |
+ * | | [ ]--------------------------->[ ] |
+ * | | | | |
+ * | | | | |
+ * | | [ ]------------------------- | |
+ * | | [ ] Allocate main memory. | | |
+ * | | [ ] Allocate CCS memory. | | |
+ * | | [ ] Update Main memory & | | |
+ * [ ]<------------------------------[ ] CCS pages PPGTT + BB | | |
+ * | [ ]<------------------[ ] cmds to save & restore.| | |
+ * | | [ ]<------------------------ | |
+ * | | | | |
+ * | | | | |
+ * | | | | |
+ * : : : : :
+ * ---------------------------- VF Paused -------------------------------------
+ * | | | | |
+ * | | | | |
+ * | | | |Schedule |
+ * | | | |CCS Save |
+ * | | | | LRCA |
+ * | | | [ ]------>[ ]
+ * | | | | |
+ * | | | | |
+ * | | | |CCS save |
+ * | | | |completed|
+ * | | | [ ]<------[ ]
+ * | | | | |
+ * : : : : :
+ * ---------------------------- VM Migrated -----------------------------------
+ * | | | | |
+ * | | | | |
+ * : : : : :
+ * ---------------------------- VF Resumed ------------------------------------
+ * | | | | |
+ * | | | | |
+ * | | [ ]-------------- | |
+ * | | [ ] Fix up GGTT | | |
+ * | | [ ]<------------- | |
+ * | | | | |
+ * | | | | |
+ * | | | Notify VF_RESFIX_DONE | |
+ * | | [ ]--------------------------->[ ] |
+ * | | | | |
+ * | | | |Schedule |
+ * | | | |CCS |
+ * | | | |Restore |
+ * | | | |LRCA |
+ * | | | [ ]------>[ ]
+ * | | | | |
+ * | | | | |
+ * | | | |CCS |
+ * | | | |restore |
+ * | | | |completed|
+ * | | | [ ]<------[ ]
+ * | | | | |
+ * | | | | |
+ * | | | VF_RESFIX_DONE complete | |
+ * | | | notification | |
+ * | | [ ]<---------------------------[ ] |
+ * | | | | |
+ * | | | | |
+ * : : : : :
+ * ------------------------- Continue VM restore ------------------------------
+ */
+
+static u64 get_ccs_bb_pool_size(struct xe_device *xe)
+{
+ u64 sys_mem_size, ccs_mem_size, ptes, bb_pool_size;
+ struct sysinfo si;
+
+ si_meminfo(&si);
+ sys_mem_size = si.totalram * si.mem_unit;
+ ccs_mem_size = div64_u64(sys_mem_size, NUM_BYTES_PER_CCS_BYTE(xe));
+ ptes = DIV_ROUND_UP_ULL(sys_mem_size + ccs_mem_size, XE_PAGE_SIZE);
+
+ /**
+ * We need below BB size to hold PTE mappings and some DWs for copy
+ * command. In reality, we need space for many copy commands. So, let
+ * us allocate double the calculated size which is enough to holds GPU
+ * instructions for the whole region.
+ */
+ bb_pool_size = ptes * sizeof(u32);
+
+ return round_up(bb_pool_size * 2, SZ_1M);
+}
+
+static int alloc_bb_pool(struct xe_tile *tile, struct xe_sriov_vf_ccs_ctx *ctx)
+{
+ struct xe_device *xe = tile_to_xe(tile);
+ struct xe_sa_manager *sa_manager;
+ u64 bb_pool_size;
+ int offset, err;
+
+ bb_pool_size = get_ccs_bb_pool_size(xe);
+ xe_sriov_info(xe, "Allocating %s CCS BB pool size = %lldMB\n",
+ ctx->ctx_id ? "Restore" : "Save", bb_pool_size / SZ_1M);
+
+ sa_manager = xe_sa_bo_manager_init(tile, bb_pool_size, SZ_16);
+
+ if (IS_ERR(sa_manager)) {
+ xe_sriov_err(xe, "Suballocator init failed with error: %pe\n",
+ sa_manager);
+ err = PTR_ERR(sa_manager);
+ return err;
+ }
+
+ offset = 0;
+ xe_map_memset(xe, &sa_manager->bo->vmap, offset, MI_NOOP,
+ bb_pool_size);
+
+ offset = bb_pool_size - sizeof(u32);
+ xe_map_wr(xe, &sa_manager->bo->vmap, offset, u32, MI_BATCH_BUFFER_END);
+
+ ctx->mem.ccs_bb_pool = sa_manager;
+
+ return 0;
+}
+
+static void ccs_rw_update_ring(struct xe_sriov_vf_ccs_ctx *ctx)
+{
+ u64 addr = xe_sa_manager_gpu_addr(ctx->mem.ccs_bb_pool);
+ struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
+ u32 dw[10], i = 0;
+
+ /*
+ * XXX: Save/restore fixes — for some reason, the GuC only accepts the
+ * save/restore context if the LRC head pointer is zero. This is evident
+ * from repeated VF migrations failing when the LRC head pointer is
+ * non-zero.
+ */
+ lrc->ring.tail = 0;
+ xe_lrc_set_ring_head(lrc, 0);
+
+ dw[i++] = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+ dw[i++] = MI_BATCH_BUFFER_START | XE_INSTR_NUM_DW(3);
+ dw[i++] = lower_32_bits(addr);
+ dw[i++] = upper_32_bits(addr);
+ dw[i++] = MI_NOOP;
+ dw[i++] = MI_NOOP;
+
+ xe_lrc_write_ring(lrc, dw, i * sizeof(u32));
+ xe_lrc_set_ring_tail(lrc, lrc->ring.tail);
+}
+
+/**
+ * xe_sriov_vf_ccs_rebase - Rebase GGTT addresses for CCS save / restore
+ * @xe: the &xe_device.
+ */
+void xe_sriov_vf_ccs_rebase(struct xe_device *xe)
+{
+ enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
+
+ if (!IS_VF_CCS_READY(xe))
+ return;
+
+ for_each_ccs_rw_ctx(ctx_id) {
+ struct xe_sriov_vf_ccs_ctx *ctx =
+ &xe->sriov.vf.ccs.contexts[ctx_id];
+
+ ccs_rw_update_ring(ctx);
+ }
+}
+
+static int register_save_restore_context(struct xe_sriov_vf_ccs_ctx *ctx)
+{
+ int ctx_type;
+
+ switch (ctx->ctx_id) {
+ case XE_SRIOV_VF_CCS_READ_CTX:
+ ctx_type = GUC_CONTEXT_COMPRESSION_SAVE;
+ break;
+ case XE_SRIOV_VF_CCS_WRITE_CTX:
+ ctx_type = GUC_CONTEXT_COMPRESSION_RESTORE;
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ xe_guc_register_vf_exec_queue(ctx->mig_q, ctx_type);
+ return 0;
+}
+
+/**
+ * xe_sriov_vf_ccs_register_context - Register read/write contexts with guc.
+ * @xe: the &xe_device to register contexts on.
+ *
+ * This function registers read and write contexts with Guc. Re-registration
+ * is needed whenever resuming from pm runtime suspend.
+ *
+ * Return: 0 on success. Negative error code on failure.
+ */
+int xe_sriov_vf_ccs_register_context(struct xe_device *xe)
+{
+ enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
+ struct xe_sriov_vf_ccs_ctx *ctx;
+ int err;
+
+ xe_assert(xe, IS_VF_CCS_READY(xe));
+
+ for_each_ccs_rw_ctx(ctx_id) {
+ ctx = &xe->sriov.vf.ccs.contexts[ctx_id];
+ err = register_save_restore_context(ctx);
+ if (err)
+ return err;
+ }
+
+ return err;
+}
+
+/*
+ * Whether GuC requires CCS copy BBs for VF migration.
+ * @xe: the &xe_device instance.
+ *
+ * Only selected platforms require VF KMD to maintain CCS copy BBs and linked LRCAs.
+ *
+ * Return: true if VF driver must participate in the CCS migration, false otherwise.
+ */
+static bool vf_migration_ccs_bb_needed(struct xe_device *xe)
+{
+ xe_assert(xe, IS_SRIOV_VF(xe));
+
+ return !IS_DGFX(xe) && xe_device_has_flat_ccs(xe);
+}
+
+/*
+ * Check for disable migration due to no CCS BBs support in GuC FW.
+ * @xe: the &xe_device instance.
+ *
+ * Performs late disable of VF migration feature in case GuC FW cannot support it.
+ *
+ * Returns: True if VF migration with CCS BBs is supported, false otherwise.
+ */
+static bool vf_migration_ccs_bb_support_check(struct xe_device *xe)
+{
+ struct xe_gt *gt = xe_root_mmio_gt(xe);
+ struct xe_uc_fw_version guc_version;
+
+ xe_gt_sriov_vf_guc_versions(gt, NULL, &guc_version);
+ if (MAKE_GUC_VER_STRUCT(guc_version) < MAKE_GUC_VER(1, 23, 0)) {
+ xe_sriov_vf_migration_disable(xe,
+ "CCS migration requires GuC ABI >= 1.23 but only %u.%u found",
+ guc_version.major, guc_version.minor);
+ return false;
+ }
+
+ return true;
+}
+
+static void xe_sriov_vf_ccs_fini(void *arg)
+{
+ struct xe_sriov_vf_ccs_ctx *ctx = arg;
+ struct xe_lrc *lrc = xe_exec_queue_lrc(ctx->mig_q);
+
+ /*
+ * Make TAIL = HEAD in the ring so that no issues are seen if Guc
+ * submits this context to HW on VF pause after unbinding device.
+ */
+ xe_lrc_set_ring_tail(lrc, xe_lrc_ring_head(lrc));
+ xe_exec_queue_put(ctx->mig_q);
+}
+
+/**
+ * xe_sriov_vf_ccs_init - Setup LRCA for save & restore.
+ * @xe: the &xe_device to start recovery on
+ *
+ * This function shall be called only by VF. It initializes
+ * LRCA and suballocator needed for CCS save & restore.
+ *
+ * Return: 0 on success. Negative error code on failure.
+ */
+int xe_sriov_vf_ccs_init(struct xe_device *xe)
+{
+ struct xe_tile *tile = xe_device_get_root_tile(xe);
+ enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
+ struct xe_sriov_vf_ccs_ctx *ctx;
+ struct xe_exec_queue *q;
+ u32 flags;
+ int err;
+
+ xe_assert(xe, IS_SRIOV_VF(xe));
+
+ if (!xe_sriov_vf_migration_supported(xe) ||
+ !vf_migration_ccs_bb_needed(xe) ||
+ !vf_migration_ccs_bb_support_check(xe))
+ return 0;
+
+ for_each_ccs_rw_ctx(ctx_id) {
+ ctx = &xe->sriov.vf.ccs.contexts[ctx_id];
+ ctx->ctx_id = ctx_id;
+
+ flags = EXEC_QUEUE_FLAG_KERNEL |
+ EXEC_QUEUE_FLAG_PERMANENT |
+ EXEC_QUEUE_FLAG_MIGRATE;
+ q = xe_exec_queue_create_bind(xe, tile, flags, 0);
+ if (IS_ERR(q)) {
+ err = PTR_ERR(q);
+ goto err_ret;
+ }
+ ctx->mig_q = q;
+
+ err = alloc_bb_pool(tile, ctx);
+ if (err)
+ goto err_free_queue;
+
+ ccs_rw_update_ring(ctx);
+
+ err = register_save_restore_context(ctx);
+ if (err)
+ goto err_free_queue;
+
+ err = devm_add_action_or_reset(xe->drm.dev,
+ xe_sriov_vf_ccs_fini,
+ ctx);
+ if (err)
+ goto err_ret;
+ }
+
+ xe->sriov.vf.ccs.initialized = 1;
+
+ return 0;
+
+err_free_queue:
+ xe_exec_queue_put(q);
+
+err_ret:
+ return err;
+}
+
+/**
+ * xe_sriov_vf_ccs_attach_bo - Insert CCS read write commands in the BO.
+ * @bo: the &buffer object to which batch buffer commands will be added.
+ *
+ * This function shall be called only by VF. It inserts the PTEs and copy
+ * command instructions in the BO by calling xe_migrate_ccs_rw_copy()
+ * function.
+ *
+ * Returns: 0 if successful, negative error code on failure.
+ */
+int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo)
+{
+ struct xe_device *xe = xe_bo_device(bo);
+ enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
+ struct xe_sriov_vf_ccs_ctx *ctx;
+ struct xe_tile *tile;
+ struct xe_bb *bb;
+ int err = 0;
+
+ xe_assert(xe, IS_VF_CCS_READY(xe));
+
+ tile = xe_device_get_root_tile(xe);
+
+ for_each_ccs_rw_ctx(ctx_id) {
+ bb = bo->bb_ccs[ctx_id];
+ /* bb should be NULL here. Assert if not NULL */
+ xe_assert(xe, !bb);
+
+ ctx = &xe->sriov.vf.ccs.contexts[ctx_id];
+ err = xe_migrate_ccs_rw_copy(tile, ctx->mig_q, bo, ctx_id);
+ }
+ return err;
+}
+
+/**
+ * xe_sriov_vf_ccs_detach_bo - Remove CCS read write commands from the BO.
+ * @bo: the &buffer object from which batch buffer commands will be removed.
+ *
+ * This function shall be called only by VF. It removes the PTEs and copy
+ * command instructions from the BO. Make sure to update the BB with MI_NOOP
+ * before freeing.
+ *
+ * Returns: 0 if successful.
+ */
+int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo)
+{
+ struct xe_device *xe = xe_bo_device(bo);
+ enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
+ struct xe_bb *bb;
+
+ xe_assert(xe, IS_VF_CCS_READY(xe));
+
+ if (!xe_bo_has_valid_ccs_bb(bo))
+ return 0;
+
+ for_each_ccs_rw_ctx(ctx_id) {
+ bb = bo->bb_ccs[ctx_id];
+ if (!bb)
+ continue;
+
+ memset(bb->cs, MI_NOOP, bb->len * sizeof(u32));
+ xe_bb_free(bb, NULL);
+ bo->bb_ccs[ctx_id] = NULL;
+ }
+ return 0;
+}
+
+/**
+ * xe_sriov_vf_ccs_print - Print VF CCS details.
+ * @xe: the &xe_device
+ * @p: the &drm_printer
+ *
+ * This function is for VF use only.
+ */
+void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p)
+{
+ struct xe_sa_manager *bb_pool;
+ enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
+
+ if (!IS_VF_CCS_READY(xe))
+ return;
+
+ xe_pm_runtime_get(xe);
+
+ for_each_ccs_rw_ctx(ctx_id) {
+ bb_pool = xe->sriov.vf.ccs.contexts[ctx_id].mem.ccs_bb_pool;
+ if (!bb_pool)
+ break;
+
+ drm_printf(p, "ccs %s bb suballoc info\n", ctx_id ? "write" : "read");
+ drm_printf(p, "-------------------------\n");
+ drm_suballoc_dump_debug_info(&bb_pool->base, p, xe_sa_manager_gpu_addr(bb_pool));
+ drm_puts(p, "\n");
+ }
+
+ xe_pm_runtime_put(xe);
+}
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h
new file mode 100644
index 000000000000..f8ca6efce9ee
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_SRIOV_VF_CCS_H_
+#define _XE_SRIOV_VF_CCS_H_
+
+#include "xe_device_types.h"
+#include "xe_sriov.h"
+#include "xe_sriov_vf_ccs_types.h"
+
+struct drm_printer;
+struct xe_device;
+struct xe_bo;
+
+int xe_sriov_vf_ccs_init(struct xe_device *xe);
+int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo);
+int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo);
+int xe_sriov_vf_ccs_register_context(struct xe_device *xe);
+void xe_sriov_vf_ccs_rebase(struct xe_device *xe);
+void xe_sriov_vf_ccs_print(struct xe_device *xe, struct drm_printer *p);
+
+static inline bool xe_sriov_vf_ccs_ready(struct xe_device *xe)
+{
+ xe_assert(xe, IS_SRIOV_VF(xe));
+ return xe->sriov.vf.ccs.initialized;
+}
+
+#define IS_VF_CCS_READY(xe) ({ \
+ struct xe_device *xe__ = (xe); \
+ IS_SRIOV_VF(xe__) && xe_sriov_vf_ccs_ready(xe__); \
+ })
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h b/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h
new file mode 100644
index 000000000000..22c499943d2a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_SRIOV_VF_CCS_TYPES_H_
+#define _XE_SRIOV_VF_CCS_TYPES_H_
+
+#include <linux/types.h>
+
+#define for_each_ccs_rw_ctx(id__) \
+ for ((id__) = 0; (id__) < XE_SRIOV_VF_CCS_CTX_COUNT; (id__)++)
+
+enum xe_sriov_vf_ccs_rw_ctxs {
+ XE_SRIOV_VF_CCS_READ_CTX,
+ XE_SRIOV_VF_CCS_WRITE_CTX,
+ XE_SRIOV_VF_CCS_CTX_COUNT
+};
+
+struct xe_migrate;
+struct xe_sa_manager;
+
+/**
+ * struct xe_sriov_vf_ccs_ctx - VF CCS migration context data.
+ */
+struct xe_sriov_vf_ccs_ctx {
+ /** @ctx_id: Id to which context it belongs to */
+ enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
+
+ /** @mig_q: exec queues used for migration */
+ struct xe_exec_queue *mig_q;
+
+ /** @mem: memory data */
+ struct {
+ /** @mem.ccs_bb_pool: Pool from which batch buffers are allocated. */
+ struct xe_sa_manager *ccs_bb_pool;
+ } mem;
+};
+
+/**
+ * struct xe_sriov_vf_ccs - The VF CCS migration support data.
+ */
+struct xe_sriov_vf_ccs {
+ /** @contexts: CCS read and write contexts for VF. */
+ struct xe_sriov_vf_ccs_ctx contexts[XE_SRIOV_VF_CCS_CTX_COUNT];
+
+ /** @initialized: Initialization of VF CCS is completed or not. */
+ bool initialized;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_sriov_vf_types.h
new file mode 100644
index 000000000000..d5f72d667817
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_vf_types.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023-2025 Intel Corporation
+ */
+
+#ifndef _XE_SRIOV_VF_TYPES_H_
+#define _XE_SRIOV_VF_TYPES_H_
+
+#include <linux/types.h>
+#include <linux/workqueue_types.h>
+
+#include "xe_sriov_vf_ccs_types.h"
+
+/**
+ * struct xe_sriov_vf_relay_version - PF ABI version details.
+ */
+struct xe_sriov_vf_relay_version {
+ /** @major: major version. */
+ u16 major;
+ /** @minor: minor version. */
+ u16 minor;
+};
+
+/**
+ * struct xe_device_vf - Xe Virtual Function related data
+ *
+ * The data in this structure is valid only if driver is running in the
+ * @XE_SRIOV_MODE_VF mode.
+ */
+struct xe_device_vf {
+ /** @pf_version: negotiated VF/PF ABI version. */
+ struct xe_sriov_vf_relay_version pf_version;
+
+ /** @migration: VF Migration state data */
+ struct {
+ /**
+ * @migration.disabled: flag indicating if migration support
+ * was turned off due to missing prerequisites
+ */
+ bool disabled;
+ } migration;
+
+ /** @ccs: VF CCS state data */
+ struct xe_sriov_vf_ccs ccs;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_vfio.c b/drivers/gpu/drm/xe/xe_sriov_vfio.c
new file mode 100644
index 000000000000..3da81af97b8b
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_sriov_vfio.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include <drm/intel/xe_sriov_vfio.h>
+#include <linux/cleanup.h>
+
+#include "xe_pci.h"
+#include "xe_pm.h"
+#include "xe_sriov_pf_control.h"
+#include "xe_sriov_pf_helpers.h"
+#include "xe_sriov_pf_migration.h"
+
+struct xe_device *xe_sriov_vfio_get_pf(struct pci_dev *pdev)
+{
+ return xe_pci_to_pf_device(pdev);
+}
+EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_get_pf, "xe-vfio-pci");
+
+bool xe_sriov_vfio_migration_supported(struct xe_device *xe)
+{
+ if (!IS_SRIOV_PF(xe))
+ return false;
+
+ return xe_sriov_pf_migration_supported(xe);
+}
+EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_migration_supported, "xe-vfio-pci");
+
+#define DEFINE_XE_SRIOV_VFIO_FUNCTION(_type, _func, _impl) \
+_type xe_sriov_vfio_##_func(struct xe_device *xe, unsigned int vfid) \
+{ \
+ if (!IS_SRIOV_PF(xe)) \
+ return -EPERM; \
+ if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe)) \
+ return -EINVAL; \
+ \
+ guard(xe_pm_runtime_noresume)(xe); \
+ \
+ return xe_sriov_pf_##_impl(xe, vfid); \
+} \
+EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_##_func, "xe-vfio-pci")
+
+DEFINE_XE_SRIOV_VFIO_FUNCTION(int, wait_flr_done, control_wait_flr);
+DEFINE_XE_SRIOV_VFIO_FUNCTION(int, suspend_device, control_pause_vf);
+DEFINE_XE_SRIOV_VFIO_FUNCTION(int, resume_device, control_resume_vf);
+DEFINE_XE_SRIOV_VFIO_FUNCTION(int, stop_copy_enter, control_trigger_save_vf);
+DEFINE_XE_SRIOV_VFIO_FUNCTION(int, stop_copy_exit, control_finish_save_vf);
+DEFINE_XE_SRIOV_VFIO_FUNCTION(int, resume_data_enter, control_trigger_restore_vf);
+DEFINE_XE_SRIOV_VFIO_FUNCTION(int, resume_data_exit, control_finish_restore_vf);
+DEFINE_XE_SRIOV_VFIO_FUNCTION(int, error, control_stop_vf);
+DEFINE_XE_SRIOV_VFIO_FUNCTION(ssize_t, stop_copy_size, migration_size);
+
+ssize_t xe_sriov_vfio_data_read(struct xe_device *xe, unsigned int vfid,
+ char __user *buf, size_t len)
+{
+ if (!IS_SRIOV_PF(xe))
+ return -EPERM;
+ if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe))
+ return -EINVAL;
+
+ guard(xe_pm_runtime_noresume)(xe);
+
+ return xe_sriov_pf_migration_read(xe, vfid, buf, len);
+}
+EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_data_read, "xe-vfio-pci");
+
+ssize_t xe_sriov_vfio_data_write(struct xe_device *xe, unsigned int vfid,
+ const char __user *buf, size_t len)
+{
+ if (!IS_SRIOV_PF(xe))
+ return -EPERM;
+ if (vfid == PFID || vfid > xe_sriov_pf_num_vfs(xe))
+ return -EINVAL;
+
+ guard(xe_pm_runtime_noresume)(xe);
+
+ return xe_sriov_pf_migration_write(xe, vfid, buf, len);
+}
+EXPORT_SYMBOL_FOR_MODULES(xe_sriov_vfio_data_write, "xe-vfio-pci");
diff --git a/drivers/gpu/drm/xe/xe_step.c b/drivers/gpu/drm/xe/xe_step.c
index c77b5c317fa0..10e88f2c9615 100644
--- a/drivers/gpu/drm/xe/xe_step.c
+++ b/drivers/gpu/drm/xe/xe_step.c
@@ -5,6 +5,7 @@
#include "xe_step.h"
+#include <kunit/visibility.h>
#include <linux/bitfield.h>
#include "xe_device.h"
@@ -255,3 +256,4 @@ const char *xe_step_name(enum xe_step step)
return "**";
}
}
+EXPORT_SYMBOL_IF_KUNIT(xe_step_name);
diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.c b/drivers/gpu/drm/xe/xe_survivability_mode.c
index 1f710b3fc599..1662bfddd4bc 100644
--- a/drivers/gpu/drm/xe/xe_survivability_mode.c
+++ b/drivers/gpu/drm/xe/xe_survivability_mode.c
@@ -14,6 +14,7 @@
#include "xe_device.h"
#include "xe_gt.h"
#include "xe_heci_gsc.h"
+#include "xe_i2c.h"
#include "xe_mmio.h"
#include "xe_pcode_api.h"
#include "xe_vsec.h"
@@ -21,15 +22,18 @@
#define MAX_SCRATCH_MMIO 8
/**
- * DOC: Xe Boot Survivability
+ * DOC: Survivability Mode
*
- * Boot Survivability is a software based workflow for recovering a system in a failed boot state
+ * Survivability Mode is a software based workflow for recovering a system in a failed boot state
* Here system recoverability is concerned with recovering the firmware responsible for boot.
*
- * This is implemented by loading the driver with bare minimum (no drm card) to allow the firmware
- * to be flashed through mei and collect telemetry. The driver's probe flow is modified
- * such that it enters survivability mode when pcode initialization is incomplete and boot status
- * denotes a failure.
+ * Boot Survivability
+ * ===================
+ *
+ * Boot Survivability is implemented by loading the driver with bare minimum (no drm card) to allow
+ * the firmware to be flashed through mei driver and collect telemetry. The driver's probe flow is
+ * modified such that it enters survivability mode when pcode initialization is incomplete and boot
+ * status denotes a failure.
*
* Survivability mode can also be entered manually using the survivability mode attribute available
* through configfs which is beneficial in several usecases. It can be used to address scenarios
@@ -40,12 +44,14 @@
*
* # echo 1 > /sys/kernel/config/xe/0000:03:00.0/survivability_mode
*
+ * It is the responsibility of the user to clear the mode once firmware flash is complete.
+ *
* Refer :ref:`xe_configfs` for more details on how to use configfs
*
* Survivability mode is indicated by the below admin-only readable sysfs which provides additional
* debug information::
*
- * /sys/bus/pci/devices/<device>/surivability_mode
+ * /sys/bus/pci/devices/<device>/survivability_mode
*
* Capability Information:
* Provides boot status
@@ -55,6 +61,22 @@
* Provides history of previous failures
* Auxiliary Information
* Certain failures may have information in addition to postcode information
+ *
+ * Runtime Survivability
+ * =====================
+ *
+ * Certain runtime firmware errors can cause the device to enter a wedged state
+ * (:ref:`xe-device-wedging`) requiring a firmware flash to restore normal operation.
+ * Runtime Survivability Mode indicates that a firmware flash is necessary to recover the device and
+ * is indicated by the presence of survivability mode sysfs::
+ *
+ * /sys/bus/pci/devices/<device>/survivability_mode
+ *
+ * Survivability mode sysfs provides information about the type of survivability mode.
+ *
+ * When such errors occur, userspace is notified with the drm device wedged uevent and runtime
+ * survivability mode. User can then initiate a firmware flash using userspace tools like fwupd
+ * to restore device to normal operation.
*/
static u32 aux_history_offset(u32 reg_value)
@@ -120,6 +142,14 @@ static void log_survivability_info(struct pci_dev *pdev)
}
}
+static int check_boot_failure(struct xe_device *xe)
+{
+ struct xe_survivability *survivability = &xe->survivability;
+
+ return survivability->boot_status == NON_CRITICAL_FAILURE ||
+ survivability->boot_status == CRITICAL_FAILURE;
+}
+
static ssize_t survivability_mode_show(struct device *dev,
struct device_attribute *attr, char *buff)
{
@@ -129,6 +159,12 @@ static ssize_t survivability_mode_show(struct device *dev,
struct xe_survivability_info *info = survivability->info;
int index = 0, count = 0;
+ count += sysfs_emit_at(buff, count, "Survivability mode type: %s\n",
+ survivability->type ? "Runtime" : "Boot");
+
+ if (!check_boot_failure(xe))
+ return count;
+
for (index = 0; index < MAX_SCRATCH_MMIO; index++) {
if (info[index].reg)
count += sysfs_emit_at(buff, count, "%s: 0x%x - 0x%x\n", info[index].name,
@@ -146,16 +182,14 @@ static void xe_survivability_mode_fini(void *arg)
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
struct device *dev = &pdev->dev;
- xe_configfs_clear_survivability_mode(pdev);
sysfs_remove_file(&dev->kobj, &dev_attr_survivability_mode.attr);
}
-static int enable_survivability_mode(struct pci_dev *pdev)
+static int create_survivability_sysfs(struct pci_dev *pdev)
{
struct device *dev = &pdev->dev;
struct xe_device *xe = pdev_to_xe_device(pdev);
- struct xe_survivability *survivability = &xe->survivability;
- int ret = 0;
+ int ret;
/* create survivability mode sysfs */
ret = sysfs_create_file(&dev->kobj, &dev_attr_survivability_mode.attr);
@@ -169,35 +203,72 @@ static int enable_survivability_mode(struct pci_dev *pdev)
if (ret)
return ret;
+ return 0;
+}
+
+static int enable_boot_survivability_mode(struct pci_dev *pdev)
+{
+ struct device *dev = &pdev->dev;
+ struct xe_device *xe = pdev_to_xe_device(pdev);
+ struct xe_survivability *survivability = &xe->survivability;
+ int ret = 0;
+
+ ret = create_survivability_sysfs(pdev);
+ if (ret)
+ return ret;
+
/* Make sure xe_heci_gsc_init() knows about survivability mode */
survivability->mode = true;
ret = xe_heci_gsc_init(xe);
- if (ret) {
- /*
- * But if it fails, device can't enter survivability
- * so move it back for correct error handling
- */
- survivability->mode = false;
- return ret;
- }
+ if (ret)
+ goto err;
xe_vsec_init(xe);
+ ret = xe_i2c_probe(xe);
+ if (ret)
+ goto err;
+
dev_err(dev, "In Survivability Mode\n");
return 0;
+
+err:
+ survivability->mode = false;
+ return ret;
+}
+
+static int init_survivability_mode(struct xe_device *xe)
+{
+ struct xe_survivability *survivability = &xe->survivability;
+ struct xe_survivability_info *info;
+
+ survivability->size = MAX_SCRATCH_MMIO;
+
+ info = devm_kcalloc(xe->drm.dev, survivability->size, sizeof(*info),
+ GFP_KERNEL);
+ if (!info)
+ return -ENOMEM;
+
+ survivability->info = info;
+
+ populate_survivability_info(xe);
+
+ return 0;
}
/**
- * xe_survivability_mode_is_enabled - check if survivability mode is enabled
+ * xe_survivability_mode_is_boot_enabled- check if boot survivability mode is enabled
* @xe: xe device instance
*
- * Returns true if in survivability mode, false otherwise
+ * Returns true if in boot survivability mode of type, else false
*/
-bool xe_survivability_mode_is_enabled(struct xe_device *xe)
+bool xe_survivability_mode_is_boot_enabled(struct xe_device *xe)
{
- return xe->survivability.mode;
+ struct xe_survivability *survivability = &xe->survivability;
+
+ return survivability->mode && survivability->type == XE_SURVIVABILITY_TYPE_BOOT;
}
/**
@@ -218,19 +289,10 @@ bool xe_survivability_mode_is_requested(struct xe_device *xe)
u32 data;
bool survivability_mode;
- if (!IS_DGFX(xe) || IS_SRIOV_VF(xe))
+ if (!IS_DGFX(xe) || IS_SRIOV_VF(xe) || xe->info.platform < XE_BATTLEMAGE)
return false;
survivability_mode = xe_configfs_get_survivability_mode(pdev);
-
- if (xe->info.platform < XE_BATTLEMAGE) {
- if (survivability_mode) {
- dev_err(&pdev->dev, "Survivability Mode is not supported on this card\n");
- xe_configfs_clear_survivability_mode(pdev);
- }
- return false;
- }
-
/* Enable survivability mode if set via configfs */
if (survivability_mode)
return true;
@@ -238,44 +300,78 @@ bool xe_survivability_mode_is_requested(struct xe_device *xe)
data = xe_mmio_read32(mmio, PCODE_SCRATCH(0));
survivability->boot_status = REG_FIELD_GET(BOOT_STATUS, data);
- return survivability->boot_status == NON_CRITICAL_FAILURE ||
- survivability->boot_status == CRITICAL_FAILURE;
+ return check_boot_failure(xe);
}
/**
- * xe_survivability_mode_enable - Initialize and enable the survivability mode
+ * xe_survivability_mode_runtime_enable - Initialize and enable runtime survivability mode
* @xe: xe device instance
*
- * Initialize survivability information and enable survivability mode
+ * Initialize survivability information and enable runtime survivability mode.
+ * Runtime survivability mode is enabled when certain errors cause the device to be
+ * in non-recoverable state. The device is declared wedged with the appropriate
+ * recovery method and survivability mode sysfs exposed to userspace
*
- * Return: 0 if survivability mode is enabled or not requested; negative error
- * code otherwise.
+ * Return: 0 if runtime survivability mode is enabled, negative error code otherwise.
*/
-int xe_survivability_mode_enable(struct xe_device *xe)
+int xe_survivability_mode_runtime_enable(struct xe_device *xe)
{
struct xe_survivability *survivability = &xe->survivability;
- struct xe_survivability_info *info;
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+ int ret;
- if (!xe_survivability_mode_is_requested(xe))
- return 0;
+ if (!IS_DGFX(xe) || IS_SRIOV_VF(xe) || xe->info.platform < XE_BATTLEMAGE) {
+ dev_err(&pdev->dev, "Runtime Survivability Mode not supported\n");
+ return -EINVAL;
+ }
- survivability->size = MAX_SCRATCH_MMIO;
+ ret = init_survivability_mode(xe);
+ if (ret)
+ return ret;
- info = devm_kcalloc(xe->drm.dev, survivability->size, sizeof(*info),
- GFP_KERNEL);
- if (!info)
- return -ENOMEM;
+ ret = create_survivability_sysfs(pdev);
+ if (ret)
+ dev_err(&pdev->dev, "Failed to create survivability mode sysfs\n");
- survivability->info = info;
+ survivability->type = XE_SURVIVABILITY_TYPE_RUNTIME;
+ dev_err(&pdev->dev, "Runtime Survivability mode enabled\n");
- populate_survivability_info(xe);
+ xe_device_set_wedged_method(xe, DRM_WEDGE_RECOVERY_VENDOR);
+ xe_device_declare_wedged(xe);
+ dev_err(&pdev->dev, "Firmware flash required, Please refer to the userspace documentation for more details!\n");
+
+ return 0;
+}
+
+/**
+ * xe_survivability_mode_boot_enable - Initialize and enable boot survivability mode
+ * @xe: xe device instance
+ *
+ * Initialize survivability information and enable boot survivability mode
+ *
+ * Return: 0 if boot survivability mode is enabled or not requested, negative error
+ * code otherwise.
+ */
+int xe_survivability_mode_boot_enable(struct xe_device *xe)
+{
+ struct xe_survivability *survivability = &xe->survivability;
+ struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
+ int ret;
+
+ if (!xe_survivability_mode_is_requested(xe))
+ return 0;
+
+ ret = init_survivability_mode(xe);
+ if (ret)
+ return ret;
- /* Only log debug information and exit if it is a critical failure */
+ /* Log breadcrumbs but do not enter survivability mode for Critical boot errors */
if (survivability->boot_status == CRITICAL_FAILURE) {
log_survivability_info(pdev);
return -ENXIO;
}
- return enable_survivability_mode(pdev);
+ survivability->type = XE_SURVIVABILITY_TYPE_BOOT;
+
+ return enable_boot_survivability_mode(pdev);
}
diff --git a/drivers/gpu/drm/xe/xe_survivability_mode.h b/drivers/gpu/drm/xe/xe_survivability_mode.h
index 02231c2bf008..1cc94226aa82 100644
--- a/drivers/gpu/drm/xe/xe_survivability_mode.h
+++ b/drivers/gpu/drm/xe/xe_survivability_mode.h
@@ -10,8 +10,9 @@
struct xe_device;
-int xe_survivability_mode_enable(struct xe_device *xe);
-bool xe_survivability_mode_is_enabled(struct xe_device *xe);
+int xe_survivability_mode_boot_enable(struct xe_device *xe);
+int xe_survivability_mode_runtime_enable(struct xe_device *xe);
+bool xe_survivability_mode_is_boot_enabled(struct xe_device *xe);
bool xe_survivability_mode_is_requested(struct xe_device *xe);
#endif /* _XE_SURVIVABILITY_MODE_H_ */
diff --git a/drivers/gpu/drm/xe/xe_survivability_mode_types.h b/drivers/gpu/drm/xe/xe_survivability_mode_types.h
index 19d433e253df..cd65a5d167c9 100644
--- a/drivers/gpu/drm/xe/xe_survivability_mode_types.h
+++ b/drivers/gpu/drm/xe/xe_survivability_mode_types.h
@@ -9,6 +9,11 @@
#include <linux/limits.h>
#include <linux/types.h>
+enum xe_survivability_type {
+ XE_SURVIVABILITY_TYPE_BOOT,
+ XE_SURVIVABILITY_TYPE_RUNTIME,
+};
+
struct xe_survivability_info {
char name[NAME_MAX];
u32 reg;
@@ -30,6 +35,9 @@ struct xe_survivability {
/** @mode: boolean to indicate survivability mode */
bool mode;
+
+ /** @type: survivability type */
+ enum xe_survivability_type type;
};
#endif /* _XE_SURVIVABILITY_MODE_TYPES_H_ */
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 6345896585de..f97e0af6a9b0 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -3,16 +3,21 @@
* Copyright © 2024 Intel Corporation
*/
+#include <drm/drm_drv.h>
+
#include "xe_bo.h"
+#include "xe_exec_queue_types.h"
#include "xe_gt_stats.h"
-#include "xe_gt_tlb_invalidation.h"
#include "xe_migrate.h"
#include "xe_module.h"
+#include "xe_pm.h"
#include "xe_pt.h"
#include "xe_svm.h"
+#include "xe_tile.h"
#include "xe_ttm_vram_mgr.h"
#include "xe_vm.h"
#include "xe_vm_types.h"
+#include "xe_vram_types.h"
static bool xe_svm_range_in_vram(struct xe_svm_range *range)
{
@@ -21,9 +26,9 @@ static bool xe_svm_range_in_vram(struct xe_svm_range *range)
* memory.
*/
- struct drm_gpusvm_range_flags flags = {
+ struct drm_gpusvm_pages_flags flags = {
/* Pairs with WRITE_ONCE in drm_gpusvm.c */
- .__flags = READ_ONCE(range->base.flags.__flags),
+ .__flags = READ_ONCE(range->base.pages.flags.__flags),
};
return flags.has_devmem_pages;
@@ -45,30 +50,15 @@ static struct xe_vm *range_to_vm(struct drm_gpusvm_range *r)
return gpusvm_to_vm(r->gpusvm);
}
-static unsigned long xe_svm_range_start(struct xe_svm_range *range)
-{
- return drm_gpusvm_range_start(&range->base);
-}
-
-static unsigned long xe_svm_range_end(struct xe_svm_range *range)
-{
- return drm_gpusvm_range_end(&range->base);
-}
-
-static unsigned long xe_svm_range_size(struct xe_svm_range *range)
-{
- return drm_gpusvm_range_size(&range->base);
-}
-
-#define range_debug(r__, operaton__) \
+#define range_debug(r__, operation__) \
vm_dbg(&range_to_vm(&(r__)->base)->xe->drm, \
"%s: asid=%u, gpusvm=%p, vram=%d,%d, seqno=%lu, " \
"start=0x%014lx, end=0x%014lx, size=%lu", \
- (operaton__), range_to_vm(&(r__)->base)->usm.asid, \
+ (operation__), range_to_vm(&(r__)->base)->usm.asid, \
(r__)->base.gpusvm, \
xe_svm_range_in_vram((r__)) ? 1 : 0, \
xe_svm_range_has_vram_binding((r__)) ? 1 : 0, \
- (r__)->base.notifier_seq, \
+ (r__)->base.pages.notifier_seq, \
xe_svm_range_start((r__)), xe_svm_range_end((r__)), \
xe_svm_range_size((r__)))
@@ -77,11 +67,6 @@ void xe_svm_range_debug(struct xe_svm_range *range, const char *operation)
range_debug(range, operation);
}
-static void *xe_svm_devm_owner(struct xe_device *xe)
-{
- return xe;
-}
-
static struct drm_gpusvm_range *
xe_svm_range_alloc(struct drm_gpusvm *gpusvm)
{
@@ -103,11 +88,6 @@ static void xe_svm_range_free(struct drm_gpusvm_range *range)
kfree(range);
}
-static struct xe_svm_range *to_xe_range(struct drm_gpusvm_range *r)
-{
- return container_of(r, struct xe_svm_range, base);
-}
-
static void
xe_svm_garbage_collector_add_range(struct xe_vm *vm, struct xe_svm_range *range,
const struct mmu_notifier_range *mmu_range)
@@ -124,8 +104,12 @@ xe_svm_garbage_collector_add_range(struct xe_vm *vm, struct xe_svm_range *range,
&vm->svm.garbage_collector.range_list);
spin_unlock(&vm->svm.garbage_collector.lock);
- queue_work(xe_device_get_root_tile(xe)->primary_gt->usm.pf_wq,
- &vm->svm.garbage_collector.work);
+ queue_work(xe->usm.pf_wq, &vm->svm.garbage_collector.work);
+}
+
+static void xe_svm_tlb_inval_count_stats_incr(struct xe_gt *gt)
+{
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_TLB_INVAL_COUNT, 1);
}
static u8
@@ -144,7 +128,7 @@ xe_svm_range_notifier_event_begin(struct xe_vm *vm, struct drm_gpusvm_range *r,
range_debug(range, "NOTIFIER");
/* Skip if already unmapped or if no binding exist */
- if (range->base.flags.unmapped || !range->tile_present)
+ if (range->base.pages.flags.unmapped || !range->tile_present)
return 0;
range_debug(range, "NOTIFIER - EXECUTE");
@@ -160,8 +144,19 @@ xe_svm_range_notifier_event_begin(struct xe_vm *vm, struct drm_gpusvm_range *r,
*/
for_each_tile(tile, xe, id)
if (xe_pt_zap_ptes_range(tile, vm, range)) {
- tile_mask |= BIT(id);
- range->tile_invalidated |= BIT(id);
+ /*
+ * WRITE_ONCE pairs with READ_ONCE in
+ * xe_vm_has_valid_gpu_mapping()
+ */
+ WRITE_ONCE(range->tile_invalidated,
+ range->tile_invalidated | BIT(id));
+
+ if (!(tile_mask & BIT(id))) {
+ xe_svm_tlb_inval_count_stats_incr(tile->primary_gt);
+ if (tile->media_gt)
+ xe_svm_tlb_inval_count_stats_incr(tile->media_gt);
+ tile_mask |= BIT(id);
+ }
}
return tile_mask;
@@ -181,20 +176,35 @@ xe_svm_range_notifier_event_end(struct xe_vm *vm, struct drm_gpusvm_range *r,
mmu_range);
}
+static s64 xe_svm_stats_ktime_us_delta(ktime_t start)
+{
+ return IS_ENABLED(CONFIG_DEBUG_FS) ?
+ ktime_us_delta(ktime_get(), start) : 0;
+}
+
+static void xe_svm_tlb_inval_us_stats_incr(struct xe_gt *gt, ktime_t start)
+{
+ s64 us_delta = xe_svm_stats_ktime_us_delta(start);
+
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_TLB_INVAL_US, us_delta);
+}
+
+static ktime_t xe_svm_stats_ktime_get(void)
+{
+ return IS_ENABLED(CONFIG_DEBUG_FS) ? ktime_get() : 0;
+}
+
static void xe_svm_invalidate(struct drm_gpusvm *gpusvm,
struct drm_gpusvm_notifier *notifier,
const struct mmu_notifier_range *mmu_range)
{
struct xe_vm *vm = gpusvm_to_vm(gpusvm);
struct xe_device *xe = vm->xe;
- struct xe_tile *tile;
struct drm_gpusvm_range *r, *first;
- struct xe_gt_tlb_invalidation_fence
- fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
+ struct xe_tile *tile;
+ ktime_t start = xe_svm_stats_ktime_get();
u64 adj_start = mmu_range->start, adj_end = mmu_range->end;
- u8 tile_mask = 0;
- u8 id;
- u32 fence_id = 0;
+ u8 tile_mask = 0, id;
long err;
xe_svm_assert_in_notifier(vm);
@@ -240,47 +250,20 @@ static void xe_svm_invalidate(struct drm_gpusvm *gpusvm,
xe_device_wmb(xe);
- for_each_tile(tile, xe, id) {
- if (tile_mask & BIT(id)) {
- int err;
-
- xe_gt_tlb_invalidation_fence_init(tile->primary_gt,
- &fence[fence_id], true);
-
- err = xe_gt_tlb_invalidation_range(tile->primary_gt,
- &fence[fence_id],
- adj_start,
- adj_end,
- vm->usm.asid);
- if (WARN_ON_ONCE(err < 0))
- goto wait;
- ++fence_id;
-
- if (!tile->media_gt)
- continue;
-
- xe_gt_tlb_invalidation_fence_init(tile->media_gt,
- &fence[fence_id], true);
-
- err = xe_gt_tlb_invalidation_range(tile->media_gt,
- &fence[fence_id],
- adj_start,
- adj_end,
- vm->usm.asid);
- if (WARN_ON_ONCE(err < 0))
- goto wait;
- ++fence_id;
- }
- }
-
-wait:
- for (id = 0; id < fence_id; ++id)
- xe_gt_tlb_invalidation_fence_wait(&fence[id]);
+ err = xe_vm_range_tilemask_tlb_inval(vm, adj_start, adj_end, tile_mask);
+ WARN_ON_ONCE(err);
range_notifier_event_end:
r = first;
drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end)
xe_svm_range_notifier_event_end(vm, r, mmu_range);
+ for_each_tile(tile, xe, id) {
+ if (tile_mask & BIT(id)) {
+ xe_svm_tlb_inval_us_stats_incr(tile->primary_gt, start);
+ if (tile->media_gt)
+ xe_svm_tlb_inval_us_stats_incr(tile->media_gt, start);
+ }
+ }
}
static int __xe_svm_garbage_collector(struct xe_vm *vm,
@@ -302,24 +285,78 @@ static int __xe_svm_garbage_collector(struct xe_vm *vm,
return 0;
}
+static int xe_svm_range_set_default_attr(struct xe_vm *vm, u64 range_start, u64 range_end)
+{
+ struct xe_vma *vma;
+ struct xe_vma_mem_attr default_attr = {
+ .preferred_loc = {
+ .devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE,
+ .migration_policy = DRM_XE_MIGRATE_ALL_PAGES,
+ },
+ .atomic_access = DRM_XE_ATOMIC_UNDEFINED,
+ };
+ int err = 0;
+
+ vma = xe_vm_find_vma_by_addr(vm, range_start);
+ if (!vma)
+ return -EINVAL;
+
+ if (!(vma->gpuva.flags & XE_VMA_MADV_AUTORESET)) {
+ drm_dbg(&vm->xe->drm, "Skipping madvise reset for vma.\n");
+ return 0;
+ }
+
+ if (xe_vma_has_default_mem_attrs(vma))
+ return 0;
+
+ vm_dbg(&vm->xe->drm, "Existing VMA start=0x%016llx, vma_end=0x%016llx",
+ xe_vma_start(vma), xe_vma_end(vma));
+
+ if (xe_vma_start(vma) == range_start && xe_vma_end(vma) == range_end) {
+ default_attr.pat_index = vma->attr.default_pat_index;
+ default_attr.default_pat_index = vma->attr.default_pat_index;
+ vma->attr = default_attr;
+ } else {
+ vm_dbg(&vm->xe->drm, "Split VMA start=0x%016llx, vma_end=0x%016llx",
+ range_start, range_end);
+ err = xe_vm_alloc_cpu_addr_mirror_vma(vm, range_start, range_end - range_start);
+ if (err) {
+ drm_warn(&vm->xe->drm, "VMA SPLIT failed: %pe\n", ERR_PTR(err));
+ xe_vm_kill(vm, true);
+ return err;
+ }
+ }
+
+ /*
+ * On call from xe_svm_handle_pagefault original VMA might be changed
+ * signal this to lookup for VMA again.
+ */
+ return -EAGAIN;
+}
+
static int xe_svm_garbage_collector(struct xe_vm *vm)
{
struct xe_svm_range *range;
- int err;
+ u64 range_start;
+ u64 range_end;
+ int err, ret = 0;
lockdep_assert_held_write(&vm->lock);
if (xe_vm_is_closed_or_banned(vm))
return -ENOENT;
- spin_lock(&vm->svm.garbage_collector.lock);
for (;;) {
+ spin_lock(&vm->svm.garbage_collector.lock);
range = list_first_entry_or_null(&vm->svm.garbage_collector.range_list,
typeof(*range),
garbage_collector_link);
if (!range)
break;
+ range_start = xe_svm_range_start(range);
+ range_end = xe_svm_range_end(range);
+
list_del(&range->garbage_collector_link);
spin_unlock(&vm->svm.garbage_collector.lock);
@@ -332,11 +369,17 @@ static int xe_svm_garbage_collector(struct xe_vm *vm)
return err;
}
- spin_lock(&vm->svm.garbage_collector.lock);
+ err = xe_svm_range_set_default_attr(vm, range_start, range_end);
+ if (err) {
+ if (err == -EAGAIN)
+ ret = -EAGAIN;
+ else
+ return err;
+ }
}
spin_unlock(&vm->svm.garbage_collector.lock);
- return 0;
+ return ret;
}
static void xe_svm_garbage_collector_work_func(struct work_struct *w)
@@ -349,28 +392,22 @@ static void xe_svm_garbage_collector_work_func(struct work_struct *w)
up_write(&vm->lock);
}
-#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR)
+#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP)
static struct xe_vram_region *page_to_vr(struct page *page)
{
return container_of(page_pgmap(page), struct xe_vram_region, pagemap);
}
-static struct xe_tile *vr_to_tile(struct xe_vram_region *vr)
-{
- return container_of(vr, struct xe_tile, mem.vram);
-}
-
static u64 xe_vram_region_page_to_dpa(struct xe_vram_region *vr,
struct page *page)
{
u64 dpa;
- struct xe_tile *tile = vr_to_tile(vr);
u64 pfn = page_to_pfn(page);
u64 offset;
- xe_tile_assert(tile, is_device_private_page(page));
- xe_tile_assert(tile, (pfn << PAGE_SHIFT) >= vr->hpa_base);
+ xe_assert(vr->xe, is_device_private_page(page));
+ xe_assert(vr->xe, (pfn << PAGE_SHIFT) >= vr->hpa_base);
offset = (pfn << PAGE_SHIFT) - vr->hpa_base;
dpa = vr->dpa_base + offset;
@@ -383,17 +420,75 @@ enum xe_svm_copy_dir {
XE_SVM_COPY_TO_SRAM,
};
-static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr,
- unsigned long npages, const enum xe_svm_copy_dir dir)
+static void xe_svm_copy_kb_stats_incr(struct xe_gt *gt,
+ const enum xe_svm_copy_dir dir,
+ int kb)
+{
+ if (dir == XE_SVM_COPY_TO_VRAM)
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_DEVICE_COPY_KB, kb);
+ else
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_CPU_COPY_KB, kb);
+}
+
+static void xe_svm_copy_us_stats_incr(struct xe_gt *gt,
+ const enum xe_svm_copy_dir dir,
+ unsigned long npages,
+ ktime_t start)
+{
+ s64 us_delta = xe_svm_stats_ktime_us_delta(start);
+
+ if (dir == XE_SVM_COPY_TO_VRAM) {
+ switch (npages) {
+ case 1:
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_DEVICE_COPY_US,
+ us_delta);
+ break;
+ case 16:
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_DEVICE_COPY_US,
+ us_delta);
+ break;
+ case 512:
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_DEVICE_COPY_US,
+ us_delta);
+ break;
+ }
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_DEVICE_COPY_US,
+ us_delta);
+ } else {
+ switch (npages) {
+ case 1:
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_CPU_COPY_US,
+ us_delta);
+ break;
+ case 16:
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_CPU_COPY_US,
+ us_delta);
+ break;
+ case 512:
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_CPU_COPY_US,
+ us_delta);
+ break;
+ }
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_CPU_COPY_US,
+ us_delta);
+ }
+}
+
+static int xe_svm_copy(struct page **pages,
+ struct drm_pagemap_addr *pagemap_addr,
+ unsigned long npages, const enum xe_svm_copy_dir dir,
+ struct dma_fence *pre_migrate_fence)
{
struct xe_vram_region *vr = NULL;
- struct xe_tile *tile;
+ struct xe_gt *gt = NULL;
+ struct xe_device *xe;
struct dma_fence *fence = NULL;
unsigned long i;
#define XE_VRAM_ADDR_INVALID ~0x0ull
u64 vram_addr = XE_VRAM_ADDR_INVALID;
int err = 0, pos = 0;
bool sram = dir == XE_SVM_COPY_TO_SRAM;
+ ktime_t start = xe_svm_stats_ktime_get();
/*
* This flow is complex: it locates physically contiguous device pages,
@@ -415,12 +510,13 @@ static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr,
last = (i + 1) == npages;
/* No CPU page and no device pages queue'd to copy */
- if (!dma_addr[i] && vram_addr == XE_VRAM_ADDR_INVALID)
+ if (!pagemap_addr[i].addr && vram_addr == XE_VRAM_ADDR_INVALID)
continue;
if (!vr && spage) {
vr = page_to_vr(spage);
- tile = vr_to_tile(vr);
+ gt = xe_migrate_exec_queue(vr->migrate)->gt;
+ xe = vr->xe;
}
XE_WARN_ON(spage && page_to_vr(spage) != vr);
@@ -429,7 +525,7 @@ static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr,
* first device page, check if physical contiguous on subsequent
* device pages.
*/
- if (dma_addr[i] && spage) {
+ if (pagemap_addr[i].addr && spage) {
__vram_addr = xe_vram_region_page_to_dpa(vr, spage);
if (vram_addr == XE_VRAM_ADDR_INVALID) {
vram_addr = __vram_addr;
@@ -437,6 +533,14 @@ static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr,
}
match = vram_addr + PAGE_SIZE * (i - pos) == __vram_addr;
+ /* Expected with contiguous memory */
+ xe_assert(vr->xe, match);
+
+ if (pagemap_addr[i].order) {
+ i += NR_PAGES(pagemap_addr[i].order) - 1;
+ chunk = (i - pos) == (XE_MIGRATE_CHUNK_SIZE / PAGE_SIZE);
+ last = (i + 1) == npages;
+ }
}
/*
@@ -451,34 +555,41 @@ static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr,
int incr = (match && last) ? 1 : 0;
if (vram_addr != XE_VRAM_ADDR_INVALID) {
+ xe_svm_copy_kb_stats_incr(gt, dir,
+ (i - pos + incr) *
+ (PAGE_SIZE / SZ_1K));
if (sram) {
- vm_dbg(&tile->xe->drm,
+ vm_dbg(&xe->drm,
"COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld",
- vram_addr, (u64)dma_addr[pos], i - pos + incr);
- __fence = xe_migrate_from_vram(tile->migrate,
+ vram_addr,
+ (u64)pagemap_addr[pos].addr, i - pos + incr);
+ __fence = xe_migrate_from_vram(vr->migrate,
i - pos + incr,
vram_addr,
- dma_addr + pos);
+ &pagemap_addr[pos],
+ pre_migrate_fence);
} else {
- vm_dbg(&tile->xe->drm,
+ vm_dbg(&xe->drm,
"COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%ld",
- (u64)dma_addr[pos], vram_addr, i - pos + incr);
- __fence = xe_migrate_to_vram(tile->migrate,
+ (u64)pagemap_addr[pos].addr, vram_addr,
+ i - pos + incr);
+ __fence = xe_migrate_to_vram(vr->migrate,
i - pos + incr,
- dma_addr + pos,
- vram_addr);
+ &pagemap_addr[pos],
+ vram_addr,
+ pre_migrate_fence);
}
if (IS_ERR(__fence)) {
err = PTR_ERR(__fence);
goto err_out;
}
-
+ pre_migrate_fence = NULL;
dma_fence_put(fence);
fence = __fence;
}
/* Setup physical address of next device page */
- if (dma_addr[i] && spage) {
+ if (pagemap_addr[i].addr && spage) {
vram_addr = __vram_addr;
pos = i;
} else {
@@ -487,26 +598,30 @@ static int xe_svm_copy(struct page **pages, dma_addr_t *dma_addr,
/* Extra mismatched device page, copy it */
if (!match && last && vram_addr != XE_VRAM_ADDR_INVALID) {
+ xe_svm_copy_kb_stats_incr(gt, dir,
+ (PAGE_SIZE / SZ_1K));
if (sram) {
- vm_dbg(&tile->xe->drm,
+ vm_dbg(&xe->drm,
"COPY TO SRAM - 0x%016llx -> 0x%016llx, NPAGES=%d",
- vram_addr, (u64)dma_addr[pos], 1);
- __fence = xe_migrate_from_vram(tile->migrate, 1,
+ vram_addr, (u64)pagemap_addr[pos].addr, 1);
+ __fence = xe_migrate_from_vram(vr->migrate, 1,
vram_addr,
- dma_addr + pos);
+ &pagemap_addr[pos],
+ pre_migrate_fence);
} else {
- vm_dbg(&tile->xe->drm,
+ vm_dbg(&xe->drm,
"COPY TO VRAM - 0x%016llx -> 0x%016llx, NPAGES=%d",
- (u64)dma_addr[pos], vram_addr, 1);
- __fence = xe_migrate_to_vram(tile->migrate, 1,
- dma_addr + pos,
- vram_addr);
+ (u64)pagemap_addr[pos].addr, vram_addr, 1);
+ __fence = xe_migrate_to_vram(vr->migrate, 1,
+ &pagemap_addr[pos],
+ vram_addr,
+ pre_migrate_fence);
}
if (IS_ERR(__fence)) {
err = PTR_ERR(__fence);
goto err_out;
}
-
+ pre_migrate_fence = NULL;
dma_fence_put(fence);
fence = __fence;
}
@@ -519,34 +634,53 @@ err_out:
dma_fence_wait(fence, false);
dma_fence_put(fence);
}
+ if (pre_migrate_fence)
+ dma_fence_wait(pre_migrate_fence, false);
+
+ /*
+ * XXX: We can't derive the GT here (or anywhere in this functions, but
+ * compute always uses the primary GT so accumulate stats on the likely
+ * GT of the fault.
+ */
+ if (gt)
+ xe_svm_copy_us_stats_incr(gt, dir, npages, start);
return err;
#undef XE_MIGRATE_CHUNK_SIZE
#undef XE_VRAM_ADDR_INVALID
}
-static int xe_svm_copy_to_devmem(struct page **pages, dma_addr_t *dma_addr,
- unsigned long npages)
+static int xe_svm_copy_to_devmem(struct page **pages,
+ struct drm_pagemap_addr *pagemap_addr,
+ unsigned long npages,
+ struct dma_fence *pre_migrate_fence)
{
- return xe_svm_copy(pages, dma_addr, npages, XE_SVM_COPY_TO_VRAM);
+ return xe_svm_copy(pages, pagemap_addr, npages, XE_SVM_COPY_TO_VRAM,
+ pre_migrate_fence);
}
-static int xe_svm_copy_to_ram(struct page **pages, dma_addr_t *dma_addr,
- unsigned long npages)
+static int xe_svm_copy_to_ram(struct page **pages,
+ struct drm_pagemap_addr *pagemap_addr,
+ unsigned long npages,
+ struct dma_fence *pre_migrate_fence)
{
- return xe_svm_copy(pages, dma_addr, npages, XE_SVM_COPY_TO_SRAM);
+ return xe_svm_copy(pages, pagemap_addr, npages, XE_SVM_COPY_TO_SRAM,
+ pre_migrate_fence);
}
-static struct xe_bo *to_xe_bo(struct drm_gpusvm_devmem *devmem_allocation)
+static struct xe_bo *to_xe_bo(struct drm_pagemap_devmem *devmem_allocation)
{
return container_of(devmem_allocation, struct xe_bo, devmem_allocation);
}
-static void xe_svm_devmem_release(struct drm_gpusvm_devmem *devmem_allocation)
+static void xe_svm_devmem_release(struct drm_pagemap_devmem *devmem_allocation)
{
struct xe_bo *bo = to_xe_bo(devmem_allocation);
+ struct xe_device *xe = xe_bo_device(bo);
+ dma_fence_put(devmem_allocation->pre_migrate_fence);
xe_bo_put_async(bo);
+ xe_pm_runtime_put(xe);
}
static u64 block_offset_to_pfn(struct xe_vram_region *vr, u64 offset)
@@ -554,12 +688,12 @@ static u64 block_offset_to_pfn(struct xe_vram_region *vr, u64 offset)
return PHYS_PFN(offset + vr->hpa_base);
}
-static struct drm_buddy *tile_to_buddy(struct xe_tile *tile)
+static struct drm_buddy *vram_to_buddy(struct xe_vram_region *vram)
{
- return &tile->mem.vram.ttm.mm;
+ return &vram->ttm.mm;
}
-static int xe_svm_populate_devmem_pfn(struct drm_gpusvm_devmem *devmem_allocation,
+static int xe_svm_populate_devmem_pfn(struct drm_pagemap_devmem *devmem_allocation,
unsigned long npages, unsigned long *pfn)
{
struct xe_bo *bo = to_xe_bo(devmem_allocation);
@@ -570,8 +704,7 @@ static int xe_svm_populate_devmem_pfn(struct drm_gpusvm_devmem *devmem_allocatio
list_for_each_entry(block, blocks, link) {
struct xe_vram_region *vr = block->private;
- struct xe_tile *tile = vr_to_tile(vr);
- struct drm_buddy *buddy = tile_to_buddy(tile);
+ struct drm_buddy *buddy = vram_to_buddy(vr);
u64 block_pfn = block_offset_to_pfn(vr, drm_buddy_block_offset(block));
int i;
@@ -582,7 +715,7 @@ static int xe_svm_populate_devmem_pfn(struct drm_gpusvm_devmem *devmem_allocatio
return 0;
}
-static const struct drm_gpusvm_devmem_ops gpusvm_devmem_ops = {
+static const struct drm_pagemap_devmem_ops dpagemap_devmem_ops = {
.devmem_release = xe_svm_devmem_release,
.populate_devmem_pfn = xe_svm_populate_devmem_pfn,
.copy_to_devmem = xe_svm_copy_to_devmem,
@@ -615,22 +748,25 @@ int xe_svm_init(struct xe_vm *vm)
{
int err;
- spin_lock_init(&vm->svm.garbage_collector.lock);
- INIT_LIST_HEAD(&vm->svm.garbage_collector.range_list);
- INIT_WORK(&vm->svm.garbage_collector.work,
- xe_svm_garbage_collector_work_func);
-
- err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm,
- current->mm, xe_svm_devm_owner(vm->xe), 0,
- vm->size, xe_modparam.svm_notifier_size * SZ_1M,
- &gpusvm_ops, fault_chunk_sizes,
- ARRAY_SIZE(fault_chunk_sizes));
- if (err)
- return err;
-
- drm_gpusvm_driver_set_lock(&vm->svm.gpusvm, &vm->lock);
+ if (vm->flags & XE_VM_FLAG_FAULT_MODE) {
+ spin_lock_init(&vm->svm.garbage_collector.lock);
+ INIT_LIST_HEAD(&vm->svm.garbage_collector.range_list);
+ INIT_WORK(&vm->svm.garbage_collector.work,
+ xe_svm_garbage_collector_work_func);
+
+ err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM", &vm->xe->drm,
+ current->mm, 0, vm->size,
+ xe_modparam.svm_notifier_size * SZ_1M,
+ &gpusvm_ops, fault_chunk_sizes,
+ ARRAY_SIZE(fault_chunk_sizes));
+ drm_gpusvm_driver_set_lock(&vm->svm.gpusvm, &vm->lock);
+ } else {
+ err = drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM (simple)",
+ &vm->xe->drm, NULL, 0, 0, 0, NULL,
+ NULL, 0);
+ }
- return 0;
+ return err;
}
/**
@@ -662,84 +798,144 @@ static bool xe_svm_range_is_valid(struct xe_svm_range *range,
struct xe_tile *tile,
bool devmem_only)
{
- /*
- * Advisory only check whether the range currently has a valid mapping,
- * READ_ONCE pairs with WRITE_ONCE in xe_pt.c
- */
- return ((READ_ONCE(range->tile_present) &
- ~READ_ONCE(range->tile_invalidated)) & BIT(tile->id)) &&
- (!devmem_only || xe_svm_range_in_vram(range));
+ return (xe_vm_has_valid_gpu_mapping(tile, range->tile_present,
+ range->tile_invalidated) &&
+ (!devmem_only || xe_svm_range_in_vram(range)));
}
-#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR)
-static struct xe_vram_region *tile_to_vr(struct xe_tile *tile)
+/** xe_svm_range_migrate_to_smem() - Move range pages from VRAM to SMEM
+ * @vm: xe_vm pointer
+ * @range: Pointer to the SVM range structure
+ *
+ * The xe_svm_range_migrate_to_smem() checks range has pages in VRAM
+ * and migrates them to SMEM
+ */
+void xe_svm_range_migrate_to_smem(struct xe_vm *vm, struct xe_svm_range *range)
{
- return &tile->mem.vram;
+ if (xe_svm_range_in_vram(range))
+ drm_gpusvm_range_evict(&vm->svm.gpusvm, &range->base);
}
-static int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile,
- struct xe_svm_range *range,
- const struct drm_gpusvm_ctx *ctx)
+/**
+ * xe_svm_range_validate() - Check if the SVM range is valid
+ * @vm: xe_vm pointer
+ * @range: Pointer to the SVM range structure
+ * @tile_mask: Mask representing the tiles to be checked
+ * @devmem_preferred : if true range needs to be in devmem
+ *
+ * The xe_svm_range_validate() function checks if a range is
+ * valid and located in the desired memory region.
+ *
+ * Return: true if the range is valid, false otherwise
+ */
+bool xe_svm_range_validate(struct xe_vm *vm,
+ struct xe_svm_range *range,
+ u8 tile_mask, bool devmem_preferred)
{
- struct mm_struct *mm = vm->svm.gpusvm.mm;
- struct xe_vram_region *vr = tile_to_vr(tile);
- struct drm_buddy_block *block;
- struct list_head *blocks;
- struct xe_bo *bo;
- ktime_t end = 0;
- int err;
+ bool ret;
- range_debug(range, "ALLOCATE VRAM");
+ xe_svm_notifier_lock(vm);
- if (!mmget_not_zero(mm))
- return -EFAULT;
- mmap_read_lock(mm);
+ ret = (range->tile_present & ~range->tile_invalidated & tile_mask) == tile_mask &&
+ (devmem_preferred == range->base.pages.flags.has_devmem_pages);
-retry:
- bo = xe_bo_create_locked(tile_to_xe(tile), NULL, NULL,
- xe_svm_range_size(range),
- ttm_bo_type_device,
- XE_BO_FLAG_VRAM_IF_DGFX(tile) |
- XE_BO_FLAG_CPU_ADDR_MIRROR);
- if (IS_ERR(bo)) {
- err = PTR_ERR(bo);
- if (xe_vm_validate_should_retry(NULL, err, &end))
- goto retry;
- goto unlock;
- }
+ xe_svm_notifier_unlock(vm);
- drm_gpusvm_devmem_init(&bo->devmem_allocation,
- vm->xe->drm.dev, mm,
- &gpusvm_devmem_ops,
- &tile->mem.vram.dpagemap,
- xe_svm_range_size(range));
+ return ret;
+}
- blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks;
- list_for_each_entry(block, blocks, link)
- block->private = vr;
+/**
+ * xe_svm_find_vma_start - Find start of CPU VMA
+ * @vm: xe_vm pointer
+ * @start: start address
+ * @end: end address
+ * @vma: Pointer to struct xe_vma
+ *
+ *
+ * This function searches for a cpu vma, within the specified
+ * range [start, end] in the given VM. It adjusts the range based on the
+ * xe_vma start and end addresses. If no cpu VMA is found, it returns ULONG_MAX.
+ *
+ * Return: The starting address of the VMA within the range,
+ * or ULONG_MAX if no VMA is found
+ */
+u64 xe_svm_find_vma_start(struct xe_vm *vm, u64 start, u64 end, struct xe_vma *vma)
+{
+ return drm_gpusvm_find_vma_start(&vm->svm.gpusvm,
+ max(start, xe_vma_start(vma)),
+ min(end, xe_vma_end(vma)));
+}
- xe_bo_get(bo);
- err = drm_gpusvm_migrate_to_devmem(&vm->svm.gpusvm, &range->base,
- &bo->devmem_allocation, ctx);
- if (err)
- xe_svm_devmem_release(&bo->devmem_allocation);
+#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP)
+static int xe_drm_pagemap_populate_mm(struct drm_pagemap *dpagemap,
+ unsigned long start, unsigned long end,
+ struct mm_struct *mm,
+ unsigned long timeslice_ms)
+{
+ struct xe_vram_region *vr = container_of(dpagemap, typeof(*vr), dpagemap);
+ struct dma_fence *pre_migrate_fence = NULL;
+ struct xe_device *xe = vr->xe;
+ struct device *dev = xe->drm.dev;
+ struct drm_buddy_block *block;
+ struct xe_validation_ctx vctx;
+ struct list_head *blocks;
+ struct drm_exec exec;
+ struct xe_bo *bo;
+ int err = 0, idx;
- xe_bo_unlock(bo);
- xe_bo_put(bo);
+ if (!drm_dev_enter(&xe->drm, &idx))
+ return -ENODEV;
-unlock:
- mmap_read_unlock(mm);
- mmput(mm);
+ xe_pm_runtime_get(xe);
+
+ xe_validation_guard(&vctx, &xe->val, &exec, (struct xe_val_flags) {}, err) {
+ bo = xe_bo_create_locked(xe, NULL, NULL, end - start,
+ ttm_bo_type_device,
+ (IS_DGFX(xe) ? XE_BO_FLAG_VRAM(vr) : XE_BO_FLAG_SYSTEM) |
+ XE_BO_FLAG_CPU_ADDR_MIRROR, &exec);
+ drm_exec_retry_on_contention(&exec);
+ if (IS_ERR(bo)) {
+ err = PTR_ERR(bo);
+ xe_validation_retry_on_oom(&vctx, &err);
+ break;
+ }
+
+ /* Ensure that any clearing or async eviction will complete before migration. */
+ if (!dma_resv_test_signaled(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL)) {
+ err = dma_resv_get_singleton(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
+ &pre_migrate_fence);
+ if (err)
+ dma_resv_wait_timeout(bo->ttm.base.resv, DMA_RESV_USAGE_KERNEL,
+ false, MAX_SCHEDULE_TIMEOUT);
+ else if (pre_migrate_fence)
+ dma_fence_enable_sw_signaling(pre_migrate_fence);
+ }
+
+ drm_pagemap_devmem_init(&bo->devmem_allocation, dev, mm,
+ &dpagemap_devmem_ops, dpagemap, end - start,
+ pre_migrate_fence);
+
+ blocks = &to_xe_ttm_vram_mgr_resource(bo->ttm.resource)->blocks;
+ list_for_each_entry(block, blocks, link)
+ block->private = vr;
+
+ xe_bo_get(bo);
+
+ /* Ensure the device has a pm ref while there are device pages active. */
+ xe_pm_runtime_get_noresume(xe);
+ err = drm_pagemap_migrate_to_devmem(&bo->devmem_allocation, mm,
+ start, end, timeslice_ms,
+ xe_svm_devm_owner(xe));
+ if (err)
+ xe_svm_devmem_release(&bo->devmem_allocation);
+ xe_bo_unlock(bo);
+ xe_bo_put(bo);
+ }
+ xe_pm_runtime_put(xe);
+ drm_dev_exit(idx);
return err;
}
-#else
-static int xe_svm_alloc_vram(struct xe_vm *vm, struct xe_tile *tile,
- struct xe_svm_range *range,
- const struct drm_gpusvm_ctx *ctx)
-{
- return -EOPNOTSUPP;
-}
#endif
static bool supports_4K_migration(struct xe_device *xe)
@@ -750,21 +946,31 @@ static bool supports_4K_migration(struct xe_device *xe)
return true;
}
-static bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range,
- struct xe_vma *vma)
+/**
+ * xe_svm_range_needs_migrate_to_vram() - SVM range needs migrate to VRAM or not
+ * @range: SVM range for which migration needs to be decided
+ * @vma: vma which has range
+ * @preferred_region_is_vram: preferred region for range is vram
+ *
+ * Return: True for range needing migration and migration is supported else false
+ */
+bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vma *vma,
+ bool preferred_region_is_vram)
{
struct xe_vm *vm = range_to_vm(&range->base);
u64 range_size = xe_svm_range_size(range);
- if (!range->base.flags.migrate_devmem)
+ if (!range->base.pages.flags.migrate_devmem || !preferred_region_is_vram)
return false;
+ xe_assert(vm->xe, IS_DGFX(vm->xe));
+
if (xe_svm_range_in_vram(range)) {
drm_dbg(&vm->xe->drm, "Range is already in VRAM\n");
return false;
}
- if (range_size <= SZ_64K && !supports_4K_migration(vm->xe)) {
+ if (range_size < SZ_64K && !supports_4K_migration(vm->xe)) {
drm_dbg(&vm->xe->drm, "Platform doesn't support SZ_4K range migration\n");
return false;
}
@@ -772,41 +978,78 @@ static bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range,
return true;
}
-/**
- * xe_svm_handle_pagefault() - SVM handle page fault
- * @vm: The VM.
- * @vma: The CPU address mirror VMA.
- * @gt: The gt upon the fault occurred.
- * @fault_addr: The GPU fault address.
- * @atomic: The fault atomic access bit.
- *
- * Create GPU bindings for a SVM page fault. Optionally migrate to device
- * memory.
- *
- * Return: 0 on success, negative error code on error.
- */
-int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
- struct xe_gt *gt, u64 fault_addr,
- bool atomic)
+#define DECL_SVM_RANGE_COUNT_STATS(elem, stat) \
+static void xe_svm_range_##elem##_count_stats_incr(struct xe_gt *gt, \
+ struct xe_svm_range *range) \
+{ \
+ switch (xe_svm_range_size(range)) { \
+ case SZ_4K: \
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_##stat##_COUNT, 1); \
+ break; \
+ case SZ_64K: \
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_##stat##_COUNT, 1); \
+ break; \
+ case SZ_2M: \
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_##stat##_COUNT, 1); \
+ break; \
+ } \
+} \
+
+DECL_SVM_RANGE_COUNT_STATS(fault, PAGEFAULT)
+DECL_SVM_RANGE_COUNT_STATS(valid_fault, VALID_PAGEFAULT)
+DECL_SVM_RANGE_COUNT_STATS(migrate, MIGRATE)
+
+#define DECL_SVM_RANGE_US_STATS(elem, stat) \
+static void xe_svm_range_##elem##_us_stats_incr(struct xe_gt *gt, \
+ struct xe_svm_range *range, \
+ ktime_t start) \
+{ \
+ s64 us_delta = xe_svm_stats_ktime_us_delta(start); \
+\
+ switch (xe_svm_range_size(range)) { \
+ case SZ_4K: \
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_4K_##stat##_US, \
+ us_delta); \
+ break; \
+ case SZ_64K: \
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_64K_##stat##_US, \
+ us_delta); \
+ break; \
+ case SZ_2M: \
+ xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_2M_##stat##_US, \
+ us_delta); \
+ break; \
+ } \
+} \
+
+DECL_SVM_RANGE_US_STATS(migrate, MIGRATE)
+DECL_SVM_RANGE_US_STATS(get_pages, GET_PAGES)
+DECL_SVM_RANGE_US_STATS(bind, BIND)
+DECL_SVM_RANGE_US_STATS(fault, PAGEFAULT)
+
+static int __xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
+ struct xe_gt *gt, u64 fault_addr,
+ bool need_vram)
{
+ int devmem_possible = IS_DGFX(vm->xe) &&
+ IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
struct drm_gpusvm_ctx ctx = {
.read_only = xe_vma_read_only(vma),
- .devmem_possible = IS_DGFX(vm->xe) &&
- IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR),
- .check_pages_threshold = IS_DGFX(vm->xe) &&
- IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? SZ_64K : 0,
- .devmem_only = atomic && IS_DGFX(vm->xe) &&
- IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR),
- .timeslice_ms = atomic && IS_DGFX(vm->xe) &&
- IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? 5 : 0,
+ .devmem_possible = devmem_possible,
+ .check_pages_threshold = devmem_possible ? SZ_64K : 0,
+ .devmem_only = need_vram && devmem_possible,
+ .timeslice_ms = need_vram && devmem_possible ?
+ vm->xe->atomic_svm_timeslice_ms : 0,
+ .device_private_page_owner = xe_svm_devm_owner(vm->xe),
};
- struct xe_svm_range *range;
- struct drm_gpusvm_range *r;
+ struct xe_validation_ctx vctx;
struct drm_exec exec;
+ struct xe_svm_range *range;
struct dma_fence *fence;
- int migrate_try_count = ctx.devmem_only ? 3 : 1;
+ struct drm_pagemap *dpagemap;
struct xe_tile *tile = gt_to_tile(gt);
- ktime_t end = 0;
+ int migrate_try_count = ctx.devmem_only ? 3 : 1;
+ ktime_t start = xe_svm_stats_ktime_get(), bind_start, get_pages_start;
int err;
lockdep_assert_held_write(&vm->lock);
@@ -820,31 +1063,58 @@ retry:
if (err)
return err;
- r = drm_gpusvm_range_find_or_insert(&vm->svm.gpusvm, fault_addr,
- xe_vma_start(vma), xe_vma_end(vma),
- &ctx);
- if (IS_ERR(r))
- return PTR_ERR(r);
+ dpagemap = xe_vma_resolve_pagemap(vma, tile);
+ if (!dpagemap && !ctx.devmem_only)
+ ctx.device_private_page_owner = NULL;
+ range = xe_svm_range_find_or_insert(vm, fault_addr, vma, &ctx);
- if (ctx.devmem_only && !r->flags.migrate_devmem)
- return -EACCES;
+ if (IS_ERR(range))
+ return PTR_ERR(range);
- range = to_xe_range(r);
- if (xe_svm_range_is_valid(range, tile, ctx.devmem_only))
- return 0;
+ xe_svm_range_fault_count_stats_incr(gt, range);
+
+ if (ctx.devmem_only && !range->base.pages.flags.migrate_devmem) {
+ err = -EACCES;
+ goto out;
+ }
+
+ if (xe_svm_range_is_valid(range, tile, ctx.devmem_only)) {
+ xe_svm_range_valid_fault_count_stats_incr(gt, range);
+ range_debug(range, "PAGE FAULT - VALID");
+ goto out;
+ }
range_debug(range, "PAGE FAULT");
if (--migrate_try_count >= 0 &&
- xe_svm_range_needs_migrate_to_vram(range, vma)) {
- err = xe_svm_alloc_vram(vm, tile, range, &ctx);
+ xe_svm_range_needs_migrate_to_vram(range, vma, !!dpagemap || ctx.devmem_only)) {
+ ktime_t migrate_start = xe_svm_stats_ktime_get();
+
+ /* TODO : For multi-device dpagemap will be used to find the
+ * remote tile and remote device. Will need to modify
+ * xe_svm_alloc_vram to use dpagemap for future multi-device
+ * support.
+ */
+ xe_svm_range_migrate_count_stats_incr(gt, range);
+ err = xe_svm_alloc_vram(tile, range, &ctx);
+ xe_svm_range_migrate_us_stats_incr(gt, range, migrate_start);
ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
if (err) {
if (migrate_try_count || !ctx.devmem_only) {
drm_dbg(&vm->xe->drm,
"VRAM allocation failed, falling back to retrying fault, asid=%u, errno=%pe\n",
vm->usm.asid, ERR_PTR(err));
- goto retry;
+
+ /*
+ * In the devmem-only case, mixed mappings may
+ * be found. The get_pages function will fix
+ * these up to a single location, allowing the
+ * page fault handler to make forward progress.
+ */
+ if (ctx.devmem_only)
+ goto get_pages;
+ else
+ goto retry;
} else {
drm_err(&vm->xe->drm,
"VRAM allocation failed, retry count exceeded, asid=%u, errno=%pe\n",
@@ -854,17 +1124,15 @@ retry:
}
}
+get_pages:
+ get_pages_start = xe_svm_stats_ktime_get();
+
range_debug(range, "GET PAGES");
- err = drm_gpusvm_range_get_pages(&vm->svm.gpusvm, r, &ctx);
+ err = xe_svm_range_get_pages(vm, range, &ctx);
/* Corner where CPU mappings have changed */
if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) {
ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
if (migrate_try_count > 0 || !ctx.devmem_only) {
- if (err == -EOPNOTSUPP) {
- range_debug(range, "PAGE FAULT - EVICT PAGES");
- drm_gpusvm_range_evict(&vm->svm.gpusvm,
- &range->base);
- }
drm_dbg(&vm->xe->drm,
"Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n",
vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
@@ -878,46 +1146,89 @@ retry:
}
if (err) {
range_debug(range, "PAGE FAULT - FAIL PAGE COLLECT");
- goto err_out;
+ goto out;
}
+ xe_svm_range_get_pages_us_stats_incr(gt, range, get_pages_start);
range_debug(range, "PAGE FAULT - BIND");
-retry_bind:
- drm_exec_init(&exec, 0, 0);
- drm_exec_until_all_locked(&exec) {
- err = drm_exec_lock_obj(&exec, vm->gpuvm.r_obj);
+ bind_start = xe_svm_stats_ktime_get();
+ xe_validation_guard(&vctx, &vm->xe->val, &exec, (struct xe_val_flags) {}, err) {
+ err = xe_vm_drm_exec_lock(vm, &exec);
drm_exec_retry_on_contention(&exec);
- if (err) {
- drm_exec_fini(&exec);
- goto err_out;
- }
+ xe_vm_set_validation_exec(vm, &exec);
fence = xe_vm_range_rebind(vm, vma, range, BIT(tile->id));
+ xe_vm_set_validation_exec(vm, NULL);
if (IS_ERR(fence)) {
- drm_exec_fini(&exec);
+ drm_exec_retry_on_contention(&exec);
err = PTR_ERR(fence);
- if (err == -EAGAIN) {
- ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
- range_debug(range, "PAGE FAULT - RETRY BIND");
- goto retry;
- }
- if (xe_vm_validate_should_retry(&exec, err, &end))
- goto retry_bind;
- goto err_out;
+ xe_validation_retry_on_oom(&vctx, &err);
+ xe_svm_range_bind_us_stats_incr(gt, range, bind_start);
+ break;
}
}
- drm_exec_fini(&exec);
+ if (err)
+ goto err_out;
dma_fence_wait(fence, false);
dma_fence_put(fence);
+ xe_svm_range_bind_us_stats_incr(gt, range, bind_start);
+
+out:
+ xe_svm_range_fault_us_stats_incr(gt, range, start);
+ return 0;
err_out:
+ if (err == -EAGAIN) {
+ ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
+ range_debug(range, "PAGE FAULT - RETRY BIND");
+ goto retry;
+ }
return err;
}
/**
+ * xe_svm_handle_pagefault() - SVM handle page fault
+ * @vm: The VM.
+ * @vma: The CPU address mirror VMA.
+ * @gt: The gt upon the fault occurred.
+ * @fault_addr: The GPU fault address.
+ * @atomic: The fault atomic access bit.
+ *
+ * Create GPU bindings for a SVM page fault. Optionally migrate to device
+ * memory.
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
+ struct xe_gt *gt, u64 fault_addr,
+ bool atomic)
+{
+ int need_vram, ret;
+retry:
+ need_vram = xe_vma_need_vram_for_atomic(vm->xe, vma, atomic);
+ if (need_vram < 0)
+ return need_vram;
+
+ ret = __xe_svm_handle_pagefault(vm, vma, gt, fault_addr,
+ need_vram ? true : false);
+ if (ret == -EAGAIN) {
+ /*
+ * Retry once on -EAGAIN to re-lookup the VMA, as the original VMA
+ * may have been split by xe_svm_range_set_default_attr.
+ */
+ vma = xe_vm_find_vma_by_addr(vm, fault_addr);
+ if (!vma)
+ return -EINVAL;
+
+ goto retry;
+ }
+ return ret;
+}
+
+/**
* xe_svm_has_mapping() - SVM has mappings
* @vm: The VM.
* @start: Start address.
@@ -933,6 +1244,41 @@ bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end)
}
/**
+ * xe_svm_unmap_address_range - UNMAP SVM mappings and ranges
+ * @vm: The VM
+ * @start: start addr
+ * @end: end addr
+ *
+ * This function UNMAPS svm ranges if start or end address are inside them.
+ */
+void xe_svm_unmap_address_range(struct xe_vm *vm, u64 start, u64 end)
+{
+ struct drm_gpusvm_notifier *notifier, *next;
+
+ lockdep_assert_held_write(&vm->lock);
+
+ drm_gpusvm_for_each_notifier_safe(notifier, next, &vm->svm.gpusvm, start, end) {
+ struct drm_gpusvm_range *range, *__next;
+
+ drm_gpusvm_for_each_range_safe(range, __next, notifier, start, end) {
+ if (start > drm_gpusvm_range_start(range) ||
+ end < drm_gpusvm_range_end(range)) {
+ if (IS_DGFX(vm->xe) && xe_svm_range_in_vram(to_xe_range(range)))
+ drm_gpusvm_range_evict(&vm->svm.gpusvm, range);
+ drm_gpusvm_range_get(range);
+ __xe_svm_garbage_collector(vm, to_xe_range(range));
+ if (!list_empty(&to_xe_range(range)->garbage_collector_link)) {
+ spin_lock(&vm->svm.garbage_collector.lock);
+ list_del(&to_xe_range(range)->garbage_collector_link);
+ spin_unlock(&vm->svm.garbage_collector.lock);
+ }
+ drm_gpusvm_range_put(range);
+ }
+ }
+ }
+}
+
+/**
* xe_svm_bo_evict() - SVM evict BO to system memory
* @bo: BO to evict
*
@@ -943,12 +1289,172 @@ bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end)
*/
int xe_svm_bo_evict(struct xe_bo *bo)
{
- return drm_gpusvm_evict_to_ram(&bo->devmem_allocation);
+ return drm_pagemap_evict_to_ram(&bo->devmem_allocation);
}
-#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR)
+/**
+ * xe_svm_range_find_or_insert- Find or insert GPU SVM range
+ * @vm: xe_vm pointer
+ * @addr: address for which range needs to be found/inserted
+ * @vma: Pointer to struct xe_vma which mirrors CPU
+ * @ctx: GPU SVM context
+ *
+ * This function finds or inserts a newly allocated a SVM range based on the
+ * address.
+ *
+ * Return: Pointer to the SVM range on success, ERR_PTR() on failure.
+ */
+struct xe_svm_range *xe_svm_range_find_or_insert(struct xe_vm *vm, u64 addr,
+ struct xe_vma *vma, struct drm_gpusvm_ctx *ctx)
+{
+ struct drm_gpusvm_range *r;
+
+ r = drm_gpusvm_range_find_or_insert(&vm->svm.gpusvm, max(addr, xe_vma_start(vma)),
+ xe_vma_start(vma), xe_vma_end(vma), ctx);
+ if (IS_ERR(r))
+ return ERR_CAST(r);
+
+ return to_xe_range(r);
+}
-static struct drm_pagemap_device_addr
+/**
+ * xe_svm_range_get_pages() - Get pages for a SVM range
+ * @vm: Pointer to the struct xe_vm
+ * @range: Pointer to the xe SVM range structure
+ * @ctx: GPU SVM context
+ *
+ * This function gets pages for a SVM range and ensures they are mapped for
+ * DMA access. In case of failure with -EOPNOTSUPP, it evicts the range.
+ *
+ * Return: 0 on success, negative error code on failure.
+ */
+int xe_svm_range_get_pages(struct xe_vm *vm, struct xe_svm_range *range,
+ struct drm_gpusvm_ctx *ctx)
+{
+ int err = 0;
+
+ err = drm_gpusvm_range_get_pages(&vm->svm.gpusvm, &range->base, ctx);
+ if (err == -EOPNOTSUPP) {
+ range_debug(range, "PAGE FAULT - EVICT PAGES");
+ drm_gpusvm_range_evict(&vm->svm.gpusvm, &range->base);
+ }
+
+ return err;
+}
+
+/**
+ * xe_svm_ranges_zap_ptes_in_range - clear ptes of svm ranges in input range
+ * @vm: Pointer to the xe_vm structure
+ * @start: Start of the input range
+ * @end: End of the input range
+ *
+ * This function removes the page table entries (PTEs) associated
+ * with the svm ranges within the given input start and end
+ *
+ * Return: tile_mask for which gt's need to be tlb invalidated.
+ */
+u8 xe_svm_ranges_zap_ptes_in_range(struct xe_vm *vm, u64 start, u64 end)
+{
+ struct drm_gpusvm_notifier *notifier;
+ struct xe_svm_range *range;
+ u64 adj_start, adj_end;
+ struct xe_tile *tile;
+ u8 tile_mask = 0;
+ u8 id;
+
+ lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) &&
+ lockdep_is_held_type(&vm->lock, 0));
+
+ drm_gpusvm_for_each_notifier(notifier, &vm->svm.gpusvm, start, end) {
+ struct drm_gpusvm_range *r = NULL;
+
+ adj_start = max(start, drm_gpusvm_notifier_start(notifier));
+ adj_end = min(end, drm_gpusvm_notifier_end(notifier));
+ drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end) {
+ range = to_xe_range(r);
+ for_each_tile(tile, vm->xe, id) {
+ if (xe_pt_zap_ptes_range(tile, vm, range)) {
+ tile_mask |= BIT(id);
+ /*
+ * WRITE_ONCE pairs with READ_ONCE in
+ * xe_vm_has_valid_gpu_mapping().
+ * Must not fail after setting
+ * tile_invalidated and before
+ * TLB invalidation.
+ */
+ WRITE_ONCE(range->tile_invalidated,
+ range->tile_invalidated | BIT(id));
+ }
+ }
+ }
+ }
+
+ return tile_mask;
+}
+
+#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP)
+
+static struct drm_pagemap *tile_local_pagemap(struct xe_tile *tile)
+{
+ return &tile->mem.vram->dpagemap;
+}
+
+/**
+ * xe_vma_resolve_pagemap - Resolve the appropriate DRM pagemap for a VMA
+ * @vma: Pointer to the xe_vma structure containing memory attributes
+ * @tile: Pointer to the xe_tile structure used as fallback for VRAM mapping
+ *
+ * This function determines the correct DRM pagemap to use for a given VMA.
+ * It first checks if a valid devmem_fd is provided in the VMA's preferred
+ * location. If the devmem_fd is negative, it returns NULL, indicating no
+ * pagemap is available and smem to be used as preferred location.
+ * If the devmem_fd is equal to the default faulting
+ * GT identifier, it returns the VRAM pagemap associated with the tile.
+ *
+ * Future support for multi-device configurations may use drm_pagemap_from_fd()
+ * to resolve pagemaps from arbitrary file descriptors.
+ *
+ * Return: A pointer to the resolved drm_pagemap, or NULL if none is applicable.
+ */
+struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile)
+{
+ s32 fd = (s32)vma->attr.preferred_loc.devmem_fd;
+
+ if (fd == DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM)
+ return NULL;
+
+ if (fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE)
+ return IS_DGFX(tile_to_xe(tile)) ? tile_local_pagemap(tile) : NULL;
+
+ /* TODO: Support multi-device with drm_pagemap_from_fd(fd) */
+ return NULL;
+}
+
+/**
+ * xe_svm_alloc_vram()- Allocate device memory pages for range,
+ * migrating existing data.
+ * @tile: tile to allocate vram from
+ * @range: SVM range
+ * @ctx: DRM GPU SVM context
+ *
+ * Return: 0 on success, error code on failure.
+ */
+int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range,
+ const struct drm_gpusvm_ctx *ctx)
+{
+ struct drm_pagemap *dpagemap;
+
+ xe_assert(tile_to_xe(tile), range->base.pages.flags.migrate_devmem);
+ range_debug(range, "ALLOCATE VRAM");
+
+ dpagemap = tile_local_pagemap(tile);
+ return drm_pagemap_populate_mm(dpagemap, xe_svm_range_start(range),
+ xe_svm_range_end(range),
+ range->base.gpusvm->mm,
+ ctx->timeslice_ms);
+}
+
+static struct drm_pagemap_addr
xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap,
struct device *dev,
struct page *page,
@@ -967,11 +1473,12 @@ xe_drm_pagemap_device_map(struct drm_pagemap *dpagemap,
prot = 0;
}
- return drm_pagemap_device_addr_encode(addr, prot, order, dir);
+ return drm_pagemap_addr_encode(addr, prot, order, dir);
}
static const struct drm_pagemap_ops xe_drm_pagemap_ops = {
.device_map = xe_drm_pagemap_device_map,
+ .populate_mm = xe_drm_pagemap_populate_mm,
};
/**
@@ -1003,7 +1510,7 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
vr->pagemap.range.start = res->start;
vr->pagemap.range.end = res->end;
vr->pagemap.nr_range = 1;
- vr->pagemap.ops = drm_gpusvm_pagemap_ops_get();
+ vr->pagemap.ops = drm_pagemap_pagemap_ops_get();
vr->pagemap.owner = xe_svm_devm_owner(xe);
addr = devm_memremap_pages(dev, &vr->pagemap);
@@ -1024,10 +1531,22 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
return 0;
}
#else
+int xe_svm_alloc_vram(struct xe_tile *tile,
+ struct xe_svm_range *range,
+ const struct drm_gpusvm_ctx *ctx)
+{
+ return -EOPNOTSUPP;
+}
+
int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
{
return 0;
}
+
+struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile)
+{
+ return NULL;
+}
#endif
/**
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index 30fc78b85b30..fa757dd07954 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -6,6 +6,20 @@
#ifndef _XE_SVM_H_
#define _XE_SVM_H_
+struct xe_device;
+
+/**
+ * xe_svm_devm_owner() - Return the owner of device private memory
+ * @xe: The xe device.
+ *
+ * Return: The owner of this device's device private memory to use in
+ * hmm_range_fault()-
+ */
+static inline void *xe_svm_devm_owner(struct xe_device *xe)
+{
+ return xe;
+}
+
#if IS_ENABLED(CONFIG_DRM_XE_GPUSVM)
#include <drm/drm_pagemap.h>
@@ -70,6 +84,32 @@ int xe_svm_bo_evict(struct xe_bo *bo);
void xe_svm_range_debug(struct xe_svm_range *range, const char *operation);
+int xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range,
+ const struct drm_gpusvm_ctx *ctx);
+
+struct xe_svm_range *xe_svm_range_find_or_insert(struct xe_vm *vm, u64 addr,
+ struct xe_vma *vma, struct drm_gpusvm_ctx *ctx);
+
+int xe_svm_range_get_pages(struct xe_vm *vm, struct xe_svm_range *range,
+ struct drm_gpusvm_ctx *ctx);
+
+bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vma *vma,
+ bool preferred_region_is_vram);
+
+void xe_svm_range_migrate_to_smem(struct xe_vm *vm, struct xe_svm_range *range);
+
+bool xe_svm_range_validate(struct xe_vm *vm,
+ struct xe_svm_range *range,
+ u8 tile_mask, bool devmem_preferred);
+
+u64 xe_svm_find_vma_start(struct xe_vm *vm, u64 addr, u64 end, struct xe_vma *vma);
+
+void xe_svm_unmap_address_range(struct xe_vm *vm, u64 start, u64 end);
+
+u8 xe_svm_ranges_zap_ptes_in_range(struct xe_vm *vm, u64 start, u64 end);
+
+struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile);
+
/**
* xe_svm_range_has_dma_mapping() - SVM range has DMA mapping
* @range: SVM range
@@ -79,24 +119,65 @@ void xe_svm_range_debug(struct xe_svm_range *range, const char *operation);
static inline bool xe_svm_range_has_dma_mapping(struct xe_svm_range *range)
{
lockdep_assert_held(&range->base.gpusvm->notifier_lock);
- return range->base.flags.has_dma_mapping;
+ return range->base.pages.flags.has_dma_mapping;
}
-#define xe_svm_assert_in_notifier(vm__) \
- lockdep_assert_held_write(&(vm__)->svm.gpusvm.notifier_lock)
+/**
+ * to_xe_range - Convert a drm_gpusvm_range pointer to a xe_svm_range
+ * @r: Pointer to the drm_gpusvm_range structure
+ *
+ * This function takes a pointer to a drm_gpusvm_range structure and
+ * converts it to a pointer to the containing xe_svm_range structure.
+ *
+ * Return: Pointer to the xe_svm_range structure
+ */
+static inline struct xe_svm_range *to_xe_range(struct drm_gpusvm_range *r)
+{
+ return container_of(r, struct xe_svm_range, base);
+}
-#define xe_svm_notifier_lock(vm__) \
- drm_gpusvm_notifier_lock(&(vm__)->svm.gpusvm)
+/**
+ * xe_svm_range_start() - SVM range start address
+ * @range: SVM range
+ *
+ * Return: start address of range.
+ */
+static inline unsigned long xe_svm_range_start(struct xe_svm_range *range)
+{
+ return drm_gpusvm_range_start(&range->base);
+}
-#define xe_svm_notifier_unlock(vm__) \
- drm_gpusvm_notifier_unlock(&(vm__)->svm.gpusvm)
+/**
+ * xe_svm_range_end() - SVM range end address
+ * @range: SVM range
+ *
+ * Return: end address of range.
+ */
+static inline unsigned long xe_svm_range_end(struct xe_svm_range *range)
+{
+ return drm_gpusvm_range_end(&range->base);
+}
+
+/**
+ * xe_svm_range_size() - SVM range size
+ * @range: SVM range
+ *
+ * Return: Size of range.
+ */
+static inline unsigned long xe_svm_range_size(struct xe_svm_range *range)
+{
+ return drm_gpusvm_range_size(&range->base);
+}
void xe_svm_flush(struct xe_vm *vm);
#else
#include <linux/interval_tree.h>
+#include "xe_vm.h"
-struct drm_pagemap_device_addr;
+struct drm_pagemap_addr;
+struct drm_gpusvm_ctx;
+struct drm_gpusvm_range;
struct xe_bo;
struct xe_gt;
struct xe_vm;
@@ -109,7 +190,9 @@ struct xe_vram_region;
struct xe_svm_range {
struct {
struct interval_tree_node itree;
- const struct drm_pagemap_device_addr *dma_addr;
+ struct {
+ const struct drm_pagemap_addr *dma_addr;
+ } pages;
} base;
u32 tile_present;
u32 tile_invalidated;
@@ -129,12 +212,21 @@ int xe_devm_add(struct xe_tile *tile, struct xe_vram_region *vr)
static inline
int xe_svm_init(struct xe_vm *vm)
{
+#if IS_ENABLED(CONFIG_DRM_GPUSVM)
+ return drm_gpusvm_init(&vm->svm.gpusvm, "Xe SVM (simple)", &vm->xe->drm,
+ NULL, 0, 0, 0, NULL, NULL, 0);
+#else
return 0;
+#endif
}
static inline
void xe_svm_fini(struct xe_vm *vm)
{
+#if IS_ENABLED(CONFIG_DRM_GPUSVM)
+ xe_assert(vm->xe, xe_vm_is_closed(vm));
+ drm_gpusvm_fini(&vm->svm.gpusvm);
+#endif
}
static inline
@@ -167,19 +259,131 @@ void xe_svm_range_debug(struct xe_svm_range *range, const char *operation)
{
}
-#define xe_svm_assert_in_notifier(...) do {} while (0)
+static inline int
+xe_svm_alloc_vram(struct xe_tile *tile, struct xe_svm_range *range,
+ const struct drm_gpusvm_ctx *ctx)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline
+struct xe_svm_range *xe_svm_range_find_or_insert(struct xe_vm *vm, u64 addr,
+ struct xe_vma *vma, struct drm_gpusvm_ctx *ctx)
+{
+ return ERR_PTR(-EINVAL);
+}
+
+static inline
+int xe_svm_range_get_pages(struct xe_vm *vm, struct xe_svm_range *range,
+ struct drm_gpusvm_ctx *ctx)
+{
+ return -EINVAL;
+}
+
+static inline struct xe_svm_range *to_xe_range(struct drm_gpusvm_range *r)
+{
+ return NULL;
+}
+
+static inline unsigned long xe_svm_range_start(struct xe_svm_range *range)
+{
+ return 0;
+}
+
+static inline unsigned long xe_svm_range_end(struct xe_svm_range *range)
+{
+ return 0;
+}
+
+static inline unsigned long xe_svm_range_size(struct xe_svm_range *range)
+{
+ return 0;
+}
+
+static inline
+bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vma *vma,
+ u32 region)
+{
+ return false;
+}
+
+static inline
+void xe_svm_range_migrate_to_smem(struct xe_vm *vm, struct xe_svm_range *range)
+{
+}
+
+static inline
+bool xe_svm_range_validate(struct xe_vm *vm,
+ struct xe_svm_range *range,
+ u8 tile_mask, bool devmem_preferred)
+{
+ return false;
+}
+
+static inline
+u64 xe_svm_find_vma_start(struct xe_vm *vm, u64 addr, u64 end, struct xe_vma *vma)
+{
+ return ULONG_MAX;
+}
+
+static inline
+void xe_svm_unmap_address_range(struct xe_vm *vm, u64 start, u64 end)
+{
+}
+
+static inline
+u8 xe_svm_ranges_zap_ptes_in_range(struct xe_vm *vm, u64 start, u64 end)
+{
+ return 0;
+}
+
+static inline
+struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile)
+{
+ return NULL;
+}
+
+static inline void xe_svm_flush(struct xe_vm *vm)
+{
+}
#define xe_svm_range_has_dma_mapping(...) false
+#endif /* CONFIG_DRM_XE_GPUSVM */
+
+#if IS_ENABLED(CONFIG_DRM_GPUSVM) /* Need to support userptr without XE_GPUSVM */
+#define xe_svm_assert_in_notifier(vm__) \
+ lockdep_assert_held_write(&(vm__)->svm.gpusvm.notifier_lock)
+
+#define xe_svm_assert_held_read(vm__) \
+ lockdep_assert_held_read(&(vm__)->svm.gpusvm.notifier_lock)
+
+#define xe_svm_notifier_lock(vm__) \
+ drm_gpusvm_notifier_lock(&(vm__)->svm.gpusvm)
+
+#define xe_svm_notifier_lock_interruptible(vm__) \
+ down_read_interruptible(&(vm__)->svm.gpusvm.notifier_lock)
+
+#define xe_svm_notifier_unlock(vm__) \
+ drm_gpusvm_notifier_unlock(&(vm__)->svm.gpusvm)
+
+#else
+#define xe_svm_assert_in_notifier(...) do {} while (0)
+
+static inline void xe_svm_assert_held_read(struct xe_vm *vm)
+{
+}
static inline void xe_svm_notifier_lock(struct xe_vm *vm)
{
}
-static inline void xe_svm_notifier_unlock(struct xe_vm *vm)
+static inline int xe_svm_notifier_lock_interruptible(struct xe_vm *vm)
{
+ return 0;
}
-static inline void xe_svm_flush(struct xe_vm *vm)
+static inline void xe_svm_notifier_unlock(struct xe_vm *vm)
{
}
-#endif
+#endif /* CONFIG_DRM_GPUSVM */
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_sync.c b/drivers/gpu/drm/xe/xe_sync.c
index f87276df18f2..ff74528ca0c6 100644
--- a/drivers/gpu/drm/xe/xe_sync.c
+++ b/drivers/gpu/drm/xe/xe_sync.c
@@ -14,7 +14,7 @@
#include <drm/drm_syncobj.h>
#include <uapi/drm/xe_drm.h>
-#include "xe_device_types.h"
+#include "xe_device.h"
#include "xe_exec_queue.h"
#include "xe_macros.h"
#include "xe_sched_job_types.h"
@@ -77,6 +77,7 @@ static void user_fence_worker(struct work_struct *w)
{
struct xe_user_fence *ufence = container_of(w, struct xe_user_fence, worker);
+ WRITE_ONCE(ufence->signalled, 1);
if (mmget_not_zero(ufence->mm)) {
kthread_use_mm(ufence->mm);
if (copy_to_user(ufence->addr, &ufence->value, sizeof(ufence->value)))
@@ -91,7 +92,6 @@ static void user_fence_worker(struct work_struct *w)
* Wake up waiters only after updating the ufence state, allowing the UMD
* to safely reuse the same ufence without encountering -EBUSY errors.
*/
- WRITE_ONCE(ufence->signalled, 1);
wake_up_all(&ufence->xe->ufence_wq);
user_fence_put(ufence);
}
@@ -113,6 +113,8 @@ static void user_fence_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
struct xe_sync_entry *sync,
struct drm_xe_sync __user *sync_user,
+ struct drm_syncobj *ufence_syncobj,
+ u64 ufence_timeline_value,
unsigned int flags)
{
struct drm_xe_sync sync_in;
@@ -192,10 +194,15 @@ int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
if (exec) {
sync->addr = sync_in.addr;
} else {
+ sync->ufence_timeline_value = ufence_timeline_value;
sync->ufence = user_fence_create(xe, sync_in.addr,
sync_in.timeline_value);
if (XE_IOCTL_DBG(xe, IS_ERR(sync->ufence)))
return PTR_ERR(sync->ufence);
+ sync->ufence_chain_fence = dma_fence_chain_alloc();
+ if (!sync->ufence_chain_fence)
+ return -ENOMEM;
+ sync->ufence_syncobj = ufence_syncobj;
}
break;
@@ -239,7 +246,12 @@ void xe_sync_entry_signal(struct xe_sync_entry *sync, struct dma_fence *fence)
} else if (sync->ufence) {
int err;
- dma_fence_get(fence);
+ drm_syncobj_add_point(sync->ufence_syncobj,
+ sync->ufence_chain_fence,
+ fence, sync->ufence_timeline_value);
+ sync->ufence_chain_fence = NULL;
+
+ fence = drm_syncobj_fence_get(sync->ufence_syncobj);
user_fence_get(sync->ufence);
err = dma_fence_add_callback(fence, &sync->ufence->cb,
user_fence_cb);
@@ -259,7 +271,8 @@ void xe_sync_entry_cleanup(struct xe_sync_entry *sync)
drm_syncobj_put(sync->syncobj);
dma_fence_put(sync->fence);
dma_fence_chain_free(sync->chain_fence);
- if (sync->ufence)
+ dma_fence_chain_free(sync->ufence_chain_fence);
+ if (!IS_ERR_OR_NULL(sync->ufence))
user_fence_put(sync->ufence);
}
@@ -284,51 +297,59 @@ xe_sync_in_fence_get(struct xe_sync_entry *sync, int num_sync,
struct dma_fence **fences = NULL;
struct dma_fence_array *cf = NULL;
struct dma_fence *fence;
- int i, num_in_fence = 0, current_fence = 0;
+ int i, num_fence = 0, current_fence = 0;
lockdep_assert_held(&vm->lock);
- /* Count in-fences */
- for (i = 0; i < num_sync; ++i) {
- if (sync[i].fence) {
- ++num_in_fence;
- fence = sync[i].fence;
+ /* Reject in fences */
+ for (i = 0; i < num_sync; ++i)
+ if (sync[i].fence)
+ return ERR_PTR(-EOPNOTSUPP);
+
+ if (q->flags & EXEC_QUEUE_FLAG_VM) {
+ struct xe_exec_queue *__q;
+ struct xe_tile *tile;
+ u8 id;
+
+ for_each_tile(tile, vm->xe, id)
+ num_fence += (1 + XE_MAX_GT_PER_TILE);
+
+ fences = kmalloc_array(num_fence, sizeof(*fences),
+ GFP_KERNEL);
+ if (!fences)
+ return ERR_PTR(-ENOMEM);
+
+ fences[current_fence++] =
+ xe_exec_queue_last_fence_get(q, vm);
+ for_each_tlb_inval(i)
+ fences[current_fence++] =
+ xe_exec_queue_tlb_inval_last_fence_get(q, vm, i);
+ list_for_each_entry(__q, &q->multi_gt_list,
+ multi_gt_link) {
+ fences[current_fence++] =
+ xe_exec_queue_last_fence_get(__q, vm);
+ for_each_tlb_inval(i)
+ fences[current_fence++] =
+ xe_exec_queue_tlb_inval_last_fence_get(__q, vm, i);
}
- }
- /* Easy case... */
- if (!num_in_fence) {
- fence = xe_exec_queue_last_fence_get(q, vm);
- return fence;
- }
+ xe_assert(vm->xe, current_fence == num_fence);
+ cf = dma_fence_array_create(num_fence, fences,
+ dma_fence_context_alloc(1),
+ 1, false);
+ if (!cf)
+ goto err_out;
- /* Create composite fence */
- fences = kmalloc_array(num_in_fence + 1, sizeof(*fences), GFP_KERNEL);
- if (!fences)
- return ERR_PTR(-ENOMEM);
- for (i = 0; i < num_sync; ++i) {
- if (sync[i].fence) {
- dma_fence_get(sync[i].fence);
- fences[current_fence++] = sync[i].fence;
- }
- }
- fences[current_fence++] = xe_exec_queue_last_fence_get(q, vm);
- cf = dma_fence_array_create(num_in_fence, fences,
- vm->composite_fence_ctx,
- vm->composite_fence_seqno++,
- false);
- if (!cf) {
- --vm->composite_fence_seqno;
- goto err_out;
+ return &cf->base;
}
- return &cf->base;
+ fence = xe_exec_queue_last_fence_get(q, vm);
+ return fence;
err_out:
while (current_fence)
dma_fence_put(fences[--current_fence]);
kfree(fences);
- kfree(cf);
return ERR_PTR(-ENOMEM);
}
diff --git a/drivers/gpu/drm/xe/xe_sync.h b/drivers/gpu/drm/xe/xe_sync.h
index 256ffc1e54dc..51f2d803e977 100644
--- a/drivers/gpu/drm/xe/xe_sync.h
+++ b/drivers/gpu/drm/xe/xe_sync.h
@@ -8,6 +8,7 @@
#include "xe_sync_types.h"
+struct drm_syncobj;
struct xe_device;
struct xe_exec_queue;
struct xe_file;
@@ -21,6 +22,8 @@ struct xe_vm;
int xe_sync_entry_parse(struct xe_device *xe, struct xe_file *xef,
struct xe_sync_entry *sync,
struct drm_xe_sync __user *sync_user,
+ struct drm_syncobj *ufence_syncobj,
+ u64 ufence_timeline_value,
unsigned int flags);
int xe_sync_entry_add_deps(struct xe_sync_entry *sync,
struct xe_sched_job *job);
diff --git a/drivers/gpu/drm/xe/xe_sync_types.h b/drivers/gpu/drm/xe/xe_sync_types.h
index 30ac3f51993b..b88f1833e28c 100644
--- a/drivers/gpu/drm/xe/xe_sync_types.h
+++ b/drivers/gpu/drm/xe/xe_sync_types.h
@@ -18,9 +18,12 @@ struct xe_sync_entry {
struct drm_syncobj *syncobj;
struct dma_fence *fence;
struct dma_fence_chain *chain_fence;
+ struct dma_fence_chain *ufence_chain_fence;
+ struct drm_syncobj *ufence_syncobj;
struct xe_user_fence *ufence;
u64 addr;
u64 timeline_value;
+ u64 ufence_timeline_value;
u32 type;
u32 flags;
};
diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c
index 0771acbbf367..4f4f9a5c43af 100644
--- a/drivers/gpu/drm/xe/xe_tile.c
+++ b/drivers/gpu/drm/xe/xe_tile.c
@@ -7,9 +7,11 @@
#include <drm/drm_managed.h>
+#include "xe_bo.h"
#include "xe_device.h"
#include "xe_ggtt.h"
#include "xe_gt.h"
+#include "xe_memirq.h"
#include "xe_migrate.h"
#include "xe_pcode.h"
#include "xe_sa.h"
@@ -17,6 +19,8 @@
#include "xe_tile.h"
#include "xe_tile_sysfs.h"
#include "xe_ttm_vram_mgr.h"
+#include "xe_vram.h"
+#include "xe_vram_types.h"
#include "xe_wa.h"
/**
@@ -87,13 +91,46 @@
*/
static int xe_tile_alloc(struct xe_tile *tile)
{
- struct drm_device *drm = &tile_to_xe(tile)->drm;
-
- tile->mem.ggtt = drmm_kzalloc(drm, sizeof(*tile->mem.ggtt),
- GFP_KERNEL);
+ tile->mem.ggtt = xe_ggtt_alloc(tile);
if (!tile->mem.ggtt)
return -ENOMEM;
- tile->mem.ggtt->tile = tile;
+
+ tile->migrate = xe_migrate_alloc(tile);
+ if (!tile->migrate)
+ return -ENOMEM;
+
+ return 0;
+}
+
+/**
+ * xe_tile_alloc_vram - Perform per-tile VRAM structs allocation
+ * @tile: Tile to perform allocations for
+ *
+ * Allocates VRAM per-tile data structures using DRM-managed allocations.
+ * Does not touch the hardware.
+ *
+ * Returns -ENOMEM if allocations fail, otherwise 0.
+ */
+int xe_tile_alloc_vram(struct xe_tile *tile)
+{
+ struct xe_device *xe = tile_to_xe(tile);
+ struct xe_vram_region *vram;
+
+ if (!IS_DGFX(xe))
+ return 0;
+
+ vram = xe_vram_region_alloc(xe, tile->id, XE_PL_VRAM0 + tile->id);
+ if (!vram)
+ return -ENOMEM;
+ tile->mem.vram = vram;
+
+ /*
+ * If the kernel_vram is not already allocated,
+ * it means that tile has common VRAM region for
+ * kernel and user space.
+ */
+ if (!tile->mem.kernel_vram)
+ tile->mem.kernel_vram = tile->mem.vram;
return 0;
}
@@ -120,31 +157,12 @@ int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id)
if (err)
return err;
- tile->primary_gt = xe_gt_alloc(tile);
- if (IS_ERR(tile->primary_gt))
- return PTR_ERR(tile->primary_gt);
-
xe_pcode_init(tile);
return 0;
}
ALLOW_ERROR_INJECTION(xe_tile_init_early, ERRNO); /* See xe_pci_probe() */
-static int tile_ttm_mgr_init(struct xe_tile *tile)
-{
- struct xe_device *xe = tile_to_xe(tile);
- int err;
-
- if (tile->mem.vram.usable_size) {
- err = xe_ttm_vram_mgr_init(tile, &tile->mem.vram.ttm);
- if (err)
- return err;
- xe->info.mem_region_mask |= BIT(tile->id) << 1;
- }
-
- return 0;
-}
-
/**
* xe_tile_init_noalloc - Init tile up to the point where allocations can happen.
* @tile: The tile to initialize.
@@ -162,22 +180,31 @@ static int tile_ttm_mgr_init(struct xe_tile *tile)
int xe_tile_init_noalloc(struct xe_tile *tile)
{
struct xe_device *xe = tile_to_xe(tile);
- int err;
-
- err = tile_ttm_mgr_init(tile);
- if (err)
- return err;
xe_wa_apply_tile_workarounds(tile);
if (xe->info.has_usm && IS_DGFX(xe))
- xe_devm_add(tile, &tile->mem.vram);
+ xe_devm_add(tile, tile->mem.vram);
+
+ if (IS_DGFX(xe) && !ttm_resource_manager_used(&tile->mem.vram->ttm.manager)) {
+ int err = xe_ttm_vram_mgr_init(xe, tile->mem.vram);
+
+ if (err)
+ return err;
+ xe->info.mem_region_mask |= BIT(tile->mem.vram->id) << 1;
+ }
return xe_tile_sysfs_init(tile);
}
int xe_tile_init(struct xe_tile *tile)
{
+ int err;
+
+ err = xe_memirq_init(&tile->memirq);
+ if (err)
+ return err;
+
tile->mem.kernel_bb_pool = xe_sa_bo_manager_init(tile, SZ_1M, 16);
if (IS_ERR(tile->mem.kernel_bb_pool))
return PTR_ERR(tile->mem.kernel_bb_pool);
diff --git a/drivers/gpu/drm/xe/xe_tile.h b/drivers/gpu/drm/xe/xe_tile.h
index eb939316d55b..dceb6297aa01 100644
--- a/drivers/gpu/drm/xe/xe_tile.h
+++ b/drivers/gpu/drm/xe/xe_tile.h
@@ -14,6 +14,13 @@ int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id);
int xe_tile_init_noalloc(struct xe_tile *tile);
int xe_tile_init(struct xe_tile *tile);
+int xe_tile_alloc_vram(struct xe_tile *tile);
+
void xe_tile_migrate_wait(struct xe_tile *tile);
+static inline bool xe_tile_is_root(struct xe_tile *tile)
+{
+ return tile->id == 0;
+}
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_tile_debugfs.c b/drivers/gpu/drm/xe/xe_tile_debugfs.c
new file mode 100644
index 000000000000..fff242a5ae56
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tile_debugfs.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include <linux/debugfs.h>
+#include <drm/drm_debugfs.h>
+
+#include "xe_ggtt.h"
+#include "xe_pm.h"
+#include "xe_sa.h"
+#include "xe_tile_debugfs.h"
+
+static struct xe_tile *node_to_tile(struct drm_info_node *node)
+{
+ return node->dent->d_parent->d_inode->i_private;
+}
+
+/**
+ * xe_tile_debugfs_simple_show() - A show callback for struct drm_info_list
+ * @m: the &seq_file
+ * @data: data used by the drm debugfs helpers
+ *
+ * This callback can be used in struct drm_info_list to describe debugfs
+ * files that are &xe_tile specific.
+ *
+ * It is assumed that those debugfs files will be created on directory entry
+ * which struct dentry d_inode->i_private points to &xe_tile.
+ *
+ * /sys/kernel/debug/dri/0/
+ * ├── tile0/ # tile = dentry->d_inode->i_private
+ * │ │ ├── id # tile = dentry->d_parent->d_inode->i_private
+ *
+ * This function assumes that &m->private will be set to the &struct
+ * drm_info_node corresponding to the instance of the info on a given &struct
+ * drm_minor (see struct drm_info_list.show for details).
+ *
+ * This function also assumes that struct drm_info_list.data will point to the
+ * function code that will actually print a file content::
+ *
+ * int (*print)(struct xe_tile *, struct drm_printer *)
+ *
+ * Example::
+ *
+ * int tile_id(struct xe_tile *tile, struct drm_printer *p)
+ * {
+ * drm_printf(p, "%u\n", tile->id);
+ * return 0;
+ * }
+ *
+ * static const struct drm_info_list info[] = {
+ * { name = "id", .show = tile_debugfs_simple_show, .data = tile_id },
+ * };
+ *
+ * dir = debugfs_create_dir("tile0", parent);
+ * dir->d_inode->i_private = tile;
+ * drm_debugfs_create_files(info, ARRAY_SIZE(info), dir, minor);
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_tile_debugfs_simple_show(struct seq_file *m, void *data)
+{
+ struct drm_printer p = drm_seq_file_printer(m);
+ struct drm_info_node *node = m->private;
+ struct xe_tile *tile = node_to_tile(node);
+ int (*print)(struct xe_tile *, struct drm_printer *) = node->info_ent->data;
+
+ return print(tile, &p);
+}
+
+/**
+ * xe_tile_debugfs_show_with_rpm() - A show callback for struct drm_info_list
+ * @m: the &seq_file
+ * @data: data used by the drm debugfs helpers
+ *
+ * Similar to tile_debugfs_simple_show() but implicitly takes a RPM ref.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_tile_debugfs_show_with_rpm(struct seq_file *m, void *data)
+{
+ struct drm_info_node *node = m->private;
+ struct xe_tile *tile = node_to_tile(node);
+ struct xe_device *xe = tile_to_xe(tile);
+ int ret;
+
+ xe_pm_runtime_get(xe);
+ ret = xe_tile_debugfs_simple_show(m, data);
+ xe_pm_runtime_put(xe);
+
+ return ret;
+}
+
+static int ggtt(struct xe_tile *tile, struct drm_printer *p)
+{
+ return xe_ggtt_dump(tile->mem.ggtt, p);
+}
+
+static int sa_info(struct xe_tile *tile, struct drm_printer *p)
+{
+ drm_suballoc_dump_debug_info(&tile->mem.kernel_bb_pool->base, p,
+ xe_sa_manager_gpu_addr(tile->mem.kernel_bb_pool));
+
+ return 0;
+}
+
+/* only for debugfs files which can be safely used on the VF */
+static const struct drm_info_list vf_safe_debugfs_list[] = {
+ { "ggtt", .show = xe_tile_debugfs_show_with_rpm, .data = ggtt },
+ { "sa_info", .show = xe_tile_debugfs_show_with_rpm, .data = sa_info },
+};
+
+/**
+ * xe_tile_debugfs_register - Register tile's debugfs attributes
+ * @tile: the &xe_tile to register
+ *
+ * Create debugfs sub-directory with a name that includes a tile ID and
+ * then creates set of debugfs files (attributes) specific to this tile.
+ */
+void xe_tile_debugfs_register(struct xe_tile *tile)
+{
+ struct xe_device *xe = tile_to_xe(tile);
+ struct drm_minor *minor = xe->drm.primary;
+ struct dentry *root = minor->debugfs_root;
+ char name[8];
+
+ snprintf(name, sizeof(name), "tile%u", tile->id);
+ tile->debugfs = debugfs_create_dir(name, root);
+ if (IS_ERR(tile->debugfs))
+ return;
+
+ /*
+ * Store the xe_tile pointer as private data of the tile/ directory
+ * node so other tile specific attributes under that directory may
+ * refer to it by looking at its parent node private data.
+ */
+ tile->debugfs->d_inode->i_private = tile;
+
+ drm_debugfs_create_files(vf_safe_debugfs_list,
+ ARRAY_SIZE(vf_safe_debugfs_list),
+ tile->debugfs, minor);
+}
diff --git a/drivers/gpu/drm/xe/xe_tile_debugfs.h b/drivers/gpu/drm/xe/xe_tile_debugfs.h
new file mode 100644
index 000000000000..4429c22542f4
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tile_debugfs.h
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_TILE_DEBUGFS_H_
+#define _XE_TILE_DEBUGFS_H_
+
+struct seq_file;
+struct xe_tile;
+
+void xe_tile_debugfs_register(struct xe_tile *tile);
+int xe_tile_debugfs_simple_show(struct seq_file *m, void *data);
+int xe_tile_debugfs_show_with_rpm(struct seq_file *m, void *data);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_tile_printk.h b/drivers/gpu/drm/xe/xe_tile_printk.h
new file mode 100644
index 000000000000..63640a42685d
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tile_printk.h
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _xe_tile_printk_H_
+#define _xe_tile_printk_H_
+
+#include "xe_printk.h"
+
+#define __XE_TILE_PRINTK_FMT(_tile, _fmt, _args...) "Tile%u: " _fmt, (_tile)->id, ##_args
+
+#define xe_tile_printk(_tile, _level, _fmt, ...) \
+ xe_printk((_tile)->xe, _level, __XE_TILE_PRINTK_FMT((_tile), _fmt, ##__VA_ARGS__))
+
+#define xe_tile_err(_tile, _fmt, ...) \
+ xe_tile_printk((_tile), err, _fmt, ##__VA_ARGS__)
+
+#define xe_tile_err_once(_tile, _fmt, ...) \
+ xe_tile_printk((_tile), err_once, _fmt, ##__VA_ARGS__)
+
+#define xe_tile_err_ratelimited(_tile, _fmt, ...) \
+ xe_tile_printk((_tile), err_ratelimited, _fmt, ##__VA_ARGS__)
+
+#define xe_tile_warn(_tile, _fmt, ...) \
+ xe_tile_printk((_tile), warn, _fmt, ##__VA_ARGS__)
+
+#define xe_tile_notice(_tile, _fmt, ...) \
+ xe_tile_printk((_tile), notice, _fmt, ##__VA_ARGS__)
+
+#define xe_tile_info(_tile, _fmt, ...) \
+ xe_tile_printk((_tile), info, _fmt, ##__VA_ARGS__)
+
+#define xe_tile_dbg(_tile, _fmt, ...) \
+ xe_tile_printk((_tile), dbg, _fmt, ##__VA_ARGS__)
+
+#define xe_tile_WARN_type(_tile, _type, _condition, _fmt, ...) \
+ xe_WARN##_type((_tile)->xe, _condition, _fmt, ## __VA_ARGS__)
+
+#define xe_tile_WARN(_tile, _condition, _fmt, ...) \
+ xe_tile_WARN_type((_tile),, _condition, __XE_TILE_PRINTK_FMT((_tile), _fmt, ##__VA_ARGS__))
+
+#define xe_tile_WARN_ONCE(_tile, _condition, _fmt, ...) \
+ xe_tile_WARN_type((_tile), _ONCE, _condition, __XE_TILE_PRINTK_FMT((_tile), _fmt, ##__VA_ARGS__))
+
+#define xe_tile_WARN_ON(_tile, _condition) \
+ xe_tile_WARN((_tile), _condition, "%s(%s)", "WARN_ON", __stringify(_condition))
+
+#define xe_tile_WARN_ON_ONCE(_tile, _condition) \
+ xe_tile_WARN_ONCE((_tile), _condition, "%s(%s)", "WARN_ON_ONCE", __stringify(_condition))
+
+static inline void __xe_tile_printfn_err(struct drm_printer *p, struct va_format *vaf)
+{
+ struct xe_tile *tile = p->arg;
+
+ xe_tile_err(tile, "%pV", vaf);
+}
+
+static inline void __xe_tile_printfn_info(struct drm_printer *p, struct va_format *vaf)
+{
+ struct xe_tile *tile = p->arg;
+
+ xe_tile_info(tile, "%pV", vaf);
+}
+
+static inline void __xe_tile_printfn_dbg(struct drm_printer *p, struct va_format *vaf)
+{
+ struct xe_tile *tile = p->arg;
+ struct drm_printer dbg;
+
+ /*
+ * The original xe_tile_dbg() callsite annotations are useless here,
+ * redirect to the tweaked xe_dbg_printer() instead.
+ */
+ dbg = xe_dbg_printer(tile->xe);
+ dbg.origin = p->origin;
+
+ drm_printf(&dbg, __XE_TILE_PRINTK_FMT(tile, "%pV", vaf));
+}
+
+/**
+ * xe_tile_err_printer - Construct a &drm_printer that outputs to xe_tile_err()
+ * @tile: the &xe_tile pointer to use in xe_tile_err()
+ *
+ * Return: The &drm_printer object.
+ */
+static inline struct drm_printer xe_tile_err_printer(struct xe_tile *tile)
+{
+ struct drm_printer p = {
+ .printfn = __xe_tile_printfn_err,
+ .arg = tile,
+ };
+ return p;
+}
+
+/**
+ * xe_tile_info_printer - Construct a &drm_printer that outputs to xe_tile_info()
+ * @tile: the &xe_tile pointer to use in xe_tile_info()
+ *
+ * Return: The &drm_printer object.
+ */
+static inline struct drm_printer xe_tile_info_printer(struct xe_tile *tile)
+{
+ struct drm_printer p = {
+ .printfn = __xe_tile_printfn_info,
+ .arg = tile,
+ };
+ return p;
+}
+
+/**
+ * xe_tile_dbg_printer - Construct a &drm_printer that outputs like xe_tile_dbg()
+ * @tile: the &xe_tile pointer to use in xe_tile_dbg()
+ *
+ * Return: The &drm_printer object.
+ */
+static inline struct drm_printer xe_tile_dbg_printer(struct xe_tile *tile)
+{
+ struct drm_printer p = {
+ .printfn = __xe_tile_printfn_dbg,
+ .arg = tile,
+ .origin = (const void *)_THIS_IP_,
+ };
+ return p;
+}
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c b/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c
new file mode 100644
index 000000000000..f3f478f14ff5
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.c
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include <linux/debugfs.h>
+#include <drm/drm_debugfs.h>
+
+#include "xe_device.h"
+#include "xe_device_types.h"
+#include "xe_gt_sriov_pf_config.h"
+#include "xe_gt_sriov_pf_debugfs.h"
+#include "xe_pm.h"
+#include "xe_tile_debugfs.h"
+#include "xe_tile_sriov_pf_debugfs.h"
+#include "xe_sriov.h"
+#include "xe_sriov_pf.h"
+#include "xe_sriov_pf_provision.h"
+
+/*
+ * /sys/kernel/debug/dri/BDF/
+ * ├── sriov # d_inode->i_private = (xe_device*)
+ * │ ├── pf # d_inode->i_private = (xe_device*)
+ * │ │ ├── tile0 # d_inode->i_private = (xe_tile*)
+ * │ │ ├── tile1
+ * │ │ : :
+ * │ ├── vf1 # d_inode->i_private = VFID(1)
+ * │ │ ├── tile0 # d_inode->i_private = (xe_tile*)
+ * │ │ ├── tile1
+ * │ │ : :
+ * │ ├── vfN # d_inode->i_private = VFID(N)
+ * │ │ ├── tile0 # d_inode->i_private = (xe_tile*)
+ * │ │ ├── tile1
+ * : : : :
+ */
+
+static void *extract_priv(struct dentry *d)
+{
+ return d->d_inode->i_private;
+}
+
+__maybe_unused
+static struct xe_tile *extract_tile(struct dentry *d)
+{
+ return extract_priv(d);
+}
+
+static struct xe_device *extract_xe(struct dentry *d)
+{
+ return extract_priv(d->d_parent->d_parent);
+}
+
+__maybe_unused
+static unsigned int extract_vfid(struct dentry *d)
+{
+ void *pp = extract_priv(d->d_parent);
+
+ return pp == extract_xe(d) ? PFID : (uintptr_t)pp;
+}
+
+/*
+ * /sys/kernel/debug/dri/BDF/
+ * ├── sriov
+ * : ├── pf
+ * : ├── tile0
+ * : ├── ggtt_available
+ * ├── ggtt_provisioned
+ */
+
+static int pf_config_print_available_ggtt(struct xe_tile *tile, struct drm_printer *p)
+{
+ return xe_gt_sriov_pf_config_print_available_ggtt(tile->primary_gt, p);
+}
+
+static int pf_config_print_ggtt(struct xe_tile *tile, struct drm_printer *p)
+{
+ return xe_gt_sriov_pf_config_print_ggtt(tile->primary_gt, p);
+}
+
+static const struct drm_info_list pf_ggtt_info[] = {
+ {
+ "ggtt_available",
+ .show = xe_tile_debugfs_simple_show,
+ .data = pf_config_print_available_ggtt,
+ },
+ {
+ "ggtt_provisioned",
+ .show = xe_tile_debugfs_simple_show,
+ .data = pf_config_print_ggtt,
+ },
+};
+
+/*
+ * /sys/kernel/debug/dri/BDF/
+ * ├── sriov
+ * : ├── pf
+ * : ├── tile0
+ * : ├── vram_provisioned
+ */
+
+static int pf_config_print_vram(struct xe_tile *tile, struct drm_printer *p)
+{
+ return xe_gt_sriov_pf_config_print_lmem(tile->primary_gt, p);
+}
+
+static const struct drm_info_list pf_vram_info[] = {
+ {
+ "vram_provisioned",
+ .show = xe_tile_debugfs_simple_show,
+ .data = pf_config_print_vram,
+ },
+};
+
+/*
+ * /sys/kernel/debug/dri/BDF/
+ * ├── sriov
+ * │ ├── pf
+ * │ │ ├── tile0
+ * │ │ │ ├── ggtt_spare
+ * │ │ │ ├── vram_spare
+ * │ │ ├── tile1
+ * │ │ : :
+ * │ ├── vf1
+ * │ : ├── tile0
+ * │ │ ├── ggtt_quota
+ * │ │ ├── vram_quota
+ * │ ├── tile1
+ * │ : :
+ */
+
+#define DEFINE_SRIOV_TILE_CONFIG_DEBUGFS_ATTRIBUTE(NAME, CONFIG, TYPE, FORMAT) \
+ \
+static int NAME##_set(void *data, u64 val) \
+{ \
+ struct xe_tile *tile = extract_tile(data); \
+ unsigned int vfid = extract_vfid(data); \
+ struct xe_gt *gt = tile->primary_gt; \
+ struct xe_device *xe = tile->xe; \
+ int err; \
+ \
+ if (val > (TYPE)~0ull) \
+ return -EOVERFLOW; \
+ \
+ xe_pm_runtime_get(xe); \
+ err = xe_sriov_pf_wait_ready(xe) ?: \
+ xe_gt_sriov_pf_config_set_##CONFIG(gt, vfid, val); \
+ if (!err) \
+ xe_sriov_pf_provision_set_custom_mode(xe); \
+ xe_pm_runtime_put(xe); \
+ \
+ return err; \
+} \
+ \
+static int NAME##_get(void *data, u64 *val) \
+{ \
+ struct xe_tile *tile = extract_tile(data); \
+ unsigned int vfid = extract_vfid(data); \
+ struct xe_gt *gt = tile->primary_gt; \
+ \
+ *val = xe_gt_sriov_pf_config_get_##CONFIG(gt, vfid); \
+ return 0; \
+} \
+ \
+DEFINE_DEBUGFS_ATTRIBUTE(NAME##_fops, NAME##_get, NAME##_set, FORMAT)
+
+DEFINE_SRIOV_TILE_CONFIG_DEBUGFS_ATTRIBUTE(ggtt, ggtt, u64, "%llu\n");
+DEFINE_SRIOV_TILE_CONFIG_DEBUGFS_ATTRIBUTE(vram, lmem, u64, "%llu\n");
+
+static void pf_add_config_attrs(struct xe_tile *tile, struct dentry *dent, unsigned int vfid)
+{
+ struct xe_device *xe = tile->xe;
+
+ xe_tile_assert(tile, tile == extract_tile(dent));
+ xe_tile_assert(tile, vfid == extract_vfid(dent));
+
+ debugfs_create_file_unsafe(vfid ? "ggtt_quota" : "ggtt_spare",
+ 0644, dent, dent, &ggtt_fops);
+ if (IS_DGFX(xe))
+ debugfs_create_file_unsafe(vfid ? "vram_quota" : "vram_spare",
+ xe_device_has_lmtt(xe) ? 0644 : 0444,
+ dent, dent, &vram_fops);
+}
+
+static void pf_populate_tile(struct xe_tile *tile, struct dentry *dent, unsigned int vfid)
+{
+ struct xe_device *xe = tile->xe;
+ struct drm_minor *minor = xe->drm.primary;
+ struct xe_gt *gt;
+ unsigned int id;
+
+ pf_add_config_attrs(tile, dent, vfid);
+
+ if (!vfid) {
+ drm_debugfs_create_files(pf_ggtt_info,
+ ARRAY_SIZE(pf_ggtt_info),
+ dent, minor);
+ if (IS_DGFX(xe))
+ drm_debugfs_create_files(pf_vram_info,
+ ARRAY_SIZE(pf_vram_info),
+ dent, minor);
+ }
+
+ for_each_gt_on_tile(gt, tile, id)
+ xe_gt_sriov_pf_debugfs_populate(gt, dent, vfid);
+}
+
+/**
+ * xe_tile_sriov_pf_debugfs_populate() - Populate SR-IOV debugfs tree with tile files.
+ * @tile: the &xe_tile to register
+ * @parent: the parent &dentry that represents the SR-IOV @vfid function
+ * @vfid: the VF identifier
+ *
+ * Add to the @parent directory new debugfs directory that will represent a @tile and
+ * populate it with files that are related to the SR-IOV @vfid function.
+ *
+ * This function can only be called on PF.
+ */
+void xe_tile_sriov_pf_debugfs_populate(struct xe_tile *tile, struct dentry *parent,
+ unsigned int vfid)
+{
+ struct xe_device *xe = tile->xe;
+ struct dentry *dent;
+ char name[10]; /* should be enough up to "tile%u\0" for 2^16 - 1 */
+
+ xe_tile_assert(tile, IS_SRIOV_PF(xe));
+ xe_tile_assert(tile, extract_priv(parent->d_parent) == xe);
+ xe_tile_assert(tile, extract_priv(parent) == tile->xe ||
+ (uintptr_t)extract_priv(parent) == vfid);
+
+ /*
+ * /sys/kernel/debug/dri/BDF/
+ * ├── sriov
+ * │ ├── pf # parent, d_inode->i_private = (xe_device*)
+ * │ │ ├── tile0 # d_inode->i_private = (xe_tile*)
+ * │ │ ├── tile1
+ * │ │ : :
+ * │ ├── vf1 # parent, d_inode->i_private = VFID(1)
+ * │ │ ├── tile0 # d_inode->i_private = (xe_tile*)
+ * │ │ ├── tile1
+ * : : : :
+ */
+ snprintf(name, sizeof(name), "tile%u", tile->id);
+ dent = debugfs_create_dir(name, parent);
+ if (IS_ERR(dent))
+ return;
+ dent->d_inode->i_private = tile;
+
+ xe_tile_assert(tile, extract_tile(dent) == tile);
+ xe_tile_assert(tile, extract_vfid(dent) == vfid);
+ xe_tile_assert(tile, extract_xe(dent) == xe);
+
+ pf_populate_tile(tile, dent, vfid);
+}
diff --git a/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.h b/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.h
new file mode 100644
index 000000000000..55d179c44634
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tile_sriov_pf_debugfs.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_TILE_SRIOV_PF_DEBUGFS_H_
+#define _XE_TILE_SRIOV_PF_DEBUGFS_H_
+
+struct dentry;
+struct xe_tile;
+
+void xe_tile_sriov_pf_debugfs_populate(struct xe_tile *tile, struct dentry *parent,
+ unsigned int vfid);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_tile_sriov_printk.h b/drivers/gpu/drm/xe/xe_tile_sriov_printk.h
new file mode 100644
index 000000000000..68323512872c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tile_sriov_printk.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_TILE_SRIOV_PRINTK_H_
+#define _XE_TILE_SRIOV_PRINTK_H_
+
+#include "xe_tile_printk.h"
+#include "xe_sriov_printk.h"
+
+#define __XE_TILE_SRIOV_PRINTK_FMT(_tile, _fmt, ...) \
+ __XE_TILE_PRINTK_FMT((_tile), _fmt, ##__VA_ARGS__)
+
+#define xe_tile_sriov_printk(_tile, _level, _fmt, ...) \
+ xe_sriov_##_level((_tile)->xe, __XE_TILE_SRIOV_PRINTK_FMT((_tile), _fmt, ##__VA_ARGS__))
+
+#define xe_tile_sriov_err(_tile, _fmt, ...) \
+ xe_tile_sriov_printk(_tile, err, _fmt, ##__VA_ARGS__)
+
+#define xe_tile_sriov_notice(_tile, _fmt, ...) \
+ xe_tile_sriov_printk(_tile, notice, _fmt, ##__VA_ARGS__)
+
+#define xe_tile_sriov_info(_tile, _fmt, ...) \
+ xe_tile_sriov_printk(_tile, info, _fmt, ##__VA_ARGS__)
+
+#define xe_tile_sriov_dbg(_tile, _fmt, ...) \
+ xe_tile_sriov_printk(_tile, dbg, _fmt, ##__VA_ARGS__)
+
+#define xe_tile_sriov_dbg_verbose(_tile, _fmt, ...) \
+ xe_tile_sriov_printk(_tile, dbg_verbose, _fmt, ##__VA_ARGS__)
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_tile_sriov_vf.c b/drivers/gpu/drm/xe/xe_tile_sriov_vf.c
new file mode 100644
index 000000000000..c9bac2cfdd04
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tile_sriov_vf.c
@@ -0,0 +1,350 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "regs/xe_gtt_defs.h"
+
+#include "xe_assert.h"
+#include "xe_ggtt.h"
+#include "xe_sriov.h"
+#include "xe_sriov_printk.h"
+#include "xe_tile_sriov_vf.h"
+#include "xe_wopcm.h"
+
+static int vf_init_ggtt_balloons(struct xe_tile *tile)
+{
+ struct xe_ggtt *ggtt = tile->mem.ggtt;
+
+ xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
+
+ tile->sriov.vf.ggtt_balloon[0] = xe_ggtt_node_init(ggtt);
+ if (IS_ERR(tile->sriov.vf.ggtt_balloon[0]))
+ return PTR_ERR(tile->sriov.vf.ggtt_balloon[0]);
+
+ tile->sriov.vf.ggtt_balloon[1] = xe_ggtt_node_init(ggtt);
+ if (IS_ERR(tile->sriov.vf.ggtt_balloon[1])) {
+ xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[0]);
+ return PTR_ERR(tile->sriov.vf.ggtt_balloon[1]);
+ }
+
+ return 0;
+}
+
+/**
+ * xe_tile_sriov_vf_balloon_ggtt_locked - Insert balloon nodes to limit used GGTT address range.
+ * @tile: the &xe_tile struct instance
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+static int xe_tile_sriov_vf_balloon_ggtt_locked(struct xe_tile *tile)
+{
+ u64 ggtt_base = tile->sriov.vf.self_config.ggtt_base;
+ u64 ggtt_size = tile->sriov.vf.self_config.ggtt_size;
+ struct xe_device *xe = tile_to_xe(tile);
+ u64 wopcm = xe_wopcm_size(xe);
+ u64 start, end;
+ int err;
+
+ xe_tile_assert(tile, IS_SRIOV_VF(xe));
+ xe_tile_assert(tile, ggtt_size);
+ lockdep_assert_held(&tile->mem.ggtt->lock);
+
+ /*
+ * VF can only use part of the GGTT as allocated by the PF:
+ *
+ * WOPCM GUC_GGTT_TOP
+ * |<------------ Total GGTT size ------------------>|
+ *
+ * VF GGTT base -->|<- size ->|
+ *
+ * +--------------------+----------+-----------------+
+ * |////////////////////| block |\\\\\\\\\\\\\\\\\|
+ * +--------------------+----------+-----------------+
+ *
+ * |<--- balloon[0] --->|<-- VF -->|<-- balloon[1] ->|
+ */
+
+ if (ggtt_base < wopcm || ggtt_base > GUC_GGTT_TOP ||
+ ggtt_size > GUC_GGTT_TOP - ggtt_base) {
+ xe_sriov_err(xe, "tile%u: Invalid GGTT configuration: %#llx-%#llx\n",
+ tile->id, ggtt_base, ggtt_base + ggtt_size - 1);
+ return -ERANGE;
+ }
+
+ start = wopcm;
+ end = ggtt_base;
+ if (end != start) {
+ err = xe_ggtt_node_insert_balloon_locked(tile->sriov.vf.ggtt_balloon[0],
+ start, end);
+ if (err)
+ return err;
+ }
+
+ start = ggtt_base + ggtt_size;
+ end = GUC_GGTT_TOP;
+ if (end != start) {
+ err = xe_ggtt_node_insert_balloon_locked(tile->sriov.vf.ggtt_balloon[1],
+ start, end);
+ if (err) {
+ xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[0]);
+ return err;
+ }
+ }
+
+ return 0;
+}
+
+static int vf_balloon_ggtt(struct xe_tile *tile)
+{
+ struct xe_ggtt *ggtt = tile->mem.ggtt;
+ int err;
+
+ mutex_lock(&ggtt->lock);
+ err = xe_tile_sriov_vf_balloon_ggtt_locked(tile);
+ mutex_unlock(&ggtt->lock);
+
+ return err;
+}
+
+/**
+ * xe_tile_sriov_vf_deballoon_ggtt_locked - Remove balloon nodes.
+ * @tile: the &xe_tile struct instance
+ */
+void xe_tile_sriov_vf_deballoon_ggtt_locked(struct xe_tile *tile)
+{
+ xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
+
+ xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[1]);
+ xe_ggtt_node_remove_balloon_locked(tile->sriov.vf.ggtt_balloon[0]);
+}
+
+static void vf_deballoon_ggtt(struct xe_tile *tile)
+{
+ mutex_lock(&tile->mem.ggtt->lock);
+ xe_tile_sriov_vf_deballoon_ggtt_locked(tile);
+ mutex_unlock(&tile->mem.ggtt->lock);
+}
+
+static void vf_fini_ggtt_balloons(struct xe_tile *tile)
+{
+ xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
+
+ xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[1]);
+ xe_ggtt_node_fini(tile->sriov.vf.ggtt_balloon[0]);
+}
+
+static void cleanup_ggtt(struct drm_device *drm, void *arg)
+{
+ struct xe_tile *tile = arg;
+
+ vf_deballoon_ggtt(tile);
+ vf_fini_ggtt_balloons(tile);
+}
+
+/**
+ * xe_tile_sriov_vf_prepare_ggtt - Prepare a VF's GGTT configuration.
+ * @tile: the &xe_tile
+ *
+ * This function is for VF use only.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_tile_sriov_vf_prepare_ggtt(struct xe_tile *tile)
+{
+ struct xe_device *xe = tile_to_xe(tile);
+ int err;
+
+ err = vf_init_ggtt_balloons(tile);
+ if (err)
+ return err;
+
+ err = vf_balloon_ggtt(tile);
+ if (err) {
+ vf_fini_ggtt_balloons(tile);
+ return err;
+ }
+
+ return drmm_add_action_or_reset(&xe->drm, cleanup_ggtt, tile);
+}
+
+/**
+ * DOC: GGTT nodes shifting during VF post-migration recovery
+ *
+ * The first fixup applied to the VF KMD structures as part of post-migration
+ * recovery is shifting nodes within &xe_ggtt instance. The nodes are moved
+ * from range previously assigned to this VF, into newly provisioned area.
+ * The changes include balloons, which are resized accordingly.
+ *
+ * The balloon nodes are there to eliminate unavailable ranges from use: one
+ * reserves the GGTT area below the range for current VF, and another one
+ * reserves area above.
+ *
+ * Below is a GGTT layout of example VF, with a certain address range assigned to
+ * said VF, and inaccessible areas above and below:
+ *
+ * 0 4GiB
+ * |<--------------------------- Total GGTT size ----------------------------->|
+ * WOPCM GUC_TOP
+ * |<-------------- Area mappable by xe_ggtt instance ---------------->|
+ *
+ * +---+---------------------------------+----------+----------------------+---+
+ * |\\\|/////////////////////////////////| VF mem |//////////////////////|\\\|
+ * +---+---------------------------------+----------+----------------------+---+
+ *
+ * Hardware enforced access rules before migration:
+ *
+ * |<------- inaccessible for VF ------->|<VF owned>|<-- inaccessible for VF ->|
+ *
+ * GGTT nodes used for tracking allocations:
+ *
+ * |<---------- balloon ------------>|<- nodes->|<----- balloon ------>|
+ *
+ * After the migration, GGTT area assigned to the VF might have shifted, either
+ * to lower or to higher address. But we expect the total size and extra areas to
+ * be identical, as migration can only happen between matching platforms.
+ * Below is an example of GGTT layout of the VF after migration. Content of the
+ * GGTT for VF has been moved to a new area, and we receive its address from GuC:
+ *
+ * +---+----------------------+----------+---------------------------------+---+
+ * |\\\|//////////////////////| VF mem |/////////////////////////////////|\\\|
+ * +---+----------------------+----------+---------------------------------+---+
+ *
+ * Hardware enforced access rules after migration:
+ *
+ * |<- inaccessible for VF -->|<VF owned>|<------- inaccessible for VF ------->|
+ *
+ * So the VF has a new slice of GGTT assigned, and during migration process, the
+ * memory content was copied to that new area. But the &xe_ggtt nodes are still
+ * tracking allocations using the old addresses. The nodes within VF owned area
+ * have to be shifted, and balloon nodes need to be resized to properly mask out
+ * areas not owned by the VF.
+ *
+ * Fixed &xe_ggtt nodes used for tracking allocations:
+ *
+ * |<------ balloon ------>|<- nodes->|<----------- balloon ----------->|
+ *
+ * Due to use of GPU profiles, we do not expect the old and new GGTT ares to
+ * overlap; but our node shifting will fix addresses properly regardless.
+ */
+
+/**
+ * xe_tile_sriov_vf_fixup_ggtt_nodes_locked - Shift GGTT allocations to match assigned range.
+ * @tile: the &xe_tile struct instance
+ * @shift: the shift value
+ *
+ * Since Global GTT is not virtualized, each VF has an assigned range
+ * within the global space. This range might have changed during migration,
+ * which requires all memory addresses pointing to GGTT to be shifted.
+ */
+void xe_tile_sriov_vf_fixup_ggtt_nodes_locked(struct xe_tile *tile, s64 shift)
+{
+ struct xe_ggtt *ggtt = tile->mem.ggtt;
+
+ lockdep_assert_held(&ggtt->lock);
+
+ xe_tile_sriov_vf_deballoon_ggtt_locked(tile);
+ xe_ggtt_shift_nodes_locked(ggtt, shift);
+ xe_tile_sriov_vf_balloon_ggtt_locked(tile);
+}
+
+/**
+ * xe_tile_sriov_vf_lmem - VF LMEM configuration.
+ * @tile: the &xe_tile
+ *
+ * This function is for VF use only.
+ *
+ * Return: size of the LMEM assigned to VF.
+ */
+u64 xe_tile_sriov_vf_lmem(struct xe_tile *tile)
+{
+ struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config;
+
+ xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
+
+ return config->lmem_size;
+}
+
+/**
+ * xe_tile_sriov_vf_lmem_store - Store VF LMEM configuration
+ * @tile: the &xe_tile
+ * @lmem_size: VF LMEM size to store
+ *
+ * This function is for VF use only.
+ */
+void xe_tile_sriov_vf_lmem_store(struct xe_tile *tile, u64 lmem_size)
+{
+ struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config;
+
+ xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
+
+ config->lmem_size = lmem_size;
+}
+
+/**
+ * xe_tile_sriov_vf_ggtt - VF GGTT configuration.
+ * @tile: the &xe_tile
+ *
+ * This function is for VF use only.
+ *
+ * Return: size of the GGTT assigned to VF.
+ */
+u64 xe_tile_sriov_vf_ggtt(struct xe_tile *tile)
+{
+ struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config;
+
+ xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
+
+ return config->ggtt_size;
+}
+
+/**
+ * xe_tile_sriov_vf_ggtt_store - Store VF GGTT configuration
+ * @tile: the &xe_tile
+ * @ggtt_size: VF GGTT size to store
+ *
+ * This function is for VF use only.
+ */
+void xe_tile_sriov_vf_ggtt_store(struct xe_tile *tile, u64 ggtt_size)
+{
+ struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config;
+
+ xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
+
+ config->ggtt_size = ggtt_size;
+}
+
+/**
+ * xe_tile_sriov_vf_ggtt_base - VF GGTT base configuration.
+ * @tile: the &xe_tile
+ *
+ * This function is for VF use only.
+ *
+ * Return: base of the GGTT assigned to VF.
+ */
+u64 xe_tile_sriov_vf_ggtt_base(struct xe_tile *tile)
+{
+ struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config;
+
+ xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
+
+ return config->ggtt_base;
+}
+
+/**
+ * xe_tile_sriov_vf_ggtt_base_store - Store VF GGTT base configuration
+ * @tile: the &xe_tile
+ * @ggtt_base: VF GGTT base to store
+ *
+ * This function is for VF use only.
+ */
+void xe_tile_sriov_vf_ggtt_base_store(struct xe_tile *tile, u64 ggtt_base)
+{
+ struct xe_tile_sriov_vf_selfconfig *config = &tile->sriov.vf.self_config;
+
+ xe_tile_assert(tile, IS_SRIOV_VF(tile_to_xe(tile)));
+
+ config->ggtt_base = ggtt_base;
+}
diff --git a/drivers/gpu/drm/xe/xe_tile_sriov_vf.h b/drivers/gpu/drm/xe/xe_tile_sriov_vf.h
new file mode 100644
index 000000000000..749f41504883
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tile_sriov_vf.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_TILE_SRIOV_VF_H_
+#define _XE_TILE_SRIOV_VF_H_
+
+#include <linux/types.h>
+
+struct xe_tile;
+
+int xe_tile_sriov_vf_prepare_ggtt(struct xe_tile *tile);
+void xe_tile_sriov_vf_deballoon_ggtt_locked(struct xe_tile *tile);
+void xe_tile_sriov_vf_fixup_ggtt_nodes_locked(struct xe_tile *tile, s64 shift);
+u64 xe_tile_sriov_vf_ggtt(struct xe_tile *tile);
+void xe_tile_sriov_vf_ggtt_store(struct xe_tile *tile, u64 ggtt_size);
+u64 xe_tile_sriov_vf_ggtt_base(struct xe_tile *tile);
+void xe_tile_sriov_vf_ggtt_base_store(struct xe_tile *tile, u64 ggtt_size);
+u64 xe_tile_sriov_vf_lmem(struct xe_tile *tile);
+void xe_tile_sriov_vf_lmem_store(struct xe_tile *tile, u64 lmem_size);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_tile_sriov_vf_types.h b/drivers/gpu/drm/xe/xe_tile_sriov_vf_types.h
new file mode 100644
index 000000000000..4807ca51614c
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tile_sriov_vf_types.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_TILE_SRIOV_VF_TYPES_H_
+#define _XE_TILE_SRIOV_VF_TYPES_H_
+
+#include <linux/types.h>
+
+/**
+ * struct xe_tile_sriov_vf_selfconfig - VF configuration data.
+ */
+struct xe_tile_sriov_vf_selfconfig {
+ /** @ggtt_base: assigned base offset of the GGTT region. */
+ u64 ggtt_base;
+ /** @ggtt_size: assigned size of the GGTT region. */
+ u64 ggtt_size;
+ /** @lmem_size: assigned size of the LMEM. */
+ u64 lmem_size;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_tile_sysfs.c b/drivers/gpu/drm/xe/xe_tile_sysfs.c
index b804234a6551..9e1236a9ec67 100644
--- a/drivers/gpu/drm/xe/xe_tile_sysfs.c
+++ b/drivers/gpu/drm/xe/xe_tile_sysfs.c
@@ -44,16 +44,18 @@ int xe_tile_sysfs_init(struct xe_tile *tile)
kt->tile = tile;
err = kobject_add(&kt->base, &dev->kobj, "tile%d", tile->id);
- if (err) {
- kobject_put(&kt->base);
- return err;
- }
+ if (err)
+ goto err_object;
tile->sysfs = &kt->base;
err = xe_vram_freq_sysfs_init(tile);
if (err)
- return err;
+ goto err_object;
return devm_add_action_or_reset(xe->drm.dev, tile_sysfs_fini, tile);
+
+err_object:
+ kobject_put(&kt->base);
+ return err;
}
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval.c b/drivers/gpu/drm/xe/xe_tlb_inval.c
new file mode 100644
index 000000000000..918a59e686ea
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tlb_inval.c
@@ -0,0 +1,433 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#include <drm/drm_managed.h>
+
+#include "abi/guc_actions_abi.h"
+#include "xe_device.h"
+#include "xe_force_wake.h"
+#include "xe_gt.h"
+#include "xe_gt_printk.h"
+#include "xe_gt_stats.h"
+#include "xe_guc.h"
+#include "xe_guc_ct.h"
+#include "xe_guc_tlb_inval.h"
+#include "xe_mmio.h"
+#include "xe_pm.h"
+#include "xe_tlb_inval.h"
+#include "xe_trace.h"
+
+/**
+ * DOC: Xe TLB invalidation
+ *
+ * Xe TLB invalidation is implemented in two layers. The first is the frontend
+ * API, which provides an interface for TLB invalidations to the driver code.
+ * The frontend handles seqno assignment, synchronization (fences), and the
+ * timeout mechanism. The frontend is implemented via an embedded structure
+ * xe_tlb_inval that includes a set of ops hooking into the backend. The backend
+ * interacts with the hardware (or firmware) to perform the actual invalidation.
+ */
+
+#define FENCE_STACK_BIT DMA_FENCE_FLAG_USER_BITS
+
+static void xe_tlb_inval_fence_fini(struct xe_tlb_inval_fence *fence)
+{
+ if (WARN_ON_ONCE(!fence->tlb_inval))
+ return;
+
+ xe_pm_runtime_put(fence->tlb_inval->xe);
+ fence->tlb_inval = NULL; /* fini() should be called once */
+}
+
+static void
+xe_tlb_inval_fence_signal(struct xe_tlb_inval_fence *fence)
+{
+ bool stack = test_bit(FENCE_STACK_BIT, &fence->base.flags);
+
+ lockdep_assert_held(&fence->tlb_inval->pending_lock);
+
+ list_del(&fence->link);
+ trace_xe_tlb_inval_fence_signal(fence->tlb_inval->xe, fence);
+ xe_tlb_inval_fence_fini(fence);
+ dma_fence_signal(&fence->base);
+ if (!stack)
+ dma_fence_put(&fence->base);
+}
+
+static void
+xe_tlb_inval_fence_signal_unlocked(struct xe_tlb_inval_fence *fence)
+{
+ struct xe_tlb_inval *tlb_inval = fence->tlb_inval;
+
+ spin_lock_irq(&tlb_inval->pending_lock);
+ xe_tlb_inval_fence_signal(fence);
+ spin_unlock_irq(&tlb_inval->pending_lock);
+}
+
+static void xe_tlb_inval_fence_timeout(struct work_struct *work)
+{
+ struct xe_tlb_inval *tlb_inval = container_of(work, struct xe_tlb_inval,
+ fence_tdr.work);
+ struct xe_device *xe = tlb_inval->xe;
+ struct xe_tlb_inval_fence *fence, *next;
+ long timeout_delay = tlb_inval->ops->timeout_delay(tlb_inval);
+
+ tlb_inval->ops->flush(tlb_inval);
+
+ spin_lock_irq(&tlb_inval->pending_lock);
+ list_for_each_entry_safe(fence, next,
+ &tlb_inval->pending_fences, link) {
+ s64 since_inval_ms = ktime_ms_delta(ktime_get(),
+ fence->inval_time);
+
+ if (msecs_to_jiffies(since_inval_ms) < timeout_delay)
+ break;
+
+ trace_xe_tlb_inval_fence_timeout(xe, fence);
+ drm_err(&xe->drm,
+ "TLB invalidation fence timeout, seqno=%d recv=%d",
+ fence->seqno, tlb_inval->seqno_recv);
+
+ fence->base.error = -ETIME;
+ xe_tlb_inval_fence_signal(fence);
+ }
+ if (!list_empty(&tlb_inval->pending_fences))
+ queue_delayed_work(system_wq, &tlb_inval->fence_tdr,
+ timeout_delay);
+ spin_unlock_irq(&tlb_inval->pending_lock);
+}
+
+/**
+ * tlb_inval_fini - Clean up TLB invalidation state
+ * @drm: @drm_device
+ * @arg: pointer to struct @xe_tlb_inval
+ *
+ * Cancel pending fence workers and clean up any additional
+ * TLB invalidation state.
+ */
+static void tlb_inval_fini(struct drm_device *drm, void *arg)
+{
+ struct xe_tlb_inval *tlb_inval = arg;
+
+ xe_tlb_inval_reset(tlb_inval);
+}
+
+/**
+ * xe_gt_tlb_inval_init - Initialize TLB invalidation state
+ * @gt: GT structure
+ *
+ * Initialize TLB invalidation state, purely software initialization, should
+ * be called once during driver load.
+ *
+ * Return: 0 on success, negative error code on error.
+ */
+int xe_gt_tlb_inval_init_early(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ struct xe_tlb_inval *tlb_inval = &gt->tlb_inval;
+ int err;
+
+ tlb_inval->xe = xe;
+ tlb_inval->seqno = 1;
+ INIT_LIST_HEAD(&tlb_inval->pending_fences);
+ spin_lock_init(&tlb_inval->pending_lock);
+ spin_lock_init(&tlb_inval->lock);
+ INIT_DELAYED_WORK(&tlb_inval->fence_tdr, xe_tlb_inval_fence_timeout);
+
+ err = drmm_mutex_init(&xe->drm, &tlb_inval->seqno_lock);
+ if (err)
+ return err;
+
+ tlb_inval->job_wq = drmm_alloc_ordered_workqueue(&xe->drm,
+ "gt-tbl-inval-job-wq",
+ WQ_MEM_RECLAIM);
+ if (IS_ERR(tlb_inval->job_wq))
+ return PTR_ERR(tlb_inval->job_wq);
+
+ /* XXX: Blindly setting up backend to GuC */
+ xe_guc_tlb_inval_init_early(&gt->uc.guc, tlb_inval);
+
+ return drmm_add_action_or_reset(&xe->drm, tlb_inval_fini, tlb_inval);
+}
+
+/**
+ * xe_tlb_inval_reset() - TLB invalidation reset
+ * @tlb_inval: TLB invalidation client
+ *
+ * Signal any pending invalidation fences, should be called during a GT reset
+ */
+void xe_tlb_inval_reset(struct xe_tlb_inval *tlb_inval)
+{
+ struct xe_tlb_inval_fence *fence, *next;
+ int pending_seqno;
+
+ /*
+ * we can get here before the backends are even initialized if we're
+ * wedging very early, in which case there are not going to be any
+ * pendind fences so we can bail immediately.
+ */
+ if (!tlb_inval->ops->initialized(tlb_inval))
+ return;
+
+ /*
+ * Backend is already disabled at this point. No new TLB requests can
+ * appear.
+ */
+
+ mutex_lock(&tlb_inval->seqno_lock);
+ spin_lock_irq(&tlb_inval->pending_lock);
+ cancel_delayed_work(&tlb_inval->fence_tdr);
+ /*
+ * We might have various kworkers waiting for TLB flushes to complete
+ * which are not tracked with an explicit TLB fence, however at this
+ * stage that will never happen since the backend is already disabled,
+ * so make sure we signal them here under the assumption that we have
+ * completed a full GT reset.
+ */
+ if (tlb_inval->seqno == 1)
+ pending_seqno = TLB_INVALIDATION_SEQNO_MAX - 1;
+ else
+ pending_seqno = tlb_inval->seqno - 1;
+ WRITE_ONCE(tlb_inval->seqno_recv, pending_seqno);
+
+ list_for_each_entry_safe(fence, next,
+ &tlb_inval->pending_fences, link)
+ xe_tlb_inval_fence_signal(fence);
+ spin_unlock_irq(&tlb_inval->pending_lock);
+ mutex_unlock(&tlb_inval->seqno_lock);
+}
+
+static bool xe_tlb_inval_seqno_past(struct xe_tlb_inval *tlb_inval, int seqno)
+{
+ int seqno_recv = READ_ONCE(tlb_inval->seqno_recv);
+
+ lockdep_assert_held(&tlb_inval->pending_lock);
+
+ if (seqno - seqno_recv < -(TLB_INVALIDATION_SEQNO_MAX / 2))
+ return false;
+
+ if (seqno - seqno_recv > (TLB_INVALIDATION_SEQNO_MAX / 2))
+ return true;
+
+ return seqno_recv >= seqno;
+}
+
+static void xe_tlb_inval_fence_prep(struct xe_tlb_inval_fence *fence)
+{
+ struct xe_tlb_inval *tlb_inval = fence->tlb_inval;
+
+ fence->seqno = tlb_inval->seqno;
+ trace_xe_tlb_inval_fence_send(tlb_inval->xe, fence);
+
+ spin_lock_irq(&tlb_inval->pending_lock);
+ fence->inval_time = ktime_get();
+ list_add_tail(&fence->link, &tlb_inval->pending_fences);
+
+ if (list_is_singular(&tlb_inval->pending_fences))
+ queue_delayed_work(system_wq, &tlb_inval->fence_tdr,
+ tlb_inval->ops->timeout_delay(tlb_inval));
+ spin_unlock_irq(&tlb_inval->pending_lock);
+
+ tlb_inval->seqno = (tlb_inval->seqno + 1) %
+ TLB_INVALIDATION_SEQNO_MAX;
+ if (!tlb_inval->seqno)
+ tlb_inval->seqno = 1;
+}
+
+#define xe_tlb_inval_issue(__tlb_inval, __fence, op, args...) \
+({ \
+ int __ret; \
+ \
+ xe_assert((__tlb_inval)->xe, (__tlb_inval)->ops); \
+ xe_assert((__tlb_inval)->xe, (__fence)); \
+ \
+ mutex_lock(&(__tlb_inval)->seqno_lock); \
+ xe_tlb_inval_fence_prep((__fence)); \
+ __ret = op((__tlb_inval), (__fence)->seqno, ##args); \
+ if (__ret < 0) \
+ xe_tlb_inval_fence_signal_unlocked((__fence)); \
+ mutex_unlock(&(__tlb_inval)->seqno_lock); \
+ \
+ __ret == -ECANCELED ? 0 : __ret; \
+})
+
+/**
+ * xe_tlb_inval_all() - Issue a TLB invalidation for all TLBs
+ * @tlb_inval: TLB invalidation client
+ * @fence: invalidation fence which will be signal on TLB invalidation
+ * completion
+ *
+ * Issue a TLB invalidation for all TLBs. Completion of TLB is asynchronous and
+ * caller can use the invalidation fence to wait for completion.
+ *
+ * Return: 0 on success, negative error code on error
+ */
+int xe_tlb_inval_all(struct xe_tlb_inval *tlb_inval,
+ struct xe_tlb_inval_fence *fence)
+{
+ return xe_tlb_inval_issue(tlb_inval, fence, tlb_inval->ops->all);
+}
+
+/**
+ * xe_tlb_inval_ggtt() - Issue a TLB invalidation for the GGTT
+ * @tlb_inval: TLB invalidation client
+ *
+ * Issue a TLB invalidation for the GGTT. Completion of TLB is asynchronous and
+ * caller can use the invalidation fence to wait for completion.
+ *
+ * Return: 0 on success, negative error code on error
+ */
+int xe_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval)
+{
+ struct xe_tlb_inval_fence fence, *fence_ptr = &fence;
+ int ret;
+
+ xe_tlb_inval_fence_init(tlb_inval, fence_ptr, true);
+ ret = xe_tlb_inval_issue(tlb_inval, fence_ptr, tlb_inval->ops->ggtt);
+ xe_tlb_inval_fence_wait(fence_ptr);
+
+ return ret;
+}
+
+/**
+ * xe_tlb_inval_range() - Issue a TLB invalidation for an address range
+ * @tlb_inval: TLB invalidation client
+ * @fence: invalidation fence which will be signal on TLB invalidation
+ * completion
+ * @start: start address
+ * @end: end address
+ * @asid: address space id
+ *
+ * Issue a range based TLB invalidation if supported, if not fallback to a full
+ * TLB invalidation. Completion of TLB is asynchronous and caller can use
+ * the invalidation fence to wait for completion.
+ *
+ * Return: Negative error code on error, 0 on success
+ */
+int xe_tlb_inval_range(struct xe_tlb_inval *tlb_inval,
+ struct xe_tlb_inval_fence *fence, u64 start, u64 end,
+ u32 asid)
+{
+ return xe_tlb_inval_issue(tlb_inval, fence, tlb_inval->ops->ppgtt,
+ start, end, asid);
+}
+
+/**
+ * xe_tlb_inval_vm() - Issue a TLB invalidation for a VM
+ * @tlb_inval: TLB invalidation client
+ * @vm: VM to invalidate
+ *
+ * Invalidate entire VM's address space
+ */
+void xe_tlb_inval_vm(struct xe_tlb_inval *tlb_inval, struct xe_vm *vm)
+{
+ struct xe_tlb_inval_fence fence;
+ u64 range = 1ull << vm->xe->info.va_bits;
+
+ xe_tlb_inval_fence_init(tlb_inval, &fence, true);
+ xe_tlb_inval_range(tlb_inval, &fence, 0, range, vm->usm.asid);
+ xe_tlb_inval_fence_wait(&fence);
+}
+
+/**
+ * xe_tlb_inval_done_handler() - TLB invalidation done handler
+ * @tlb_inval: TLB invalidation client
+ * @seqno: seqno of invalidation that is done
+ *
+ * Update recv seqno, signal any TLB invalidation fences, and restart TDR
+ */
+void xe_tlb_inval_done_handler(struct xe_tlb_inval *tlb_inval, int seqno)
+{
+ struct xe_device *xe = tlb_inval->xe;
+ struct xe_tlb_inval_fence *fence, *next;
+ unsigned long flags;
+
+ /*
+ * This can also be run both directly from the IRQ handler and also in
+ * process_g2h_msg(). Only one may process any individual CT message,
+ * however the order they are processed here could result in skipping a
+ * seqno. To handle that we just process all the seqnos from the last
+ * seqno_recv up to and including the one in msg[0]. The delta should be
+ * very small so there shouldn't be much of pending_fences we actually
+ * need to iterate over here.
+ *
+ * From GuC POV we expect the seqnos to always appear in-order, so if we
+ * see something later in the timeline we can be sure that anything
+ * appearing earlier has already signalled, just that we have yet to
+ * officially process the CT message like if racing against
+ * process_g2h_msg().
+ */
+ spin_lock_irqsave(&tlb_inval->pending_lock, flags);
+ if (xe_tlb_inval_seqno_past(tlb_inval, seqno)) {
+ spin_unlock_irqrestore(&tlb_inval->pending_lock, flags);
+ return;
+ }
+
+ WRITE_ONCE(tlb_inval->seqno_recv, seqno);
+
+ list_for_each_entry_safe(fence, next,
+ &tlb_inval->pending_fences, link) {
+ trace_xe_tlb_inval_fence_recv(xe, fence);
+
+ if (!xe_tlb_inval_seqno_past(tlb_inval, fence->seqno))
+ break;
+
+ xe_tlb_inval_fence_signal(fence);
+ }
+
+ if (!list_empty(&tlb_inval->pending_fences))
+ mod_delayed_work(system_wq,
+ &tlb_inval->fence_tdr,
+ tlb_inval->ops->timeout_delay(tlb_inval));
+ else
+ cancel_delayed_work(&tlb_inval->fence_tdr);
+
+ spin_unlock_irqrestore(&tlb_inval->pending_lock, flags);
+}
+
+static const char *
+xe_inval_fence_get_driver_name(struct dma_fence *dma_fence)
+{
+ return "xe";
+}
+
+static const char *
+xe_inval_fence_get_timeline_name(struct dma_fence *dma_fence)
+{
+ return "tlb_inval_fence";
+}
+
+static const struct dma_fence_ops inval_fence_ops = {
+ .get_driver_name = xe_inval_fence_get_driver_name,
+ .get_timeline_name = xe_inval_fence_get_timeline_name,
+};
+
+/**
+ * xe_tlb_inval_fence_init() - Initialize TLB invalidation fence
+ * @tlb_inval: TLB invalidation client
+ * @fence: TLB invalidation fence to initialize
+ * @stack: fence is stack variable
+ *
+ * Initialize TLB invalidation fence for use. xe_tlb_inval_fence_fini
+ * will be automatically called when fence is signalled (all fences must signal),
+ * even on error.
+ */
+void xe_tlb_inval_fence_init(struct xe_tlb_inval *tlb_inval,
+ struct xe_tlb_inval_fence *fence,
+ bool stack)
+{
+ xe_pm_runtime_get_noresume(tlb_inval->xe);
+
+ spin_lock_irq(&tlb_inval->lock);
+ dma_fence_init(&fence->base, &inval_fence_ops, &tlb_inval->lock,
+ dma_fence_context_alloc(1), 1);
+ spin_unlock_irq(&tlb_inval->lock);
+ INIT_LIST_HEAD(&fence->link);
+ if (stack)
+ set_bit(FENCE_STACK_BIT, &fence->base.flags);
+ else
+ dma_fence_get(&fence->base);
+ fence->tlb_inval = tlb_inval;
+}
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval.h b/drivers/gpu/drm/xe/xe_tlb_inval.h
new file mode 100644
index 000000000000..05614915463a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tlb_inval.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_TLB_INVAL_H_
+#define _XE_TLB_INVAL_H_
+
+#include <linux/types.h>
+
+#include "xe_tlb_inval_types.h"
+
+struct xe_gt;
+struct xe_guc;
+struct xe_vm;
+
+int xe_gt_tlb_inval_init_early(struct xe_gt *gt);
+
+void xe_tlb_inval_reset(struct xe_tlb_inval *tlb_inval);
+int xe_tlb_inval_all(struct xe_tlb_inval *tlb_inval,
+ struct xe_tlb_inval_fence *fence);
+int xe_tlb_inval_ggtt(struct xe_tlb_inval *tlb_inval);
+void xe_tlb_inval_vm(struct xe_tlb_inval *tlb_inval, struct xe_vm *vm);
+int xe_tlb_inval_range(struct xe_tlb_inval *tlb_inval,
+ struct xe_tlb_inval_fence *fence,
+ u64 start, u64 end, u32 asid);
+
+void xe_tlb_inval_fence_init(struct xe_tlb_inval *tlb_inval,
+ struct xe_tlb_inval_fence *fence,
+ bool stack);
+
+/**
+ * xe_tlb_inval_fence_wait() - TLB invalidiation fence wait
+ * @fence: TLB invalidation fence to wait on
+ *
+ * Wait on a TLB invalidiation fence until it signals, non interruptible
+ */
+static inline void
+xe_tlb_inval_fence_wait(struct xe_tlb_inval_fence *fence)
+{
+ dma_fence_wait(&fence->base, false);
+}
+
+void xe_tlb_inval_done_handler(struct xe_tlb_inval *tlb_inval, int seqno);
+
+#endif /* _XE_TLB_INVAL_ */
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_job.c b/drivers/gpu/drm/xe/xe_tlb_inval_job.c
new file mode 100644
index 000000000000..1ae0dec2cf31
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tlb_inval_job.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include "xe_assert.h"
+#include "xe_dep_job_types.h"
+#include "xe_dep_scheduler.h"
+#include "xe_exec_queue.h"
+#include "xe_gt_types.h"
+#include "xe_tlb_inval.h"
+#include "xe_tlb_inval_job.h"
+#include "xe_migrate.h"
+#include "xe_pm.h"
+#include "xe_vm.h"
+
+/** struct xe_tlb_inval_job - TLB invalidation job */
+struct xe_tlb_inval_job {
+ /** @dep: base generic dependency Xe job */
+ struct xe_dep_job dep;
+ /** @tlb_inval: TLB invalidation client */
+ struct xe_tlb_inval *tlb_inval;
+ /** @q: exec queue issuing the invalidate */
+ struct xe_exec_queue *q;
+ /** @vm: VM which TLB invalidation is being issued for */
+ struct xe_vm *vm;
+ /** @refcount: ref count of this job */
+ struct kref refcount;
+ /**
+ * @fence: dma fence to indicate completion. 1 way relationship - job
+ * can safely reference fence, fence cannot safely reference job.
+ */
+ struct dma_fence *fence;
+ /** @start: Start address to invalidate */
+ u64 start;
+ /** @end: End address to invalidate */
+ u64 end;
+ /** @type: GT type */
+ int type;
+ /** @fence_armed: Fence has been armed */
+ bool fence_armed;
+};
+
+static struct dma_fence *xe_tlb_inval_job_run(struct xe_dep_job *dep_job)
+{
+ struct xe_tlb_inval_job *job =
+ container_of(dep_job, typeof(*job), dep);
+ struct xe_tlb_inval_fence *ifence =
+ container_of(job->fence, typeof(*ifence), base);
+
+ xe_tlb_inval_range(job->tlb_inval, ifence, job->start,
+ job->end, job->vm->usm.asid);
+
+ return job->fence;
+}
+
+static void xe_tlb_inval_job_free(struct xe_dep_job *dep_job)
+{
+ struct xe_tlb_inval_job *job =
+ container_of(dep_job, typeof(*job), dep);
+
+ /* Pairs with get in xe_tlb_inval_job_push */
+ xe_tlb_inval_job_put(job);
+}
+
+static const struct xe_dep_job_ops dep_job_ops = {
+ .run_job = xe_tlb_inval_job_run,
+ .free_job = xe_tlb_inval_job_free,
+};
+
+/**
+ * xe_tlb_inval_job_create() - TLB invalidation job create
+ * @q: exec queue issuing the invalidate
+ * @tlb_inval: TLB invalidation client
+ * @dep_scheduler: Dependency scheduler for job
+ * @vm: VM which TLB invalidation is being issued for
+ * @start: Start address to invalidate
+ * @end: End address to invalidate
+ * @type: GT type
+ *
+ * Create a TLB invalidation job and initialize internal fields. The caller is
+ * responsible for releasing the creation reference.
+ *
+ * Return: TLB invalidation job object on success, ERR_PTR failure
+ */
+struct xe_tlb_inval_job *
+xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval,
+ struct xe_dep_scheduler *dep_scheduler,
+ struct xe_vm *vm, u64 start, u64 end, int type)
+{
+ struct xe_tlb_inval_job *job;
+ struct drm_sched_entity *entity =
+ xe_dep_scheduler_entity(dep_scheduler);
+ struct xe_tlb_inval_fence *ifence;
+ int err;
+
+ xe_assert(vm->xe, type == XE_EXEC_QUEUE_TLB_INVAL_MEDIA_GT ||
+ type == XE_EXEC_QUEUE_TLB_INVAL_PRIMARY_GT);
+
+ job = kmalloc(sizeof(*job), GFP_KERNEL);
+ if (!job)
+ return ERR_PTR(-ENOMEM);
+
+ job->q = q;
+ job->vm = vm;
+ job->tlb_inval = tlb_inval;
+ job->start = start;
+ job->end = end;
+ job->fence_armed = false;
+ job->dep.ops = &dep_job_ops;
+ job->type = type;
+ kref_init(&job->refcount);
+ xe_exec_queue_get(q); /* Pairs with put in xe_tlb_inval_job_destroy */
+ xe_vm_get(vm); /* Pairs with put in xe_tlb_inval_job_destroy */
+
+ ifence = kmalloc(sizeof(*ifence), GFP_KERNEL);
+ if (!ifence) {
+ err = -ENOMEM;
+ goto err_job;
+ }
+ job->fence = &ifence->base;
+
+ err = drm_sched_job_init(&job->dep.drm, entity, 1, NULL,
+ q->xef ? q->xef->drm->client_id : 0);
+ if (err)
+ goto err_fence;
+
+ /* Pairs with put in xe_tlb_inval_job_destroy */
+ xe_pm_runtime_get_noresume(gt_to_xe(q->gt));
+
+ return job;
+
+err_fence:
+ kfree(ifence);
+err_job:
+ xe_vm_put(vm);
+ xe_exec_queue_put(q);
+ kfree(job);
+
+ return ERR_PTR(err);
+}
+
+static void xe_tlb_inval_job_destroy(struct kref *ref)
+{
+ struct xe_tlb_inval_job *job = container_of(ref, typeof(*job),
+ refcount);
+ struct xe_tlb_inval_fence *ifence =
+ container_of(job->fence, typeof(*ifence), base);
+ struct xe_exec_queue *q = job->q;
+ struct xe_device *xe = gt_to_xe(q->gt);
+ struct xe_vm *vm = job->vm;
+
+ if (!job->fence_armed)
+ kfree(ifence);
+ else
+ /* Ref from xe_tlb_inval_fence_init */
+ dma_fence_put(job->fence);
+
+ drm_sched_job_cleanup(&job->dep.drm);
+ kfree(job);
+ xe_vm_put(vm); /* Pairs with get from xe_tlb_inval_job_create */
+ xe_exec_queue_put(q); /* Pairs with get from xe_tlb_inval_job_create */
+ xe_pm_runtime_put(xe); /* Pairs with get from xe_tlb_inval_job_create */
+}
+
+/**
+ * xe_tlb_inval_alloc_dep() - TLB invalidation job alloc dependency
+ * @job: TLB invalidation job to alloc dependency for
+ *
+ * Allocate storage for a dependency in the TLB invalidation fence. This
+ * function should be called at most once per job and must be paired with
+ * xe_tlb_inval_job_push being called with a real fence.
+ *
+ * Return: 0 on success, -errno on failure
+ */
+int xe_tlb_inval_job_alloc_dep(struct xe_tlb_inval_job *job)
+{
+ xe_assert(gt_to_xe(job->q->gt), !xa_load(&job->dep.drm.dependencies, 0));
+ might_alloc(GFP_KERNEL);
+
+ return drm_sched_job_add_dependency(&job->dep.drm,
+ dma_fence_get_stub());
+}
+
+/**
+ * xe_tlb_inval_job_push() - TLB invalidation job push
+ * @job: TLB invalidation job to push
+ * @m: The migration object being used
+ * @fence: Dependency for TLB invalidation job
+ *
+ * Pushes a TLB invalidation job for execution, using @fence as a dependency.
+ * Storage for @fence must be preallocated with xe_tlb_inval_job_alloc_dep
+ * prior to this call if @fence is not signaled. Takes a reference to the job’s
+ * finished fence, which the caller is responsible for releasing, and return it
+ * to the caller. This function is safe to be called in the path of reclaim.
+ *
+ * Return: Job's finished fence on success, cannot fail
+ */
+struct dma_fence *xe_tlb_inval_job_push(struct xe_tlb_inval_job *job,
+ struct xe_migrate *m,
+ struct dma_fence *fence)
+{
+ struct xe_tlb_inval_fence *ifence =
+ container_of(job->fence, typeof(*ifence), base);
+
+ if (!dma_fence_is_signaled(fence)) {
+ void *ptr;
+
+ /*
+ * Can be in path of reclaim, hence the preallocation of fence
+ * storage in xe_tlb_inval_job_alloc_dep. Verify caller did
+ * this correctly.
+ */
+ xe_assert(gt_to_xe(job->q->gt),
+ xa_load(&job->dep.drm.dependencies, 0) ==
+ dma_fence_get_stub());
+
+ dma_fence_get(fence); /* ref released once dependency processed by scheduler */
+ ptr = xa_store(&job->dep.drm.dependencies, 0, fence,
+ GFP_ATOMIC);
+ xe_assert(gt_to_xe(job->q->gt), !xa_is_err(ptr));
+ }
+
+ xe_tlb_inval_job_get(job); /* Pairs with put in free_job */
+ job->fence_armed = true;
+
+ /*
+ * We need the migration lock to protect the job's seqno and the spsc
+ * queue, only taken on migration queue, user queues protected dma-resv
+ * VM lock.
+ */
+ xe_migrate_job_lock(m, job->q);
+
+ /* Creation ref pairs with put in xe_tlb_inval_job_destroy */
+ xe_tlb_inval_fence_init(job->tlb_inval, ifence, false);
+ dma_fence_get(job->fence); /* Pairs with put in DRM scheduler */
+
+ drm_sched_job_arm(&job->dep.drm);
+ /*
+ * caller ref, get must be done before job push as it could immediately
+ * signal and free.
+ */
+ dma_fence_get(&job->dep.drm.s_fence->finished);
+ drm_sched_entity_push_job(&job->dep.drm);
+
+ /* Let the upper layers fish this out */
+ xe_exec_queue_tlb_inval_last_fence_set(job->q, job->vm,
+ &job->dep.drm.s_fence->finished,
+ job->type);
+
+ xe_migrate_job_unlock(m, job->q);
+
+ /*
+ * Not using job->fence, as it has its own dma-fence context, which does
+ * not allow TLB invalidation fences on the same queue, GT tuple to
+ * be squashed in dma-resv/DRM scheduler. Instead, we use the DRM scheduler
+ * context and job's finished fence, which enables squashing.
+ */
+ return &job->dep.drm.s_fence->finished;
+}
+
+/**
+ * xe_tlb_inval_job_get() - Get a reference to TLB invalidation job
+ * @job: TLB invalidation job object
+ *
+ * Increment the TLB invalidation job's reference count
+ */
+void xe_tlb_inval_job_get(struct xe_tlb_inval_job *job)
+{
+ kref_get(&job->refcount);
+}
+
+/**
+ * xe_tlb_inval_job_put() - Put a reference to TLB invalidation job
+ * @job: TLB invalidation job object
+ *
+ * Decrement the TLB invalidation job's reference count, call
+ * xe_tlb_inval_job_destroy when reference count == 0. Skips decrement if
+ * input @job is NULL or IS_ERR.
+ */
+void xe_tlb_inval_job_put(struct xe_tlb_inval_job *job)
+{
+ if (!IS_ERR_OR_NULL(job))
+ kref_put(&job->refcount, xe_tlb_inval_job_destroy);
+}
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_job.h b/drivers/gpu/drm/xe/xe_tlb_inval_job.h
new file mode 100644
index 000000000000..4d6df1a6c6ca
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tlb_inval_job.h
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_TLB_INVAL_JOB_H_
+#define _XE_TLB_INVAL_JOB_H_
+
+#include <linux/types.h>
+
+struct dma_fence;
+struct xe_dep_scheduler;
+struct xe_exec_queue;
+struct xe_migrate;
+struct xe_tlb_inval;
+struct xe_tlb_inval_job;
+struct xe_vm;
+
+struct xe_tlb_inval_job *
+xe_tlb_inval_job_create(struct xe_exec_queue *q, struct xe_tlb_inval *tlb_inval,
+ struct xe_dep_scheduler *dep_scheduler,
+ struct xe_vm *vm, u64 start, u64 end, int type);
+
+int xe_tlb_inval_job_alloc_dep(struct xe_tlb_inval_job *job);
+
+struct dma_fence *xe_tlb_inval_job_push(struct xe_tlb_inval_job *job,
+ struct xe_migrate *m,
+ struct dma_fence *fence);
+
+void xe_tlb_inval_job_get(struct xe_tlb_inval_job *job);
+
+void xe_tlb_inval_job_put(struct xe_tlb_inval_job *job);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_tlb_inval_types.h b/drivers/gpu/drm/xe/xe_tlb_inval_types.h
new file mode 100644
index 000000000000..8f8b060e9005
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_tlb_inval_types.h
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2023 Intel Corporation
+ */
+
+#ifndef _XE_TLB_INVAL_TYPES_H_
+#define _XE_TLB_INVAL_TYPES_H_
+
+#include <linux/workqueue.h>
+#include <linux/dma-fence.h>
+
+struct xe_tlb_inval;
+
+/** struct xe_tlb_inval_ops - TLB invalidation ops (backend) */
+struct xe_tlb_inval_ops {
+ /**
+ * @all: Invalidate all TLBs
+ * @tlb_inval: TLB invalidation client
+ * @seqno: Seqno of TLB invalidation
+ *
+ * Return 0 on success, -ECANCELED if backend is mid-reset, error on
+ * failure
+ */
+ int (*all)(struct xe_tlb_inval *tlb_inval, u32 seqno);
+
+ /**
+ * @ggtt: Invalidate global translation TLBs
+ * @tlb_inval: TLB invalidation client
+ * @seqno: Seqno of TLB invalidation
+ *
+ * Return 0 on success, -ECANCELED if backend is mid-reset, error on
+ * failure
+ */
+ int (*ggtt)(struct xe_tlb_inval *tlb_inval, u32 seqno);
+
+ /**
+ * @ppgtt: Invalidate per-process translation TLBs
+ * @tlb_inval: TLB invalidation client
+ * @seqno: Seqno of TLB invalidation
+ * @start: Start address
+ * @end: End address
+ * @asid: Address space ID
+ *
+ * Return 0 on success, -ECANCELED if backend is mid-reset, error on
+ * failure
+ */
+ int (*ppgtt)(struct xe_tlb_inval *tlb_inval, u32 seqno, u64 start,
+ u64 end, u32 asid);
+
+ /**
+ * @initialized: Backend is initialized
+ * @tlb_inval: TLB invalidation client
+ *
+ * Return: True if back is initialized, False otherwise
+ */
+ bool (*initialized)(struct xe_tlb_inval *tlb_inval);
+
+ /**
+ * @flush: Flush pending TLB invalidations
+ * @tlb_inval: TLB invalidation client
+ */
+ void (*flush)(struct xe_tlb_inval *tlb_inval);
+
+ /**
+ * @timeout_delay: Timeout delay for TLB invalidation
+ * @tlb_inval: TLB invalidation client
+ *
+ * Return: Timeout delay for TLB invalidation in jiffies
+ */
+ long (*timeout_delay)(struct xe_tlb_inval *tlb_inval);
+};
+
+/** struct xe_tlb_inval - TLB invalidation client (frontend) */
+struct xe_tlb_inval {
+ /** @private: Backend private pointer */
+ void *private;
+ /** @xe: Pointer to Xe device */
+ struct xe_device *xe;
+ /** @ops: TLB invalidation ops */
+ const struct xe_tlb_inval_ops *ops;
+ /** @tlb_inval.seqno: TLB invalidation seqno, protected by CT lock */
+#define TLB_INVALIDATION_SEQNO_MAX 0x100000
+ int seqno;
+ /** @tlb_invalidation.seqno_lock: protects @tlb_invalidation.seqno */
+ struct mutex seqno_lock;
+ /**
+ * @seqno_recv: last received TLB invalidation seqno, protected by
+ * CT lock
+ */
+ int seqno_recv;
+ /**
+ * @pending_fences: list of pending fences waiting TLB invaliations,
+ * protected CT lock
+ */
+ struct list_head pending_fences;
+ /**
+ * @pending_lock: protects @pending_fences and updating @seqno_recv.
+ */
+ spinlock_t pending_lock;
+ /**
+ * @fence_tdr: schedules a delayed call to xe_tlb_fence_timeout after
+ * the timeout interval is over.
+ */
+ struct delayed_work fence_tdr;
+ /** @job_wq: schedules TLB invalidation jobs */
+ struct workqueue_struct *job_wq;
+ /** @tlb_inval.lock: protects TLB invalidation fences */
+ spinlock_t lock;
+};
+
+/**
+ * struct xe_tlb_inval_fence - TLB invalidation fence
+ *
+ * Optionally passed to xe_tlb_inval* functions and will be signaled upon TLB
+ * invalidation completion.
+ */
+struct xe_tlb_inval_fence {
+ /** @base: dma fence base */
+ struct dma_fence base;
+ /** @tlb_inval: TLB invalidation client which fence belong to */
+ struct xe_tlb_inval *tlb_inval;
+ /** @link: link into list of pending tlb fences */
+ struct list_head link;
+ /** @seqno: seqno of TLB invalidation to signal fence one */
+ int seqno;
+ /** @inval_time: time of TLB invalidation */
+ ktime_t inval_time;
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_trace.h b/drivers/gpu/drm/xe/xe_trace.h
index b4a3577df70c..79a97b086cb2 100644
--- a/drivers/gpu/drm/xe/xe_trace.h
+++ b/drivers/gpu/drm/xe/xe_trace.h
@@ -14,10 +14,10 @@
#include "xe_exec_queue_types.h"
#include "xe_gpu_scheduler_types.h"
-#include "xe_gt_tlb_invalidation_types.h"
#include "xe_gt_types.h"
#include "xe_guc_exec_queue_types.h"
#include "xe_sched_job.h"
+#include "xe_tlb_inval_types.h"
#include "xe_vm.h"
#define __dev_name_xe(xe) dev_name((xe)->drm.dev)
@@ -25,13 +25,13 @@
#define __dev_name_gt(gt) __dev_name_xe(gt_to_xe((gt)))
#define __dev_name_eq(q) __dev_name_gt((q)->gt)
-DECLARE_EVENT_CLASS(xe_gt_tlb_invalidation_fence,
- TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence),
+DECLARE_EVENT_CLASS(xe_tlb_inval_fence,
+ TP_PROTO(struct xe_device *xe, struct xe_tlb_inval_fence *fence),
TP_ARGS(xe, fence),
TP_STRUCT__entry(
__string(dev, __dev_name_xe(xe))
- __field(struct xe_gt_tlb_invalidation_fence *, fence)
+ __field(struct xe_tlb_inval_fence *, fence)
__field(int, seqno)
),
@@ -45,39 +45,23 @@ DECLARE_EVENT_CLASS(xe_gt_tlb_invalidation_fence,
__get_str(dev), __entry->fence, __entry->seqno)
);
-DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_create,
- TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence),
+DEFINE_EVENT(xe_tlb_inval_fence, xe_tlb_inval_fence_send,
+ TP_PROTO(struct xe_device *xe, struct xe_tlb_inval_fence *fence),
TP_ARGS(xe, fence)
);
-DEFINE_EVENT(xe_gt_tlb_invalidation_fence,
- xe_gt_tlb_invalidation_fence_work_func,
- TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence),
+DEFINE_EVENT(xe_tlb_inval_fence, xe_tlb_inval_fence_recv,
+ TP_PROTO(struct xe_device *xe, struct xe_tlb_inval_fence *fence),
TP_ARGS(xe, fence)
);
-DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_cb,
- TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence),
+DEFINE_EVENT(xe_tlb_inval_fence, xe_tlb_inval_fence_signal,
+ TP_PROTO(struct xe_device *xe, struct xe_tlb_inval_fence *fence),
TP_ARGS(xe, fence)
);
-DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_send,
- TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence),
- TP_ARGS(xe, fence)
-);
-
-DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_recv,
- TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence),
- TP_ARGS(xe, fence)
-);
-
-DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_signal,
- TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence),
- TP_ARGS(xe, fence)
-);
-
-DEFINE_EVENT(xe_gt_tlb_invalidation_fence, xe_gt_tlb_invalidation_fence_timeout,
- TP_PROTO(struct xe_device *xe, struct xe_gt_tlb_invalidation_fence *fence),
+DEFINE_EVENT(xe_tlb_inval_fence, xe_tlb_inval_fence_timeout,
+ TP_PROTO(struct xe_device *xe, struct xe_tlb_inval_fence *fence),
TP_ARGS(xe, fence)
);
@@ -457,6 +441,29 @@ TRACE_EVENT(xe_eu_stall_data_read,
__entry->read_size, __entry->total_size)
);
+TRACE_EVENT(xe_exec_queue_reach_max_job_count,
+ TP_PROTO(struct xe_exec_queue *q, int max_cnt),
+ TP_ARGS(q, max_cnt),
+
+ TP_STRUCT__entry(__string(dev, __dev_name_eq(q))
+ __field(enum xe_engine_class, class)
+ __field(u32, logical_mask)
+ __field(u16, guc_id)
+ __field(int, max_cnt)
+ ),
+
+ TP_fast_assign(__assign_str(dev);
+ __entry->class = q->class;
+ __entry->logical_mask = q->logical_mask;
+ __entry->guc_id = q->guc->id;
+ __entry->max_cnt = max_cnt;
+ ),
+
+ TP_printk("dev=%s, job count exceeded the maximum limit (%d) per exec queue. engine_class=0x%x, logical_mask=0x%x, guc_id=%d",
+ __get_str(dev), __entry->max_cnt,
+ __entry->class, __entry->logical_mask, __entry->guc_id)
+);
+
#endif
/* This part must be outside protection */
diff --git a/drivers/gpu/drm/xe/xe_trace_bo.h b/drivers/gpu/drm/xe/xe_trace_bo.h
index ccebd5f0878e..86323cf3be2c 100644
--- a/drivers/gpu/drm/xe/xe_trace_bo.h
+++ b/drivers/gpu/drm/xe/xe_trace_bo.h
@@ -33,7 +33,7 @@ DECLARE_EVENT_CLASS(xe_bo,
TP_fast_assign(
__assign_str(dev);
- __entry->size = bo->size;
+ __entry->size = xe_bo_size(bo);
__entry->flags = bo->flags;
__entry->vm = bo->vm;
),
@@ -73,7 +73,7 @@ TRACE_EVENT(xe_bo_move,
TP_fast_assign(
__entry->bo = bo;
- __entry->size = bo->size;
+ __entry->size = xe_bo_size(bo);
__assign_str(new_placement_name);
__assign_str(old_placement_name);
__assign_str(device_id);
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index d9c9d2547aad..1bddecfb723a 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: MIT
/*
* Copyright © 2021-2023 Intel Corporation
- * Copyright (C) 2021-2002 Red Hat
+ * Copyright (C) 2021-2022 Red Hat
*/
#include <drm/drm_managed.h>
@@ -24,6 +24,7 @@
#include "xe_sriov.h"
#include "xe_ttm_stolen_mgr.h"
#include "xe_ttm_vram_mgr.h"
+#include "xe_vram.h"
#include "xe_wa.h"
struct xe_ttm_stolen_mgr {
@@ -80,17 +81,18 @@ static u32 get_wopcm_size(struct xe_device *xe)
return wopcm_size;
}
-static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
+static u64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
{
- struct xe_tile *tile = xe_device_get_root_tile(xe);
+ struct xe_vram_region *tile_vram = xe_device_get_root_tile(xe)->mem.vram;
+ resource_size_t tile_io_start = xe_vram_region_io_start(tile_vram);
struct xe_mmio *mmio = xe_root_tile_mmio(xe);
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
u64 stolen_size, wopcm_size;
u64 tile_offset;
u64 tile_size;
- tile_offset = tile->mem.vram.io_start - xe->mem.vram.io_start;
- tile_size = tile->mem.vram.actual_physical_size;
+ tile_offset = tile_io_start - xe_vram_region_io_start(xe->mem.vram);
+ tile_size = xe_vram_region_actual_physical_size(tile_vram);
/* Use DSM base address instead for stolen memory */
mgr->stolen_base = (xe_mmio_read64_2x32(mmio, DSMBASE) & BDSM_MASK) - tile_offset;
@@ -103,11 +105,13 @@ static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
return 0;
stolen_size = tile_size - mgr->stolen_base;
+
+ xe_assert(xe, stolen_size >= wopcm_size);
stolen_size -= wopcm_size;
/* Verify usage fits in the actual resource available */
if (mgr->stolen_base + stolen_size <= pci_resource_len(pdev, LMEM_BAR))
- mgr->io_base = tile->mem.vram.io_start + mgr->stolen_base;
+ mgr->io_base = tile_io_start + mgr->stolen_base;
/*
* There may be few KB of platform dependent reserved memory at the end
@@ -164,7 +168,7 @@ static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr
stolen_size -= wopcm_size;
- if (media_gt && XE_WA(media_gt, 14019821291)) {
+ if (media_gt && XE_GT_WA(media_gt, 14019821291)) {
u64 gscpsmi_base = xe_mmio_read64_2x32(&media_gt->mmio, GSCPSMI_BASE)
& ~GENMASK_ULL(5, 0);
diff --git a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
index d38b91872da3..3e404eb8d098 100644
--- a/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_sys_mgr.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: MIT
/*
* Copyright © 2021-2022 Intel Corporation
- * Copyright (C) 2021-2002 Red Hat
+ * Copyright (C) 2021-2022 Red Hat
*/
#include "xe_ttm_sys_mgr.h"
@@ -85,7 +85,7 @@ static const struct ttm_resource_manager_func xe_ttm_sys_mgr_func = {
.debug = xe_ttm_sys_mgr_debug
};
-static void ttm_sys_mgr_fini(struct drm_device *drm, void *arg)
+static void xe_ttm_sys_mgr_fini(struct drm_device *drm, void *arg)
{
struct xe_device *xe = (struct xe_device *)arg;
struct ttm_resource_manager *man = &xe->mem.sys_mgr;
@@ -116,5 +116,5 @@ int xe_ttm_sys_mgr_init(struct xe_device *xe)
ttm_resource_manager_init(man, &xe->ttm, gtt_size >> PAGE_SHIFT);
ttm_set_driver_manager(&xe->ttm, XE_PL_TT, man);
ttm_resource_manager_set_used(man, true);
- return drmm_add_action_or_reset(&xe->drm, ttm_sys_mgr_fini, xe);
+ return drmm_add_action_or_reset(&xe->drm, xe_ttm_sys_mgr_fini, xe);
}
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
index 9e375a40aee9..9f70802fce92 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: MIT
/*
* Copyright © 2021-2022 Intel Corporation
- * Copyright (C) 2021-2002 Red Hat
+ * Copyright (C) 2021-2022 Red Hat
*/
#include <drm/drm_managed.h>
@@ -15,6 +15,7 @@
#include "xe_gt.h"
#include "xe_res_cursor.h"
#include "xe_ttm_vram_mgr.h"
+#include "xe_vram_types.h"
static inline struct drm_buddy_block *
xe_ttm_vram_mgr_first_block(struct list_head *list)
@@ -283,7 +284,7 @@ static const struct ttm_resource_manager_func xe_ttm_vram_mgr_func = {
.debug = xe_ttm_vram_mgr_debug
};
-static void ttm_vram_mgr_fini(struct drm_device *dev, void *arg)
+static void xe_ttm_vram_mgr_fini(struct drm_device *dev, void *arg)
{
struct xe_device *xe = to_xe_device(dev);
struct xe_ttm_vram_mgr *mgr = arg;
@@ -334,16 +335,23 @@ int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr,
ttm_set_driver_manager(&xe->ttm, mem_type, &mgr->manager);
ttm_resource_manager_set_used(&mgr->manager, true);
- return drmm_add_action_or_reset(&xe->drm, ttm_vram_mgr_fini, mgr);
+ return drmm_add_action_or_reset(&xe->drm, xe_ttm_vram_mgr_fini, mgr);
}
-int xe_ttm_vram_mgr_init(struct xe_tile *tile, struct xe_ttm_vram_mgr *mgr)
+/**
+ * xe_ttm_vram_mgr_init - initialize TTM VRAM region
+ * @xe: pointer to Xe device
+ * @vram: pointer to xe_vram_region that contains the memory region attributes
+ *
+ * Initialize the Xe TTM for given @vram region using the given parameters.
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
+int xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_vram_region *vram)
{
- struct xe_device *xe = tile_to_xe(tile);
- struct xe_vram_region *vram = &tile->mem.vram;
-
- return __xe_ttm_vram_mgr_init(xe, mgr, XE_PL_VRAM0 + tile->id,
- vram->usable_size, vram->io_size,
+ return __xe_ttm_vram_mgr_init(xe, &vram->ttm, vram->placement,
+ xe_vram_region_usable_size(vram),
+ xe_vram_region_io_size(vram),
PAGE_SIZE);
}
@@ -392,7 +400,7 @@ int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
*/
xe_res_first(res, offset, length, &cursor);
for_each_sgtable_sg((*sgt), sg, i) {
- phys_addr_t phys = cursor.start + tile->mem.vram.io_start;
+ phys_addr_t phys = cursor.start + xe_vram_region_io_start(tile->mem.vram);
size_t size = min_t(u64, cursor.size, SZ_2G);
dma_addr_t addr;
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
index cc76050e376d..87b7fae5edba 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.h
@@ -11,11 +11,12 @@
enum dma_data_direction;
struct xe_device;
struct xe_tile;
+struct xe_vram_region;
int __xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_ttm_vram_mgr *mgr,
u32 mem_type, u64 size, u64 io_size,
u64 default_page_size);
-int xe_ttm_vram_mgr_init(struct xe_tile *tile, struct xe_ttm_vram_mgr *mgr);
+int xe_ttm_vram_mgr_init(struct xe_device *xe, struct xe_vram_region *vram);
int xe_ttm_vram_mgr_alloc_sgt(struct xe_device *xe,
struct ttm_resource *res,
u64 offset, u64 length,
diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
index 1144f9232ebb..a71e14818ec2 100644
--- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
+++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr_types.h
@@ -10,7 +10,7 @@
#include <drm/ttm/ttm_device.h>
/**
- * struct xe_ttm_vram_mgr - XE TTM VRAM manager
+ * struct xe_ttm_vram_mgr - Xe TTM VRAM manager
*
* Manages placement of TTM resource in VRAM.
*/
@@ -32,7 +32,7 @@ struct xe_ttm_vram_mgr {
};
/**
- * struct xe_ttm_vram_mgr_resource - XE TTM VRAM resource
+ * struct xe_ttm_vram_mgr_resource - Xe TTM VRAM resource
*/
struct xe_ttm_vram_mgr_resource {
/** @base: Base TTM resource */
diff --git a/drivers/gpu/drm/xe/xe_tuning.c b/drivers/gpu/drm/xe/xe_tuning.c
index 49ddbda7cdef..5766fa7742d3 100644
--- a/drivers/gpu/drm/xe/xe_tuning.c
+++ b/drivers/gpu/drm/xe/xe_tuning.c
@@ -8,6 +8,7 @@
#include <kunit/visibility.h>
#include <drm/drm_managed.h>
+#include <drm/drm_print.h>
#include "regs/xe_gt_regs.h"
#include "xe_gt_types.h"
@@ -40,7 +41,8 @@ static const struct xe_rtp_entry_sr gt_tunings[] = {
REG_FIELD_PREP(L3_PWM_TIMER_INIT_VAL_MASK, 0x7f)))
},
{ XE_RTP_NAME("Tuning: Compression Overfetch"),
- XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED),
+ FUNC(xe_rtp_match_has_flat_ccs)),
XE_RTP_ACTIONS(CLR(CCCHKNREG1, ENCOMPPERFFIX),
SET(CCCHKNREG1, L3CMPCTRL))
},
@@ -58,12 +60,14 @@ static const struct xe_rtp_entry_sr gt_tunings[] = {
XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG3, COMPPWOVERFETCHEN))
},
{ XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only"),
- XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED)),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED),
+ FUNC(xe_rtp_match_has_flat_ccs)),
XE_RTP_ACTIONS(SET(L3SQCREG2,
COMPMEMRD256BOVRFETCHEN))
},
{ XE_RTP_NAME("Tuning: L2 Overfetch Compressible Only - media"),
- XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED)),
+ XE_RTP_RULES(MEDIA_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED),
+ FUNC(xe_rtp_match_has_flat_ccs)),
XE_RTP_ACTIONS(SET(XE2LPM_L3SQCREG2,
COMPMEMRD256BOVRFETCHEN))
},
@@ -98,6 +102,11 @@ static const struct xe_rtp_entry_sr engine_tunings[] = {
ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(SAMPLER_MODE, INDIRECT_STATE_BASE_ADDR_OVERRIDE))
},
+ { XE_RTP_NAME("Tuning: Disable NULL query for Anyhit Shader"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2000, XE_RTP_END_VERSION_UNDEFINED),
+ FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_ACTIONS(SET(RT_CTRL, DIS_NULL_QUERY))
+ },
};
static const struct xe_rtp_entry_sr lrc_tunings[] = {
@@ -209,7 +218,14 @@ void xe_tuning_process_lrc(struct xe_hw_engine *hwe)
xe_rtp_process_to_sr(&ctx, lrc_tunings, ARRAY_SIZE(lrc_tunings), &hwe->reg_lrc);
}
-void xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p)
+/**
+ * xe_tuning_dump() - Dump GT tuning info into a drm printer.
+ * @gt: the &xe_gt
+ * @p: the &drm_printer
+ *
+ * Return: always 0.
+ */
+int xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p)
{
size_t idx;
@@ -217,11 +233,15 @@ void xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p)
for_each_set_bit(idx, gt->tuning_active.gt, ARRAY_SIZE(gt_tunings))
drm_printf_indent(p, 1, "%s\n", gt_tunings[idx].name);
- drm_printf(p, "\nEngine Tunings\n");
+ drm_puts(p, "\n");
+ drm_printf(p, "Engine Tunings\n");
for_each_set_bit(idx, gt->tuning_active.engine, ARRAY_SIZE(engine_tunings))
drm_printf_indent(p, 1, "%s\n", engine_tunings[idx].name);
- drm_printf(p, "\nLRC Tunings\n");
+ drm_puts(p, "\n");
+ drm_printf(p, "LRC Tunings\n");
for_each_set_bit(idx, gt->tuning_active.lrc, ARRAY_SIZE(lrc_tunings))
drm_printf_indent(p, 1, "%s\n", lrc_tunings[idx].name);
+
+ return 0;
}
diff --git a/drivers/gpu/drm/xe/xe_tuning.h b/drivers/gpu/drm/xe/xe_tuning.h
index dd0d3ccc9c65..c1cc5927fda7 100644
--- a/drivers/gpu/drm/xe/xe_tuning.h
+++ b/drivers/gpu/drm/xe/xe_tuning.h
@@ -14,6 +14,6 @@ int xe_tuning_init(struct xe_gt *gt);
void xe_tuning_process_gt(struct xe_gt *gt);
void xe_tuning_process_engine(struct xe_hw_engine *hwe);
void xe_tuning_process_lrc(struct xe_hw_engine *hwe);
-void xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p);
+int xe_tuning_dump(struct xe_gt *gt, struct drm_printer *p);
#endif
diff --git a/drivers/gpu/drm/xe/xe_uc.c b/drivers/gpu/drm/xe/xe_uc.c
index 3a8751a8b92d..465bda355443 100644
--- a/drivers/gpu/drm/xe/xe_uc.c
+++ b/drivers/gpu/drm/xe/xe_uc.c
@@ -33,6 +33,22 @@ uc_to_xe(struct xe_uc *uc)
}
/* Should be called once at driver load only */
+int xe_uc_init_noalloc(struct xe_uc *uc)
+{
+ int ret;
+
+ ret = xe_guc_init_noalloc(&uc->guc);
+ if (ret)
+ goto err;
+
+ /* HuC and GSC have no early dependencies and will be initialized during xe_uc_init(). */
+ return 0;
+
+err:
+ xe_gt_err(uc_to_gt(uc), "Failed to early initialize uC (%pe)\n", ERR_PTR(ret));
+ return ret;
+}
+
int xe_uc_init(struct xe_uc *uc)
{
int ret;
@@ -56,15 +72,17 @@ int xe_uc_init(struct xe_uc *uc)
if (!xe_device_uc_enabled(uc_to_xe(uc)))
return 0;
- if (IS_SRIOV_VF(uc_to_xe(uc)))
- return 0;
+ if (!IS_SRIOV_VF(uc_to_xe(uc))) {
+ ret = xe_wopcm_init(&uc->wopcm);
+ if (ret)
+ goto err;
+ }
- ret = xe_wopcm_init(&uc->wopcm);
+ ret = xe_guc_min_load_for_hwconfig(&uc->guc);
if (ret)
goto err;
return 0;
-
err:
xe_gt_err(uc_to_gt(uc), "Failed to initialize uC (%pe)\n", ERR_PTR(ret));
return ret;
@@ -126,28 +144,7 @@ int xe_uc_sanitize_reset(struct xe_uc *uc)
return uc_reset(uc);
}
-/**
- * xe_uc_init_hwconfig - minimally init Uc, read and parse hwconfig
- * @uc: The UC object
- *
- * Return: 0 on success, negative error code on error.
- */
-int xe_uc_init_hwconfig(struct xe_uc *uc)
-{
- int ret;
-
- /* GuC submission not enabled, nothing to do */
- if (!xe_device_uc_enabled(uc_to_xe(uc)))
- return 0;
-
- ret = xe_guc_min_load_for_hwconfig(&uc->guc);
- if (ret)
- return ret;
-
- return 0;
-}
-
-static int vf_uc_init_hw(struct xe_uc *uc)
+static int vf_uc_load_hw(struct xe_uc *uc)
{
int err;
@@ -161,22 +158,30 @@ static int vf_uc_init_hw(struct xe_uc *uc)
err = xe_gt_sriov_vf_connect(uc_to_gt(uc));
if (err)
- return err;
+ goto err_out;
uc->guc.submission_state.enabled = true;
+ err = xe_guc_opt_in_features_enable(&uc->guc);
+ if (err)
+ goto err_out;
+
err = xe_gt_record_default_lrcs(uc_to_gt(uc));
if (err)
- return err;
+ goto err_out;
return 0;
+
+err_out:
+ xe_guc_sanitize(&uc->guc);
+ return err;
}
/*
* Should be called during driver load, after every GT reset, and after every
* suspend to reload / auth the firmwares.
*/
-int xe_uc_init_hw(struct xe_uc *uc)
+int xe_uc_load_hw(struct xe_uc *uc)
{
int ret;
@@ -185,7 +190,7 @@ int xe_uc_init_hw(struct xe_uc *uc)
return 0;
if (IS_SRIOV_VF(uc_to_xe(uc)))
- return vf_uc_init_hw(uc);
+ return vf_uc_load_hw(uc);
ret = xe_huc_upload(&uc->huc);
if (ret)
@@ -201,15 +206,15 @@ int xe_uc_init_hw(struct xe_uc *uc)
ret = xe_gt_record_default_lrcs(uc_to_gt(uc));
if (ret)
- return ret;
+ goto err_out;
ret = xe_guc_post_load_init(&uc->guc);
if (ret)
- return ret;
+ goto err_out;
ret = xe_guc_pc_start(&uc->guc.pc);
if (ret)
- return ret;
+ goto err_out;
xe_guc_engine_activity_enable_stats(&uc->guc);
@@ -221,11 +226,10 @@ int xe_uc_init_hw(struct xe_uc *uc)
xe_gsc_load_start(&uc->gsc);
return 0;
-}
-int xe_uc_fini_hw(struct xe_uc *uc)
-{
- return xe_uc_sanitize_reset(uc);
+err_out:
+ xe_guc_sanitize(&uc->guc);
+ return ret;
}
int xe_uc_reset_prepare(struct xe_uc *uc)
diff --git a/drivers/gpu/drm/xe/xe_uc.h b/drivers/gpu/drm/xe/xe_uc.h
index c23e6f5e2514..21c9306098cf 100644
--- a/drivers/gpu/drm/xe/xe_uc.h
+++ b/drivers/gpu/drm/xe/xe_uc.h
@@ -8,11 +8,10 @@
struct xe_uc;
+int xe_uc_init_noalloc(struct xe_uc *uc);
int xe_uc_init(struct xe_uc *uc);
-int xe_uc_init_hwconfig(struct xe_uc *uc);
int xe_uc_init_post_hwconfig(struct xe_uc *uc);
-int xe_uc_init_hw(struct xe_uc *uc);
-int xe_uc_fini_hw(struct xe_uc *uc);
+int xe_uc_load_hw(struct xe_uc *uc);
void xe_uc_gucrc_disable(struct xe_uc *uc);
int xe_uc_reset_prepare(struct xe_uc *uc);
void xe_uc_stop_prepare(struct xe_uc *uc);
diff --git a/drivers/gpu/drm/xe/xe_uc_fw.c b/drivers/gpu/drm/xe/xe_uc_fw.c
index 2741849bbf4d..622b76078567 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw.c
+++ b/drivers/gpu/drm/xe/xe_uc_fw.c
@@ -16,6 +16,7 @@
#include "xe_gsc.h"
#include "xe_gt.h"
#include "xe_gt_printk.h"
+#include "xe_gt_sriov_vf.h"
#include "xe_guc.h"
#include "xe_map.h"
#include "xe_mmio.h"
@@ -114,10 +115,11 @@ struct fw_blobs_by_type {
#define XE_GT_TYPE_ANY XE_GT_TYPE_UNINITIALIZED
#define XE_GUC_FIRMWARE_DEFS(fw_def, mmp_ver, major_ver) \
- fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 44, 1)) \
- fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 44, 1)) \
+ fw_def(PANTHERLAKE, GT_TYPE_ANY, major_ver(xe, guc, ptl, 70, 49, 4)) \
+ fw_def(BATTLEMAGE, GT_TYPE_ANY, major_ver(xe, guc, bmg, 70, 49, 4)) \
+ fw_def(LUNARLAKE, GT_TYPE_ANY, major_ver(xe, guc, lnl, 70, 45, 2)) \
fw_def(METEORLAKE, GT_TYPE_ANY, major_ver(i915, guc, mtl, 70, 44, 1)) \
- fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 44, 1)) \
+ fw_def(DG2, GT_TYPE_ANY, major_ver(i915, guc, dg2, 70, 45, 2)) \
fw_def(DG1, GT_TYPE_ANY, major_ver(i915, guc, dg1, 70, 44, 1)) \
fw_def(ALDERLAKE_N, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1)) \
fw_def(ALDERLAKE_P, GT_TYPE_ANY, major_ver(i915, guc, adlp, 70, 44, 1)) \
@@ -126,6 +128,7 @@ struct fw_blobs_by_type {
fw_def(TIGERLAKE, GT_TYPE_ANY, major_ver(i915, guc, tgl, 70, 44, 1))
#define XE_HUC_FIRMWARE_DEFS(fw_def, mmp_ver, no_ver) \
+ fw_def(PANTHERLAKE, GT_TYPE_ANY, no_ver(xe, huc, ptl)) \
fw_def(BATTLEMAGE, GT_TYPE_ANY, no_ver(xe, huc, bmg)) \
fw_def(LUNARLAKE, GT_TYPE_ANY, no_ver(xe, huc, lnl)) \
fw_def(METEORLAKE, GT_TYPE_ANY, no_ver(i915, huc_gsc, mtl)) \
@@ -325,7 +328,7 @@ static void uc_fw_fini(struct drm_device *drm, void *arg)
xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_SELECTED);
}
-static int guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css)
+static int guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_guc_info *guc_info)
{
struct xe_gt *gt = uc_fw_to_gt(uc_fw);
struct xe_uc_fw_version *release = &uc_fw->versions.found[XE_UC_FW_VER_RELEASE];
@@ -340,11 +343,12 @@ static int guc_read_css_info(struct xe_uc_fw *uc_fw, struct uc_css_header *css)
return -EINVAL;
}
- compatibility->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css->submission_version);
- compatibility->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->submission_version);
- compatibility->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->submission_version);
+ compatibility->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, guc_info->submission_version);
+ compatibility->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, guc_info->submission_version);
+ compatibility->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, guc_info->submission_version);
- uc_fw->private_data_size = css->private_data_size;
+ uc_fw->build_type = FIELD_GET(CSS_UKERNEL_INFO_BUILDTYPE, guc_info->ukernel_info);
+ uc_fw->private_data_size = guc_info->private_data_size;
return 0;
}
@@ -413,8 +417,8 @@ static int parse_css_header(struct xe_uc_fw *uc_fw, const void *fw_data, size_t
css = (struct uc_css_header *)fw_data;
/* Check integrity of size values inside CSS header */
- size = (css->header_size_dw - css->key_size_dw - css->modulus_size_dw -
- css->exponent_size_dw) * sizeof(u32);
+ size = (css->header_size_dw - css->rsa_info.key_size_dw - css->rsa_info.modulus_size_dw -
+ css->rsa_info.exponent_size_dw) * sizeof(u32);
if (unlikely(size != sizeof(struct uc_css_header))) {
drm_warn(&xe->drm,
"%s firmware %s: unexpected header size: %zu != %zu\n",
@@ -427,7 +431,7 @@ static int parse_css_header(struct xe_uc_fw *uc_fw, const void *fw_data, size_t
uc_fw->ucode_size = (css->size_dw - css->header_size_dw) * sizeof(u32);
/* now RSA */
- uc_fw->rsa_size = css->key_size_dw * sizeof(u32);
+ uc_fw->rsa_size = css->rsa_info.key_size_dw * sizeof(u32);
/* At least, it should have header, uCode and RSA. Size of all three. */
size = sizeof(struct uc_css_header) + uc_fw->ucode_size +
@@ -440,12 +444,12 @@ static int parse_css_header(struct xe_uc_fw *uc_fw, const void *fw_data, size_t
}
/* Get version numbers from the CSS header */
- release->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css->sw_version);
- release->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->sw_version);
- release->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->sw_version);
+ release->major = FIELD_GET(CSS_SW_VERSION_UC_MAJOR, css->guc_info.sw_version);
+ release->minor = FIELD_GET(CSS_SW_VERSION_UC_MINOR, css->guc_info.sw_version);
+ release->patch = FIELD_GET(CSS_SW_VERSION_UC_PATCH, css->guc_info.sw_version);
if (uc_fw->type == XE_UC_FW_TYPE_GUC)
- return guc_read_css_info(uc_fw, css);
+ return guc_read_css_info(uc_fw, &css->guc_info);
return 0;
}
@@ -662,11 +666,39 @@ do { \
ver_->major, ver_->minor, ver_->patch); \
} while (0)
+static void uc_fw_vf_override(struct xe_uc_fw *uc_fw)
+{
+ struct xe_uc_fw_version *compat = &uc_fw->versions.found[XE_UC_FW_VER_COMPATIBILITY];
+ struct xe_uc_fw_version *wanted = &uc_fw->versions.wanted;
+
+ /* Only GuC/HuC are supported */
+ if (uc_fw->type != XE_UC_FW_TYPE_GUC && uc_fw->type != XE_UC_FW_TYPE_HUC)
+ uc_fw->path = NULL;
+
+ /* VF will support only firmwares that driver can autoselect */
+ xe_uc_fw_change_status(uc_fw, uc_fw->path ?
+ XE_UC_FIRMWARE_PRELOADED :
+ XE_UC_FIRMWARE_NOT_SUPPORTED);
+
+ if (!xe_uc_fw_is_supported(uc_fw))
+ return;
+
+ /* PF is doing the loading, so we don't need a path on the VF */
+ uc_fw->path = "Loaded by PF";
+
+ /* The GuC versions are set up during the VF bootstrap */
+ if (uc_fw->type == XE_UC_FW_TYPE_GUC) {
+ uc_fw->versions.wanted_type = XE_UC_FW_VER_COMPATIBILITY;
+ xe_gt_sriov_vf_guc_versions(uc_fw_to_gt(uc_fw), wanted, compat);
+ }
+}
+
static int uc_fw_request(struct xe_uc_fw *uc_fw, const struct firmware **firmware_p)
{
struct xe_device *xe = uc_fw_to_xe(uc_fw);
+ struct xe_gt *gt = uc_fw_to_gt(uc_fw);
+ struct drm_printer p = xe_gt_info_printer(gt);
struct device *dev = xe->drm.dev;
- struct drm_printer p = drm_info_printer(dev);
const struct firmware *fw = NULL;
int err;
@@ -675,20 +707,13 @@ static int uc_fw_request(struct xe_uc_fw *uc_fw, const struct firmware **firmwar
* before we're looked at the HW caps to see if we have uc support
*/
BUILD_BUG_ON(XE_UC_FIRMWARE_UNINITIALIZED);
- xe_assert(xe, !uc_fw->status);
- xe_assert(xe, !uc_fw->path);
+ xe_gt_assert(gt, !uc_fw->status);
+ xe_gt_assert(gt, !uc_fw->path);
uc_fw_auto_select(xe, uc_fw);
if (IS_SRIOV_VF(xe)) {
- /* Only GuC/HuC are supported */
- if (uc_fw->type != XE_UC_FW_TYPE_GUC &&
- uc_fw->type != XE_UC_FW_TYPE_HUC)
- uc_fw->path = NULL;
- /* VF will support only firmwares that driver can autoselect */
- xe_uc_fw_change_status(uc_fw, uc_fw->path ?
- XE_UC_FIRMWARE_PRELOADED :
- XE_UC_FIRMWARE_NOT_SUPPORTED);
+ uc_fw_vf_override(uc_fw);
return 0;
}
@@ -700,7 +725,7 @@ static int uc_fw_request(struct xe_uc_fw *uc_fw, const struct firmware **firmwar
if (!xe_uc_fw_is_supported(uc_fw)) {
if (uc_fw->type == XE_UC_FW_TYPE_GUC) {
- drm_err(&xe->drm, "No GuC firmware defined for platform\n");
+ xe_gt_err(gt, "No GuC firmware defined for platform\n");
return -ENOENT;
}
return 0;
@@ -709,7 +734,7 @@ static int uc_fw_request(struct xe_uc_fw *uc_fw, const struct firmware **firmwar
/* an empty path means the firmware is disabled */
if (!xe_device_uc_enabled(xe) || !(*uc_fw->path)) {
xe_uc_fw_change_status(uc_fw, XE_UC_FIRMWARE_DISABLED);
- drm_dbg(&xe->drm, "%s disabled", xe_uc_fw_type_repr(uc_fw->type));
+ xe_gt_dbg(gt, "%s disabled\n", xe_uc_fw_type_repr(uc_fw->type));
return 0;
}
@@ -742,10 +767,10 @@ fail:
XE_UC_FIRMWARE_MISSING :
XE_UC_FIRMWARE_ERROR);
- drm_notice(&xe->drm, "%s firmware %s: fetch failed with error %d\n",
- xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, err);
- drm_info(&xe->drm, "%s firmware(s) can be downloaded from %s\n",
- xe_uc_fw_type_repr(uc_fw->type), XE_UC_FIRMWARE_URL);
+ xe_gt_notice(gt, "%s firmware %s: fetch failed with error %pe\n",
+ xe_uc_fw_type_repr(uc_fw->type), uc_fw->path, ERR_PTR(err));
+ xe_gt_info(gt, "%s firmware(s) can be downloaded from %s\n",
+ xe_uc_fw_type_repr(uc_fw->type), XE_UC_FIRMWARE_URL);
release_firmware(fw); /* OK even if fw is NULL */
diff --git a/drivers/gpu/drm/xe/xe_uc_fw_abi.h b/drivers/gpu/drm/xe/xe_uc_fw_abi.h
index 87ade41209d0..3c9a63d13032 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw_abi.h
+++ b/drivers/gpu/drm/xe/xe_uc_fw_abi.h
@@ -44,6 +44,39 @@
* in fw. So driver will load a truncated firmware in this case.
*/
+struct uc_css_rsa_info {
+ u32 key_size_dw;
+ u32 modulus_size_dw;
+ u32 exponent_size_dw;
+} __packed;
+
+struct uc_css_guc_info {
+ u32 time;
+#define CSS_TIME_HOUR (0xFF << 0)
+#define CSS_TIME_MIN (0xFF << 8)
+#define CSS_TIME_SEC (0xFFFF << 16)
+ u32 reserved0[5];
+ u32 sw_version;
+#define CSS_SW_VERSION_UC_MAJOR (0xFF << 16)
+#define CSS_SW_VERSION_UC_MINOR (0xFF << 8)
+#define CSS_SW_VERSION_UC_PATCH (0xFF << 0)
+ u32 submission_version;
+ u32 reserved1[11];
+ u32 header_info;
+#define CSS_HEADER_INFO_SVN (0xFF)
+#define CSS_HEADER_INFO_COPY_VALID (0x1 << 31)
+ u32 private_data_size;
+ u32 ukernel_info;
+#define CSS_UKERNEL_INFO_DEVICEID (0xFFFF << 16)
+#define CSS_UKERNEL_INFO_PRODKEY (0xFF << 8)
+#define CSS_UKERNEL_INFO_BUILDTYPE (0x3 << 2)
+#define CSS_UKERNEL_INFO_BUILDTYPE_PROD 0
+#define CSS_UKERNEL_INFO_BUILDTYPE_PREPROD 1
+#define CSS_UKERNEL_INFO_BUILDTYPE_DEBUG 2
+#define CSS_UKERNEL_INFO_ENCSTATUS (0x1 << 1)
+#define CSS_UKERNEL_INFO_COPY_VALID (0x1 << 0)
+} __packed;
+
struct uc_css_header {
u32 module_type;
/*
@@ -52,36 +85,21 @@ struct uc_css_header {
*/
u32 header_size_dw;
u32 header_version;
- u32 module_id;
+ u32 reserved0;
u32 module_vendor;
u32 date;
-#define CSS_DATE_DAY (0xFF << 0)
-#define CSS_DATE_MONTH (0xFF << 8)
-#define CSS_DATE_YEAR (0xFFFF << 16)
+#define CSS_DATE_DAY (0xFF << 0)
+#define CSS_DATE_MONTH (0xFF << 8)
+#define CSS_DATE_YEAR (0xFFFF << 16)
u32 size_dw; /* uCode plus header_size_dw */
- u32 key_size_dw;
- u32 modulus_size_dw;
- u32 exponent_size_dw;
- u32 time;
-#define CSS_TIME_HOUR (0xFF << 0)
-#define CSS_DATE_MIN (0xFF << 8)
-#define CSS_DATE_SEC (0xFFFF << 16)
- char username[8];
- char buildnumber[12];
- u32 sw_version;
-#define CSS_SW_VERSION_UC_MAJOR (0xFF << 16)
-#define CSS_SW_VERSION_UC_MINOR (0xFF << 8)
-#define CSS_SW_VERSION_UC_PATCH (0xFF << 0)
union {
- u32 submission_version; /* only applies to GuC */
- u32 reserved2;
+ u32 reserved1[3];
+ struct uc_css_rsa_info rsa_info;
};
- u32 reserved0[12];
union {
- u32 private_data_size; /* only applies to GuC */
- u32 reserved1;
+ u32 reserved2[22];
+ struct uc_css_guc_info guc_info;
};
- u32 header_info;
} __packed;
static_assert(sizeof(struct uc_css_header) == 128);
@@ -318,4 +336,70 @@ struct gsc_manifest_header {
u32 exponent_size; /* in dwords */
} __packed;
+/**
+ * DOC: Late binding Firmware Layout
+ *
+ * The Late binding binary starts with FPT header, which contains locations
+ * of various partitions of the binary. Here we're interested in finding out
+ * manifest version. To the manifest version, we need to locate CPD header
+ * one of the entry in CPD header points to manifest header. Manifest header
+ * contains the version.
+ *
+ * +================================================+
+ * | FPT Header |
+ * +================================================+
+ * | FPT entries[] |
+ * | entry1 |
+ * | ... |
+ * | entryX |
+ * | "LTES" |
+ * | ... |
+ * | offset >-----------------------------|------o
+ * +================================================+ |
+ * |
+ * +================================================+ |
+ * | CPD Header |<-----o
+ * +================================================+
+ * | CPD entries[] |
+ * | entry1 |
+ * | ... |
+ * | entryX |
+ * | "LTES.man" |
+ * | ... |
+ * | offset >----------------------------|------o
+ * +================================================+ |
+ * |
+ * +================================================+ |
+ * | Manifest Header |<-----o
+ * | ... |
+ * | FW version |
+ * | ... |
+ * +================================================+
+ */
+
+/* FPT Headers */
+struct csc_fpt_header {
+ u32 header_marker;
+#define CSC_FPT_HEADER_MARKER 0x54504624
+ u32 num_of_entries;
+ u8 header_version;
+ u8 entry_version;
+ u8 header_length; /* in bytes */
+ u8 flags;
+ u16 ticks_to_add;
+ u16 tokens_to_add;
+ u32 uma_size;
+ u32 crc32;
+ struct gsc_version fitc_version;
+} __packed;
+
+struct csc_fpt_entry {
+ u8 name[4]; /* partition name */
+ u32 reserved1;
+ u32 offset; /* offset from beginning of CSE region */
+ u32 length; /* partition length in bytes */
+ u32 reserved2[3];
+ u32 partition_flags;
+} __packed;
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_uc_fw_types.h b/drivers/gpu/drm/xe/xe_uc_fw_types.h
index ad3b35a0e6eb..2ebe8c9db6ce 100644
--- a/drivers/gpu/drm/xe/xe_uc_fw_types.h
+++ b/drivers/gpu/drm/xe/xe_uc_fw_types.h
@@ -62,9 +62,11 @@ enum xe_uc_fw_type {
};
/**
- * struct xe_uc_fw_version - Version for XE micro controller firmware
+ * struct xe_uc_fw_version - Version for Xe micro controller firmware
*/
struct xe_uc_fw_version {
+ /** @branch: branch version of the FW (not always available) */
+ u16 branch;
/** @major: major version of the FW */
u16 major;
/** @minor: minor version of the FW */
@@ -82,7 +84,7 @@ enum xe_uc_fw_version_types {
};
/**
- * struct xe_uc_fw - XE micro controller firmware
+ * struct xe_uc_fw - Xe micro controller firmware
*/
struct xe_uc_fw {
/** @type: type uC firmware */
@@ -110,7 +112,7 @@ struct xe_uc_fw {
/** @size: size of uC firmware including css header */
size_t size;
- /** @bo: XE BO for uC firmware */
+ /** @bo: Xe BO for uC firmware */
struct xe_bo *bo;
/** @has_gsc_headers: whether the FW image starts with GSC headers */
@@ -145,6 +147,9 @@ struct xe_uc_fw {
/** @private_data_size: size of private data found in uC css header */
u32 private_data_size;
+
+ /** @build_type: Firmware build type (see CSS_UKERNEL_INFO_BUILDTYPE for definitions) */
+ u32 build_type;
};
#endif
diff --git a/drivers/gpu/drm/xe/xe_uc_types.h b/drivers/gpu/drm/xe/xe_uc_types.h
index 9924e4484866..1708379dc834 100644
--- a/drivers/gpu/drm/xe/xe_uc_types.h
+++ b/drivers/gpu/drm/xe/xe_uc_types.h
@@ -12,7 +12,7 @@
#include "xe_wopcm_types.h"
/**
- * struct xe_uc - XE micro controllers
+ * struct xe_uc - Xe micro controllers
*/
struct xe_uc {
/** @guc: Graphics micro controller */
diff --git a/drivers/gpu/drm/xe/xe_userptr.c b/drivers/gpu/drm/xe/xe_userptr.c
new file mode 100644
index 000000000000..0d9130b1958a
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_userptr.c
@@ -0,0 +1,322 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include "xe_svm.h"
+#include "xe_userptr.h"
+
+#include <linux/mm.h>
+
+#include "xe_trace_bo.h"
+
+/**
+ * xe_vma_userptr_check_repin() - Advisory check for repin needed
+ * @uvma: The userptr vma
+ *
+ * Check if the userptr vma has been invalidated since last successful
+ * repin. The check is advisory only and can the function can be called
+ * without the vm->svm.gpusvm.notifier_lock held. There is no guarantee that the
+ * vma userptr will remain valid after a lockless check, so typically
+ * the call needs to be followed by a proper check under the notifier_lock.
+ *
+ * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
+ */
+int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma)
+{
+ return mmu_interval_check_retry(&uvma->userptr.notifier,
+ uvma->userptr.pages.notifier_seq) ?
+ -EAGAIN : 0;
+}
+
+/**
+ * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs
+ * that need repinning.
+ * @vm: The VM.
+ *
+ * This function checks for whether the VM has userptrs that need repinning,
+ * and provides a release-type barrier on the svm.gpusvm.notifier_lock after
+ * checking.
+ *
+ * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are.
+ */
+int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
+{
+ lockdep_assert_held_read(&vm->svm.gpusvm.notifier_lock);
+
+ return (list_empty(&vm->userptr.repin_list) &&
+ list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
+}
+
+int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
+{
+ struct xe_vma *vma = &uvma->vma;
+ struct xe_vm *vm = xe_vma_vm(vma);
+ struct xe_device *xe = vm->xe;
+ struct drm_gpusvm_ctx ctx = {
+ .read_only = xe_vma_read_only(vma),
+ .device_private_page_owner = xe_svm_devm_owner(xe),
+ .allow_mixed = true,
+ };
+
+ lockdep_assert_held(&vm->lock);
+ xe_assert(xe, xe_vma_is_userptr(vma));
+
+ if (vma->gpuva.flags & XE_VMA_DESTROYED)
+ return 0;
+
+ return drm_gpusvm_get_pages(&vm->svm.gpusvm, &uvma->userptr.pages,
+ uvma->userptr.notifier.mm,
+ &uvma->userptr.notifier,
+ xe_vma_userptr(vma),
+ xe_vma_userptr(vma) + xe_vma_size(vma),
+ &ctx);
+}
+
+static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma)
+{
+ struct xe_userptr *userptr = &uvma->userptr;
+ struct xe_vma *vma = &uvma->vma;
+ struct dma_resv_iter cursor;
+ struct dma_fence *fence;
+ struct drm_gpusvm_ctx ctx = {
+ .in_notifier = true,
+ .read_only = xe_vma_read_only(vma),
+ };
+ long err;
+
+ /*
+ * Tell exec and rebind worker they need to repin and rebind this
+ * userptr.
+ */
+ if (!xe_vm_in_fault_mode(vm) &&
+ !(vma->gpuva.flags & XE_VMA_DESTROYED)) {
+ spin_lock(&vm->userptr.invalidated_lock);
+ list_move_tail(&userptr->invalidate_link,
+ &vm->userptr.invalidated);
+ spin_unlock(&vm->userptr.invalidated_lock);
+ }
+
+ /*
+ * Preempt fences turn into schedule disables, pipeline these.
+ * Note that even in fault mode, we need to wait for binds and
+ * unbinds to complete, and those are attached as BOOKMARK fences
+ * to the vm.
+ */
+ dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
+ DMA_RESV_USAGE_BOOKKEEP);
+ dma_resv_for_each_fence_unlocked(&cursor, fence)
+ dma_fence_enable_sw_signaling(fence);
+ dma_resv_iter_end(&cursor);
+
+ err = dma_resv_wait_timeout(xe_vm_resv(vm),
+ DMA_RESV_USAGE_BOOKKEEP,
+ false, MAX_SCHEDULE_TIMEOUT);
+ XE_WARN_ON(err <= 0);
+
+ if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) {
+ err = xe_vm_invalidate_vma(vma);
+ XE_WARN_ON(err);
+ }
+
+ drm_gpusvm_unmap_pages(&vm->svm.gpusvm, &uvma->userptr.pages,
+ xe_vma_size(vma) >> PAGE_SHIFT, &ctx);
+}
+
+static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
+ const struct mmu_notifier_range *range,
+ unsigned long cur_seq)
+{
+ struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier);
+ struct xe_vma *vma = &uvma->vma;
+ struct xe_vm *vm = xe_vma_vm(vma);
+
+ xe_assert(vm->xe, xe_vma_is_userptr(vma));
+ trace_xe_vma_userptr_invalidate(vma);
+
+ if (!mmu_notifier_range_blockable(range))
+ return false;
+
+ vm_dbg(&xe_vma_vm(vma)->xe->drm,
+ "NOTIFIER: addr=0x%016llx, range=0x%016llx",
+ xe_vma_start(vma), xe_vma_size(vma));
+
+ down_write(&vm->svm.gpusvm.notifier_lock);
+ mmu_interval_set_seq(mni, cur_seq);
+
+ __vma_userptr_invalidate(vm, uvma);
+ up_write(&vm->svm.gpusvm.notifier_lock);
+ trace_xe_vma_userptr_invalidate_complete(vma);
+
+ return true;
+}
+
+static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
+ .invalidate = vma_userptr_invalidate,
+};
+
+#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
+/**
+ * xe_vma_userptr_force_invalidate() - force invalidate a userptr
+ * @uvma: The userptr vma to invalidate
+ *
+ * Perform a forced userptr invalidation for testing purposes.
+ */
+void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma)
+{
+ struct xe_vm *vm = xe_vma_vm(&uvma->vma);
+
+ /* Protect against concurrent userptr pinning */
+ lockdep_assert_held(&vm->lock);
+ /* Protect against concurrent notifiers */
+ lockdep_assert_held(&vm->svm.gpusvm.notifier_lock);
+ /*
+ * Protect against concurrent instances of this function and
+ * the critical exec sections
+ */
+ xe_vm_assert_held(vm);
+
+ if (!mmu_interval_read_retry(&uvma->userptr.notifier,
+ uvma->userptr.pages.notifier_seq))
+ uvma->userptr.pages.notifier_seq -= 2;
+ __vma_userptr_invalidate(vm, uvma);
+}
+#endif
+
+int xe_vm_userptr_pin(struct xe_vm *vm)
+{
+ struct xe_userptr_vma *uvma, *next;
+ int err = 0;
+
+ xe_assert(vm->xe, !xe_vm_in_fault_mode(vm));
+ lockdep_assert_held_write(&vm->lock);
+
+ /* Collect invalidated userptrs */
+ spin_lock(&vm->userptr.invalidated_lock);
+ xe_assert(vm->xe, list_empty(&vm->userptr.repin_list));
+ list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated,
+ userptr.invalidate_link) {
+ list_del_init(&uvma->userptr.invalidate_link);
+ list_add_tail(&uvma->userptr.repin_link,
+ &vm->userptr.repin_list);
+ }
+ spin_unlock(&vm->userptr.invalidated_lock);
+
+ /* Pin and move to bind list */
+ list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
+ userptr.repin_link) {
+ err = xe_vma_userptr_pin_pages(uvma);
+ if (err == -EFAULT) {
+ list_del_init(&uvma->userptr.repin_link);
+ /*
+ * We might have already done the pin once already, but
+ * then had to retry before the re-bind happened, due
+ * some other condition in the caller, but in the
+ * meantime the userptr got dinged by the notifier such
+ * that we need to revalidate here, but this time we hit
+ * the EFAULT. In such a case make sure we remove
+ * ourselves from the rebind list to avoid going down in
+ * flames.
+ */
+ if (!list_empty(&uvma->vma.combined_links.rebind))
+ list_del_init(&uvma->vma.combined_links.rebind);
+
+ /* Wait for pending binds */
+ xe_vm_lock(vm, false);
+ dma_resv_wait_timeout(xe_vm_resv(vm),
+ DMA_RESV_USAGE_BOOKKEEP,
+ false, MAX_SCHEDULE_TIMEOUT);
+
+ down_read(&vm->svm.gpusvm.notifier_lock);
+ err = xe_vm_invalidate_vma(&uvma->vma);
+ up_read(&vm->svm.gpusvm.notifier_lock);
+ xe_vm_unlock(vm);
+ if (err)
+ break;
+ } else {
+ if (err)
+ break;
+
+ list_del_init(&uvma->userptr.repin_link);
+ list_move_tail(&uvma->vma.combined_links.rebind,
+ &vm->rebind_list);
+ }
+ }
+
+ if (err) {
+ down_write(&vm->svm.gpusvm.notifier_lock);
+ spin_lock(&vm->userptr.invalidated_lock);
+ list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
+ userptr.repin_link) {
+ list_del_init(&uvma->userptr.repin_link);
+ list_move_tail(&uvma->userptr.invalidate_link,
+ &vm->userptr.invalidated);
+ }
+ spin_unlock(&vm->userptr.invalidated_lock);
+ up_write(&vm->svm.gpusvm.notifier_lock);
+ }
+ return err;
+}
+
+/**
+ * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs
+ * that need repinning.
+ * @vm: The VM.
+ *
+ * This function does an advisory check for whether the VM has userptrs that
+ * need repinning.
+ *
+ * Return: 0 if there are no indications of userptrs needing repinning,
+ * -EAGAIN if there are.
+ */
+int xe_vm_userptr_check_repin(struct xe_vm *vm)
+{
+ return (list_empty_careful(&vm->userptr.repin_list) &&
+ list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
+}
+
+int xe_userptr_setup(struct xe_userptr_vma *uvma, unsigned long start,
+ unsigned long range)
+{
+ struct xe_userptr *userptr = &uvma->userptr;
+ int err;
+
+ INIT_LIST_HEAD(&userptr->invalidate_link);
+ INIT_LIST_HEAD(&userptr->repin_link);
+
+ err = mmu_interval_notifier_insert(&userptr->notifier, current->mm,
+ start, range,
+ &vma_userptr_notifier_ops);
+ if (err)
+ return err;
+
+ userptr->pages.notifier_seq = LONG_MAX;
+
+ return 0;
+}
+
+void xe_userptr_remove(struct xe_userptr_vma *uvma)
+{
+ struct xe_vm *vm = xe_vma_vm(&uvma->vma);
+ struct xe_userptr *userptr = &uvma->userptr;
+
+ drm_gpusvm_free_pages(&vm->svm.gpusvm, &uvma->userptr.pages,
+ xe_vma_size(&uvma->vma) >> PAGE_SHIFT);
+
+ /*
+ * Since userptr pages are not pinned, we can't remove
+ * the notifier until we're sure the GPU is not accessing
+ * them anymore
+ */
+ mmu_interval_notifier_remove(&userptr->notifier);
+}
+
+void xe_userptr_destroy(struct xe_userptr_vma *uvma)
+{
+ struct xe_vm *vm = xe_vma_vm(&uvma->vma);
+
+ spin_lock(&vm->userptr.invalidated_lock);
+ xe_assert(vm->xe, list_empty(&uvma->userptr.repin_link));
+ list_del(&uvma->userptr.invalidate_link);
+ spin_unlock(&vm->userptr.invalidated_lock);
+}
diff --git a/drivers/gpu/drm/xe/xe_userptr.h b/drivers/gpu/drm/xe/xe_userptr.h
new file mode 100644
index 000000000000..ef801234991e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_userptr.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_USERPTR_H_
+#define _XE_USERPTR_H_
+
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/notifier.h>
+#include <linux/scatterlist.h>
+#include <linux/spinlock.h>
+
+#include <drm/drm_gpusvm.h>
+
+struct xe_vm;
+struct xe_vma;
+struct xe_userptr_vma;
+
+/** struct xe_userptr_vm - User pointer VM level state */
+struct xe_userptr_vm {
+ /**
+ * @userptr.repin_list: list of VMAs which are user pointers,
+ * and needs repinning. Protected by @lock.
+ */
+ struct list_head repin_list;
+ /**
+ * @userptr.invalidated_lock: Protects the
+ * @userptr.invalidated list.
+ */
+ spinlock_t invalidated_lock;
+ /**
+ * @userptr.invalidated: List of invalidated userptrs, not yet
+ * picked
+ * up for revalidation. Protected from access with the
+ * @invalidated_lock. Removing items from the list
+ * additionally requires @lock in write mode, and adding
+ * items to the list requires either the @svm.gpusvm.notifier_lock in
+ * write mode, OR @lock in write mode.
+ */
+ struct list_head invalidated;
+};
+
+/** struct xe_userptr - User pointer */
+struct xe_userptr {
+ /** @invalidate_link: Link for the vm::userptr.invalidated list */
+ struct list_head invalidate_link;
+ /** @userptr: link into VM repin list if userptr. */
+ struct list_head repin_link;
+ /**
+ * @pages: gpusvm pages for this user pointer.
+ */
+ struct drm_gpusvm_pages pages;
+ /**
+ * @notifier: MMU notifier for user pointer (invalidation call back)
+ */
+ struct mmu_interval_notifier notifier;
+
+ /**
+ * @initial_bind: user pointer has been bound at least once.
+ * write: vm->svm.gpusvm.notifier_lock in read mode and vm->resv held.
+ * read: vm->svm.gpusvm.notifier_lock in write mode or vm->resv held.
+ */
+ bool initial_bind;
+#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
+ u32 divisor;
+#endif
+};
+
+#if IS_ENABLED(CONFIG_DRM_GPUSVM)
+void xe_userptr_remove(struct xe_userptr_vma *uvma);
+int xe_userptr_setup(struct xe_userptr_vma *uvma, unsigned long start,
+ unsigned long range);
+void xe_userptr_destroy(struct xe_userptr_vma *uvma);
+
+int xe_vm_userptr_pin(struct xe_vm *vm);
+int __xe_vm_userptr_needs_repin(struct xe_vm *vm);
+int xe_vm_userptr_check_repin(struct xe_vm *vm);
+int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma);
+int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma);
+#else
+static inline void xe_userptr_remove(struct xe_userptr_vma *uvma) {}
+
+static inline int xe_userptr_setup(struct xe_userptr_vma *uvma,
+ unsigned long start, unsigned long range)
+{
+ return -ENODEV;
+}
+
+static inline void xe_userptr_destroy(struct xe_userptr_vma *uvma) {}
+
+static inline int xe_vm_userptr_pin(struct xe_vm *vm) { return 0; }
+static inline int __xe_vm_userptr_needs_repin(struct xe_vm *vm) { return 0; }
+static inline int xe_vm_userptr_check_repin(struct xe_vm *vm) { return 0; }
+static inline int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma) { return -ENODEV; }
+static inline int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma) { return -ENODEV; };
+#endif
+
+#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
+void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma);
+#else
+static inline void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma)
+{
+}
+#endif
+#endif
diff --git a/drivers/gpu/drm/xe/xe_validation.c b/drivers/gpu/drm/xe/xe_validation.c
new file mode 100644
index 000000000000..826cd09966ef
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_validation.c
@@ -0,0 +1,278 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+#include "xe_bo.h"
+#include <drm/drm_exec.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_gpuvm.h>
+
+#include "xe_assert.h"
+#include "xe_validation.h"
+
+#ifdef CONFIG_DRM_XE_DEBUG
+/**
+ * xe_validation_assert_exec() - Assert that the drm_exec pointer is suitable
+ * for validation.
+ * @xe: Pointer to the xe device.
+ * @exec: The drm_exec pointer to check.
+ * @obj: Pointer to the object subject to validation.
+ *
+ * NULL exec pointers are not allowed.
+ * For XE_VALIDATION_UNIMPLEMENTED, no checking.
+ * For XE_VLIDATION_OPT_OUT, check that the caller is a kunit test
+ * For XE_VALIDATION_UNSUPPORTED, check that the object subject to
+ * validation is a dma-buf, for which support for ww locking is
+ * not in place in the dma-buf layer.
+ */
+void xe_validation_assert_exec(const struct xe_device *xe,
+ const struct drm_exec *exec,
+ const struct drm_gem_object *obj)
+{
+ xe_assert(xe, exec);
+ if (IS_ERR(exec)) {
+ switch (PTR_ERR(exec)) {
+ case __XE_VAL_UNIMPLEMENTED:
+ break;
+ case __XE_VAL_UNSUPPORTED:
+ xe_assert(xe, !!obj->dma_buf);
+ break;
+#if IS_ENABLED(CONFIG_KUNIT)
+ case __XE_VAL_OPT_OUT:
+ xe_assert(xe, current->kunit_test);
+ break;
+#endif
+ default:
+ xe_assert(xe, false);
+ }
+ }
+}
+#endif
+
+static int xe_validation_lock(struct xe_validation_ctx *ctx)
+{
+ struct xe_validation_device *val = ctx->val;
+ int ret = 0;
+
+ if (ctx->val_flags.interruptible) {
+ if (ctx->request_exclusive)
+ ret = down_write_killable(&val->lock);
+ else
+ ret = down_read_interruptible(&val->lock);
+ } else {
+ if (ctx->request_exclusive)
+ down_write(&val->lock);
+ else
+ down_read(&val->lock);
+ }
+
+ if (!ret) {
+ ctx->lock_held = true;
+ ctx->lock_held_exclusive = ctx->request_exclusive;
+ }
+
+ return ret;
+}
+
+static int xe_validation_trylock(struct xe_validation_ctx *ctx)
+{
+ struct xe_validation_device *val = ctx->val;
+ bool locked;
+
+ if (ctx->request_exclusive)
+ locked = down_write_trylock(&val->lock);
+ else
+ locked = down_read_trylock(&val->lock);
+
+ if (locked) {
+ ctx->lock_held = true;
+ ctx->lock_held_exclusive = ctx->request_exclusive;
+ }
+
+ return locked ? 0 : -EWOULDBLOCK;
+}
+
+static void xe_validation_unlock(struct xe_validation_ctx *ctx)
+{
+ if (!ctx->lock_held)
+ return;
+
+ if (ctx->lock_held_exclusive)
+ up_write(&ctx->val->lock);
+ else
+ up_read(&ctx->val->lock);
+
+ ctx->lock_held = false;
+}
+
+/**
+ * xe_validation_ctx_init() - Initialize an xe_validation_ctx
+ * @ctx: The xe_validation_ctx to initialize.
+ * @val: The xe_validation_device representing the validation domain.
+ * @exec: The struct drm_exec to use for the transaction. May be NULL.
+ * @flags: The flags to use for initialization.
+ *
+ * Initialize and lock a an xe_validation transaction using the validation domain
+ * represented by @val. Also initialize the drm_exec object forwarding parts of
+ * @flags to the drm_exec initialization. The @flags.exclusive flag should
+ * typically be set to false to avoid locking out other validators from the
+ * domain until an OOM is hit. For testing- or final attempt purposes it can,
+ * however, be set to true.
+ *
+ * Return: %0 on success, %-EINTR if interruptible initial locking failed with a
+ * signal pending. If @flags.no_block is set to true, a failed trylock
+ * returns %-EWOULDBLOCK.
+ */
+int xe_validation_ctx_init(struct xe_validation_ctx *ctx, struct xe_validation_device *val,
+ struct drm_exec *exec, const struct xe_val_flags flags)
+{
+ int ret;
+
+ ctx->exec = exec;
+ ctx->val = val;
+ ctx->lock_held = false;
+ ctx->lock_held_exclusive = false;
+ ctx->request_exclusive = flags.exclusive;
+ ctx->val_flags = flags;
+ ctx->exec_flags = 0;
+ ctx->nr = 0;
+
+ if (flags.no_block)
+ ret = xe_validation_trylock(ctx);
+ else
+ ret = xe_validation_lock(ctx);
+ if (ret)
+ return ret;
+
+ if (exec) {
+ if (flags.interruptible)
+ ctx->exec_flags |= DRM_EXEC_INTERRUPTIBLE_WAIT;
+ if (flags.exec_ignore_duplicates)
+ ctx->exec_flags |= DRM_EXEC_IGNORE_DUPLICATES;
+ drm_exec_init(exec, ctx->exec_flags, ctx->nr);
+ }
+
+ return 0;
+}
+
+#ifdef CONFIG_DEBUG_WW_MUTEX_SLOWPATH
+/*
+ * This abuses both drm_exec and ww_mutex internals and should be
+ * replaced by checking for -EDEADLK when we can make TTM
+ * stop converting -EDEADLK to -ENOMEM.
+ * An alternative is to not have exhaustive eviction with
+ * CONFIG_DEBUG_WW_MUTEX_SLOWPATH until that happens.
+ */
+static bool xe_validation_contention_injected(struct drm_exec *exec)
+{
+ return !!exec->ticket.contending_lock;
+}
+
+#else
+
+static bool xe_validation_contention_injected(struct drm_exec *exec)
+{
+ return false;
+}
+
+#endif
+
+static bool __xe_validation_should_retry(struct xe_validation_ctx *ctx, int ret)
+{
+ if (ret == -ENOMEM &&
+ ((ctx->request_exclusive &&
+ xe_validation_contention_injected(ctx->exec)) ||
+ !ctx->request_exclusive)) {
+ ctx->request_exclusive = true;
+ return true;
+ }
+
+ return false;
+}
+
+/**
+ * xe_validation_exec_lock() - Perform drm_gpuvm_exec_lock within a validation
+ * transaction.
+ * @ctx: An uninitialized xe_validation_ctx.
+ * @vm_exec: An initialized struct vm_exec.
+ * @val: The validation domain.
+ *
+ * The drm_gpuvm_exec_lock() function internally initializes its drm_exec
+ * transaction and therefore doesn't lend itself very well to be using
+ * xe_validation_ctx_init(). Provide a helper that takes an uninitialized
+ * xe_validation_ctx and calls drm_gpuvm_exec_lock() with OOM retry.
+ *
+ * Return: %0 on success, negative error code on failure.
+ */
+int xe_validation_exec_lock(struct xe_validation_ctx *ctx,
+ struct drm_gpuvm_exec *vm_exec,
+ struct xe_validation_device *val)
+{
+ int ret;
+
+ memset(ctx, 0, sizeof(*ctx));
+ ctx->exec = &vm_exec->exec;
+ ctx->exec_flags = vm_exec->flags;
+ ctx->val = val;
+ if (ctx->exec_flags & DRM_EXEC_INTERRUPTIBLE_WAIT)
+ ctx->val_flags.interruptible = 1;
+ if (ctx->exec_flags & DRM_EXEC_IGNORE_DUPLICATES)
+ ctx->val_flags.exec_ignore_duplicates = 1;
+retry:
+ ret = xe_validation_lock(ctx);
+ if (ret)
+ return ret;
+
+ ret = drm_gpuvm_exec_lock(vm_exec);
+ if (ret) {
+ xe_validation_unlock(ctx);
+ if (__xe_validation_should_retry(ctx, ret))
+ goto retry;
+ }
+
+ return ret;
+}
+
+/**
+ * xe_validation_ctx_fini() - Finalize a validation transaction
+ * @ctx: The Validation transaction to finalize.
+ *
+ * Finalize a validation transaction and its related drm_exec transaction.
+ */
+void xe_validation_ctx_fini(struct xe_validation_ctx *ctx)
+{
+ if (ctx->exec)
+ drm_exec_fini(ctx->exec);
+ xe_validation_unlock(ctx);
+}
+
+/**
+ * xe_validation_should_retry() - Determine if a validation transaction should retry
+ * @ctx: The validation transaction.
+ * @ret: Pointer to a return value variable.
+ *
+ * Determines whether a validation transaction should retry based on the
+ * internal transaction state and the return value pointed to by @ret.
+ * If a validation should be retried, the transaction is prepared for that,
+ * and the validation locked might be re-locked in exclusive mode, and *@ret
+ * is set to %0. If the re-locking errors, typically due to interruptible
+ * locking with signal pending, *@ret is instead set to -EINTR and the
+ * function returns %false.
+ *
+ * Return: %true if validation should be retried, %false otherwise.
+ */
+bool xe_validation_should_retry(struct xe_validation_ctx *ctx, int *ret)
+{
+ if (__xe_validation_should_retry(ctx, *ret)) {
+ drm_exec_fini(ctx->exec);
+ *ret = 0;
+ if (ctx->request_exclusive != ctx->lock_held_exclusive) {
+ xe_validation_unlock(ctx);
+ *ret = xe_validation_lock(ctx);
+ }
+ drm_exec_init(ctx->exec, ctx->exec_flags, ctx->nr);
+ return !*ret;
+ }
+
+ return false;
+}
diff --git a/drivers/gpu/drm/xe/xe_validation.h b/drivers/gpu/drm/xe/xe_validation.h
new file mode 100644
index 000000000000..a30e732c4d51
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_validation.h
@@ -0,0 +1,192 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2024 Intel Corporation
+ */
+#ifndef _XE_VALIDATION_H_
+#define _XE_VALIDATION_H_
+
+#include <linux/dma-resv.h>
+#include <linux/types.h>
+#include <linux/rwsem.h>
+
+struct drm_exec;
+struct drm_gem_object;
+struct drm_gpuvm_exec;
+struct xe_device;
+
+#ifdef CONFIG_PROVE_LOCKING
+/**
+ * xe_validation_lockdep() - Assert that a drm_exec locking transaction can
+ * be initialized at this point.
+ */
+static inline void xe_validation_lockdep(void)
+{
+ struct ww_acquire_ctx ticket;
+
+ ww_acquire_init(&ticket, &reservation_ww_class);
+ ww_acquire_fini(&ticket);
+}
+#else
+static inline void xe_validation_lockdep(void)
+{
+}
+#endif
+
+/*
+ * Various values of the drm_exec pointer where we've not (yet)
+ * implemented full ww locking.
+ *
+ * XE_VALIDATION_UNIMPLEMENTED means implementation is pending.
+ * A lockdep check is made to assure that a drm_exec locking
+ * transaction can actually take place where the macro is
+ * used. If this asserts, the exec pointer needs to be assigned
+ * higher up in the callchain and passed down.
+ *
+ * XE_VALIDATION_UNSUPPORTED is for dma-buf code only where
+ * the dma-buf layer doesn't support WW locking.
+ *
+ * XE_VALIDATION_OPT_OUT is for simplification of kunit tests where
+ * exhaustive eviction isn't necessary.
+ */
+#define __XE_VAL_UNIMPLEMENTED -EINVAL
+#define XE_VALIDATION_UNIMPLEMENTED (xe_validation_lockdep(), \
+ (struct drm_exec *)ERR_PTR(__XE_VAL_UNIMPLEMENTED))
+
+#define __XE_VAL_UNSUPPORTED -EOPNOTSUPP
+#define XE_VALIDATION_UNSUPPORTED ((struct drm_exec *)ERR_PTR(__XE_VAL_UNSUPPORTED))
+
+#define __XE_VAL_OPT_OUT -ENOMEM
+#define XE_VALIDATION_OPT_OUT (xe_validation_lockdep(), \
+ (struct drm_exec *)ERR_PTR(__XE_VAL_OPT_OUT))
+#ifdef CONFIG_DRM_XE_DEBUG
+void xe_validation_assert_exec(const struct xe_device *xe, const struct drm_exec *exec,
+ const struct drm_gem_object *obj);
+#else
+#define xe_validation_assert_exec(_xe, _exec, _obj) \
+ do { \
+ (void)_xe; (void)_exec; (void)_obj; \
+ } while (0)
+#endif
+
+/**
+ * struct xe_validation_device - The domain for exhaustive eviction
+ * @lock: The lock used to exclude other processes from allocating graphics memory
+ *
+ * The struct xe_validation_device represents the domain for which we want to use
+ * exhaustive eviction. The @lock is typically grabbed in read mode for allocations
+ * but when graphics memory allocation fails, it is retried with the write mode held.
+ */
+struct xe_validation_device {
+ struct rw_semaphore lock;
+};
+
+/**
+ * struct xe_val_flags - Flags for xe_validation_ctx_init().
+ * @exclusive: Start the validation transaction by locking out all other validators.
+ * @no_block: Don't block on initialization.
+ * @interruptible: Block interruptible if blocking. Implies initializing the drm_exec
+ * context with the DRM_EXEC_INTERRUPTIBLE_WAIT flag.
+ * @exec_ignore_duplicates: Initialize the drm_exec context with the
+ * DRM_EXEC_IGNORE_DUPLICATES flag.
+ */
+struct xe_val_flags {
+ u32 exclusive :1;
+ u32 no_block :1;
+ u32 interruptible :1;
+ u32 exec_ignore_duplicates :1;
+};
+
+/**
+ * struct xe_validation_ctx - A struct drm_exec subclass with support for
+ * exhaustive eviction
+ * @exec: The drm_exec object base class. Note that we use a pointer instead of
+ * embedding to avoid diamond inheritance.
+ * @val: The exhaustive eviction domain.
+ * @val_flags: Copy of the struct xe_val_flags passed to xe_validation_ctx_init.
+ * @lock_held: Whether The domain lock is currently held.
+ * @lock_held_exclusive: Whether the domain lock is held in exclusive mode.
+ * @request_exclusive: Whether to lock exclusively (write mode) the next time
+ * the domain lock is locked.
+ * @exec_flags: The drm_exec flags used for drm_exec (re-)initialization.
+ * @nr: The drm_exec nr parameter used for drm_exec (re-)initialization.
+ */
+struct xe_validation_ctx {
+ struct drm_exec *exec;
+ struct xe_validation_device *val;
+ struct xe_val_flags val_flags;
+ bool lock_held;
+ bool lock_held_exclusive;
+ bool request_exclusive;
+ u32 exec_flags;
+ unsigned int nr;
+};
+
+int xe_validation_ctx_init(struct xe_validation_ctx *ctx, struct xe_validation_device *val,
+ struct drm_exec *exec, const struct xe_val_flags flags);
+
+int xe_validation_exec_lock(struct xe_validation_ctx *ctx, struct drm_gpuvm_exec *vm_exec,
+ struct xe_validation_device *val);
+
+void xe_validation_ctx_fini(struct xe_validation_ctx *ctx);
+
+bool xe_validation_should_retry(struct xe_validation_ctx *ctx, int *ret);
+
+/**
+ * xe_validation_retry_on_oom() - Retry on oom in an xe_validaton transaction
+ * @_ctx: Pointer to the xe_validation_ctx
+ * @_ret: The current error value possibly holding -ENOMEM
+ *
+ * Use this in way similar to drm_exec_retry_on_contention().
+ * If @_ret contains -ENOMEM the transaction is restarted once in a way that
+ * blocks other transactions and allows exhastive eviction. If the transaction
+ * was already restarted once, Just return the -ENOMEM. May also set
+ * _ret to -EINTR if not retrying and waits are interruptible.
+ * May only be used within a drm_exec_until_all_locked() loop.
+ */
+#define xe_validation_retry_on_oom(_ctx, _ret) \
+ do { \
+ if (xe_validation_should_retry(_ctx, _ret)) \
+ goto *__drm_exec_retry_ptr; \
+ } while (0)
+
+/**
+ * xe_validation_device_init - Initialize a struct xe_validation_device
+ * @val: The xe_validation_device to init.
+ */
+static inline void
+xe_validation_device_init(struct xe_validation_device *val)
+{
+ init_rwsem(&val->lock);
+}
+
+/*
+ * Make guard() and scoped_guard() work with xe_validation_ctx
+ * so that we can exit transactions without caring about the
+ * cleanup.
+ */
+DEFINE_CLASS(xe_validation, struct xe_validation_ctx *,
+ if (_T) xe_validation_ctx_fini(_T);,
+ ({*_ret = xe_validation_ctx_init(_ctx, _val, _exec, _flags);
+ *_ret ? NULL : _ctx; }),
+ struct xe_validation_ctx *_ctx, struct xe_validation_device *_val,
+ struct drm_exec *_exec, const struct xe_val_flags _flags, int *_ret);
+static inline void *class_xe_validation_lock_ptr(class_xe_validation_t *_T)
+{return *_T; }
+#define class_xe_validation_is_conditional true
+
+/**
+ * xe_validation_guard() - An auto-cleanup xe_validation_ctx transaction
+ * @_ctx: The xe_validation_ctx.
+ * @_val: The xe_validation_device.
+ * @_exec: The struct drm_exec object
+ * @_flags: Flags for the xe_validation_ctx initialization.
+ * @_ret: Return in / out parameter. May be set by this macro. Typically 0 when called.
+ *
+ * This macro is will initiate a drm_exec transaction with additional support for
+ * exhaustive eviction.
+ */
+#define xe_validation_guard(_ctx, _val, _exec, _flags, _ret) \
+ scoped_guard(xe_validation, _ctx, _val, _exec, _flags, &_ret) \
+ drm_exec_until_all_locked(_exec)
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 79323c78130f..79ab6c512d3e 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -27,8 +27,6 @@
#include "xe_device.h"
#include "xe_drm_client.h"
#include "xe_exec_queue.h"
-#include "xe_gt_pagefault.h"
-#include "xe_gt_tlb_invalidation.h"
#include "xe_migrate.h"
#include "xe_pat.h"
#include "xe_pm.h"
@@ -36,11 +34,13 @@
#include "xe_pt.h"
#include "xe_pxp.h"
#include "xe_res_cursor.h"
+#include "xe_sriov_vf.h"
#include "xe_svm.h"
#include "xe_sync.h"
+#include "xe_tile.h"
+#include "xe_tlb_inval.h"
#include "xe_trace_bo.h"
#include "xe_wa.h"
-#include "xe_hmm.h"
static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
{
@@ -48,34 +48,17 @@ static struct drm_gem_object *xe_vm_obj(struct xe_vm *vm)
}
/**
- * xe_vma_userptr_check_repin() - Advisory check for repin needed
- * @uvma: The userptr vma
+ * xe_vm_drm_exec_lock() - Lock the vm's resv with a drm_exec transaction
+ * @vm: The vm whose resv is to be locked.
+ * @exec: The drm_exec transaction.
*
- * Check if the userptr vma has been invalidated since last successful
- * repin. The check is advisory only and can the function can be called
- * without the vm->userptr.notifier_lock held. There is no guarantee that the
- * vma userptr will remain valid after a lockless check, so typically
- * the call needs to be followed by a proper check under the notifier_lock.
+ * Helper to lock the vm's resv as part of a drm_exec transaction.
*
- * Return: 0 if userptr vma is valid, -EAGAIN otherwise; repin recommended.
+ * Return: %0 on success. See drm_exec_lock_obj() for error codes.
*/
-int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma)
+int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec)
{
- return mmu_interval_check_retry(&uvma->userptr.notifier,
- uvma->userptr.notifier_seq) ?
- -EAGAIN : 0;
-}
-
-int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma)
-{
- struct xe_vma *vma = &uvma->vma;
- struct xe_vm *vm = xe_vma_vm(vma);
- struct xe_device *xe = vm->xe;
-
- lockdep_assert_held(&vm->lock);
- xe_assert(xe, xe_vma_is_userptr(vma));
-
- return xe_hmm_userptr_populate_range(uvma, false);
+ return drm_exec_lock_obj(exec, xe_vm_obj(vm));
}
static bool preempt_fences_waiting(struct xe_vm *vm)
@@ -128,12 +111,22 @@ static int alloc_preempt_fences(struct xe_vm *vm, struct list_head *list,
static int wait_for_existing_preempt_fences(struct xe_vm *vm)
{
struct xe_exec_queue *q;
+ bool vf_migration = IS_SRIOV_VF(vm->xe) &&
+ xe_sriov_vf_migration_supported(vm->xe);
+ signed long wait_time = vf_migration ? HZ / 5 : MAX_SCHEDULE_TIMEOUT;
xe_vm_assert_held(vm);
list_for_each_entry(q, &vm->preempt.exec_queues, lr.link) {
if (q->lr.pfence) {
- long timeout = dma_fence_wait(q->lr.pfence, false);
+ long timeout;
+
+ timeout = dma_fence_wait_timeout(q->lr.pfence, false,
+ wait_time);
+ if (!timeout) {
+ xe_assert(vm->xe, vf_migration);
+ return -EAGAIN;
+ }
/* Only -ETIME on fence indicates VM needs to be killed */
if (timeout < 0 || q->lr.pfence->error == -ETIME)
@@ -227,6 +220,7 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
.num_fences = 1,
};
struct drm_exec *exec = &vm_exec.exec;
+ struct xe_validation_ctx ctx;
struct dma_fence *pfence;
int err;
bool wait;
@@ -234,14 +228,14 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
xe_assert(vm->xe, xe_vm_in_preempt_fence_mode(vm));
down_write(&vm->lock);
- err = drm_gpuvm_exec_lock(&vm_exec);
+ err = xe_validation_exec_lock(&ctx, &vm_exec, &vm->xe->val);
if (err)
goto out_up_write;
pfence = xe_preempt_fence_create(q, q->lr.context,
++q->lr.seqno);
- if (!pfence) {
- err = -ENOMEM;
+ if (IS_ERR(pfence)) {
+ err = PTR_ERR(pfence);
goto out_fini;
}
@@ -249,7 +243,7 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
++vm->preempt.num_exec_queues;
q->lr.pfence = pfence;
- down_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_lock(vm);
drm_gpuvm_resv_add_fence(&vm->gpuvm, exec, pfence,
DMA_RESV_USAGE_BOOKKEEP, DMA_RESV_USAGE_BOOKKEEP);
@@ -263,10 +257,10 @@ int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
if (wait)
dma_fence_enable_sw_signaling(pfence);
- up_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_unlock(vm);
out_fini:
- drm_exec_fini(exec);
+ xe_validation_ctx_fini(&ctx);
out_up_write:
up_write(&vm->lock);
@@ -299,25 +293,6 @@ void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q)
up_write(&vm->lock);
}
-/**
- * __xe_vm_userptr_needs_repin() - Check whether the VM does have userptrs
- * that need repinning.
- * @vm: The VM.
- *
- * This function checks for whether the VM has userptrs that need repinning,
- * and provides a release-type barrier on the userptr.notifier_lock after
- * checking.
- *
- * Return: 0 if there are no userptrs needing repinning, -EAGAIN if there are.
- */
-int __xe_vm_userptr_needs_repin(struct xe_vm *vm)
-{
- lockdep_assert_held_read(&vm->userptr.notifier_lock);
-
- return (list_empty(&vm->userptr.repin_list) &&
- list_empty(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
-}
-
#define XE_VM_REBIND_RETRY_TIMEOUT_MS 1000
/**
@@ -349,39 +324,6 @@ void xe_vm_kill(struct xe_vm *vm, bool unlocked)
/* TODO: Inform user the VM is banned */
}
-/**
- * xe_vm_validate_should_retry() - Whether to retry after a validate error.
- * @exec: The drm_exec object used for locking before validation.
- * @err: The error returned from ttm_bo_validate().
- * @end: A ktime_t cookie that should be set to 0 before first use and
- * that should be reused on subsequent calls.
- *
- * With multiple active VMs, under memory pressure, it is possible that
- * ttm_bo_validate() run into -EDEADLK and in such case returns -ENOMEM.
- * Until ttm properly handles locking in such scenarios, best thing the
- * driver can do is retry with a timeout. Check if that is necessary, and
- * if so unlock the drm_exec's objects while keeping the ticket to prepare
- * for a rerun.
- *
- * Return: true if a retry after drm_exec_init() is recommended;
- * false otherwise.
- */
-bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end)
-{
- ktime_t cur;
-
- if (err != -ENOMEM)
- return false;
-
- cur = ktime_get();
- *end = *end ? : ktime_add_ms(cur, XE_VM_REBIND_RETRY_TIMEOUT_MS);
- if (!ktime_before(cur, *end))
- return false;
-
- msleep(20);
- return true;
-}
-
static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
{
struct xe_vm *vm = gpuvm_to_vm(vm_bo->vm);
@@ -393,7 +335,10 @@ static int xe_gpuvm_validate(struct drm_gpuvm_bo *vm_bo, struct drm_exec *exec)
list_move_tail(&gpuva_to_vma(gpuva)->combined_links.rebind,
&vm->rebind_list);
- ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false);
+ if (!try_wait_for_completion(&vm->xe->pm_block))
+ return -EAGAIN;
+
+ ret = xe_bo_validate(gem_to_xe_bo(vm_bo->obj), vm, false, exec);
if (ret)
return ret;
@@ -479,13 +424,40 @@ static int xe_preempt_work_begin(struct drm_exec *exec, struct xe_vm *vm,
return xe_vm_validate_rebind(vm, exec, vm->preempt.num_exec_queues);
}
+static bool vm_suspend_rebind_worker(struct xe_vm *vm)
+{
+ struct xe_device *xe = vm->xe;
+ bool ret = false;
+
+ mutex_lock(&xe->rebind_resume_lock);
+ if (!try_wait_for_completion(&vm->xe->pm_block)) {
+ ret = true;
+ list_move_tail(&vm->preempt.pm_activate_link, &xe->rebind_resume_list);
+ }
+ mutex_unlock(&xe->rebind_resume_lock);
+
+ return ret;
+}
+
+/**
+ * xe_vm_resume_rebind_worker() - Resume the rebind worker.
+ * @vm: The vm whose preempt worker to resume.
+ *
+ * Resume a preempt worker that was previously suspended by
+ * vm_suspend_rebind_worker().
+ */
+void xe_vm_resume_rebind_worker(struct xe_vm *vm)
+{
+ queue_work(vm->xe->ordered_wq, &vm->preempt.rebind_work);
+}
+
static void preempt_rebind_work_func(struct work_struct *w)
{
struct xe_vm *vm = container_of(w, struct xe_vm, preempt.rebind_work);
+ struct xe_validation_ctx ctx;
struct drm_exec exec;
unsigned int fence_count = 0;
LIST_HEAD(preempt_fences);
- ktime_t end = 0;
int err = 0;
long wait;
int __maybe_unused tries = 0;
@@ -502,24 +474,32 @@ static void preempt_rebind_work_func(struct work_struct *w)
}
retry:
+ if (!try_wait_for_completion(&vm->xe->pm_block) && vm_suspend_rebind_worker(vm)) {
+ up_write(&vm->lock);
+ /* We don't actually block but don't make progress. */
+ xe_pm_might_block_on_suspend();
+ return;
+ }
+
if (xe_vm_userptr_check_repin(vm)) {
err = xe_vm_userptr_pin(vm);
if (err)
goto out_unlock_outer;
}
- drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+ err = xe_validation_ctx_init(&ctx, &vm->xe->val, &exec,
+ (struct xe_val_flags) {.interruptible = true});
+ if (err)
+ goto out_unlock_outer;
drm_exec_until_all_locked(&exec) {
bool done = false;
err = xe_preempt_work_begin(&exec, vm, &done);
drm_exec_retry_on_contention(&exec);
+ xe_validation_retry_on_oom(&ctx, &err);
if (err || done) {
- drm_exec_fini(&exec);
- if (err && xe_vm_validate_should_retry(&exec, err, &end))
- err = -EAGAIN;
-
+ xe_validation_ctx_fini(&ctx);
goto out_unlock_outer;
}
}
@@ -528,7 +508,9 @@ retry:
if (err)
goto out_unlock;
+ xe_vm_set_validation_exec(vm, &exec);
err = xe_vm_rebind(vm, true);
+ xe_vm_set_validation_exec(vm, NULL);
if (err)
goto out_unlock;
@@ -546,9 +528,9 @@ retry:
(!(__tries)++ || __xe_vm_userptr_needs_repin(__vm)) : \
__xe_vm_userptr_needs_repin(__vm))
- down_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_lock(vm);
if (retry_required(tries, vm)) {
- up_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_unlock(vm);
err = -EAGAIN;
goto out_unlock;
}
@@ -562,13 +544,26 @@ retry:
/* Point of no return. */
arm_preempt_fences(vm, &preempt_fences);
resume_and_reinstall_preempt_fences(vm, &exec);
- up_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_unlock(vm);
out_unlock:
- drm_exec_fini(&exec);
+ xe_validation_ctx_fini(&ctx);
out_unlock_outer:
if (err == -EAGAIN) {
trace_xe_vm_rebind_worker_retry(vm);
+
+ /*
+ * We can't block in workers on a VF which supports migration
+ * given this can block the VF post-migration workers from
+ * getting scheduled.
+ */
+ if (IS_SRIOV_VF(vm->xe) &&
+ xe_sriov_vf_migration_supported(vm->xe)) {
+ up_write(&vm->lock);
+ xe_vm_queue_rebind_worker(vm);
+ return;
+ }
+
goto retry;
}
@@ -583,201 +578,6 @@ out_unlock_outer:
trace_xe_vm_rebind_worker_exit(vm);
}
-static void __vma_userptr_invalidate(struct xe_vm *vm, struct xe_userptr_vma *uvma)
-{
- struct xe_userptr *userptr = &uvma->userptr;
- struct xe_vma *vma = &uvma->vma;
- struct dma_resv_iter cursor;
- struct dma_fence *fence;
- long err;
-
- /*
- * Tell exec and rebind worker they need to repin and rebind this
- * userptr.
- */
- if (!xe_vm_in_fault_mode(vm) &&
- !(vma->gpuva.flags & XE_VMA_DESTROYED)) {
- spin_lock(&vm->userptr.invalidated_lock);
- list_move_tail(&userptr->invalidate_link,
- &vm->userptr.invalidated);
- spin_unlock(&vm->userptr.invalidated_lock);
- }
-
- /*
- * Preempt fences turn into schedule disables, pipeline these.
- * Note that even in fault mode, we need to wait for binds and
- * unbinds to complete, and those are attached as BOOKMARK fences
- * to the vm.
- */
- dma_resv_iter_begin(&cursor, xe_vm_resv(vm),
- DMA_RESV_USAGE_BOOKKEEP);
- dma_resv_for_each_fence_unlocked(&cursor, fence)
- dma_fence_enable_sw_signaling(fence);
- dma_resv_iter_end(&cursor);
-
- err = dma_resv_wait_timeout(xe_vm_resv(vm),
- DMA_RESV_USAGE_BOOKKEEP,
- false, MAX_SCHEDULE_TIMEOUT);
- XE_WARN_ON(err <= 0);
-
- if (xe_vm_in_fault_mode(vm) && userptr->initial_bind) {
- err = xe_vm_invalidate_vma(vma);
- XE_WARN_ON(err);
- }
-
- xe_hmm_userptr_unmap(uvma);
-}
-
-static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
- const struct mmu_notifier_range *range,
- unsigned long cur_seq)
-{
- struct xe_userptr_vma *uvma = container_of(mni, typeof(*uvma), userptr.notifier);
- struct xe_vma *vma = &uvma->vma;
- struct xe_vm *vm = xe_vma_vm(vma);
-
- xe_assert(vm->xe, xe_vma_is_userptr(vma));
- trace_xe_vma_userptr_invalidate(vma);
-
- if (!mmu_notifier_range_blockable(range))
- return false;
-
- vm_dbg(&xe_vma_vm(vma)->xe->drm,
- "NOTIFIER: addr=0x%016llx, range=0x%016llx",
- xe_vma_start(vma), xe_vma_size(vma));
-
- down_write(&vm->userptr.notifier_lock);
- mmu_interval_set_seq(mni, cur_seq);
-
- __vma_userptr_invalidate(vm, uvma);
- up_write(&vm->userptr.notifier_lock);
- trace_xe_vma_userptr_invalidate_complete(vma);
-
- return true;
-}
-
-static const struct mmu_interval_notifier_ops vma_userptr_notifier_ops = {
- .invalidate = vma_userptr_invalidate,
-};
-
-#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
-/**
- * xe_vma_userptr_force_invalidate() - force invalidate a userptr
- * @uvma: The userptr vma to invalidate
- *
- * Perform a forced userptr invalidation for testing purposes.
- */
-void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma)
-{
- struct xe_vm *vm = xe_vma_vm(&uvma->vma);
-
- /* Protect against concurrent userptr pinning */
- lockdep_assert_held(&vm->lock);
- /* Protect against concurrent notifiers */
- lockdep_assert_held(&vm->userptr.notifier_lock);
- /*
- * Protect against concurrent instances of this function and
- * the critical exec sections
- */
- xe_vm_assert_held(vm);
-
- if (!mmu_interval_read_retry(&uvma->userptr.notifier,
- uvma->userptr.notifier_seq))
- uvma->userptr.notifier_seq -= 2;
- __vma_userptr_invalidate(vm, uvma);
-}
-#endif
-
-int xe_vm_userptr_pin(struct xe_vm *vm)
-{
- struct xe_userptr_vma *uvma, *next;
- int err = 0;
-
- xe_assert(vm->xe, !xe_vm_in_fault_mode(vm));
- lockdep_assert_held_write(&vm->lock);
-
- /* Collect invalidated userptrs */
- spin_lock(&vm->userptr.invalidated_lock);
- xe_assert(vm->xe, list_empty(&vm->userptr.repin_list));
- list_for_each_entry_safe(uvma, next, &vm->userptr.invalidated,
- userptr.invalidate_link) {
- list_del_init(&uvma->userptr.invalidate_link);
- list_add_tail(&uvma->userptr.repin_link,
- &vm->userptr.repin_list);
- }
- spin_unlock(&vm->userptr.invalidated_lock);
-
- /* Pin and move to bind list */
- list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
- userptr.repin_link) {
- err = xe_vma_userptr_pin_pages(uvma);
- if (err == -EFAULT) {
- list_del_init(&uvma->userptr.repin_link);
- /*
- * We might have already done the pin once already, but
- * then had to retry before the re-bind happened, due
- * some other condition in the caller, but in the
- * meantime the userptr got dinged by the notifier such
- * that we need to revalidate here, but this time we hit
- * the EFAULT. In such a case make sure we remove
- * ourselves from the rebind list to avoid going down in
- * flames.
- */
- if (!list_empty(&uvma->vma.combined_links.rebind))
- list_del_init(&uvma->vma.combined_links.rebind);
-
- /* Wait for pending binds */
- xe_vm_lock(vm, false);
- dma_resv_wait_timeout(xe_vm_resv(vm),
- DMA_RESV_USAGE_BOOKKEEP,
- false, MAX_SCHEDULE_TIMEOUT);
-
- err = xe_vm_invalidate_vma(&uvma->vma);
- xe_vm_unlock(vm);
- if (err)
- break;
- } else {
- if (err)
- break;
-
- list_del_init(&uvma->userptr.repin_link);
- list_move_tail(&uvma->vma.combined_links.rebind,
- &vm->rebind_list);
- }
- }
-
- if (err) {
- down_write(&vm->userptr.notifier_lock);
- spin_lock(&vm->userptr.invalidated_lock);
- list_for_each_entry_safe(uvma, next, &vm->userptr.repin_list,
- userptr.repin_link) {
- list_del_init(&uvma->userptr.repin_link);
- list_move_tail(&uvma->userptr.invalidate_link,
- &vm->userptr.invalidated);
- }
- spin_unlock(&vm->userptr.invalidated_lock);
- up_write(&vm->userptr.notifier_lock);
- }
- return err;
-}
-
-/**
- * xe_vm_userptr_check_repin() - Check whether the VM might have userptrs
- * that need repinning.
- * @vm: The VM.
- *
- * This function does an advisory check for whether the VM has userptrs that
- * need repinning.
- *
- * Return: 0 if there are no indications of userptrs needing repinning,
- * -EAGAIN if there are.
- */
-int xe_vm_userptr_check_repin(struct xe_vm *vm)
-{
- return (list_empty_careful(&vm->userptr.repin_list) &&
- list_empty_careful(&vm->userptr.invalidated)) ? 0 : -EAGAIN;
-}
-
static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds)
{
int i;
@@ -798,23 +598,56 @@ static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds)
}
ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO);
+static void xe_vma_svm_prefetch_op_fini(struct xe_vma_op *op)
+{
+ struct xe_vma *vma;
+
+ vma = gpuva_to_vma(op->base.prefetch.va);
+
+ if (op->base.op == DRM_GPUVA_OP_PREFETCH && xe_vma_is_cpu_addr_mirror(vma))
+ xa_destroy(&op->prefetch_range.range);
+}
+
+static void xe_vma_svm_prefetch_ops_fini(struct xe_vma_ops *vops)
+{
+ struct xe_vma_op *op;
+
+ if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
+ return;
+
+ list_for_each_entry(op, &vops->list, link)
+ xe_vma_svm_prefetch_op_fini(op);
+}
+
static void xe_vma_ops_fini(struct xe_vma_ops *vops)
{
int i;
+ xe_vma_svm_prefetch_ops_fini(vops);
+
for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
kfree(vops->pt_update_ops[i].ops);
}
-static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask)
+static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops, u8 tile_mask, int inc_val)
{
int i;
+ if (!inc_val)
+ return;
+
for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
if (BIT(i) & tile_mask)
- ++vops->pt_update_ops[i].num_ops;
+ vops->pt_update_ops[i].num_ops += inc_val;
}
+#define XE_VMA_CREATE_MASK ( \
+ XE_VMA_READ_ONLY | \
+ XE_VMA_DUMPABLE | \
+ XE_VMA_SYSTEM_ALLOCATOR | \
+ DRM_GPUVA_SPARSE | \
+ XE_VMA_MADV_AUTORESET)
+
static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma,
u8 tile_mask)
{
@@ -827,8 +660,7 @@ static void xe_vm_populate_rebind(struct xe_vma_op *op, struct xe_vma *vma,
op->base.map.gem.offset = vma->gpuva.gem.offset;
op->map.vma = vma;
op->map.immediate = true;
- op->map.dumpable = vma->gpuva.flags & XE_VMA_DUMPABLE;
- op->map.is_null = xe_vma_is_null(vma);
+ op->map.vma_flags = vma->gpuva.flags & XE_VMA_CREATE_MASK;
}
static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma,
@@ -842,7 +674,7 @@ static int xe_vm_ops_add_rebind(struct xe_vma_ops *vops, struct xe_vma *vma,
xe_vm_populate_rebind(op, vma, tile_mask);
list_add_tail(&op->link, &vops->list);
- xe_vma_ops_incr_pt_update_ops(vops, tile_mask);
+ xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1);
return 0;
}
@@ -922,10 +754,11 @@ struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma, u8 tile_ma
xe_assert(vm->xe, xe_vm_in_fault_mode(vm));
xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
+ vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT;
for_each_tile(tile, vm->xe, id) {
vops.pt_update_ops[id].wait_vm_bookkeep = true;
vops.pt_update_ops[tile->id].q =
- xe_tile_migrate_exec_queue(tile);
+ xe_migrate_exec_queue(tile->migrate);
}
err = xe_vm_ops_add_rebind(&vops, vma, tile_mask);
@@ -977,7 +810,7 @@ xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops,
xe_vm_populate_range_rebind(op, vma, range, tile_mask);
list_add_tail(&op->link, &vops->list);
- xe_vma_ops_incr_pt_update_ops(vops, tile_mask);
+ xe_vma_ops_incr_pt_update_ops(vops, tile_mask, 1);
return 0;
}
@@ -991,7 +824,7 @@ xe_vm_ops_add_range_rebind(struct xe_vma_ops *vops,
*
* (re)bind SVM range setting up GPU page tables for the range.
*
- * Return: dma fence for rebind to signal completion on succees, ERR_PTR on
+ * Return: dma fence for rebind to signal completion on success, ERR_PTR on
* failure
*/
struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm,
@@ -1012,10 +845,11 @@ struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm,
xe_assert(vm->xe, xe_vma_is_cpu_addr_mirror(vma));
xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
+ vops.flags |= XE_VMA_OPS_FLAG_SKIP_TLB_WAIT;
for_each_tile(tile, vm->xe, id) {
vops.pt_update_ops[id].wait_vm_bookkeep = true;
vops.pt_update_ops[tile->id].q =
- xe_tile_migrate_exec_queue(tile);
+ xe_migrate_exec_queue(tile->migrate);
}
err = xe_vm_ops_add_range_rebind(&vops, vma, range, tile_mask);
@@ -1062,7 +896,7 @@ xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops,
xe_vm_populate_range_unbind(op, range);
list_add_tail(&op->link, &vops->list);
- xe_vma_ops_incr_pt_update_ops(vops, range->tile_present);
+ xe_vma_ops_incr_pt_update_ops(vops, range->tile_present, 1);
return 0;
}
@@ -1074,7 +908,7 @@ xe_vm_ops_add_range_unbind(struct xe_vma_ops *vops,
*
* Unbind SVM range removing the GPU page tables for the range.
*
- * Return: dma fence for unbind to signal completion on succees, ERR_PTR on
+ * Return: dma fence for unbind to signal completion on success, ERR_PTR on
* failure
*/
struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm,
@@ -1098,7 +932,7 @@ struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm,
for_each_tile(tile, vm->xe, id) {
vops.pt_update_ops[id].wait_vm_bookkeep = true;
vops.pt_update_ops[tile->id].q =
- xe_tile_migrate_exec_queue(tile);
+ xe_migrate_exec_queue(tile->migrate);
}
err = xe_vm_ops_add_range_unbind(&vops, range);
@@ -1131,25 +965,18 @@ static void xe_vma_free(struct xe_vma *vma)
kfree(vma);
}
-#define VMA_CREATE_FLAG_READ_ONLY BIT(0)
-#define VMA_CREATE_FLAG_IS_NULL BIT(1)
-#define VMA_CREATE_FLAG_DUMPABLE BIT(2)
-#define VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR BIT(3)
-
static struct xe_vma *xe_vma_create(struct xe_vm *vm,
struct xe_bo *bo,
u64 bo_offset_or_userptr,
u64 start, u64 end,
- u16 pat_index, unsigned int flags)
+ struct xe_vma_mem_attr *attr,
+ unsigned int flags)
{
struct xe_vma *vma;
struct xe_tile *tile;
u8 id;
- bool read_only = (flags & VMA_CREATE_FLAG_READ_ONLY);
- bool is_null = (flags & VMA_CREATE_FLAG_IS_NULL);
- bool dumpable = (flags & VMA_CREATE_FLAG_DUMPABLE);
- bool is_cpu_addr_mirror =
- (flags & VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR);
+ bool is_null = (flags & DRM_GPUVA_SPARSE);
+ bool is_cpu_addr_mirror = (flags & XE_VMA_SYSTEM_ALLOCATOR);
xe_assert(vm->xe, start < end);
xe_assert(vm->xe, end < vm->size);
@@ -1170,10 +997,6 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
if (!vma)
return ERR_PTR(-ENOMEM);
- if (is_cpu_addr_mirror)
- vma->gpuva.flags |= XE_VMA_SYSTEM_ALLOCATOR;
- if (is_null)
- vma->gpuva.flags |= DRM_GPUVA_SPARSE;
if (bo)
vma->gpuva.gem.obj = &bo->ttm.base;
}
@@ -1184,10 +1007,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
vma->gpuva.vm = &vm->gpuvm;
vma->gpuva.va.addr = start;
vma->gpuva.va.range = end - start + 1;
- if (read_only)
- vma->gpuva.flags |= XE_VMA_READ_ONLY;
- if (dumpable)
- vma->gpuva.flags |= XE_VMA_DUMPABLE;
+ vma->gpuva.flags = flags;
for_each_tile(tile, vm->xe, id)
vma->tile_mask |= 0x1 << id;
@@ -1195,7 +1015,7 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
if (vm->xe->info.has_atomic_enable_pte_bit)
vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
- vma->pat_index = pat_index;
+ vma->attr = *attr;
if (bo) {
struct drm_gpuvm_bo *vm_bo;
@@ -1215,25 +1035,17 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
drm_gpuvm_bo_put(vm_bo);
} else /* userptr or null */ {
if (!is_null && !is_cpu_addr_mirror) {
- struct xe_userptr *userptr = &to_userptr_vma(vma)->userptr;
+ struct xe_userptr_vma *uvma = to_userptr_vma(vma);
u64 size = end - start + 1;
int err;
- INIT_LIST_HEAD(&userptr->invalidate_link);
- INIT_LIST_HEAD(&userptr->repin_link);
vma->gpuva.gem.offset = bo_offset_or_userptr;
- mutex_init(&userptr->unmap_mutex);
- err = mmu_interval_notifier_insert(&userptr->notifier,
- current->mm,
- xe_vma_userptr(vma), size,
- &vma_userptr_notifier_ops);
+ err = xe_userptr_setup(uvma, xe_vma_userptr(vma), size);
if (err) {
xe_vma_free(vma);
return ERR_PTR(err);
}
-
- userptr->notifier_seq = LONG_MAX;
}
xe_vm_get(vm);
@@ -1253,18 +1065,8 @@ static void xe_vma_destroy_late(struct xe_vma *vma)
if (xe_vma_is_userptr(vma)) {
struct xe_userptr_vma *uvma = to_userptr_vma(vma);
- struct xe_userptr *userptr = &uvma->userptr;
- if (userptr->sg)
- xe_hmm_userptr_free_sg(uvma);
-
- /*
- * Since userptr pages are not pinned, we can't remove
- * the notifier until we're sure the GPU is not accessing
- * them anymore
- */
- mmu_interval_notifier_remove(&userptr->notifier);
- mutex_destroy(&userptr->unmap_mutex);
+ xe_userptr_remove(uvma);
xe_vm_put(vm);
} else if (xe_vma_is_null(vma) || xe_vma_is_cpu_addr_mirror(vma)) {
xe_vm_put(vm);
@@ -1301,11 +1103,7 @@ static void xe_vma_destroy(struct xe_vma *vma, struct dma_fence *fence)
if (xe_vma_is_userptr(vma)) {
xe_assert(vm->xe, vma->gpuva.flags & XE_VMA_DESTROYED);
-
- spin_lock(&vm->userptr.invalidated_lock);
- xe_assert(vm->xe, list_empty(&to_userptr_vma(vma)->userptr.repin_link));
- list_del(&to_userptr_vma(vma)->userptr.invalidate_link);
- spin_unlock(&vm->userptr.invalidated_lock);
+ xe_userptr_destroy(to_userptr_vma(vma));
} else if (!xe_vma_is_null(vma) && !xe_vma_is_cpu_addr_mirror(vma)) {
xe_bo_assert_held(xe_vma_bo(vma));
@@ -1353,20 +1151,19 @@ int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma)
static void xe_vma_destroy_unlocked(struct xe_vma *vma)
{
+ struct xe_device *xe = xe_vma_vm(vma)->xe;
+ struct xe_validation_ctx ctx;
struct drm_exec exec;
- int err;
+ int err = 0;
- drm_exec_init(&exec, 0, 0);
- drm_exec_until_all_locked(&exec) {
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {}, err) {
err = xe_vm_lock_vma(&exec, vma);
drm_exec_retry_on_contention(&exec);
if (XE_WARN_ON(err))
break;
+ xe_vma_destroy(vma, NULL);
}
-
- xe_vma_destroy(vma, NULL);
-
- drm_exec_fini(&exec);
+ xe_assert(xe, !err);
}
struct xe_vma *
@@ -1484,14 +1281,39 @@ static u64 pte_encode_ps(u32 pt_level)
return 0;
}
-static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset,
- const u16 pat_index)
+static u16 pde_pat_index(struct xe_bo *bo)
+{
+ struct xe_device *xe = xe_bo_device(bo);
+ u16 pat_index;
+
+ /*
+ * We only have two bits to encode the PAT index in non-leaf nodes, but
+ * these only point to other paging structures so we only need a minimal
+ * selection of options. The user PAT index is only for encoding leaf
+ * nodes, where we have use of more bits to do the encoding. The
+ * non-leaf nodes are instead under driver control so the chosen index
+ * here should be distinct from the user PAT index. Also the
+ * corresponding coherency of the PAT index should be tied to the
+ * allocation type of the page table (or at least we should pick
+ * something which is always safe).
+ */
+ if (!xe_bo_is_vram(bo) && bo->ttm.ttm->caching == ttm_cached)
+ pat_index = xe->pat.idx[XE_CACHE_WB];
+ else
+ pat_index = xe->pat.idx[XE_CACHE_NONE];
+
+ xe_assert(xe, pat_index <= 3);
+
+ return pat_index;
+}
+
+static u64 xelp_pde_encode_bo(struct xe_bo *bo, u64 bo_offset)
{
u64 pde;
pde = xe_bo_addr(bo, bo_offset, XE_PAGE_SIZE);
pde |= XE_PAGE_PRESENT | XE_PAGE_RW;
- pde |= pde_encode_pat_index(pat_index);
+ pde |= pde_encode_pat_index(pde_pat_index(bo));
return pde;
}
@@ -1566,6 +1388,7 @@ static void vm_destroy_work_func(struct work_struct *w);
* @xe: xe device.
* @tile: tile to set up for.
* @vm: vm to set up for.
+ * @exec: The struct drm_exec object used to lock the vm resv.
*
* Sets up a pagetable tree with one page-table per level and a single
* leaf PTE. All pagetable entries point to the single page-table or,
@@ -1575,16 +1398,19 @@ static void vm_destroy_work_func(struct work_struct *w);
* Return: 0 on success, negative error code on error.
*/
static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
- struct xe_vm *vm)
+ struct xe_vm *vm, struct drm_exec *exec)
{
u8 id = tile->id;
int i;
for (i = MAX_HUGEPTE_LEVEL; i < vm->pt_root[id]->level; i++) {
- vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i);
- if (IS_ERR(vm->scratch_pt[id][i]))
- return PTR_ERR(vm->scratch_pt[id][i]);
+ vm->scratch_pt[id][i] = xe_pt_create(vm, tile, i, exec);
+ if (IS_ERR(vm->scratch_pt[id][i])) {
+ int err = PTR_ERR(vm->scratch_pt[id][i]);
+ vm->scratch_pt[id][i] = NULL;
+ return err;
+ }
xe_pt_populate_empty(tile, vm, vm->scratch_pt[id][i]);
}
@@ -1612,11 +1438,28 @@ static void xe_vm_free_scratch(struct xe_vm *vm)
}
}
-struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
+static void xe_vm_pt_destroy(struct xe_vm *vm)
+{
+ struct xe_tile *tile;
+ u8 id;
+
+ xe_vm_assert_held(vm);
+
+ for_each_tile(tile, vm->xe, id) {
+ if (vm->pt_root[id]) {
+ xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
+ vm->pt_root[id] = NULL;
+ }
+ }
+}
+
+struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef)
{
struct drm_gem_object *vm_resv_obj;
+ struct xe_validation_ctx ctx;
+ struct drm_exec exec;
struct xe_vm *vm;
- int err, number_tiles = 0;
+ int err;
struct xe_tile *tile;
u8 id;
@@ -1633,9 +1476,10 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
vm->xe = xe;
vm->size = 1ull << xe->info.va_bits;
-
vm->flags = flags;
+ if (xef)
+ vm->xef = xe_file_get(xef);
/**
* GSC VMs are kernel-owned, only used for PXP ops and can sometimes be
* manipulated under the PXP mutex. However, the PXP mutex can be taken
@@ -1657,7 +1501,6 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
INIT_LIST_HEAD(&vm->userptr.repin_list);
INIT_LIST_HEAD(&vm->userptr.invalidated);
- init_rwsem(&vm->userptr.notifier_lock);
spin_lock_init(&vm->userptr.invalidated_lock);
ttm_lru_bulk_move_init(&vm->lru_bulk_move);
@@ -1665,7 +1508,10 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
INIT_WORK(&vm->destroy_work, vm_destroy_work_func);
INIT_LIST_HEAD(&vm->preempt.exec_queues);
- vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */
+ if (flags & XE_VM_FLAG_FAULT_MODE)
+ vm->preempt.min_run_period_ms = 0;
+ else
+ vm->preempt.min_run_period_ms = 5;
for_each_tile(tile, xe, id)
xe_range_fence_tree_init(&vm->rftree[id]);
@@ -1678,13 +1524,20 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
* scheduler drops all the references of it, hence protecting the VM
* for this case is necessary.
*/
- if (flags & XE_VM_FLAG_LR_MODE)
+ if (flags & XE_VM_FLAG_LR_MODE) {
+ INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
xe_pm_runtime_get_noresume(xe);
+ INIT_LIST_HEAD(&vm->preempt.pm_activate_link);
+ }
+
+ err = xe_svm_init(vm);
+ if (err)
+ goto err_no_resv;
vm_resv_obj = drm_gpuvm_resv_object_alloc(&xe->drm);
if (!vm_resv_obj) {
err = -ENOMEM;
- goto err_no_resv;
+ goto err_svm_fini;
}
drm_gpuvm_init(&vm->gpuvm, "Xe VM", DRM_GPUVM_RESV_PROTECTED, &xe->drm,
@@ -1692,51 +1545,68 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
drm_gem_object_put(vm_resv_obj);
- err = xe_vm_lock(vm, true);
- if (err)
- goto err_close;
-
- if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
- vm->flags |= XE_VM_FLAG_64K;
-
- for_each_tile(tile, xe, id) {
- if (flags & XE_VM_FLAG_MIGRATION &&
- tile->id != XE_VM_FLAG_TILE_ID(flags))
- continue;
+ err = 0;
+ xe_validation_guard(&ctx, &xe->val, &exec, (struct xe_val_flags) {.interruptible = true},
+ err) {
+ err = xe_vm_drm_exec_lock(vm, &exec);
+ drm_exec_retry_on_contention(&exec);
- vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level);
- if (IS_ERR(vm->pt_root[id])) {
- err = PTR_ERR(vm->pt_root[id]);
- vm->pt_root[id] = NULL;
- goto err_unlock_close;
- }
- }
+ if (IS_DGFX(xe) && xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
+ vm->flags |= XE_VM_FLAG_64K;
- if (xe_vm_has_scratch(vm)) {
for_each_tile(tile, xe, id) {
- if (!vm->pt_root[id])
+ if (flags & XE_VM_FLAG_MIGRATION &&
+ tile->id != XE_VM_FLAG_TILE_ID(flags))
continue;
- err = xe_vm_create_scratch(xe, tile, vm);
+ vm->pt_root[id] = xe_pt_create(vm, tile, xe->info.vm_max_level,
+ &exec);
+ if (IS_ERR(vm->pt_root[id])) {
+ err = PTR_ERR(vm->pt_root[id]);
+ vm->pt_root[id] = NULL;
+ xe_vm_pt_destroy(vm);
+ drm_exec_retry_on_contention(&exec);
+ xe_validation_retry_on_oom(&ctx, &err);
+ break;
+ }
+ }
+ if (err)
+ break;
+
+ if (xe_vm_has_scratch(vm)) {
+ for_each_tile(tile, xe, id) {
+ if (!vm->pt_root[id])
+ continue;
+
+ err = xe_vm_create_scratch(xe, tile, vm, &exec);
+ if (err) {
+ xe_vm_free_scratch(vm);
+ xe_vm_pt_destroy(vm);
+ drm_exec_retry_on_contention(&exec);
+ xe_validation_retry_on_oom(&ctx, &err);
+ break;
+ }
+ }
if (err)
- goto err_unlock_close;
+ break;
+ vm->batch_invalidate_tlb = true;
}
- vm->batch_invalidate_tlb = true;
- }
- if (vm->flags & XE_VM_FLAG_LR_MODE) {
- INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
- vm->batch_invalidate_tlb = false;
- }
+ if (vm->flags & XE_VM_FLAG_LR_MODE) {
+ INIT_WORK(&vm->preempt.rebind_work, preempt_rebind_work_func);
+ vm->batch_invalidate_tlb = false;
+ }
- /* Fill pt_root after allocating scratch tables */
- for_each_tile(tile, xe, id) {
- if (!vm->pt_root[id])
- continue;
+ /* Fill pt_root after allocating scratch tables */
+ for_each_tile(tile, xe, id) {
+ if (!vm->pt_root[id])
+ continue;
- xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
+ xe_pt_populate_empty(tile, vm, vm->pt_root[id]);
+ }
}
- xe_vm_unlock(vm);
+ if (err)
+ goto err_close;
/* Kernel migration VM shouldn't have a circular loop.. */
if (!(flags & XE_VM_FLAG_MIGRATION)) {
@@ -1753,34 +1623,43 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
goto err_close;
}
vm->q[id] = q;
- number_tiles++;
}
}
- if (flags & XE_VM_FLAG_FAULT_MODE) {
- err = xe_svm_init(vm);
- if (err)
+ if (xef && xe->info.has_asid) {
+ u32 asid;
+
+ down_write(&xe->usm.lock);
+ err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
+ XA_LIMIT(1, XE_MAX_ASID - 1),
+ &xe->usm.next_asid, GFP_KERNEL);
+ up_write(&xe->usm.lock);
+ if (err < 0)
goto err_close;
- }
- if (number_tiles > 1)
- vm->composite_fence_ctx = dma_fence_context_alloc(1);
+ vm->usm.asid = asid;
+ }
trace_xe_vm_create(vm);
return vm;
-err_unlock_close:
- xe_vm_unlock(vm);
err_close:
xe_vm_close_and_put(vm);
return ERR_PTR(err);
+err_svm_fini:
+ if (flags & XE_VM_FLAG_FAULT_MODE) {
+ vm->size = 0; /* close the vm */
+ xe_svm_fini(vm);
+ }
err_no_resv:
mutex_destroy(&vm->snap_mutex);
for_each_tile(tile, xe, id)
xe_range_fence_tree_fini(&vm->rftree[id]);
ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move);
+ if (vm->xef)
+ xe_file_put(vm->xef);
kfree(vm);
if (flags & XE_VM_FLAG_LR_MODE)
xe_pm_runtime_put(xe);
@@ -1817,7 +1696,7 @@ static void xe_vm_close(struct xe_vm *vm)
xe_pt_clear(xe, vm->pt_root[id]);
for_each_gt(gt, xe, id)
- xe_gt_tlb_invalidation_vm(gt, vm);
+ xe_tlb_inval_vm(&gt->tlb_inval, vm);
}
}
@@ -1841,15 +1720,24 @@ void xe_vm_close_and_put(struct xe_vm *vm)
xe_assert(xe, !vm->preempt.num_exec_queues);
xe_vm_close(vm);
- if (xe_vm_in_preempt_fence_mode(vm))
+ if (xe_vm_in_preempt_fence_mode(vm)) {
+ mutex_lock(&xe->rebind_resume_lock);
+ list_del_init(&vm->preempt.pm_activate_link);
+ mutex_unlock(&xe->rebind_resume_lock);
flush_work(&vm->preempt.rebind_work);
+ }
if (xe_vm_in_fault_mode(vm))
xe_svm_close(vm);
down_write(&vm->lock);
for_each_tile(tile, xe, id) {
- if (vm->q[id])
+ if (vm->q[id]) {
+ int i;
+
xe_exec_queue_last_fence_put(vm->q[id], vm);
+ for_each_tlb_inval(i)
+ xe_exec_queue_tlb_inval_last_fence_put(vm->q[id], vm, i);
+ }
}
up_write(&vm->lock);
@@ -1867,9 +1755,9 @@ void xe_vm_close_and_put(struct xe_vm *vm)
vma = gpuva_to_vma(gpuva);
if (xe_vma_has_no_bo(vma)) {
- down_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_lock(vm);
vma->gpuva.flags |= XE_VMA_DESTROYED;
- up_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_unlock(vm);
}
xe_vm_remove_vma(vm, vma);
@@ -1893,13 +1781,7 @@ void xe_vm_close_and_put(struct xe_vm *vm)
* destroy the pagetables immediately.
*/
xe_vm_free_scratch(vm);
-
- for_each_tile(tile, xe, id) {
- if (vm->pt_root[id]) {
- xe_pt_destroy(vm->pt_root[id], vm->flags, NULL);
- vm->pt_root[id] = NULL;
- }
- }
+ xe_vm_pt_destroy(vm);
xe_vm_unlock(vm);
/*
@@ -1913,8 +1795,7 @@ void xe_vm_close_and_put(struct xe_vm *vm)
xe_vma_destroy_unlocked(vma);
}
- if (xe_vm_in_fault_mode(vm))
- xe_svm_fini(vm);
+ xe_svm_fini(vm);
up_write(&vm->lock);
@@ -1991,8 +1872,7 @@ struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id)
u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile)
{
- return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0,
- tile_to_xe(tile)->pat.idx[XE_CACHE_WB]);
+ return vm->pt_ops->pde_encode_bo(vm->pt_root[tile->id]->bo, 0);
}
static struct xe_exec_queue *
@@ -2026,16 +1906,16 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
struct xe_device *xe = to_xe_device(dev);
struct xe_file *xef = to_xe_file(file);
struct drm_xe_vm_create *args = data;
- struct xe_tile *tile;
+ struct xe_gt *wa_gt = xe_root_mmio_gt(xe);
struct xe_vm *vm;
- u32 id, asid;
+ u32 id;
int err;
u32 flags = 0;
if (XE_IOCTL_DBG(xe, args->extensions))
return -EINVAL;
- if (XE_WA(xe_root_mmio_gt(xe), 14016763929))
+ if (wa_gt && XE_GT_WA(wa_gt, 22014953428))
args->flags |= DRM_XE_VM_CREATE_FLAG_SCRATCH_PAGE;
if (XE_IOCTL_DBG(xe, args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE &&
@@ -2064,29 +1944,10 @@ int xe_vm_create_ioctl(struct drm_device *dev, void *data,
if (args->flags & DRM_XE_VM_CREATE_FLAG_FAULT_MODE)
flags |= XE_VM_FLAG_FAULT_MODE;
- vm = xe_vm_create(xe, flags);
+ vm = xe_vm_create(xe, flags, xef);
if (IS_ERR(vm))
return PTR_ERR(vm);
- if (xe->info.has_asid) {
- down_write(&xe->usm.lock);
- err = xa_alloc_cyclic(&xe->usm.asid_to_vm, &asid, vm,
- XA_LIMIT(1, XE_MAX_ASID - 1),
- &xe->usm.next_asid, GFP_KERNEL);
- up_write(&xe->usm.lock);
- if (err < 0)
- goto err_close_and_put;
-
- vm->usm.asid = asid;
- }
-
- vm->xef = xe_file_get(xef);
-
- /* Record BO memory for VM pagetable created against client */
- for_each_tile(tile, xe, id)
- if (vm->pt_root[id])
- xe_drm_client_add_bo(vm->xef->client, vm->pt_root[id]->bo);
-
#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_MEM)
/* Warning: Security issue - never enable by default */
args->reserved[0] = xe_bo_main_addr(vm->pt_root[0]->bo, XE_PAGE_SIZE);
@@ -2136,6 +1997,139 @@ int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
return err;
}
+static int xe_vm_query_vmas(struct xe_vm *vm, u64 start, u64 end)
+{
+ struct drm_gpuva *gpuva;
+ u32 num_vmas = 0;
+
+ lockdep_assert_held(&vm->lock);
+ drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end)
+ num_vmas++;
+
+ return num_vmas;
+}
+
+static int get_mem_attrs(struct xe_vm *vm, u32 *num_vmas, u64 start,
+ u64 end, struct drm_xe_mem_range_attr *attrs)
+{
+ struct drm_gpuva *gpuva;
+ int i = 0;
+
+ lockdep_assert_held(&vm->lock);
+
+ drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) {
+ struct xe_vma *vma = gpuva_to_vma(gpuva);
+
+ if (i == *num_vmas)
+ return -ENOSPC;
+
+ attrs[i].start = xe_vma_start(vma);
+ attrs[i].end = xe_vma_end(vma);
+ attrs[i].atomic.val = vma->attr.atomic_access;
+ attrs[i].pat_index.val = vma->attr.pat_index;
+ attrs[i].preferred_mem_loc.devmem_fd = vma->attr.preferred_loc.devmem_fd;
+ attrs[i].preferred_mem_loc.migration_policy =
+ vma->attr.preferred_loc.migration_policy;
+
+ i++;
+ }
+
+ *num_vmas = i;
+ return 0;
+}
+
+int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct xe_file *xef = to_xe_file(file);
+ struct drm_xe_mem_range_attr *mem_attrs;
+ struct drm_xe_vm_query_mem_range_attr *args = data;
+ u64 __user *attrs_user = u64_to_user_ptr(args->vector_of_mem_attr);
+ struct xe_vm *vm;
+ int err = 0;
+
+ if (XE_IOCTL_DBG(xe,
+ ((args->num_mem_ranges == 0 &&
+ (attrs_user || args->sizeof_mem_range_attr != 0)) ||
+ (args->num_mem_ranges > 0 &&
+ (!attrs_user ||
+ args->sizeof_mem_range_attr !=
+ sizeof(struct drm_xe_mem_range_attr))))))
+ return -EINVAL;
+
+ vm = xe_vm_lookup(xef, args->vm_id);
+ if (XE_IOCTL_DBG(xe, !vm))
+ return -EINVAL;
+
+ err = down_read_interruptible(&vm->lock);
+ if (err)
+ goto put_vm;
+
+ attrs_user = u64_to_user_ptr(args->vector_of_mem_attr);
+
+ if (args->num_mem_ranges == 0 && !attrs_user) {
+ args->num_mem_ranges = xe_vm_query_vmas(vm, args->start, args->start + args->range);
+ args->sizeof_mem_range_attr = sizeof(struct drm_xe_mem_range_attr);
+ goto unlock_vm;
+ }
+
+ mem_attrs = kvmalloc_array(args->num_mem_ranges, args->sizeof_mem_range_attr,
+ GFP_KERNEL | __GFP_ACCOUNT |
+ __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
+ if (!mem_attrs) {
+ err = args->num_mem_ranges > 1 ? -ENOBUFS : -ENOMEM;
+ goto unlock_vm;
+ }
+
+ memset(mem_attrs, 0, args->num_mem_ranges * args->sizeof_mem_range_attr);
+ err = get_mem_attrs(vm, &args->num_mem_ranges, args->start,
+ args->start + args->range, mem_attrs);
+ if (err)
+ goto free_mem_attrs;
+
+ err = copy_to_user(attrs_user, mem_attrs,
+ args->sizeof_mem_range_attr * args->num_mem_ranges);
+ if (err)
+ err = -EFAULT;
+
+free_mem_attrs:
+ kvfree(mem_attrs);
+unlock_vm:
+ up_read(&vm->lock);
+put_vm:
+ xe_vm_put(vm);
+ return err;
+}
+
+static bool vma_matches(struct xe_vma *vma, u64 page_addr)
+{
+ if (page_addr > xe_vma_end(vma) - 1 ||
+ page_addr + SZ_4K - 1 < xe_vma_start(vma))
+ return false;
+
+ return true;
+}
+
+/**
+ * xe_vm_find_vma_by_addr() - Find a VMA by its address
+ *
+ * @vm: the xe_vm the vma belongs to
+ * @page_addr: address to look up
+ */
+struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr)
+{
+ struct xe_vma *vma = NULL;
+
+ if (vm->usm.last_fault_vma) { /* Fast lookup */
+ if (vma_matches(vm->usm.last_fault_vma, page_addr))
+ vma = vm->usm.last_fault_vma;
+ }
+ if (!vma)
+ vma = xe_vm_find_overlapping_vma(vm, page_addr, SZ_4K);
+
+ return vma;
+}
+
static const u32 region_to_mem_type[] = {
XE_PL_TT,
XE_PL_VRAM0,
@@ -2145,9 +2139,9 @@ static const u32 region_to_mem_type[] = {
static void prep_vma_destroy(struct xe_vm *vm, struct xe_vma *vma,
bool post_commit)
{
- down_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_lock(vm);
vma->gpuva.flags |= XE_VMA_DESTROYED;
- up_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_unlock(vm);
if (post_commit)
xe_vm_remove_vma(vm, vma);
}
@@ -2216,13 +2210,25 @@ static bool __xe_vm_needs_clear_scratch_pages(struct xe_vm *vm, u32 bind_flags)
return true;
}
+static void xe_svm_prefetch_gpuva_ops_fini(struct drm_gpuva_ops *ops)
+{
+ struct drm_gpuva_op *__op;
+
+ drm_gpuva_for_each_op(__op, ops) {
+ struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
+
+ xe_vma_svm_prefetch_op_fini(op);
+ }
+}
+
/*
* Create operations list from IOCTL arguments, setup operations fields so parse
* and commit steps are decoupled from IOCTL arguments. This step can fail.
*/
static struct drm_gpuva_ops *
-vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
- u64 bo_offset_or_userptr, u64 addr, u64 range,
+vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops,
+ struct xe_bo *bo, u64 bo_offset_or_userptr,
+ u64 addr, u64 range,
u32 operation, u32 flags,
u32 prefetch_region, u16 pat_index)
{
@@ -2230,6 +2236,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
struct drm_gpuva_ops *ops;
struct drm_gpuva_op *__op;
struct drm_gpuvm_bo *vm_bo;
+ u64 range_end = addr + range;
int err;
lockdep_assert_held_write(&vm->lock);
@@ -2241,10 +2248,17 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
switch (operation) {
case DRM_XE_VM_BIND_OP_MAP:
- case DRM_XE_VM_BIND_OP_MAP_USERPTR:
- ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range,
- obj, bo_offset_or_userptr);
+ case DRM_XE_VM_BIND_OP_MAP_USERPTR: {
+ struct drm_gpuvm_map_req map_req = {
+ .map.va.addr = addr,
+ .map.va.range = range,
+ .map.gem.obj = obj,
+ .map.gem.offset = bo_offset_or_userptr,
+ };
+
+ ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, &map_req);
break;
+ }
case DRM_XE_VM_BIND_OP_UNMAP:
ops = drm_gpuvm_sm_unmap_ops_create(&vm->gpuvm, addr, range);
break;
@@ -2281,30 +2295,118 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
if (__op->op == DRM_GPUVA_OP_MAP) {
op->map.immediate =
flags & DRM_XE_VM_BIND_FLAG_IMMEDIATE;
- op->map.read_only =
- flags & DRM_XE_VM_BIND_FLAG_READONLY;
- op->map.is_null = flags & DRM_XE_VM_BIND_FLAG_NULL;
- op->map.is_cpu_addr_mirror = flags &
- DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR;
- op->map.dumpable = flags & DRM_XE_VM_BIND_FLAG_DUMPABLE;
+ if (flags & DRM_XE_VM_BIND_FLAG_READONLY)
+ op->map.vma_flags |= XE_VMA_READ_ONLY;
+ if (flags & DRM_XE_VM_BIND_FLAG_NULL)
+ op->map.vma_flags |= DRM_GPUVA_SPARSE;
+ if (flags & DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR)
+ op->map.vma_flags |= XE_VMA_SYSTEM_ALLOCATOR;
+ if (flags & DRM_XE_VM_BIND_FLAG_DUMPABLE)
+ op->map.vma_flags |= XE_VMA_DUMPABLE;
+ if (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET)
+ op->map.vma_flags |= XE_VMA_MADV_AUTORESET;
op->map.pat_index = pat_index;
op->map.invalidate_on_bind =
__xe_vm_needs_clear_scratch_pages(vm, flags);
} else if (__op->op == DRM_GPUVA_OP_PREFETCH) {
- op->prefetch.region = prefetch_region;
- }
+ struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
+ struct xe_tile *tile;
+ struct xe_svm_range *svm_range;
+ struct drm_gpusvm_ctx ctx = {};
+ struct drm_pagemap *dpagemap;
+ u8 id, tile_mask = 0;
+ u32 i;
+
+ if (!xe_vma_is_cpu_addr_mirror(vma)) {
+ op->prefetch.region = prefetch_region;
+ break;
+ }
+
+ ctx.read_only = xe_vma_read_only(vma);
+ ctx.devmem_possible = IS_DGFX(vm->xe) &&
+ IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
+
+ for_each_tile(tile, vm->xe, id)
+ tile_mask |= 0x1 << id;
+
+ xa_init_flags(&op->prefetch_range.range, XA_FLAGS_ALLOC);
+ op->prefetch_range.ranges_count = 0;
+ tile = NULL;
+
+ if (prefetch_region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) {
+ dpagemap = xe_vma_resolve_pagemap(vma,
+ xe_device_get_root_tile(vm->xe));
+ /*
+ * TODO: Once multigpu support is enabled will need
+ * something to dereference tile from dpagemap.
+ */
+ if (dpagemap)
+ tile = xe_device_get_root_tile(vm->xe);
+ } else if (prefetch_region) {
+ tile = &vm->xe->tiles[region_to_mem_type[prefetch_region] -
+ XE_PL_VRAM0];
+ }
+
+ op->prefetch_range.tile = tile;
+alloc_next_range:
+ svm_range = xe_svm_range_find_or_insert(vm, addr, vma, &ctx);
+
+ if (PTR_ERR(svm_range) == -ENOENT) {
+ u64 ret = xe_svm_find_vma_start(vm, addr, range_end, vma);
+ addr = ret == ULONG_MAX ? 0 : ret;
+ if (addr)
+ goto alloc_next_range;
+ else
+ goto print_op_label;
+ }
+
+ if (IS_ERR(svm_range)) {
+ err = PTR_ERR(svm_range);
+ goto unwind_prefetch_ops;
+ }
+
+ if (xe_svm_range_validate(vm, svm_range, tile_mask, !!tile)) {
+ xe_svm_range_debug(svm_range, "PREFETCH - RANGE IS VALID");
+ goto check_next_range;
+ }
+
+ err = xa_alloc(&op->prefetch_range.range,
+ &i, svm_range, xa_limit_32b,
+ GFP_KERNEL);
+
+ if (err)
+ goto unwind_prefetch_ops;
+
+ op->prefetch_range.ranges_count++;
+ vops->flags |= XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH;
+ xe_svm_range_debug(svm_range, "PREFETCH - RANGE CREATED");
+check_next_range:
+ if (range_end > xe_svm_range_end(svm_range) &&
+ xe_svm_range_end(svm_range) < xe_vma_end(vma)) {
+ addr = xe_svm_range_end(svm_range);
+ goto alloc_next_range;
+ }
+ }
+print_op_label:
print_op(vm->xe, __op);
}
return ops;
+
+unwind_prefetch_ops:
+ xe_svm_prefetch_gpuva_ops_fini(ops);
+ drm_gpuva_ops_free(&vm->gpuvm, ops);
+ return ERR_PTR(err);
}
+
ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO);
static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
- u16 pat_index, unsigned int flags)
+ struct xe_vma_mem_attr *attr, unsigned int flags)
{
struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
+ struct xe_validation_ctx ctx;
struct drm_exec exec;
struct xe_vma *vma;
int err = 0;
@@ -2312,9 +2414,9 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
lockdep_assert_held_write(&vm->lock);
if (bo) {
- drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
- drm_exec_until_all_locked(&exec) {
- err = 0;
+ err = 0;
+ xe_validation_guard(&ctx, &vm->xe->val, &exec,
+ (struct xe_val_flags) {.interruptible = true}, err) {
if (!bo->vm) {
err = drm_exec_lock_obj(&exec, xe_vm_obj(vm));
drm_exec_retry_on_contention(&exec);
@@ -2323,27 +2425,35 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
err = drm_exec_lock_obj(&exec, &bo->ttm.base);
drm_exec_retry_on_contention(&exec);
}
- if (err) {
- drm_exec_fini(&exec);
+ if (err)
return ERR_PTR(err);
+
+ vma = xe_vma_create(vm, bo, op->gem.offset,
+ op->va.addr, op->va.addr +
+ op->va.range - 1, attr, flags);
+ if (IS_ERR(vma))
+ return vma;
+
+ if (!bo->vm) {
+ err = add_preempt_fences(vm, bo);
+ if (err) {
+ prep_vma_destroy(vm, vma, false);
+ xe_vma_destroy(vma, NULL);
+ }
}
}
+ if (err)
+ return ERR_PTR(err);
+ } else {
+ vma = xe_vma_create(vm, NULL, op->gem.offset,
+ op->va.addr, op->va.addr +
+ op->va.range - 1, attr, flags);
+ if (IS_ERR(vma))
+ return vma;
+
+ if (xe_vma_is_userptr(vma))
+ err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
}
- vma = xe_vma_create(vm, bo, op->gem.offset,
- op->va.addr, op->va.addr +
- op->va.range - 1, pat_index, flags);
- if (IS_ERR(vma))
- goto err_unlock;
-
- if (xe_vma_is_userptr(vma))
- err = xe_vma_userptr_pin_pages(to_userptr_vma(vma));
- else if (!xe_vma_has_no_bo(vma) && !bo->vm)
- err = add_preempt_fences(vm, bo);
-
-err_unlock:
- if (bo)
- drm_exec_fini(&exec);
-
if (err) {
prep_vma_destroy(vm, vma, false);
xe_vma_destroy_unlocked(vma);
@@ -2448,6 +2558,29 @@ static int xe_vma_op_commit(struct xe_vm *vm, struct xe_vma_op *op)
return err;
}
+/**
+ * xe_vma_has_default_mem_attrs - Check if a VMA has default memory attributes
+ * @vma: Pointer to the xe_vma structure to check
+ *
+ * This function determines whether the given VMA (Virtual Memory Area)
+ * has its memory attributes set to their default values. Specifically,
+ * it checks the following conditions:
+ *
+ * - `atomic_access` is `DRM_XE_VMA_ATOMIC_UNDEFINED`
+ * - `pat_index` is equal to `default_pat_index`
+ * - `preferred_loc.devmem_fd` is `DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE`
+ * - `preferred_loc.migration_policy` is `DRM_XE_MIGRATE_ALL_PAGES`
+ *
+ * Return: true if all attributes are at their default values, false otherwise.
+ */
+bool xe_vma_has_default_mem_attrs(struct xe_vma *vma)
+{
+ return (vma->attr.atomic_access == DRM_XE_ATOMIC_UNDEFINED &&
+ vma->attr.pat_index == vma->attr.default_pat_index &&
+ vma->attr.preferred_loc.devmem_fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE &&
+ vma->attr.preferred_loc.migration_policy == DRM_XE_MIGRATE_ALL_PAGES);
+}
+
static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
struct xe_vma_ops *vops)
{
@@ -2474,26 +2607,29 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
switch (op->base.op) {
case DRM_GPUVA_OP_MAP:
{
- flags |= op->map.read_only ?
- VMA_CREATE_FLAG_READ_ONLY : 0;
- flags |= op->map.is_null ?
- VMA_CREATE_FLAG_IS_NULL : 0;
- flags |= op->map.dumpable ?
- VMA_CREATE_FLAG_DUMPABLE : 0;
- flags |= op->map.is_cpu_addr_mirror ?
- VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0;
-
- vma = new_vma(vm, &op->base.map, op->map.pat_index,
+ struct xe_vma_mem_attr default_attr = {
+ .preferred_loc = {
+ .devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE,
+ .migration_policy = DRM_XE_MIGRATE_ALL_PAGES,
+ },
+ .atomic_access = DRM_XE_ATOMIC_UNDEFINED,
+ .default_pat_index = op->map.pat_index,
+ .pat_index = op->map.pat_index,
+ };
+
+ flags |= op->map.vma_flags & XE_VMA_CREATE_MASK;
+
+ vma = new_vma(vm, &op->base.map, &default_attr,
flags);
if (IS_ERR(vma))
return PTR_ERR(vma);
op->map.vma = vma;
if (((op->map.immediate || !xe_vm_in_fault_mode(vm)) &&
- !op->map.is_cpu_addr_mirror) ||
+ !(op->map.vma_flags & XE_VMA_SYSTEM_ALLOCATOR)) ||
op->map.invalidate_on_bind)
xe_vma_ops_incr_pt_update_ops(vops,
- op->tile_mask);
+ op->tile_mask, 1);
break;
}
case DRM_GPUVA_OP_REMAP:
@@ -2502,6 +2638,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
gpuva_to_vma(op->base.remap.unmap->va);
bool skip = xe_vma_is_cpu_addr_mirror(old);
u64 start = xe_vma_start(old), end = xe_vma_end(old);
+ int num_remap_ops = 0;
if (op->base.remap.prev)
start = op->base.remap.prev->va.addr +
@@ -2510,27 +2647,20 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
end = op->base.remap.next->va.addr;
if (xe_vma_is_cpu_addr_mirror(old) &&
- xe_svm_has_mapping(vm, start, end))
- return -EBUSY;
+ xe_svm_has_mapping(vm, start, end)) {
+ if (vops->flags & XE_VMA_OPS_FLAG_MADVISE)
+ xe_svm_unmap_address_range(vm, start, end);
+ else
+ return -EBUSY;
+ }
op->remap.start = xe_vma_start(old);
op->remap.range = xe_vma_size(old);
- flags |= op->base.remap.unmap->va->flags &
- XE_VMA_READ_ONLY ?
- VMA_CREATE_FLAG_READ_ONLY : 0;
- flags |= op->base.remap.unmap->va->flags &
- DRM_GPUVA_SPARSE ?
- VMA_CREATE_FLAG_IS_NULL : 0;
- flags |= op->base.remap.unmap->va->flags &
- XE_VMA_DUMPABLE ?
- VMA_CREATE_FLAG_DUMPABLE : 0;
- flags |= xe_vma_is_cpu_addr_mirror(old) ?
- VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0;
-
+ flags |= op->base.remap.unmap->va->flags & XE_VMA_CREATE_MASK;
if (op->base.remap.prev) {
vma = new_vma(vm, op->base.remap.prev,
- old->pat_index, flags);
+ &old->attr, flags);
if (IS_ERR(vma))
return PTR_ERR(vma);
@@ -2554,13 +2684,13 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
(ULL)op->remap.start,
(ULL)op->remap.range);
} else {
- xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
+ num_remap_ops++;
}
}
if (op->base.remap.next) {
vma = new_vma(vm, op->base.remap.next,
- old->pat_index, flags);
+ &old->attr, flags);
if (IS_ERR(vma))
return PTR_ERR(vma);
@@ -2583,11 +2713,13 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
(ULL)op->remap.start,
(ULL)op->remap.range);
} else {
- xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
+ num_remap_ops++;
}
}
if (!skip)
- xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
+ num_remap_ops++;
+
+ xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, num_remap_ops);
break;
}
case DRM_GPUVA_OP_UNMAP:
@@ -2599,7 +2731,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
return -EBUSY;
if (!xe_vma_is_cpu_addr_mirror(vma))
- xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
+ xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1);
break;
case DRM_GPUVA_OP_PREFETCH:
vma = gpuva_to_vma(op->base.prefetch.va);
@@ -2610,8 +2742,12 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
return err;
}
- if (!xe_vma_is_cpu_addr_mirror(vma))
- xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask);
+ if (xe_vma_is_cpu_addr_mirror(vma))
+ xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask,
+ op->prefetch_range.ranges_count);
+ else
+ xe_vma_ops_incr_pt_update_ops(vops, op->tile_mask, 1);
+
break;
default:
drm_warn(&vm->xe->drm, "NOT POSSIBLE");
@@ -2643,9 +2779,9 @@ static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
struct xe_vma *vma = gpuva_to_vma(op->base.unmap.va);
if (vma) {
- down_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_lock(vm);
vma->gpuva.flags &= ~XE_VMA_DESTROYED;
- up_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_unlock(vm);
if (post_commit)
xe_vm_insert_vma(vm, vma);
}
@@ -2664,9 +2800,9 @@ static void xe_vma_op_unwind(struct xe_vm *vm, struct xe_vma_op *op,
xe_vma_destroy_unlocked(op->remap.next);
}
if (vma) {
- down_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_lock(vm);
vma->gpuva.flags &= ~XE_VMA_DESTROYED;
- up_read(&vm->userptr.notifier_lock);
+ xe_svm_notifier_unlock(vm);
if (post_commit)
xe_vm_insert_vma(vm, vma);
}
@@ -2705,7 +2841,7 @@ static void vm_bind_ioctl_ops_unwind(struct xe_vm *vm,
}
static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
- bool validate)
+ bool res_evict, bool validate)
{
struct xe_bo *bo = xe_vma_bo(vma);
struct xe_vm *vm = xe_vma_vm(vma);
@@ -2716,7 +2852,8 @@ static int vma_lock_and_validate(struct drm_exec *exec, struct xe_vma *vma,
err = drm_exec_lock_obj(exec, &bo->ttm.base);
if (!err && validate)
err = xe_bo_validate(bo, vm,
- !xe_vm_in_preempt_fence_mode(vm));
+ !xe_vm_in_preempt_fence_mode(vm) &&
+ res_evict, exec);
}
return err;
@@ -2737,15 +2874,72 @@ static int check_ufence(struct xe_vma *vma)
return 0;
}
+static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
+{
+ bool devmem_possible = IS_DGFX(vm->xe) && IS_ENABLED(CONFIG_DRM_XE_PAGEMAP);
+ struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
+ struct xe_tile *tile = op->prefetch_range.tile;
+ int err = 0;
+
+ struct xe_svm_range *svm_range;
+ struct drm_gpusvm_ctx ctx = {};
+ unsigned long i;
+
+ if (!xe_vma_is_cpu_addr_mirror(vma))
+ return 0;
+
+ ctx.read_only = xe_vma_read_only(vma);
+ ctx.devmem_possible = devmem_possible;
+ ctx.check_pages_threshold = devmem_possible ? SZ_64K : 0;
+ ctx.device_private_page_owner = xe_svm_devm_owner(vm->xe);
+
+ /* TODO: Threading the migration */
+ xa_for_each(&op->prefetch_range.range, i, svm_range) {
+ if (!tile)
+ xe_svm_range_migrate_to_smem(vm, svm_range);
+
+ if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, !!tile)) {
+ err = xe_svm_alloc_vram(tile, svm_range, &ctx);
+ if (err) {
+ drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n",
+ vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
+ return -ENODATA;
+ }
+ xe_svm_range_debug(svm_range, "PREFETCH - RANGE MIGRATED TO VRAM");
+ }
+
+ err = xe_svm_range_get_pages(vm, svm_range, &ctx);
+ if (err) {
+ drm_dbg(&vm->xe->drm, "Get pages failed, asid=%u, gpusvm=%p, errno=%pe\n",
+ vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
+ if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM)
+ err = -ENODATA;
+ return err;
+ }
+ xe_svm_range_debug(svm_range, "PREFETCH - RANGE GET PAGES DONE");
+ }
+
+ return err;
+}
+
static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
- struct xe_vma_op *op)
+ struct xe_vma_ops *vops, struct xe_vma_op *op)
{
int err = 0;
+ bool res_evict;
+
+ /*
+ * We only allow evicting a BO within the VM if it is not part of an
+ * array of binds, as an array of binds can evict another BO within the
+ * bind.
+ */
+ res_evict = !(vops->flags & XE_VMA_OPS_ARRAY_OF_BINDS);
switch (op->base.op) {
case DRM_GPUVA_OP_MAP:
if (!op->map.invalidate_on_bind)
err = vma_lock_and_validate(exec, op->map.vma,
+ res_evict,
!xe_vm_in_fault_mode(vm) ||
op->map.immediate);
break;
@@ -2756,11 +2950,13 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
err = vma_lock_and_validate(exec,
gpuva_to_vma(op->base.remap.unmap->va),
- false);
+ res_evict, false);
if (!err && op->remap.prev)
- err = vma_lock_and_validate(exec, op->remap.prev, true);
+ err = vma_lock_and_validate(exec, op->remap.prev,
+ res_evict, true);
if (!err && op->remap.next)
- err = vma_lock_and_validate(exec, op->remap.next, true);
+ err = vma_lock_and_validate(exec, op->remap.next,
+ res_evict, true);
break;
case DRM_GPUVA_OP_UNMAP:
err = check_ufence(gpuva_to_vma(op->base.unmap.va));
@@ -2769,21 +2965,27 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
err = vma_lock_and_validate(exec,
gpuva_to_vma(op->base.unmap.va),
- false);
+ res_evict, false);
break;
case DRM_GPUVA_OP_PREFETCH:
{
struct xe_vma *vma = gpuva_to_vma(op->base.prefetch.va);
- u32 region = op->prefetch.region;
+ u32 region;
- xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type));
+ if (!xe_vma_is_cpu_addr_mirror(vma)) {
+ region = op->prefetch.region;
+ xe_assert(vm->xe, region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC ||
+ region <= ARRAY_SIZE(region_to_mem_type));
+ }
err = vma_lock_and_validate(exec,
gpuva_to_vma(op->base.prefetch.va),
- false);
+ res_evict, false);
if (!err && !xe_vma_has_no_bo(vma))
err = xe_bo_migrate(xe_vma_bo(vma),
- region_to_mem_type[region]);
+ region_to_mem_type[region],
+ NULL,
+ exec);
break;
}
default:
@@ -2793,6 +2995,25 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
return err;
}
+static int vm_bind_ioctl_ops_prefetch_ranges(struct xe_vm *vm, struct xe_vma_ops *vops)
+{
+ struct xe_vma_op *op;
+ int err;
+
+ if (!(vops->flags & XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH))
+ return 0;
+
+ list_for_each_entry(op, &vops->list, link) {
+ if (op->base.op == DRM_GPUVA_OP_PREFETCH) {
+ err = prefetch_ranges(vm, op);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
struct xe_vm *vm,
struct xe_vma_ops *vops)
@@ -2805,7 +3026,7 @@ static int vm_bind_ioctl_ops_lock_and_prep(struct drm_exec *exec,
return err;
list_for_each_entry(op, &vops->list, link) {
- err = op_lock_and_prep(exec, vm, op);
+ err = op_lock_and_prep(exec, vm, vops, op);
if (err)
return err;
}
@@ -2886,20 +3107,31 @@ static struct dma_fence *ops_execute(struct xe_vm *vm,
struct dma_fence *fence = NULL;
struct dma_fence **fences = NULL;
struct dma_fence_array *cf = NULL;
- int number_tiles = 0, current_fence = 0, err;
+ int number_tiles = 0, current_fence = 0, n_fence = 0, err;
u8 id;
number_tiles = vm_ops_setup_tile_args(vm, vops);
if (number_tiles == 0)
return ERR_PTR(-ENODATA);
- if (number_tiles > 1) {
- fences = kmalloc_array(number_tiles, sizeof(*fences),
- GFP_KERNEL);
- if (!fences) {
- fence = ERR_PTR(-ENOMEM);
- goto err_trace;
- }
+ if (vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT) {
+ for_each_tile(tile, vm->xe, id)
+ ++n_fence;
+ } else {
+ for_each_tile(tile, vm->xe, id)
+ n_fence += (1 + XE_MAX_GT_PER_TILE);
+ }
+
+ fences = kmalloc_array(n_fence, sizeof(*fences), GFP_KERNEL);
+ if (!fences) {
+ fence = ERR_PTR(-ENOMEM);
+ goto err_trace;
+ }
+
+ cf = dma_fence_array_alloc(n_fence);
+ if (!cf) {
+ fence = ERR_PTR(-ENOMEM);
+ goto err_out;
}
for_each_tile(tile, vm->xe, id) {
@@ -2916,30 +3148,34 @@ static struct dma_fence *ops_execute(struct xe_vm *vm,
trace_xe_vm_ops_execute(vops);
for_each_tile(tile, vm->xe, id) {
+ struct xe_exec_queue *q = vops->pt_update_ops[tile->id].q;
+ int i;
+
+ fence = NULL;
if (!vops->pt_update_ops[id].num_ops)
- continue;
+ goto collect_fences;
fence = xe_pt_update_ops_run(tile, vops);
if (IS_ERR(fence))
goto err_out;
- if (fences)
- fences[current_fence++] = fence;
- }
+collect_fences:
+ fences[current_fence++] = fence ?: dma_fence_get_stub();
+ if (vops->flags & XE_VMA_OPS_FLAG_SKIP_TLB_WAIT)
+ continue;
- if (fences) {
- cf = dma_fence_array_create(number_tiles, fences,
- vm->composite_fence_ctx,
- vm->composite_fence_seqno++,
- false);
- if (!cf) {
- --vm->composite_fence_seqno;
- fence = ERR_PTR(-ENOMEM);
- goto err_out;
- }
- fence = &cf->base;
+ xe_migrate_job_lock(tile->migrate, q);
+ for_each_tlb_inval(i)
+ fences[current_fence++] =
+ xe_exec_queue_tlb_inval_last_fence_get(q, vm, i);
+ xe_migrate_job_unlock(tile->migrate, q);
}
+ xe_assert(vm->xe, current_fence == n_fence);
+ dma_fence_array_init(cf, n_fence, fences, dma_fence_context_alloc(1),
+ 1, false);
+ fence = &cf->base;
+
for_each_tile(tile, vm->xe, id) {
if (!vops->pt_update_ops[id].num_ops)
continue;
@@ -2999,7 +3235,6 @@ static void op_add_ufence(struct xe_vm *vm, struct xe_vma_op *op,
static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
struct dma_fence *fence)
{
- struct xe_exec_queue *wait_exec_queue = to_wait_exec_queue(vm, vops->q);
struct xe_user_fence *ufence;
struct xe_vma_op *op;
int i;
@@ -3020,42 +3255,43 @@ static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
if (fence) {
for (i = 0; i < vops->num_syncs; i++)
xe_sync_entry_signal(vops->syncs + i, fence);
- xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
}
}
static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm,
struct xe_vma_ops *vops)
{
+ struct xe_validation_ctx ctx;
struct drm_exec exec;
struct dma_fence *fence;
- int err;
+ int err = 0;
lockdep_assert_held_write(&vm->lock);
- drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT |
- DRM_EXEC_IGNORE_DUPLICATES, 0);
- drm_exec_until_all_locked(&exec) {
+ xe_validation_guard(&ctx, &vm->xe->val, &exec,
+ ((struct xe_val_flags) {
+ .interruptible = true,
+ .exec_ignore_duplicates = true,
+ }), err) {
err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops);
drm_exec_retry_on_contention(&exec);
- if (err) {
- fence = ERR_PTR(err);
- goto unlock;
- }
+ xe_validation_retry_on_oom(&ctx, &err);
+ if (err)
+ return ERR_PTR(err);
+ xe_vm_set_validation_exec(vm, &exec);
fence = ops_execute(vm, vops);
+ xe_vm_set_validation_exec(vm, NULL);
if (IS_ERR(fence)) {
if (PTR_ERR(fence) == -ENODATA)
vm_bind_ioctl_ops_fini(vm, vops, NULL);
- goto unlock;
+ return fence;
}
vm_bind_ioctl_ops_fini(vm, vops, fence);
}
-unlock:
- drm_exec_fini(&exec);
- return fence;
+ return err ? ERR_PTR(err) : fence;
}
ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO);
@@ -3065,7 +3301,8 @@ ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO);
DRM_XE_VM_BIND_FLAG_NULL | \
DRM_XE_VM_BIND_FLAG_DUMPABLE | \
DRM_XE_VM_BIND_FLAG_CHECK_PXP | \
- DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR)
+ DRM_XE_VM_BIND_FLAG_CPU_ADDR_MIRROR | \
+ DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET)
#ifdef TEST_VM_OPS_ERROR
#define SUPPORTED_FLAGS (SUPPORTED_FLAGS_STUB | FORCE_OP_ERROR)
@@ -3090,6 +3327,9 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
if (XE_IOCTL_DBG(xe, args->extensions))
return -EINVAL;
+ if (XE_IOCTL_DBG(xe, args->num_syncs > DRM_XE_MAX_SYNCS))
+ return -EINVAL;
+
if (args->num_binds > 1) {
u64 __user *bind_user =
u64_to_user_ptr(args->vector_of_binds);
@@ -3171,14 +3411,20 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
XE_IOCTL_DBG(xe, coh_mode == XE_COH_NONE &&
op == DRM_XE_VM_BIND_OP_MAP_USERPTR) ||
+ XE_IOCTL_DBG(xe, op == DRM_XE_VM_BIND_OP_MAP_USERPTR &&
+ !IS_ENABLED(CONFIG_DRM_GPUSVM)) ||
XE_IOCTL_DBG(xe, obj &&
op == DRM_XE_VM_BIND_OP_PREFETCH) ||
XE_IOCTL_DBG(xe, prefetch_region &&
op != DRM_XE_VM_BIND_OP_PREFETCH) ||
- XE_IOCTL_DBG(xe, !(BIT(prefetch_region) &
- xe->info.mem_region_mask)) ||
+ XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC &&
+ /* Guard against undefined shift in BIT(prefetch_region) */
+ (prefetch_region >= (sizeof(xe->info.mem_region_mask) * 8) ||
+ !(BIT(prefetch_region) & xe->info.mem_region_mask)))) ||
XE_IOCTL_DBG(xe, obj &&
- op == DRM_XE_VM_BIND_OP_UNMAP)) {
+ op == DRM_XE_VM_BIND_OP_UNMAP) ||
+ XE_IOCTL_DBG(xe, (flags & DRM_XE_VM_BIND_FLAG_MADVISE_AUTORESET) &&
+ (!is_cpu_addr_mirror || op != DRM_XE_VM_BIND_OP_MAP))) {
err = -EINVAL;
goto free_bind_ops;
}
@@ -3198,6 +3444,7 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
free_bind_ops:
if (args->num_binds > 1)
kvfree(*bind_ops);
+ *bind_ops = NULL;
return err;
}
@@ -3206,19 +3453,19 @@ static int vm_bind_ioctl_signal_fences(struct xe_vm *vm,
struct xe_sync_entry *syncs,
int num_syncs)
{
- struct dma_fence *fence;
+ struct dma_fence *fence = NULL;
int i, err = 0;
- fence = xe_sync_in_fence_get(syncs, num_syncs,
- to_wait_exec_queue(vm, q), vm);
- if (IS_ERR(fence))
- return PTR_ERR(fence);
+ if (num_syncs) {
+ fence = xe_sync_in_fence_get(syncs, num_syncs,
+ to_wait_exec_queue(vm, q), vm);
+ if (IS_ERR(fence))
+ return PTR_ERR(fence);
- for (i = 0; i < num_syncs; i++)
- xe_sync_entry_signal(&syncs[i], fence);
+ for (i = 0; i < num_syncs; i++)
+ xe_sync_entry_signal(&syncs[i], fence);
+ }
- xe_exec_queue_last_fence_set(to_wait_exec_queue(vm, q), vm,
- fence);
dma_fence_put(fence);
return err;
@@ -3234,6 +3481,7 @@ static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm *vm,
vops->q = q;
vops->syncs = syncs;
vops->num_syncs = num_syncs;
+ vops->flags = 0;
}
static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
@@ -3242,9 +3490,9 @@ static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct xe_bo *bo,
{
u16 coh_mode;
- if (XE_IOCTL_DBG(xe, range > bo->size) ||
+ if (XE_IOCTL_DBG(xe, range > xe_bo_size(bo)) ||
XE_IOCTL_DBG(xe, obj_offset >
- bo->size - range)) {
+ xe_bo_size(bo) - range)) {
return -EINVAL;
}
@@ -3303,7 +3551,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
struct xe_exec_queue *q = NULL;
u32 num_syncs, num_ufence = 0;
struct xe_sync_entry *syncs = NULL;
- struct drm_xe_vm_bind_op *bind_ops;
+ struct drm_xe_vm_bind_op *bind_ops = NULL;
struct xe_vma_ops vops;
struct dma_fence *fence;
int err;
@@ -3321,7 +3569,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
q = xe_exec_queue_lookup(xef, args->exec_queue_id);
if (XE_IOCTL_DBG(xe, !q)) {
err = -ENOENT;
- goto put_vm;
+ goto free_bind_ops;
}
if (XE_IOCTL_DBG(xe, !(q->flags & EXEC_QUEUE_FLAG_VM))) {
@@ -3367,7 +3615,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
__GFP_RETRY_MAYFAIL | __GFP_NOWARN);
if (!ops) {
err = -ENOMEM;
- goto release_vm_lock;
+ goto free_bos;
}
}
@@ -3408,8 +3656,12 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
syncs_user = u64_to_user_ptr(args->syncs);
for (num_syncs = 0; num_syncs < args->num_syncs; num_syncs++) {
+ struct xe_exec_queue *__q = q ?: vm->q[0];
+
err = xe_sync_entry_parse(xe, xef, &syncs[num_syncs],
&syncs_user[num_syncs],
+ __q->ufence_syncobj,
+ ++__q->ufence_timeline_value,
(xe_vm_in_lr_mode(vm) ?
SYNC_PARSE_FLAG_LR_MODE : 0) |
(!args->num_binds ?
@@ -3432,6 +3684,8 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
}
xe_vma_ops_init(&vops, vm, q, syncs, num_syncs);
+ if (args->num_binds > 1)
+ vops.flags |= XE_VMA_OPS_ARRAY_OF_BINDS;
for (i = 0; i < args->num_binds; ++i) {
u64 range = bind_ops[i].range;
u64 addr = bind_ops[i].addr;
@@ -3441,7 +3695,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
u32 prefetch_region = bind_ops[i].prefetch_mem_region_instance;
u16 pat_index = bind_ops[i].pat_index;
- ops[i] = vm_bind_ioctl_ops_create(vm, bos[i], obj_offset,
+ ops[i] = vm_bind_ioctl_ops_create(vm, &vops, bos[i], obj_offset,
addr, range, op, flags,
prefetch_region, pat_index);
if (IS_ERR(ops[i])) {
@@ -3474,6 +3728,10 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
if (err)
goto unwind_ops;
+ err = vm_bind_ioctl_ops_prefetch_ranges(vm, &vops);
+ if (err)
+ goto unwind_ops;
+
fence = vm_bind_ioctl_ops_execute(vm, &vops);
if (IS_ERR(fence))
err = PTR_ERR(fence);
@@ -3497,17 +3755,20 @@ free_syncs:
put_obj:
for (i = 0; i < args->num_binds; ++i)
xe_bo_put(bos[i]);
+
+ kvfree(ops);
+free_bos:
+ kvfree(bos);
release_vm_lock:
up_write(&vm->lock);
put_exec_queue:
if (q)
xe_exec_queue_put(q);
-put_vm:
- xe_vm_put(vm);
- kvfree(bos);
- kvfree(ops);
+free_bind_ops:
if (args->num_binds > 1)
kvfree(bind_ops);
+put_vm:
+ xe_vm_put(vm);
return err;
}
@@ -3543,7 +3804,7 @@ struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo,
xe_vma_ops_init(&vops, vm, q, NULL, 0);
- ops = vm_bind_ioctl_ops_create(vm, bo, 0, addr, bo->size,
+ ops = vm_bind_ioctl_ops_create(vm, &vops, bo, 0, addr, xe_bo_size(bo),
DRM_XE_VM_BIND_OP_MAP, 0, 0,
vm->xe->pat.idx[cache_lvl]);
if (IS_ERR(ops)) {
@@ -3597,10 +3858,14 @@ release_vm_lock:
*/
int xe_vm_lock(struct xe_vm *vm, bool intr)
{
+ int ret;
+
if (intr)
- return dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
+ ret = dma_resv_lock_interruptible(xe_vm_resv(vm), NULL);
+ else
+ ret = dma_resv_lock(xe_vm_resv(vm), NULL);
- return dma_resv_lock(xe_vm_resv(vm), NULL);
+ return ret;
}
/**
@@ -3615,6 +3880,66 @@ void xe_vm_unlock(struct xe_vm *vm)
}
/**
+ * xe_vm_range_tilemask_tlb_inval - Issue a TLB invalidation on this tilemask for an
+ * address range
+ * @vm: The VM
+ * @start: start address
+ * @end: end address
+ * @tile_mask: mask for which gt's issue tlb invalidation
+ *
+ * Issue a range based TLB invalidation for gt's in tilemask
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
+int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start,
+ u64 end, u8 tile_mask)
+{
+ struct xe_tlb_inval_fence
+ fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
+ struct xe_tile *tile;
+ u32 fence_id = 0;
+ u8 id;
+ int err;
+
+ if (!tile_mask)
+ return 0;
+
+ for_each_tile(tile, vm->xe, id) {
+ if (!(tile_mask & BIT(id)))
+ continue;
+
+ xe_tlb_inval_fence_init(&tile->primary_gt->tlb_inval,
+ &fence[fence_id], true);
+
+ err = xe_tlb_inval_range(&tile->primary_gt->tlb_inval,
+ &fence[fence_id], start, end,
+ vm->usm.asid);
+ if (err)
+ goto wait;
+ ++fence_id;
+
+ if (!tile->media_gt)
+ continue;
+
+ xe_tlb_inval_fence_init(&tile->media_gt->tlb_inval,
+ &fence[fence_id], true);
+
+ err = xe_tlb_inval_range(&tile->media_gt->tlb_inval,
+ &fence[fence_id], start, end,
+ vm->usm.asid);
+ if (err)
+ goto wait;
+ ++fence_id;
+ }
+
+wait:
+ for (id = 0; id < fence_id; ++id)
+ xe_tlb_inval_fence_wait(&fence[id]);
+
+ return err;
+}
+
+/**
* xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
* @vma: VMA to invalidate
*
@@ -3627,28 +3952,34 @@ void xe_vm_unlock(struct xe_vm *vm)
int xe_vm_invalidate_vma(struct xe_vma *vma)
{
struct xe_device *xe = xe_vma_vm(vma)->xe;
+ struct xe_vm *vm = xe_vma_vm(vma);
struct xe_tile *tile;
- struct xe_gt_tlb_invalidation_fence
- fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
- u8 id;
- u32 fence_id = 0;
+ u8 tile_mask = 0;
int ret = 0;
+ u8 id;
xe_assert(xe, !xe_vma_is_null(vma));
xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma));
trace_xe_vma_invalidate(vma);
- vm_dbg(&xe_vma_vm(vma)->xe->drm,
+ vm_dbg(&vm->xe->drm,
"INVALIDATE: addr=0x%016llx, range=0x%016llx",
xe_vma_start(vma), xe_vma_size(vma));
- /* Check that we don't race with page-table updates */
+ /*
+ * Check that we don't race with page-table updates, tile_invalidated
+ * update is safe
+ */
if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
if (xe_vma_is_userptr(vma)) {
+ lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) ||
+ (lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) &&
+ lockdep_is_held(&xe_vm_resv(vm)->lock.base)));
+
WARN_ON_ONCE(!mmu_interval_check_retry
(&to_userptr_vma(vma)->userptr.notifier,
- to_userptr_vma(vma)->userptr.notifier_seq));
- WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)),
+ to_userptr_vma(vma)->userptr.pages.notifier_seq));
+ WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(vm),
DMA_RESV_USAGE_BOOKKEEP));
} else {
@@ -3656,39 +3987,17 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
}
}
- for_each_tile(tile, xe, id) {
- if (xe_pt_zap_ptes(tile, vma)) {
- xe_device_wmb(xe);
- xe_gt_tlb_invalidation_fence_init(tile->primary_gt,
- &fence[fence_id],
- true);
-
- ret = xe_gt_tlb_invalidation_vma(tile->primary_gt,
- &fence[fence_id], vma);
- if (ret)
- goto wait;
- ++fence_id;
-
- if (!tile->media_gt)
- continue;
-
- xe_gt_tlb_invalidation_fence_init(tile->media_gt,
- &fence[fence_id],
- true);
+ for_each_tile(tile, xe, id)
+ if (xe_pt_zap_ptes(tile, vma))
+ tile_mask |= BIT(id);
- ret = xe_gt_tlb_invalidation_vma(tile->media_gt,
- &fence[fence_id], vma);
- if (ret)
- goto wait;
- ++fence_id;
- }
- }
+ xe_device_wmb(xe);
-wait:
- for (id = 0; id < fence_id; ++id)
- xe_gt_tlb_invalidation_fence_wait(&fence[id]);
+ ret = xe_vm_range_tilemask_tlb_inval(xe_vma_vm(vma), xe_vma_start(vma),
+ xe_vma_end(vma), tile_mask);
- vma->tile_invalidated = vma->tile_mask;
+ /* WRITE_ONCE pairs with READ_ONCE in xe_vm_has_valid_gpu_mapping() */
+ WRITE_ONCE(vma->tile_invalidated, vma->tile_mask);
return ret;
}
@@ -3887,3 +4196,221 @@ void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
}
kvfree(snap);
}
+
+/**
+ * xe_vma_need_vram_for_atomic - Check if VMA needs VRAM migration for atomic operations
+ * @xe: Pointer to the Xe device structure
+ * @vma: Pointer to the virtual memory area (VMA) structure
+ * @is_atomic: In pagefault path and atomic operation
+ *
+ * This function determines whether the given VMA needs to be migrated to
+ * VRAM in order to do atomic GPU operation.
+ *
+ * Return:
+ * 1 - Migration to VRAM is required
+ * 0 - Migration is not required
+ * -EACCES - Invalid access for atomic memory attr
+ *
+ */
+int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool is_atomic)
+{
+ u32 atomic_access = xe_vma_bo(vma) ? xe_vma_bo(vma)->attr.atomic_access :
+ vma->attr.atomic_access;
+
+ if (!IS_DGFX(xe) || !is_atomic)
+ return false;
+
+ /*
+ * NOTE: The checks implemented here are platform-specific. For
+ * instance, on a device supporting CXL atomics, these would ideally
+ * work universally without additional handling.
+ */
+ switch (atomic_access) {
+ case DRM_XE_ATOMIC_DEVICE:
+ return !xe->info.has_device_atomics_on_smem;
+
+ case DRM_XE_ATOMIC_CPU:
+ return -EACCES;
+
+ case DRM_XE_ATOMIC_UNDEFINED:
+ case DRM_XE_ATOMIC_GLOBAL:
+ default:
+ return 1;
+ }
+}
+
+static int xe_vm_alloc_vma(struct xe_vm *vm,
+ struct drm_gpuvm_map_req *map_req,
+ bool is_madvise)
+{
+ struct xe_vma_ops vops;
+ struct drm_gpuva_ops *ops = NULL;
+ struct drm_gpuva_op *__op;
+ unsigned int vma_flags = 0;
+ bool remap_op = false;
+ struct xe_vma_mem_attr tmp_attr;
+ u16 default_pat;
+ int err;
+
+ lockdep_assert_held_write(&vm->lock);
+
+ if (is_madvise)
+ ops = drm_gpuvm_madvise_ops_create(&vm->gpuvm, map_req);
+ else
+ ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, map_req);
+
+ if (IS_ERR(ops))
+ return PTR_ERR(ops);
+
+ if (list_empty(&ops->list)) {
+ err = 0;
+ goto free_ops;
+ }
+
+ drm_gpuva_for_each_op(__op, ops) {
+ struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
+ struct xe_vma *vma = NULL;
+
+ if (!is_madvise) {
+ if (__op->op == DRM_GPUVA_OP_UNMAP) {
+ vma = gpuva_to_vma(op->base.unmap.va);
+ XE_WARN_ON(!xe_vma_has_default_mem_attrs(vma));
+ default_pat = vma->attr.default_pat_index;
+ vma_flags = vma->gpuva.flags;
+ }
+
+ if (__op->op == DRM_GPUVA_OP_REMAP) {
+ vma = gpuva_to_vma(op->base.remap.unmap->va);
+ default_pat = vma->attr.default_pat_index;
+ vma_flags = vma->gpuva.flags;
+ }
+
+ if (__op->op == DRM_GPUVA_OP_MAP) {
+ op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK;
+ op->map.pat_index = default_pat;
+ }
+ } else {
+ if (__op->op == DRM_GPUVA_OP_REMAP) {
+ vma = gpuva_to_vma(op->base.remap.unmap->va);
+ xe_assert(vm->xe, !remap_op);
+ xe_assert(vm->xe, xe_vma_has_no_bo(vma));
+ remap_op = true;
+ vma_flags = vma->gpuva.flags;
+ }
+
+ if (__op->op == DRM_GPUVA_OP_MAP) {
+ xe_assert(vm->xe, remap_op);
+ remap_op = false;
+ /*
+ * In case of madvise ops DRM_GPUVA_OP_MAP is
+ * always after DRM_GPUVA_OP_REMAP, so ensure
+ * to propagate the flags from the vma we're
+ * unmapping.
+ */
+ op->map.vma_flags |= vma_flags & XE_VMA_CREATE_MASK;
+ }
+ }
+ print_op(vm->xe, __op);
+ }
+
+ xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
+
+ if (is_madvise)
+ vops.flags |= XE_VMA_OPS_FLAG_MADVISE;
+
+ err = vm_bind_ioctl_ops_parse(vm, ops, &vops);
+ if (err)
+ goto unwind_ops;
+
+ xe_vm_lock(vm, false);
+
+ drm_gpuva_for_each_op(__op, ops) {
+ struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
+ struct xe_vma *vma;
+
+ if (__op->op == DRM_GPUVA_OP_UNMAP) {
+ vma = gpuva_to_vma(op->base.unmap.va);
+ /* There should be no unmap for madvise */
+ if (is_madvise)
+ XE_WARN_ON("UNEXPECTED UNMAP");
+
+ xe_vma_destroy(vma, NULL);
+ } else if (__op->op == DRM_GPUVA_OP_REMAP) {
+ vma = gpuva_to_vma(op->base.remap.unmap->va);
+ /* In case of madvise ops Store attributes for REMAP UNMAPPED
+ * VMA, so they can be assigned to newly MAP created vma.
+ */
+ if (is_madvise)
+ tmp_attr = vma->attr;
+
+ xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL);
+ } else if (__op->op == DRM_GPUVA_OP_MAP) {
+ vma = op->map.vma;
+ /* In case of madvise call, MAP will always be followed by REMAP.
+ * Therefore temp_attr will always have sane values, making it safe to
+ * copy them to new vma.
+ */
+ if (is_madvise)
+ vma->attr = tmp_attr;
+ }
+ }
+
+ xe_vm_unlock(vm);
+ drm_gpuva_ops_free(&vm->gpuvm, ops);
+ return 0;
+
+unwind_ops:
+ vm_bind_ioctl_ops_unwind(vm, &ops, 1);
+free_ops:
+ drm_gpuva_ops_free(&vm->gpuvm, ops);
+ return err;
+}
+
+/**
+ * xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops
+ * @vm: Pointer to the xe_vm structure
+ * @start: Starting input address
+ * @range: Size of the input range
+ *
+ * This function splits existing vma to create new vma for user provided input range
+ *
+ * Return: 0 if success
+ */
+int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
+{
+ struct drm_gpuvm_map_req map_req = {
+ .map.va.addr = start,
+ .map.va.range = range,
+ };
+
+ lockdep_assert_held_write(&vm->lock);
+
+ vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx", start, range);
+
+ return xe_vm_alloc_vma(vm, &map_req, true);
+}
+
+/**
+ * xe_vm_alloc_cpu_addr_mirror_vma - Allocate CPU addr mirror vma
+ * @vm: Pointer to the xe_vm structure
+ * @start: Starting input address
+ * @range: Size of the input range
+ *
+ * This function splits/merges existing vma to create new vma for user provided input range
+ *
+ * Return: 0 if success
+ */
+int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
+{
+ struct drm_gpuvm_map_req map_req = {
+ .map.va.addr = start,
+ .map.va.range = range,
+ };
+
+ lockdep_assert_held_write(&vm->lock);
+
+ vm_dbg(&vm->xe->drm, "CPU_ADDR_MIRROR_VMA_OPS_CREATE: addr=0x%016llx, size=0x%016llx",
+ start, range);
+
+ return xe_vm_alloc_vma(vm, &map_req, false);
+}
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index 0ef811fc2bde..ef8a5019574e 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -26,7 +26,7 @@ struct xe_sync_entry;
struct xe_svm_range;
struct drm_exec;
-struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags);
+struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags, struct xe_file *xef);
struct xe_vm *xe_vm_lookup(struct xe_file *xef, u32 id);
int xe_vma_cmp_vma_cb(const void *key, const struct rb_node *node);
@@ -66,6 +66,8 @@ static inline bool xe_vm_is_closed_or_banned(struct xe_vm *vm)
struct xe_vma *
xe_vm_find_overlapping_vma(struct xe_vm *vm, u64 start, u64 range);
+bool xe_vma_has_default_mem_attrs(struct xe_vma *vma);
+
/**
* xe_vm_has_scratch() - Whether the vm is configured for scratch PTEs
* @vm: The vm
@@ -169,6 +171,14 @@ static inline bool xe_vma_is_userptr(struct xe_vma *vma)
!xe_vma_is_cpu_addr_mirror(vma);
}
+struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr);
+
+int xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool is_atomic);
+
+int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t addr, uint64_t size);
+
+int xe_vm_alloc_cpu_addr_mirror_vma(struct xe_vm *vm, uint64_t addr, uint64_t size);
+
/**
* to_userptr_vma() - Return a pointer to an embedding userptr vma
* @vma: Pointer to the embedded struct xe_vma
@@ -189,7 +199,7 @@ int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
int xe_vm_bind_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
-
+int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
void xe_vm_close_and_put(struct xe_vm *vm);
static inline bool xe_vm_in_fault_mode(struct xe_vm *vm)
@@ -210,12 +220,6 @@ static inline bool xe_vm_in_preempt_fence_mode(struct xe_vm *vm)
int xe_vm_add_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q);
void xe_vm_remove_compute_exec_queue(struct xe_vm *vm, struct xe_exec_queue *q);
-int xe_vm_userptr_pin(struct xe_vm *vm);
-
-int __xe_vm_userptr_needs_repin(struct xe_vm *vm);
-
-int xe_vm_userptr_check_repin(struct xe_vm *vm);
-
int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker);
struct dma_fence *xe_vma_rebind(struct xe_vm *vm, struct xe_vma *vma,
u8 tile_mask);
@@ -226,6 +230,9 @@ struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm,
struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm,
struct xe_svm_range *range);
+int xe_vm_range_tilemask_tlb_inval(struct xe_vm *vm, u64 start,
+ u64 end, u8 tile_mask);
+
int xe_vm_invalidate_vma(struct xe_vma *vma);
int xe_vm_validate_protected(struct xe_vm *vm);
@@ -253,12 +260,6 @@ static inline void xe_vm_reactivate_rebind(struct xe_vm *vm)
}
}
-int xe_vma_userptr_pin_pages(struct xe_userptr_vma *uvma);
-
-int xe_vma_userptr_check_repin(struct xe_userptr_vma *uvma);
-
-bool xe_vm_validate_should_retry(struct drm_exec *exec, int err, ktime_t *end);
-
int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma);
int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
@@ -268,6 +269,8 @@ struct dma_fence *xe_vm_bind_kernel_bo(struct xe_vm *vm, struct xe_bo *bo,
struct xe_exec_queue *q, u64 addr,
enum xe_cache_level cache_lvl);
+void xe_vm_resume_rebind_worker(struct xe_vm *vm);
+
/**
* xe_vm_resv() - Return's the vm's reservation object
* @vm: The vm
@@ -287,6 +290,8 @@ void xe_vm_kill(struct xe_vm *vm, bool unlocked);
*/
#define xe_vm_assert_held(vm) dma_resv_assert_held(xe_vm_resv(vm))
+int xe_vm_drm_exec_lock(struct xe_vm *vm, struct drm_exec *exec);
+
#if IS_ENABLED(CONFIG_DRM_XE_DEBUG_VM)
#define vm_dbg drm_dbg
#else
@@ -301,11 +306,109 @@ void xe_vm_snapshot_capture_delayed(struct xe_vm_snapshot *snap);
void xe_vm_snapshot_print(struct xe_vm_snapshot *snap, struct drm_printer *p);
void xe_vm_snapshot_free(struct xe_vm_snapshot *snap);
-#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
-void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma);
-#else
-static inline void xe_vma_userptr_force_invalidate(struct xe_userptr_vma *uvma)
+/**
+ * xe_vm_set_validating() - Register this task as currently making bos resident
+ * @allow_res_evict: Allow eviction of buffer objects bound to @vm when
+ * validating.
+ * @vm: Pointer to the vm or NULL.
+ *
+ * Register this task as currently making bos resident for the vm. Intended
+ * to avoid eviction by the same task of shared bos bound to the vm.
+ * Call with the vm's resv lock held.
+ */
+static inline void xe_vm_set_validating(struct xe_vm *vm, bool allow_res_evict)
{
+ if (vm && !allow_res_evict) {
+ xe_vm_assert_held(vm);
+ /* Pairs with READ_ONCE in xe_vm_is_validating() */
+ WRITE_ONCE(vm->validation.validating, current);
+ }
}
-#endif
+
+/**
+ * xe_vm_clear_validating() - Unregister this task as currently making bos resident
+ * @vm: Pointer to the vm or NULL
+ * @allow_res_evict: Eviction from @vm was allowed. Must be set to the same
+ * value as for xe_vm_set_validation().
+ *
+ * Register this task as currently making bos resident for the vm. Intended
+ * to avoid eviction by the same task of shared bos bound to the vm.
+ * Call with the vm's resv lock held.
+ */
+static inline void xe_vm_clear_validating(struct xe_vm *vm, bool allow_res_evict)
+{
+ if (vm && !allow_res_evict) {
+ /* Pairs with READ_ONCE in xe_vm_is_validating() */
+ WRITE_ONCE(vm->validation.validating, NULL);
+ }
+}
+
+/**
+ * xe_vm_is_validating() - Whether bos bound to the vm are currently being made resident
+ * by the current task.
+ * @vm: Pointer to the vm.
+ *
+ * If this function returns %true, we should be in a vm resv locked region, since
+ * the current process is the same task that called xe_vm_set_validating().
+ * The function asserts that that's indeed the case.
+ *
+ * Return: %true if the task is currently making bos resident, %false otherwise.
+ */
+static inline bool xe_vm_is_validating(struct xe_vm *vm)
+{
+ /* Pairs with WRITE_ONCE in xe_vm_is_validating() */
+ if (READ_ONCE(vm->validation.validating) == current) {
+ xe_vm_assert_held(vm);
+ return true;
+ }
+ return false;
+}
+
+/**
+ * xe_vm_set_validation_exec() - Accessor to set the drm_exec object
+ * @vm: The vm we want to register a drm_exec object with.
+ * @exec: The exec object we want to register.
+ *
+ * Set the drm_exec object used to lock the vm's resv.
+ */
+static inline void xe_vm_set_validation_exec(struct xe_vm *vm, struct drm_exec *exec)
+{
+ xe_vm_assert_held(vm);
+ xe_assert(vm->xe, !!exec ^ !!vm->validation._exec);
+ vm->validation._exec = exec;
+}
+
+/**
+ * xe_vm_set_validation_exec() - Accessor to read the drm_exec object
+ * @vm: The vm we want to register a drm_exec object with.
+ *
+ * Return: The drm_exec object used to lock the vm's resv. The value
+ * is a valid pointer, %NULL, or one of the special values defined in
+ * xe_validation.h.
+ */
+static inline struct drm_exec *xe_vm_validation_exec(struct xe_vm *vm)
+{
+ xe_vm_assert_held(vm);
+ return vm->validation._exec;
+}
+
+/**
+ * xe_vm_has_valid_gpu_mapping() - Advisory helper to check if VMA or SVM range has
+ * a valid GPU mapping
+ * @tile: The tile which the GPU mapping belongs to
+ * @tile_present: Tile present mask
+ * @tile_invalidated: Tile invalidated mask
+ *
+ * The READ_ONCEs pair with WRITE_ONCEs in either the TLB invalidation paths
+ * (xe_vm.c, xe_svm.c) or the binding paths (xe_pt.c). These are not reliable
+ * without the notifier lock in userptr or SVM cases, and not reliable without
+ * the BO dma-resv lock in the BO case. As such, they should only be used in
+ * opportunistic cases (e.g., skipping a page fault fix or not skipping a TLB
+ * invalidation) where it is harmless.
+ *
+ * Return: True is there are valid GPU pages, False otherwise
+ */
+#define xe_vm_has_valid_gpu_mapping(tile, tile_present, tile_invalidated) \
+ ((READ_ONCE(tile_present) & ~READ_ONCE(tile_invalidated)) & BIT((tile)->id))
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_vm_doc.h b/drivers/gpu/drm/xe/xe_vm_doc.h
index 1030ce214032..02e5288373c9 100644
--- a/drivers/gpu/drm/xe/xe_vm_doc.h
+++ b/drivers/gpu/drm/xe/xe_vm_doc.h
@@ -7,7 +7,7 @@
#define _XE_VM_DOC_H_
/**
- * DOC: XE VM (user address space)
+ * DOC: Xe VM (user address space)
*
* VM creation
* ===========
@@ -202,13 +202,13 @@
* User pointers are user allocated memory (malloc'd, mmap'd, etc..) for which the
* user wants to create a GPU mapping. Typically in other DRM drivers a dummy BO
* was created and then a binding was created. We bypass creating a dummy BO in
- * XE and simply create a binding directly from the userptr.
+ * Xe and simply create a binding directly from the userptr.
*
* Invalidation
* ------------
*
* Since this a core kernel managed memory the kernel can move this memory
- * whenever it wants. We register an invalidation MMU notifier to alert XE when
+ * whenever it wants. We register an invalidation MMU notifier to alert Xe when
* a user pointer is about to move. The invalidation notifier needs to block
* until all pending users (jobs or compute mode engines) of the userptr are
* idle to ensure no faults. This done by waiting on all of VM's dma-resv slots.
@@ -419,7 +419,7 @@
* =======
*
* VM locking protects all of the core data paths (bind operations, execs,
- * evictions, and compute mode rebind worker) in XE.
+ * evictions, and compute mode rebind worker) in Xe.
*
* Locks
* -----
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c
new file mode 100644
index 000000000000..cad3cf627c3f
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
@@ -0,0 +1,431 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include "xe_vm_madvise.h"
+
+#include <linux/nospec.h>
+#include <drm/xe_drm.h>
+
+#include "xe_bo.h"
+#include "xe_pat.h"
+#include "xe_pt.h"
+#include "xe_svm.h"
+
+struct xe_vmas_in_madvise_range {
+ u64 addr;
+ u64 range;
+ struct xe_vma **vmas;
+ int num_vmas;
+ bool has_bo_vmas;
+ bool has_svm_userptr_vmas;
+};
+
+static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_range)
+{
+ u64 addr = madvise_range->addr;
+ u64 range = madvise_range->range;
+
+ struct xe_vma **__vmas;
+ struct drm_gpuva *gpuva;
+ int max_vmas = 8;
+
+ lockdep_assert_held(&vm->lock);
+
+ madvise_range->num_vmas = 0;
+ madvise_range->vmas = kmalloc_array(max_vmas, sizeof(*madvise_range->vmas), GFP_KERNEL);
+ if (!madvise_range->vmas)
+ return -ENOMEM;
+
+ vm_dbg(&vm->xe->drm, "VMA's in range: start=0x%016llx, end=0x%016llx", addr, addr + range);
+
+ drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, addr, addr + range) {
+ struct xe_vma *vma = gpuva_to_vma(gpuva);
+
+ if (xe_vma_bo(vma))
+ madvise_range->has_bo_vmas = true;
+ else if (xe_vma_is_cpu_addr_mirror(vma) || xe_vma_is_userptr(vma))
+ madvise_range->has_svm_userptr_vmas = true;
+
+ if (madvise_range->num_vmas == max_vmas) {
+ max_vmas <<= 1;
+ __vmas = krealloc(madvise_range->vmas,
+ max_vmas * sizeof(*madvise_range->vmas),
+ GFP_KERNEL);
+ if (!__vmas) {
+ kfree(madvise_range->vmas);
+ return -ENOMEM;
+ }
+ madvise_range->vmas = __vmas;
+ }
+
+ madvise_range->vmas[madvise_range->num_vmas] = vma;
+ (madvise_range->num_vmas)++;
+ }
+
+ if (!madvise_range->num_vmas)
+ kfree(madvise_range->vmas);
+
+ vm_dbg(&vm->xe->drm, "madvise_range-num_vmas = %d\n", madvise_range->num_vmas);
+
+ return 0;
+}
+
+static void madvise_preferred_mem_loc(struct xe_device *xe, struct xe_vm *vm,
+ struct xe_vma **vmas, int num_vmas,
+ struct drm_xe_madvise *op)
+{
+ int i;
+
+ xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC);
+
+ for (i = 0; i < num_vmas; i++) {
+ /*TODO: Extend attributes to bo based vmas */
+ if ((vmas[i]->attr.preferred_loc.devmem_fd == op->preferred_mem_loc.devmem_fd &&
+ vmas[i]->attr.preferred_loc.migration_policy ==
+ op->preferred_mem_loc.migration_policy) ||
+ !xe_vma_is_cpu_addr_mirror(vmas[i])) {
+ vmas[i]->skip_invalidation = true;
+ } else {
+ vmas[i]->skip_invalidation = false;
+ vmas[i]->attr.preferred_loc.devmem_fd = op->preferred_mem_loc.devmem_fd;
+ /* Till multi-device support is not added migration_policy
+ * is of no use and can be ignored.
+ */
+ vmas[i]->attr.preferred_loc.migration_policy =
+ op->preferred_mem_loc.migration_policy;
+ }
+ }
+}
+
+static void madvise_atomic(struct xe_device *xe, struct xe_vm *vm,
+ struct xe_vma **vmas, int num_vmas,
+ struct drm_xe_madvise *op)
+{
+ struct xe_bo *bo;
+ int i;
+
+ xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC);
+ xe_assert(vm->xe, op->atomic.val <= DRM_XE_ATOMIC_CPU);
+
+ for (i = 0; i < num_vmas; i++) {
+ if (xe_vma_is_userptr(vmas[i]) &&
+ !(op->atomic.val == DRM_XE_ATOMIC_DEVICE &&
+ xe->info.has_device_atomics_on_smem)) {
+ vmas[i]->skip_invalidation = true;
+ continue;
+ }
+
+ if (vmas[i]->attr.atomic_access == op->atomic.val) {
+ vmas[i]->skip_invalidation = true;
+ } else {
+ vmas[i]->skip_invalidation = false;
+ vmas[i]->attr.atomic_access = op->atomic.val;
+ }
+
+ bo = xe_vma_bo(vmas[i]);
+ if (!bo || bo->attr.atomic_access == op->atomic.val)
+ continue;
+
+ vmas[i]->skip_invalidation = false;
+ xe_bo_assert_held(bo);
+ bo->attr.atomic_access = op->atomic.val;
+
+ /* Invalidate cpu page table, so bo can migrate to smem in next access */
+ if (xe_bo_is_vram(bo) &&
+ (bo->attr.atomic_access == DRM_XE_ATOMIC_CPU ||
+ bo->attr.atomic_access == DRM_XE_ATOMIC_GLOBAL))
+ ttm_bo_unmap_virtual(&bo->ttm);
+ }
+}
+
+static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm,
+ struct xe_vma **vmas, int num_vmas,
+ struct drm_xe_madvise *op)
+{
+ int i;
+
+ xe_assert(vm->xe, op->type == DRM_XE_MEM_RANGE_ATTR_PAT);
+
+ for (i = 0; i < num_vmas; i++) {
+ if (vmas[i]->attr.pat_index == op->pat_index.val) {
+ vmas[i]->skip_invalidation = true;
+ } else {
+ vmas[i]->skip_invalidation = false;
+ vmas[i]->attr.pat_index = op->pat_index.val;
+ }
+ }
+}
+
+typedef void (*madvise_func)(struct xe_device *xe, struct xe_vm *vm,
+ struct xe_vma **vmas, int num_vmas,
+ struct drm_xe_madvise *op);
+
+static const madvise_func madvise_funcs[] = {
+ [DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC] = madvise_preferred_mem_loc,
+ [DRM_XE_MEM_RANGE_ATTR_ATOMIC] = madvise_atomic,
+ [DRM_XE_MEM_RANGE_ATTR_PAT] = madvise_pat_index,
+};
+
+static u8 xe_zap_ptes_in_madvise_range(struct xe_vm *vm, u64 start, u64 end)
+{
+ struct drm_gpuva *gpuva;
+ struct xe_tile *tile;
+ u8 id, tile_mask = 0;
+
+ lockdep_assert_held_write(&vm->lock);
+
+ /* Wait for pending binds */
+ if (dma_resv_wait_timeout(xe_vm_resv(vm), DMA_RESV_USAGE_BOOKKEEP,
+ false, MAX_SCHEDULE_TIMEOUT) <= 0)
+ XE_WARN_ON(1);
+
+ drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) {
+ struct xe_vma *vma = gpuva_to_vma(gpuva);
+
+ if (vma->skip_invalidation || xe_vma_is_null(vma))
+ continue;
+
+ if (xe_vma_is_cpu_addr_mirror(vma)) {
+ tile_mask |= xe_svm_ranges_zap_ptes_in_range(vm,
+ xe_vma_start(vma),
+ xe_vma_end(vma));
+ } else {
+ for_each_tile(tile, vm->xe, id) {
+ if (xe_pt_zap_ptes(tile, vma)) {
+ tile_mask |= BIT(id);
+
+ /*
+ * WRITE_ONCE pairs with READ_ONCE
+ * in xe_vm_has_valid_gpu_mapping()
+ */
+ WRITE_ONCE(vma->tile_invalidated,
+ vma->tile_invalidated | BIT(id));
+ }
+ }
+ }
+ }
+
+ return tile_mask;
+}
+
+static int xe_vm_invalidate_madvise_range(struct xe_vm *vm, u64 start, u64 end)
+{
+ u8 tile_mask = xe_zap_ptes_in_madvise_range(vm, start, end);
+
+ if (!tile_mask)
+ return 0;
+
+ xe_device_wmb(vm->xe);
+
+ return xe_vm_range_tilemask_tlb_inval(vm, start, end, tile_mask);
+}
+
+static bool madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madvise *args)
+{
+ if (XE_IOCTL_DBG(xe, !args))
+ return false;
+
+ if (XE_IOCTL_DBG(xe, !IS_ALIGNED(args->start, SZ_4K)))
+ return false;
+
+ if (XE_IOCTL_DBG(xe, !IS_ALIGNED(args->range, SZ_4K)))
+ return false;
+
+ if (XE_IOCTL_DBG(xe, args->range < SZ_4K))
+ return false;
+
+ switch (args->type) {
+ case DRM_XE_MEM_RANGE_ATTR_PREFERRED_LOC:
+ {
+ s32 fd = (s32)args->preferred_mem_loc.devmem_fd;
+
+ if (XE_IOCTL_DBG(xe, fd < DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM))
+ return false;
+
+ if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.migration_policy >
+ DRM_XE_MIGRATE_ONLY_SYSTEM_PAGES))
+ return false;
+
+ if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.pad))
+ return false;
+
+ if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.reserved))
+ return false;
+ break;
+ }
+ case DRM_XE_MEM_RANGE_ATTR_ATOMIC:
+ if (XE_IOCTL_DBG(xe, args->atomic.val > DRM_XE_ATOMIC_CPU))
+ return false;
+
+ if (XE_IOCTL_DBG(xe, args->atomic.pad))
+ return false;
+
+ if (XE_IOCTL_DBG(xe, args->atomic.reserved))
+ return false;
+
+ break;
+ case DRM_XE_MEM_RANGE_ATTR_PAT:
+ {
+ u16 coh_mode = xe_pat_index_get_coh_mode(xe, args->pat_index.val);
+
+ if (XE_IOCTL_DBG(xe, !coh_mode))
+ return false;
+
+ if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY))
+ return false;
+
+ if (XE_IOCTL_DBG(xe, args->pat_index.pad))
+ return false;
+
+ if (XE_IOCTL_DBG(xe, args->pat_index.reserved))
+ return false;
+ break;
+ }
+ default:
+ if (XE_IOCTL_DBG(xe, 1))
+ return false;
+ }
+
+ if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+ return false;
+
+ return true;
+}
+
+static bool check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas,
+ int num_vmas, u32 atomic_val)
+{
+ struct xe_device *xe = vm->xe;
+ struct xe_bo *bo;
+ int i;
+
+ for (i = 0; i < num_vmas; i++) {
+ bo = xe_vma_bo(vmas[i]);
+ if (!bo)
+ continue;
+ /*
+ * NOTE: The following atomic checks are platform-specific. For example,
+ * if a device supports CXL atomics, these may not be necessary or
+ * may behave differently.
+ */
+ if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_CPU &&
+ !(bo->flags & XE_BO_FLAG_SYSTEM)))
+ return false;
+
+ if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_DEVICE &&
+ !(bo->flags & XE_BO_FLAG_VRAM0) &&
+ !(bo->flags & XE_BO_FLAG_VRAM1) &&
+ !(bo->flags & XE_BO_FLAG_SYSTEM &&
+ xe->info.has_device_atomics_on_smem)))
+ return false;
+
+ if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_ATOMIC_GLOBAL &&
+ (!(bo->flags & XE_BO_FLAG_SYSTEM) ||
+ (!(bo->flags & XE_BO_FLAG_VRAM0) &&
+ !(bo->flags & XE_BO_FLAG_VRAM1)))))
+ return false;
+ }
+ return true;
+}
+/**
+ * xe_vm_madvise_ioctl - Handle MADVise ioctl for a VM
+ * @dev: DRM device pointer
+ * @data: Pointer to ioctl data (drm_xe_madvise*)
+ * @file: DRM file pointer
+ *
+ * Handles the MADVISE ioctl to provide memory advice for vma's within
+ * input range.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct xe_file *xef = to_xe_file(file);
+ struct drm_xe_madvise *args = data;
+ struct xe_vmas_in_madvise_range madvise_range = {.addr = args->start,
+ .range = args->range, };
+ struct xe_vm *vm;
+ struct drm_exec exec;
+ int err, attr_type;
+
+ vm = xe_vm_lookup(xef, args->vm_id);
+ if (XE_IOCTL_DBG(xe, !vm))
+ return -EINVAL;
+
+ if (!madvise_args_are_sane(vm->xe, args)) {
+ err = -EINVAL;
+ goto put_vm;
+ }
+
+ xe_svm_flush(vm);
+
+ err = down_write_killable(&vm->lock);
+ if (err)
+ goto put_vm;
+
+ if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
+ err = -ENOENT;
+ goto unlock_vm;
+ }
+
+ err = xe_vm_alloc_madvise_vma(vm, args->start, args->range);
+ if (err)
+ goto unlock_vm;
+
+ err = get_vmas(vm, &madvise_range);
+ if (err || !madvise_range.num_vmas)
+ goto unlock_vm;
+
+ if (madvise_range.has_bo_vmas) {
+ if (args->type == DRM_XE_MEM_RANGE_ATTR_ATOMIC) {
+ if (!check_bo_args_are_sane(vm, madvise_range.vmas,
+ madvise_range.num_vmas,
+ args->atomic.val)) {
+ err = -EINVAL;
+ goto unlock_vm;
+ }
+ }
+
+ drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES | DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+ drm_exec_until_all_locked(&exec) {
+ for (int i = 0; i < madvise_range.num_vmas; i++) {
+ struct xe_bo *bo = xe_vma_bo(madvise_range.vmas[i]);
+
+ if (!bo)
+ continue;
+ err = drm_exec_lock_obj(&exec, &bo->ttm.base);
+ drm_exec_retry_on_contention(&exec);
+ if (err)
+ goto err_fini;
+ }
+ }
+ }
+
+ if (madvise_range.has_svm_userptr_vmas) {
+ err = xe_svm_notifier_lock_interruptible(vm);
+ if (err)
+ goto err_fini;
+ }
+
+ attr_type = array_index_nospec(args->type, ARRAY_SIZE(madvise_funcs));
+ madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args);
+
+ err = xe_vm_invalidate_madvise_range(vm, args->start, args->start + args->range);
+
+ if (madvise_range.has_svm_userptr_vmas)
+ xe_svm_notifier_unlock(vm);
+
+err_fini:
+ if (madvise_range.has_bo_vmas)
+ drm_exec_fini(&exec);
+ kfree(madvise_range.vmas);
+ madvise_range.vmas = NULL;
+unlock_vm:
+ up_write(&vm->lock);
+put_vm:
+ xe_vm_put(vm);
+ return err;
+}
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.h b/drivers/gpu/drm/xe/xe_vm_madvise.h
new file mode 100644
index 000000000000..b0e1fc445f23
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_VM_MADVISE_H_
+#define _XE_VM_MADVISE_H_
+
+struct drm_device;
+struct drm_file;
+
+int xe_vm_madvise_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 1662604c4486..2168ef052499 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -17,6 +17,7 @@
#include "xe_device_types.h"
#include "xe_pt_types.h"
#include "xe_range_fence.h"
+#include "xe_userptr.h"
struct xe_bo;
struct xe_svm_range;
@@ -45,36 +46,44 @@ struct xe_vm_pgtable_update_op;
#define XE_VMA_PTE_COMPACT (DRM_GPUVA_USERBITS << 7)
#define XE_VMA_DUMPABLE (DRM_GPUVA_USERBITS << 8)
#define XE_VMA_SYSTEM_ALLOCATOR (DRM_GPUVA_USERBITS << 9)
+#define XE_VMA_MADV_AUTORESET (DRM_GPUVA_USERBITS << 10)
+
+/**
+ * struct xe_vma_mem_attr - memory attributes associated with vma
+ */
+struct xe_vma_mem_attr {
+ /** @preferred_loc: preferred memory_location */
+ struct {
+ /** @preferred_loc.migration_policy: Pages migration policy */
+ u32 migration_policy;
+
+ /**
+ * @preferred_loc.devmem_fd: used for determining pagemap_fd
+ * requested by user DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM and
+ * DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE mean system memory or
+ * closest device memory respectively.
+ */
+ u32 devmem_fd;
+ } preferred_loc;
-/** struct xe_userptr - User pointer */
-struct xe_userptr {
- /** @invalidate_link: Link for the vm::userptr.invalidated list */
- struct list_head invalidate_link;
- /** @userptr: link into VM repin list if userptr. */
- struct list_head repin_link;
/**
- * @notifier: MMU notifier for user pointer (invalidation call back)
+ * @atomic_access: The atomic access type for the vma
+ * See %DRM_XE_VMA_ATOMIC_UNDEFINED, %DRM_XE_VMA_ATOMIC_DEVICE,
+ * %DRM_XE_VMA_ATOMIC_GLOBAL, and %DRM_XE_VMA_ATOMIC_CPU for possible
+ * values. These are defined in uapi/drm/xe_drm.h.
*/
- struct mmu_interval_notifier notifier;
- /** @sgt: storage for a scatter gather table */
- struct sg_table sgt;
- /** @sg: allocated scatter gather table */
- struct sg_table *sg;
- /** @notifier_seq: notifier sequence number */
- unsigned long notifier_seq;
- /** @unmap_mutex: Mutex protecting dma-unmapping */
- struct mutex unmap_mutex;
+ u32 atomic_access;
+
/**
- * @initial_bind: user pointer has been bound at least once.
- * write: vm->userptr.notifier_lock in read mode and vm->resv held.
- * read: vm->userptr.notifier_lock in write mode or vm->resv held.
+ * @default_pat_index: The pat index for VMA set during first bind by user.
*/
- bool initial_bind;
- /** @mapped: Whether the @sgt sg-table is dma-mapped. Protected by @unmap_mutex. */
- bool mapped;
-#if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
- u32 divisor;
-#endif
+ u16 default_pat_index;
+
+ /**
+ * @pat_index: The pat index to use when encoding the PTEs for this vma.
+ * same as default_pat_index unless overwritten by madvise.
+ */
+ u16 pat_index;
};
struct xe_vma {
@@ -100,16 +109,23 @@ struct xe_vma {
struct work_struct destroy_work;
};
- /** @tile_invalidated: VMA has been invalidated */
+ /**
+ * @tile_invalidated: Tile mask of binding are invalidated for this VMA.
+ * protected by BO's resv and for userptrs, vm->svm.gpusvm.notifier_lock in
+ * write mode for writing or vm->svm.gpusvm.notifier_lock in read mode and
+ * the vm->resv. For stable reading, BO's resv or userptr
+ * vm->svm.gpusvm.notifier_lock in read mode is required. Can be
+ * opportunistically read with READ_ONCE outside of locks.
+ */
u8 tile_invalidated;
/** @tile_mask: Tile mask of where to create binding for this VMA */
u8 tile_mask;
/**
- * @tile_present: GT mask of binding are present for this VMA.
+ * @tile_present: Tile mask of binding are present for this VMA.
* protected by vm->lock, vm->resv and for userptrs,
- * vm->userptr.notifier_lock for writing. Needs either for reading,
+ * vm->svm.gpusvm.notifier_lock for writing. Needs either for reading,
* but if reading is done under the vm->lock only, it needs to be held
* in write mode.
*/
@@ -119,15 +135,22 @@ struct xe_vma {
u8 tile_staged;
/**
- * @pat_index: The pat index to use when encoding the PTEs for this vma.
+ * @skip_invalidation: Used in madvise to avoid invalidation
+ * if mem attributes doesn't change
*/
- u16 pat_index;
+ bool skip_invalidation;
/**
* @ufence: The user fence that was provided with MAP.
* Needs to be signalled before UNMAP can be processed.
*/
struct xe_user_fence *ufence;
+
+ /**
+ * @attr: The attributes of vma which determines the migration policy
+ * and encoding of the PTEs for this vma.
+ */
+ struct xe_vma_mem_attr attr;
};
/**
@@ -198,11 +221,6 @@ struct xe_vm {
#define XE_VM_FLAG_GSC BIT(8)
unsigned long flags;
- /** @composite_fence_ctx: context composite fence */
- u64 composite_fence_ctx;
- /** @composite_fence_seqno: seqno for composite fence */
- u32 composite_fence_seqno;
-
/**
* @lock: outer most lock, protects objects of anything attached to this
* VM
@@ -237,33 +255,7 @@ struct xe_vm {
const struct xe_pt_ops *pt_ops;
/** @userptr: user pointer state */
- struct {
- /**
- * @userptr.repin_list: list of VMAs which are user pointers,
- * and needs repinning. Protected by @lock.
- */
- struct list_head repin_list;
- /**
- * @notifier_lock: protects notifier in write mode and
- * submission in read mode.
- */
- struct rw_semaphore notifier_lock;
- /**
- * @userptr.invalidated_lock: Protects the
- * @userptr.invalidated list.
- */
- spinlock_t invalidated_lock;
- /**
- * @userptr.invalidated: List of invalidated userptrs, not yet
- * picked
- * up for revalidation. Protected from access with the
- * @invalidated_lock. Removing items from the list
- * additionally requires @lock in write mode, and adding
- * items to the list requires either the @userptr.notifer_lock in
- * write mode, OR @lock in write mode.
- */
- struct list_head invalidated;
- } userptr;
+ struct xe_userptr_vm userptr;
/** @preempt: preempt state */
struct {
@@ -271,7 +263,7 @@ struct xe_vm {
* @min_run_period_ms: The minimum run period before preempting
* an engine again
*/
- s64 min_run_period_ms;
+ unsigned int min_run_period_ms;
/** @exec_queues: list of exec queues attached to this VM */
struct list_head exec_queues;
/** @num_exec_queues: number exec queues attached to this VM */
@@ -286,6 +278,11 @@ struct xe_vm {
* BOs
*/
struct work_struct rebind_work;
+ /**
+ * @preempt.pm_activate_link: Link to list of rebind workers to be
+ * kicked on resume.
+ */
+ struct list_head pm_activate_link;
} preempt;
/** @um: unified memory state */
@@ -306,13 +303,37 @@ struct xe_vm {
} error_capture;
/**
+ * @validation: Validation data only valid with the vm resv held.
+ * Note: This is really task state of the task holding the vm resv,
+ * and moving forward we should
+ * come up with a better way of passing this down the call-
+ * chain.
+ */
+ struct {
+ /**
+ * @validation.validating: The task that is currently making bos resident.
+ * for this vm.
+ * Protected by the VM's resv for writing. Opportunistic reading can be done
+ * using READ_ONCE. Note: This is a workaround for the
+ * TTM eviction_valuable() callback not being passed a struct
+ * ttm_operation_context(). Future work might want to address this.
+ */
+ struct task_struct *validating;
+ /**
+ * @validation.exec The drm_exec context used when locking the vm resv.
+ * Protected by the vm's resv.
+ */
+ struct drm_exec *_exec;
+ } validation;
+
+ /**
* @tlb_flush_seqno: Required TLB flush seqno for the next exec.
* protected by the vm resv.
*/
u64 tlb_flush_seqno;
/** @batch_invalidate_tlb: Always invalidate TLB before batch start */
bool batch_invalidate_tlb;
- /** @xef: XE file handle for tracking this VM's drm client */
+ /** @xef: Xe file handle for tracking this VM's drm client */
struct xe_file *xef;
};
@@ -320,17 +341,10 @@ struct xe_vm {
struct xe_vma_op_map {
/** @vma: VMA to map */
struct xe_vma *vma;
+ unsigned int vma_flags;
/** @immediate: Immediate bind */
bool immediate;
/** @read_only: Read only */
- bool read_only;
- /** @is_null: is NULL binding */
- bool is_null;
- /** @is_cpu_addr_mirror: is CPU address mirror binding */
- bool is_cpu_addr_mirror;
- /** @dumpable: whether BO is dumped on GPU hang */
- bool dumpable;
- /** @invalidate: invalidate the VMA before bind */
bool invalidate_on_bind;
/** @pat_index: The pat index to use for this operation. */
u16 pat_index;
@@ -374,6 +388,19 @@ struct xe_vma_op_unmap_range {
struct xe_svm_range *range;
};
+/** struct xe_vma_op_prefetch_range - VMA prefetch range operation */
+struct xe_vma_op_prefetch_range {
+ /** @range: xarray for SVM ranges data */
+ struct xarray range;
+ /** @ranges_count: number of svm ranges to map */
+ u32 ranges_count;
+ /**
+ * @tile: Pointer to the tile structure containing memory to prefetch.
+ * NULL if prefetch requested region is smem
+ */
+ struct xe_tile *tile;
+};
+
/** enum xe_vma_op_flags - flags for VMA operation */
enum xe_vma_op_flags {
/** @XE_VMA_OP_COMMITTED: VMA operation committed */
@@ -416,6 +443,8 @@ struct xe_vma_op {
struct xe_vma_op_map_range map_range;
/** @unmap_range: VMA unmap range operation specific data */
struct xe_vma_op_unmap_range unmap_range;
+ /** @prefetch_range: VMA prefetch range operation specific data */
+ struct xe_vma_op_prefetch_range prefetch_range;
};
};
@@ -433,6 +462,12 @@ struct xe_vma_ops {
u32 num_syncs;
/** @pt_update_ops: page table update operations */
struct xe_vm_pgtable_update_ops pt_update_ops[XE_MAX_TILES_PER_DEVICE];
+ /** @flag: signify the properties within xe_vma_ops*/
+#define XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH BIT(0)
+#define XE_VMA_OPS_FLAG_MADVISE BIT(1)
+#define XE_VMA_OPS_ARRAY_OF_BINDS BIT(2)
+#define XE_VMA_OPS_FLAG_SKIP_TLB_WAIT BIT(3)
+ u32 flags;
#ifdef TEST_VM_OPS_ERROR
/** @inject_error: inject error to test error handling */
bool inject_error;
diff --git a/drivers/gpu/drm/xe/xe_vram.c b/drivers/gpu/drm/xe/xe_vram.c
index e421a74fb87c..d50baefcd124 100644
--- a/drivers/gpu/drm/xe/xe_vram.c
+++ b/drivers/gpu/drm/xe/xe_vram.c
@@ -3,6 +3,7 @@
* Copyright © 2021-2024 Intel Corporation
*/
+#include <kunit/visibility.h>
#include <linux/pci.h>
#include <drm/drm_managed.h>
@@ -12,28 +13,25 @@
#include "regs/xe_gt_regs.h"
#include "regs/xe_regs.h"
#include "xe_assert.h"
+#include "xe_bo.h"
#include "xe_device.h"
#include "xe_force_wake.h"
#include "xe_gt_mcr.h"
-#include "xe_gt_sriov_vf.h"
#include "xe_mmio.h"
#include "xe_module.h"
#include "xe_sriov.h"
+#include "xe_tile_sriov_vf.h"
+#include "xe_ttm_vram_mgr.h"
#include "xe_vram.h"
+#include "xe_vram_types.h"
-#define BAR_SIZE_SHIFT 20
-
-static void
-_resize_bar(struct xe_device *xe, int resno, resource_size_t size)
+static void resize_bar(struct xe_device *xe, int resno, resource_size_t size)
{
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
int bar_size = pci_rebar_bytes_to_size(size);
int ret;
- if (pci_resource_len(pdev, resno))
- pci_release_resource(pdev, resno);
-
- ret = pci_resize_resource(pdev, resno, bar_size);
+ ret = pci_resize_resource(pdev, resno, bar_size, 0);
if (ret) {
drm_info(&xe->drm, "Failed to resize BAR%d to %dM (%pe). Consider enabling 'Resizable BAR' support in your BIOS\n",
resno, 1 << bar_size, ERR_PTR(ret));
@@ -47,7 +45,7 @@ _resize_bar(struct xe_device *xe, int resno, resource_size_t size)
* if force_vram_bar_size is set, attempt to set to the requested size
* else set to maximum possible size
*/
-static void resize_vram_bar(struct xe_device *xe)
+void xe_vram_resize_bar(struct xe_device *xe)
{
int force_vram_bar_size = xe_modparam.force_vram_bar_size;
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
@@ -55,41 +53,37 @@ static void resize_vram_bar(struct xe_device *xe)
resource_size_t current_size;
resource_size_t rebar_size;
struct resource *root_res;
- u32 bar_size_mask;
+ int max_size, i;
u32 pci_cmd;
- int i;
/* gather some relevant info */
current_size = pci_resource_len(pdev, LMEM_BAR);
- bar_size_mask = pci_rebar_get_possible_sizes(pdev, LMEM_BAR);
-
- if (!bar_size_mask)
- return;
if (force_vram_bar_size < 0)
return;
/* set to a specific size? */
if (force_vram_bar_size) {
- u32 bar_size_bit;
-
- rebar_size = force_vram_bar_size * (resource_size_t)SZ_1M;
-
- bar_size_bit = bar_size_mask & BIT(pci_rebar_bytes_to_size(rebar_size));
+ rebar_size = pci_rebar_bytes_to_size(force_vram_bar_size *
+ (resource_size_t)SZ_1M);
- if (!bar_size_bit) {
+ if (!pci_rebar_size_supported(pdev, LMEM_BAR, rebar_size)) {
drm_info(&xe->drm,
- "Requested size: %lluMiB is not supported by rebar sizes: 0x%x. Leaving default: %lluMiB\n",
- (u64)rebar_size >> 20, bar_size_mask, (u64)current_size >> 20);
+ "Requested size: %lluMiB is not supported by rebar sizes: 0x%llx. Leaving default: %lluMiB\n",
+ (u64)pci_rebar_size_to_bytes(rebar_size) >> 20,
+ pci_rebar_get_possible_sizes(pdev, LMEM_BAR),
+ (u64)current_size >> 20);
return;
}
- rebar_size = 1ULL << (__fls(bar_size_bit) + BAR_SIZE_SHIFT);
-
+ rebar_size = pci_rebar_size_to_bytes(rebar_size);
if (rebar_size == current_size)
return;
} else {
- rebar_size = 1ULL << (__fls(bar_size_mask) + BAR_SIZE_SHIFT);
+ max_size = pci_rebar_get_max_size(pdev, LMEM_BAR);
+ if (max_size < 0)
+ return;
+ rebar_size = pci_rebar_size_to_bytes(max_size);
/* only resize if larger than current */
if (rebar_size <= current_size)
@@ -116,7 +110,7 @@ static void resize_vram_bar(struct xe_device *xe)
pci_read_config_dword(pdev, PCI_COMMAND, &pci_cmd);
pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd & ~PCI_COMMAND_MEMORY);
- _resize_bar(xe, LMEM_BAR, rebar_size);
+ resize_bar(xe, LMEM_BAR, rebar_size);
pci_assign_unassigned_bus_resources(pdev->bus);
pci_write_config_dword(pdev, PCI_COMMAND, pci_cmd);
@@ -136,7 +130,7 @@ static bool resource_is_valid(struct pci_dev *pdev, int bar)
return true;
}
-static int determine_lmem_bar_size(struct xe_device *xe)
+static int determine_lmem_bar_size(struct xe_device *xe, struct xe_vram_region *lmem_bar)
{
struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
@@ -145,28 +139,31 @@ static int determine_lmem_bar_size(struct xe_device *xe)
return -ENXIO;
}
- resize_vram_bar(xe);
-
- xe->mem.vram.io_start = pci_resource_start(pdev, LMEM_BAR);
- xe->mem.vram.io_size = pci_resource_len(pdev, LMEM_BAR);
- if (!xe->mem.vram.io_size)
+ lmem_bar->io_start = pci_resource_start(pdev, LMEM_BAR);
+ lmem_bar->io_size = pci_resource_len(pdev, LMEM_BAR);
+ if (!lmem_bar->io_size)
return -EIO;
/* XXX: Need to change when xe link code is ready */
- xe->mem.vram.dpa_base = 0;
+ lmem_bar->dpa_base = 0;
/* set up a map to the total memory area. */
- xe->mem.vram.mapping = ioremap_wc(xe->mem.vram.io_start, xe->mem.vram.io_size);
+ lmem_bar->mapping = devm_ioremap_wc(&pdev->dev, lmem_bar->io_start, lmem_bar->io_size);
return 0;
}
-static inline u64 get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size)
+static int get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size, u64 *poffset)
{
struct xe_device *xe = gt_to_xe(gt);
+ unsigned int fw_ref;
u64 offset;
u32 reg;
+ fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
+ if (!fw_ref)
+ return -ETIMEDOUT;
+
if (GRAPHICS_VER(xe) >= 20) {
u64 ccs_size = tile_size / 512;
u64 offset_hi, offset_lo;
@@ -196,7 +193,10 @@ static inline u64 get_flat_ccs_offset(struct xe_gt *gt, u64 tile_size)
offset = (u64)REG_FIELD_GET(XEHP_FLAT_CCS_PTR, reg) * SZ_64K;
}
- return offset;
+ xe_force_wake_put(gt_to_fw(gt), fw_ref);
+ *poffset = offset;
+
+ return 0;
}
/*
@@ -223,7 +223,6 @@ static int tile_vram_size(struct xe_tile *tile, u64 *vram_size,
{
struct xe_device *xe = tile_to_xe(tile);
struct xe_gt *gt = tile->primary_gt;
- unsigned int fw_ref;
u64 offset;
u32 reg;
@@ -234,32 +233,31 @@ static int tile_vram_size(struct xe_tile *tile, u64 *vram_size,
offset = 0;
for_each_tile(t, xe, id)
for_each_if(t->id < tile->id)
- offset += xe_gt_sriov_vf_lmem(t->primary_gt);
+ offset += xe_tile_sriov_vf_lmem(t);
- *tile_size = xe_gt_sriov_vf_lmem(gt);
+ *tile_size = xe_tile_sriov_vf_lmem(tile);
*vram_size = *tile_size;
*tile_offset = offset;
return 0;
}
- fw_ref = xe_force_wake_get(gt_to_fw(gt), XE_FW_GT);
- if (!fw_ref)
- return -ETIMEDOUT;
-
/* actual size */
if (unlikely(xe->info.platform == XE_DG1)) {
*tile_size = pci_resource_len(to_pci_dev(xe->drm.dev), LMEM_BAR);
*tile_offset = 0;
} else {
- reg = xe_gt_mcr_unicast_read_any(gt, XEHP_TILE_ADDR_RANGE(gt->info.id));
+ reg = xe_mmio_read32(&tile->mmio, SG_TILE_ADDR_RANGE(tile->id));
*tile_size = (u64)REG_FIELD_GET(GENMASK(14, 8), reg) * SZ_1G;
*tile_offset = (u64)REG_FIELD_GET(GENMASK(7, 1), reg) * SZ_1G;
}
/* minus device usage */
if (xe->info.has_flat_ccs) {
- offset = get_flat_ccs_offset(gt, *tile_size);
+ int ret = get_flat_ccs_offset(gt, *tile_size, &offset);
+
+ if (ret)
+ return ret;
} else {
offset = xe_mmio_read64_2x32(&tile->mmio, GSMBASE);
}
@@ -267,8 +265,6 @@ static int tile_vram_size(struct xe_tile *tile, u64 *vram_size,
/* remove the tile offset so we have just the available size */
*vram_size = offset - *tile_offset;
- xe_force_wake_put(gt_to_fw(gt), fw_ref);
-
return 0;
}
@@ -278,13 +274,74 @@ static void vram_fini(void *arg)
struct xe_tile *tile;
int id;
- if (xe->mem.vram.mapping)
- iounmap(xe->mem.vram.mapping);
+ xe->mem.vram->mapping = NULL;
+
+ for_each_tile(tile, xe, id) {
+ tile->mem.vram->mapping = NULL;
+ if (tile->mem.kernel_vram)
+ tile->mem.kernel_vram->mapping = NULL;
+ }
+}
+
+struct xe_vram_region *xe_vram_region_alloc(struct xe_device *xe, u8 id, u32 placement)
+{
+ struct xe_vram_region *vram;
+ struct drm_device *drm = &xe->drm;
+
+ xe_assert(xe, id < xe->info.tile_count);
+
+ vram = drmm_kzalloc(drm, sizeof(*vram), GFP_KERNEL);
+ if (!vram)
+ return NULL;
+
+ vram->xe = xe;
+ vram->id = id;
+ vram->placement = placement;
+#if defined(CONFIG_DRM_XE_PAGEMAP)
+ vram->migrate = xe->tiles[id].migrate;
+#endif
+ return vram;
+}
+
+static void print_vram_region_info(struct xe_device *xe, struct xe_vram_region *vram)
+{
+ struct drm_device *drm = &xe->drm;
- xe->mem.vram.mapping = NULL;
+ if (vram->io_size < vram->usable_size)
+ drm_info(drm, "Small BAR device\n");
- for_each_tile(tile, xe, id)
- tile->mem.vram.mapping = NULL;
+ drm_info(drm,
+ "VRAM[%u]: Actual physical size %pa, usable size exclude stolen %pa, CPU accessible size %pa\n",
+ vram->id, &vram->actual_physical_size, &vram->usable_size, &vram->io_size);
+ drm_info(drm, "VRAM[%u]: DPA range: [%pa-%llx], io range: [%pa-%llx]\n",
+ vram->id, &vram->dpa_base, vram->dpa_base + (u64)vram->actual_physical_size,
+ &vram->io_start, vram->io_start + (u64)vram->io_size);
+}
+
+static int vram_region_init(struct xe_device *xe, struct xe_vram_region *vram,
+ struct xe_vram_region *lmem_bar, u64 offset, u64 usable_size,
+ u64 region_size, resource_size_t remain_io_size)
+{
+ /* Check if VRAM region is already initialized */
+ if (vram->mapping)
+ return 0;
+
+ vram->actual_physical_size = region_size;
+ vram->io_start = lmem_bar->io_start + offset;
+ vram->io_size = min_t(u64, usable_size, remain_io_size);
+
+ if (!vram->io_size) {
+ drm_err(&xe->drm, "Tile without any CPU visible VRAM. Aborting.\n");
+ return -ENODEV;
+ }
+
+ vram->dpa_base = lmem_bar->dpa_base + offset;
+ vram->mapping = lmem_bar->mapping + offset;
+ vram->usable_size = usable_size;
+
+ print_vram_region_info(xe, vram);
+
+ return 0;
}
/**
@@ -298,78 +355,108 @@ static void vram_fini(void *arg)
int xe_vram_probe(struct xe_device *xe)
{
struct xe_tile *tile;
- resource_size_t io_size;
+ struct xe_vram_region lmem_bar;
+ resource_size_t remain_io_size;
u64 available_size = 0;
u64 total_size = 0;
- u64 tile_offset;
- u64 tile_size;
- u64 vram_size;
int err;
u8 id;
if (!IS_DGFX(xe))
return 0;
- /* Get the size of the root tile's vram for later accessibility comparison */
- tile = xe_device_get_root_tile(xe);
- err = tile_vram_size(tile, &vram_size, &tile_size, &tile_offset);
- if (err)
- return err;
-
- err = determine_lmem_bar_size(xe);
+ err = determine_lmem_bar_size(xe, &lmem_bar);
if (err)
return err;
+ drm_info(&xe->drm, "VISIBLE VRAM: %pa, %pa\n", &lmem_bar.io_start, &lmem_bar.io_size);
- drm_info(&xe->drm, "VISIBLE VRAM: %pa, %pa\n", &xe->mem.vram.io_start,
- &xe->mem.vram.io_size);
-
- io_size = xe->mem.vram.io_size;
+ remain_io_size = lmem_bar.io_size;
- /* tile specific ranges */
for_each_tile(tile, xe, id) {
- err = tile_vram_size(tile, &vram_size, &tile_size, &tile_offset);
+ u64 region_size;
+ u64 usable_size;
+ u64 tile_offset;
+
+ err = tile_vram_size(tile, &usable_size, &region_size, &tile_offset);
if (err)
return err;
- tile->mem.vram.actual_physical_size = tile_size;
- tile->mem.vram.io_start = xe->mem.vram.io_start + tile_offset;
- tile->mem.vram.io_size = min_t(u64, vram_size, io_size);
+ total_size += region_size;
+ available_size += usable_size;
- if (!tile->mem.vram.io_size) {
- drm_err(&xe->drm, "Tile without any CPU visible VRAM. Aborting.\n");
- return -ENODEV;
+ err = vram_region_init(xe, tile->mem.vram, &lmem_bar, tile_offset, usable_size,
+ region_size, remain_io_size);
+ if (err)
+ return err;
+
+ if (total_size > lmem_bar.io_size) {
+ drm_info(&xe->drm, "VRAM: %pa is larger than resource %pa\n",
+ &total_size, &lmem_bar.io_size);
}
- tile->mem.vram.dpa_base = xe->mem.vram.dpa_base + tile_offset;
- tile->mem.vram.usable_size = vram_size;
- tile->mem.vram.mapping = xe->mem.vram.mapping + tile_offset;
+ remain_io_size -= min_t(u64, tile->mem.vram->actual_physical_size, remain_io_size);
+ }
- if (tile->mem.vram.io_size < tile->mem.vram.usable_size)
- drm_info(&xe->drm, "Small BAR device\n");
- drm_info(&xe->drm, "VRAM[%u, %u]: Actual physical size %pa, usable size exclude stolen %pa, CPU accessible size %pa\n", id,
- tile->id, &tile->mem.vram.actual_physical_size, &tile->mem.vram.usable_size, &tile->mem.vram.io_size);
- drm_info(&xe->drm, "VRAM[%u, %u]: DPA range: [%pa-%llx], io range: [%pa-%llx]\n", id, tile->id,
- &tile->mem.vram.dpa_base, tile->mem.vram.dpa_base + (u64)tile->mem.vram.actual_physical_size,
- &tile->mem.vram.io_start, tile->mem.vram.io_start + (u64)tile->mem.vram.io_size);
+ err = vram_region_init(xe, xe->mem.vram, &lmem_bar, 0, available_size, total_size,
+ lmem_bar.io_size);
+ if (err)
+ return err;
- /* calculate total size using tile size to get the correct HW sizing */
- total_size += tile_size;
- available_size += vram_size;
+ return devm_add_action_or_reset(xe->drm.dev, vram_fini, xe);
+}
- if (total_size > xe->mem.vram.io_size) {
- drm_info(&xe->drm, "VRAM: %pa is larger than resource %pa\n",
- &total_size, &xe->mem.vram.io_size);
- }
+/**
+ * xe_vram_region_io_start - Get the IO start of a VRAM region
+ * @vram: the VRAM region
+ *
+ * Return: the IO start of the VRAM region, or 0 if not valid
+ */
+resource_size_t xe_vram_region_io_start(const struct xe_vram_region *vram)
+{
+ return vram ? vram->io_start : 0;
+}
- io_size -= min_t(u64, tile_size, io_size);
- }
+/**
+ * xe_vram_region_io_size - Get the IO size of a VRAM region
+ * @vram: the VRAM region
+ *
+ * Return: the IO size of the VRAM region, or 0 if not valid
+ */
+resource_size_t xe_vram_region_io_size(const struct xe_vram_region *vram)
+{
+ return vram ? vram->io_size : 0;
+}
- xe->mem.vram.actual_physical_size = total_size;
+/**
+ * xe_vram_region_dpa_base - Get the DPA base of a VRAM region
+ * @vram: the VRAM region
+ *
+ * Return: the DPA base of the VRAM region, or 0 if not valid
+ */
+resource_size_t xe_vram_region_dpa_base(const struct xe_vram_region *vram)
+{
+ return vram ? vram->dpa_base : 0;
+}
- drm_info(&xe->drm, "Total VRAM: %pa, %pa\n", &xe->mem.vram.io_start,
- &xe->mem.vram.actual_physical_size);
- drm_info(&xe->drm, "Available VRAM: %pa, %pa\n", &xe->mem.vram.io_start,
- &available_size);
+/**
+ * xe_vram_region_usable_size - Get the usable size of a VRAM region
+ * @vram: the VRAM region
+ *
+ * Return: the usable size of the VRAM region, or 0 if not valid
+ */
+resource_size_t xe_vram_region_usable_size(const struct xe_vram_region *vram)
+{
+ return vram ? vram->usable_size : 0;
+}
- return devm_add_action_or_reset(xe->drm.dev, vram_fini, xe);
+/**
+ * xe_vram_region_actual_physical_size - Get the actual physical size of a VRAM region
+ * @vram: the VRAM region
+ *
+ * Return: the actual physical size of the VRAM region, or 0 if not valid
+ */
+resource_size_t xe_vram_region_actual_physical_size(const struct xe_vram_region *vram)
+{
+ return vram ? vram->actual_physical_size : 0;
}
+EXPORT_SYMBOL_IF_KUNIT(xe_vram_region_actual_physical_size);
diff --git a/drivers/gpu/drm/xe/xe_vram.h b/drivers/gpu/drm/xe/xe_vram.h
index e31cc04ec0db..13505cfb184d 100644
--- a/drivers/gpu/drm/xe/xe_vram.h
+++ b/drivers/gpu/drm/xe/xe_vram.h
@@ -6,8 +6,20 @@
#ifndef _XE_VRAM_H_
#define _XE_VRAM_H_
+#include <linux/types.h>
+
struct xe_device;
+struct xe_vram_region;
+void xe_vram_resize_bar(struct xe_device *xe);
int xe_vram_probe(struct xe_device *xe);
+struct xe_vram_region *xe_vram_region_alloc(struct xe_device *xe, u8 id, u32 placement);
+
+resource_size_t xe_vram_region_io_start(const struct xe_vram_region *vram);
+resource_size_t xe_vram_region_io_size(const struct xe_vram_region *vram);
+resource_size_t xe_vram_region_dpa_base(const struct xe_vram_region *vram);
+resource_size_t xe_vram_region_usable_size(const struct xe_vram_region *vram);
+resource_size_t xe_vram_region_actual_physical_size(const struct xe_vram_region *vram);
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_vram_freq.c b/drivers/gpu/drm/xe/xe_vram_freq.c
index b26e26d73dae..17bc84da4cdc 100644
--- a/drivers/gpu/drm/xe/xe_vram_freq.c
+++ b/drivers/gpu/drm/xe/xe_vram_freq.c
@@ -34,7 +34,7 @@ static ssize_t max_freq_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct xe_tile *tile = dev_to_tile(dev);
- u32 val, mbox;
+ u32 val = 0, mbox;
int err;
mbox = REG_FIELD_PREP(PCODE_MB_COMMAND, PCODE_FREQUENCY_CONFIG)
@@ -56,7 +56,7 @@ static ssize_t min_freq_show(struct device *dev, struct device_attribute *attr,
char *buf)
{
struct xe_tile *tile = dev_to_tile(dev);
- u32 val, mbox;
+ u32 val = 0, mbox;
int err;
mbox = REG_FIELD_PREP(PCODE_MB_COMMAND, PCODE_FREQUENCY_CONFIG)
diff --git a/drivers/gpu/drm/xe/xe_vram_types.h b/drivers/gpu/drm/xe/xe_vram_types.h
new file mode 100644
index 000000000000..83772dcbf1af
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vram_types.h
@@ -0,0 +1,85 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_VRAM_TYPES_H_
+#define _XE_VRAM_TYPES_H_
+
+#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP)
+#include <drm/drm_pagemap.h>
+#endif
+
+#include "xe_ttm_vram_mgr_types.h"
+
+struct xe_device;
+struct xe_migrate;
+
+/**
+ * struct xe_vram_region - memory region structure
+ * This is used to describe a memory region in xe
+ * device, such as HBM memory or CXL extension memory.
+ */
+struct xe_vram_region {
+ /** @xe: Back pointer to xe device */
+ struct xe_device *xe;
+ /**
+ * @id: VRAM region instance id
+ *
+ * The value should be unique for VRAM region.
+ */
+ u8 id;
+ /** @io_start: IO start address of this VRAM instance */
+ resource_size_t io_start;
+ /**
+ * @io_size: IO size of this VRAM instance
+ *
+ * This represents how much of this VRAM we can access
+ * via the CPU through the VRAM BAR. This can be smaller
+ * than @usable_size, in which case only part of VRAM is CPU
+ * accessible (typically the first 256M). This
+ * configuration is known as small-bar.
+ */
+ resource_size_t io_size;
+ /** @dpa_base: This memory regions's DPA (device physical address) base */
+ resource_size_t dpa_base;
+ /**
+ * @usable_size: usable size of VRAM
+ *
+ * Usable size of VRAM excluding reserved portions
+ * (e.g stolen mem)
+ */
+ resource_size_t usable_size;
+ /**
+ * @actual_physical_size: Actual VRAM size
+ *
+ * Actual VRAM size including reserved portions
+ * (e.g stolen mem)
+ */
+ resource_size_t actual_physical_size;
+ /** @mapping: pointer to VRAM mappable space */
+ void __iomem *mapping;
+ /** @ttm: VRAM TTM manager */
+ struct xe_ttm_vram_mgr ttm;
+ /** @placement: TTM placement dedicated for this region */
+ u32 placement;
+#if IS_ENABLED(CONFIG_DRM_XE_PAGEMAP)
+ /** @migrate: Back pointer to migrate */
+ struct xe_migrate *migrate;
+ /** @pagemap: Used to remap device memory as ZONE_DEVICE */
+ struct dev_pagemap pagemap;
+ /**
+ * @dpagemap: The struct drm_pagemap of the ZONE_DEVICE memory
+ * pages of this tile.
+ */
+ struct drm_pagemap dpagemap;
+ /**
+ * @hpa_base: base host physical address
+ *
+ * This is generated when remap device memory as ZONE_DEVICE
+ */
+ resource_size_t hpa_base;
+#endif
+};
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_vsec.c b/drivers/gpu/drm/xe/xe_vsec.c
index b378848d3b7b..8f23a27871b6 100644
--- a/drivers/gpu/drm/xe/xe_vsec.c
+++ b/drivers/gpu/drm/xe/xe_vsec.c
@@ -24,6 +24,7 @@
#define BMG_DEVICE_ID 0xE2F8
static struct intel_vsec_header bmg_telemetry = {
+ .rev = 1,
.length = 0x10,
.id = VSEC_ID_TELEMETRY,
.num_entries = 2,
@@ -32,28 +33,19 @@ static struct intel_vsec_header bmg_telemetry = {
.offset = BMG_DISCOVERY_OFFSET,
};
-static struct intel_vsec_header bmg_punit_crashlog = {
+static struct intel_vsec_header bmg_crashlog = {
+ .rev = 1,
.length = 0x10,
.id = VSEC_ID_CRASHLOG,
- .num_entries = 1,
- .entry_size = 4,
+ .num_entries = 2,
+ .entry_size = 6,
.tbir = 0,
.offset = BMG_DISCOVERY_OFFSET + 0x60,
};
-static struct intel_vsec_header bmg_oobmsm_crashlog = {
- .length = 0x10,
- .id = VSEC_ID_CRASHLOG,
- .num_entries = 1,
- .entry_size = 4,
- .tbir = 0,
- .offset = BMG_DISCOVERY_OFFSET + 0x78,
-};
-
static struct intel_vsec_header *bmg_capabilities[] = {
&bmg_telemetry,
- &bmg_punit_crashlog,
- &bmg_oobmsm_crashlog,
+ &bmg_crashlog,
NULL
};
@@ -149,8 +141,8 @@ static int xe_guid_decode(u32 guid, int *index, u32 *offset)
return 0;
}
-static int xe_pmt_telem_read(struct pci_dev *pdev, u32 guid, u64 *data, loff_t user_offset,
- u32 count)
+int xe_pmt_telem_read(struct pci_dev *pdev, u32 guid, u64 *data, loff_t user_offset,
+ u32 count)
{
struct xe_device *xe = pdev_to_xe_device(pdev);
void __iomem *telem_addr = xe->mmio.regs + BMG_TELEMETRY_OFFSET;
diff --git a/drivers/gpu/drm/xe/xe_vsec.h b/drivers/gpu/drm/xe/xe_vsec.h
index 5777c53faec2..dabfb4e02d70 100644
--- a/drivers/gpu/drm/xe/xe_vsec.h
+++ b/drivers/gpu/drm/xe/xe_vsec.h
@@ -4,8 +4,12 @@
#ifndef _XE_VSEC_H_
#define _XE_VSEC_H_
+#include <linux/types.h>
+
+struct pci_dev;
struct xe_device;
void xe_vsec_init(struct xe_device *xe);
+int xe_pmt_telem_read(struct pci_dev *pdev, u32 guid, u64 *data, loff_t user_offset, u32 count);
#endif
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 67196baa4249..e32dd2fde6f1 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -10,6 +10,7 @@
#include <linux/compiler_types.h>
#include <linux/fault-inject.h>
+#include <generated/xe_device_wa_oob.h>
#include <generated/xe_wa_oob.h>
#include "regs/xe_engine_regs.h"
@@ -38,7 +39,8 @@
* Register Immediate commands) once when initializing the device and saved in
* the default context. That default context is then used on every context
* creation to have a "primed golden context", i.e. a context image that
- * already contains the changes needed to all the registers.
+ * already contains the changes needed to all the registers. See
+ * drivers/gpu/drm/xe/xe_lrc.c for default context handling.
*
* - Engine workarounds: the list of these WAs is applied whenever the specific
* engine is reset. It's also possible that a set of engine classes share a
@@ -47,10 +49,10 @@
* them need to keeep the workaround programming: the approach taken in the
* driver is to tie those workarounds to the first compute/render engine that
* is registered. When executing with GuC submission, engine resets are
- * outside of kernel driver control, hence the list of registers involved in
+ * outside of kernel driver control, hence the list of registers involved is
* written once, on engine initialization, and then passed to GuC, that
* saves/restores their values before/after the reset takes place. See
- * ``drivers/gpu/drm/xe/xe_guc_ads.c`` for reference.
+ * drivers/gpu/drm/xe/xe_guc_ads.c for reference.
*
* - GT workarounds: the list of these WAs is applied whenever these registers
* revert to their default values: on GPU reset, suspend/resume [1]_, etc.
@@ -65,21 +67,39 @@
* hardware on every HW context restore. These buffers are created and
* programmed in the default context so the hardware always go through those
* programming sequences when switching contexts. The support for workaround
- * batchbuffers is enabled these hardware mechanisms:
+ * batchbuffers is enabled via these hardware mechanisms:
*
- * #. INDIRECT_CTX: A batchbuffer and an offset are provided in the default
- * context, pointing the hardware to jump to that location when that offset
- * is reached in the context restore. Workaround batchbuffer in the driver
- * currently uses this mechanism for all platforms.
+ * #. INDIRECT_CTX (also known as **mid context restore bb**): A batchbuffer
+ * and an offset are provided in the default context, pointing the hardware
+ * to jump to that location when that offset is reached in the context
+ * restore. When a context is being restored, this is executed after the
+ * ring context, in the middle (or beginning) of the engine context image.
*
- * #. BB_PER_CTX_PTR: A batchbuffer is provided in the default context,
- * pointing the hardware to a buffer to continue executing after the
- * engine registers are restored in a context restore sequence. This is
- * currently not used in the driver.
+ * #. BB_PER_CTX_PTR (also known as **post context restore bb**): A
+ * batchbuffer is provided in the default context, pointing the hardware to
+ * a buffer to continue executing after the engine registers are restored
+ * in a context restore sequence.
+ *
+ * Below is the timeline for a context restore sequence:
+ *
+ * .. code::
+ *
+ * INDIRECT_CTX_OFFSET
+ * |----------->|
+ * .------------.------------.-------------.------------.--------------.-----------.
+ * |Ring | Engine | Mid-context | Engine | Post-context | Ring |
+ * |Restore | Restore (1)| BB Restore | Restore (2)| BB Restore | Execution |
+ * `------------'------------'-------------'------------'--------------'-----------'
*
* - Other/OOB: There are WAs that, due to their nature, cannot be applied from
* a central place. Those are peppered around the rest of the code, as needed.
- * Workarounds related to the display IP are the main example.
+ * There's a central place to control which workarounds are enabled:
+ * drivers/gpu/drm/xe/xe_wa_oob.rules for GT workarounds and
+ * drivers/gpu/drm/xe/xe_device_wa_oob.rules for device/SoC workarounds.
+ * These files only record which workarounds are enabled: during early device
+ * initialization those rules are evaluated and recorded by the driver. Then
+ * later the driver checks with ``XE_GT_WA()`` and ``XE_DEVICE_WA()`` to
+ * implement them.
*
* .. [1] Technically, some registers are powercontext saved & restored, so they
* survive a suspend/resume. In practice, writing them again is not too
@@ -250,14 +270,6 @@ static const struct xe_rtp_entry_sr gt_was[] = {
XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)),
XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
},
- { XE_RTP_NAME("14020316580"),
- XE_RTP_RULES(MEDIA_VERSION(1301)),
- XE_RTP_ACTIONS(CLR(POWERGATE_ENABLE,
- VDN_HCP_POWERGATE_ENABLE(0) |
- VDN_MFXVDENC_POWERGATE_ENABLE(0) |
- VDN_HCP_POWERGATE_ENABLE(2) |
- VDN_MFXVDENC_POWERGATE_ENABLE(2))),
- },
{ XE_RTP_NAME("14019449301"),
XE_RTP_RULES(MEDIA_VERSION(1301), ENGINE_CLASS(VIDEO_DECODE)),
XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F08(0), CG3DDISHRS_CLKGATE_DIS)),
@@ -285,6 +297,18 @@ static const struct xe_rtp_entry_sr gt_was[] = {
XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)),
XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
},
+ { XE_RTP_NAME("16021865536"),
+ XE_RTP_RULES(MEDIA_VERSION(3002),
+ ENGINE_CLASS(VIDEO_DECODE)),
+ XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F10(0), IECPUNIT_CLKGATE_DIS)),
+ XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
+ },
+ { XE_RTP_NAME("16021867713"),
+ XE_RTP_RULES(MEDIA_VERSION(3002),
+ ENGINE_CLASS(VIDEO_DECODE)),
+ XE_RTP_ACTIONS(SET(VDBOX_CGCTL3F1C(0), MFXPIPE_CLKGATE_DIS)),
+ XE_RTP_ENTRY_FLAG(FOREACH_ENGINE),
+ },
{ XE_RTP_NAME("14021486841"),
XE_RTP_RULES(MEDIA_VERSION(3000), MEDIA_STEP(A0, B0),
ENGINE_CLASS(VIDEO_DECODE)),
@@ -503,10 +527,6 @@ static const struct xe_rtp_entry_sr engine_was[] = {
XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, ENABLE_SMP_LD_RENDER_SURFACE_CONTROL))
},
- { XE_RTP_NAME("16018737384"),
- XE_RTP_RULES(GRAPHICS_VERSION(2004), FUNC(xe_rtp_match_first_render_or_compute)),
- XE_RTP_ACTIONS(SET(ROW_CHICKEN, EARLY_EOT_DIS))
- },
/*
* These two workarounds are the same, just applying to different
* engines. Although Wa_18032095049 (for the RCS) isn't required on
@@ -529,35 +549,47 @@ static const struct xe_rtp_entry_sr engine_was[] = {
XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE))
},
+ { XE_RTP_NAME("13012615864"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2004),
+ FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS))
+ },
/* Xe2_HPG */
{ XE_RTP_NAME("16018712365"),
- XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002),
+ FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, XE2_ALLOC_DPA_STARVE_FIX_DIS))
},
{ XE_RTP_NAME("16018737384"),
- XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, XE_RTP_END_VERSION_UNDEFINED),
+ FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(ROW_CHICKEN, EARLY_EOT_DIS))
},
{ XE_RTP_NAME("14019988906"),
- XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002),
+ FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FLSH_IGNORES_PSD))
},
{ XE_RTP_NAME("14019877138"),
- XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002),
+ FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(XEHP_PSS_CHICKEN, FD_END_COLLECT))
},
{ XE_RTP_NAME("14020338487"),
- XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002),
+ FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(ROW_CHICKEN3, XE2_EUPEND_CHK_FLUSH_DIS))
},
{ XE_RTP_NAME("18032247524"),
- XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002),
+ FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, SEQUENTIAL_ACCESS_UPGRADE_DISABLE))
},
{ XE_RTP_NAME("14018471104"),
- XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002),
+ FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0_UDW, ENABLE_SMP_LD_RENDER_SURFACE_CONTROL))
},
/*
@@ -566,7 +598,7 @@ static const struct xe_rtp_entry_sr engine_was[] = {
* apply this to all engines for simplicity.
*/
{ XE_RTP_NAME("16021639441"),
- XE_RTP_RULES(GRAPHICS_VERSION(2001)),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002)),
XE_RTP_ACTIONS(SET(CSFE_CHICKEN1(0),
GHWSP_CSB_REPORT_DIS |
PPHWSP_CSB_AND_TIMESTAMP_REPORT_DIS,
@@ -578,13 +610,26 @@ static const struct xe_rtp_entry_sr engine_was[] = {
XE_RTP_ACTIONS(SET(LSC_CHICKEN_BIT_0, WR_REQ_CHAINING_DIS))
},
{ XE_RTP_NAME("14021402888"),
- XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE))
},
- { XE_RTP_NAME("14021821874"),
- XE_RTP_RULES(GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_first_render_or_compute)),
+ { XE_RTP_NAME("14021821874, 14022954250"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002),
+ FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, STK_ID_RESTRICT))
},
+ { XE_RTP_NAME("13012615864"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002),
+ FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS))
+ },
+ { XE_RTP_NAME("18041344222"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002),
+ FUNC(xe_rtp_match_first_render_or_compute),
+ FUNC(xe_rtp_match_not_sriov_vf),
+ FUNC(xe_rtp_match_gt_has_discontiguous_dss_groups)),
+ XE_RTP_ACTIONS(SET(TDL_CHICKEN, EUSTALL_PERF_SAMPLING_DISABLE))
+ },
/* Xe2_LPM */
@@ -626,11 +671,14 @@ static const struct xe_rtp_entry_sr engine_was[] = {
},
{ XE_RTP_NAME("14023061436"),
XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001),
+ FUNC(xe_rtp_match_first_render_or_compute), OR,
+ GRAPHICS_VERSION_RANGE(3003, 3005),
FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(TDL_CHICKEN, QID_WAIT_FOR_THREAD_NOT_RUN_DISABLE))
},
{ XE_RTP_NAME("13012615864"),
- XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001), OR,
+ GRAPHICS_VERSION_RANGE(3003, 3005),
FUNC(xe_rtp_match_first_render_or_compute)),
XE_RTP_ACTIONS(SET(TDL_TSL_CHICKEN, RES_CHK_SPR_DIS))
},
@@ -640,6 +688,17 @@ static const struct xe_rtp_entry_sr engine_was[] = {
XE_RTP_ACTIONS(SET(RING_PSMI_CTL(0), RC_SEMA_IDLE_MSG_DISABLE,
XE_RTP_ACTION_FLAG(ENGINE_BASE)))
},
+ { XE_RTP_NAME("14021402888"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3003, 3005), FUNC(xe_rtp_match_first_render_or_compute)),
+ XE_RTP_ACTIONS(SET(HALF_SLICE_CHICKEN7, CLEAR_OPTIMIZATION_DISABLE))
+ },
+ { XE_RTP_NAME("18041344222"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3001),
+ FUNC(xe_rtp_match_first_render_or_compute),
+ FUNC(xe_rtp_match_not_sriov_vf),
+ FUNC(xe_rtp_match_gt_has_discontiguous_dss_groups)),
+ XE_RTP_ACTIONS(SET(TDL_CHICKEN, EUSTALL_PERF_SAMPLING_DISABLE))
+ },
};
static const struct xe_rtp_entry_sr lrc_was[] = {
@@ -774,7 +833,7 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
XE_RTP_ACTIONS(SET(INSTPM(RENDER_RING_BASE), ENABLE_SEMAPHORE_POLL_BIT))
},
{ XE_RTP_NAME("18033852989"),
- XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2004), ENGINE_CLASS(RENDER)),
+ XE_RTP_RULES(GRAPHICS_VERSION(2004), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN1, DISABLE_BOTTOM_CLIP_RECTANGLE_TEST))
},
{ XE_RTP_NAME("14021567978"),
@@ -807,7 +866,7 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_SF_ROUND_NEAREST_EVEN))
},
{ XE_RTP_NAME("14019386621"),
- XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(VF_SCRATCHPAD, XE2_VFG_TED_CREDIT_INTERFACE_DISABLE))
},
{ XE_RTP_NAME("14020756599"),
@@ -824,13 +883,17 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
DIS_AUTOSTRIP))
},
{ XE_RTP_NAME("15016589081"),
- XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX))
},
{ XE_RTP_NAME("22021007897"),
- XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(2001, 2002), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE))
},
+ { XE_RTP_NAME("18033852989"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
+ XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN1, DISABLE_BOTTOM_CLIP_RECTANGLE_TEST))
+ },
/* Xe3_LPG */
{ XE_RTP_NAME("14021490052"),
@@ -843,6 +906,19 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
DIS_PARTIAL_AUTOSTRIP |
DIS_AUTOSTRIP))
},
+ { XE_RTP_NAME("22021007897"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005), ENGINE_CLASS(RENDER)),
+ XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE))
+ },
+ { XE_RTP_NAME("14024681466"),
+ XE_RTP_RULES(GRAPHICS_VERSION_RANGE(3000, 3005), ENGINE_CLASS(RENDER)),
+ XE_RTP_ACTIONS(SET(XEHP_SLICE_COMMON_ECO_CHICKEN1, FAST_CLEAR_VALIGN_FIX))
+ },
+ { XE_RTP_NAME("15016589081"),
+ XE_RTP_RULES(GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0),
+ ENGINE_CLASS(RENDER)),
+ XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX))
+ },
};
static __maybe_unused const struct xe_rtp_entry oob_was[] = {
@@ -852,16 +928,41 @@ static __maybe_unused const struct xe_rtp_entry oob_was[] = {
static_assert(ARRAY_SIZE(oob_was) - 1 == _XE_WA_OOB_COUNT);
+static __maybe_unused const struct xe_rtp_entry device_oob_was[] = {
+#include <generated/xe_device_wa_oob.c>
+ {}
+};
+
+static_assert(ARRAY_SIZE(device_oob_was) - 1 == _XE_DEVICE_WA_OOB_COUNT);
+
__diag_pop();
/**
- * xe_wa_process_oob - process OOB workaround table
+ * xe_wa_process_device_oob - process OOB workaround table
+ * @xe: device instance to process workarounds for
+ *
+ * process OOB workaround table for this device, marking in @xe the
+ * workarounds that are active.
+ */
+
+void xe_wa_process_device_oob(struct xe_device *xe)
+{
+ struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(xe);
+
+ xe_rtp_process_ctx_enable_active_tracking(&ctx, xe->wa_active.oob, ARRAY_SIZE(device_oob_was));
+
+ xe->wa_active.oob_initialized = true;
+ xe_rtp_process(&ctx, device_oob_was);
+}
+
+/**
+ * xe_wa_process_gt_oob - process GT OOB workaround table
* @gt: GT instance to process workarounds for
*
* Process OOB workaround table for this platform, marking in @gt the
* workarounds that are active.
*/
-void xe_wa_process_oob(struct xe_gt *gt)
+void xe_wa_process_gt_oob(struct xe_gt *gt)
{
struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(gt);
@@ -923,12 +1024,34 @@ void xe_wa_process_lrc(struct xe_hw_engine *hwe)
}
/**
- * xe_wa_init - initialize gt with workaround bookkeeping
+ * xe_wa_device_init - initialize device with workaround oob bookkeeping
+ * @xe: Xe device instance to initialize
+ *
+ * Returns 0 for success, negative with error code otherwise
+ */
+int xe_wa_device_init(struct xe_device *xe)
+{
+ unsigned long *p;
+
+ p = drmm_kzalloc(&xe->drm,
+ sizeof(*p) * BITS_TO_LONGS(ARRAY_SIZE(device_oob_was)),
+ GFP_KERNEL);
+
+ if (!p)
+ return -ENOMEM;
+
+ xe->wa_active.oob = p;
+
+ return 0;
+}
+
+/**
+ * xe_wa_gt_init - initialize gt with workaround bookkeeping
* @gt: GT instance to initialize
*
* Returns 0 for success, negative error code otherwise.
*/
-int xe_wa_init(struct xe_gt *gt)
+int xe_wa_gt_init(struct xe_gt *gt)
{
struct xe_device *xe = gt_to_xe(gt);
size_t n_oob, n_lrc, n_engine, n_gt, total;
@@ -954,9 +1077,26 @@ int xe_wa_init(struct xe_gt *gt)
return 0;
}
-ALLOW_ERROR_INJECTION(xe_wa_init, ERRNO); /* See xe_pci_probe() */
+ALLOW_ERROR_INJECTION(xe_wa_gt_init, ERRNO); /* See xe_pci_probe() */
+
+void xe_wa_device_dump(struct xe_device *xe, struct drm_printer *p)
+{
+ size_t idx;
+
+ drm_printf(p, "Device OOB Workarounds\n");
+ for_each_set_bit(idx, xe->wa_active.oob, ARRAY_SIZE(device_oob_was))
+ if (device_oob_was[idx].name)
+ drm_printf_indent(p, 1, "%s\n", device_oob_was[idx].name);
+}
-void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p)
+/**
+ * xe_wa_gt_dump() - Dump GT workarounds into a drm printer.
+ * @gt: the &xe_gt
+ * @p: the &drm_printer
+ *
+ * Return: always 0.
+ */
+int xe_wa_gt_dump(struct xe_gt *gt, struct drm_printer *p)
{
size_t idx;
@@ -964,18 +1104,22 @@ void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p)
for_each_set_bit(idx, gt->wa_active.gt, ARRAY_SIZE(gt_was))
drm_printf_indent(p, 1, "%s\n", gt_was[idx].name);
- drm_printf(p, "\nEngine Workarounds\n");
+ drm_puts(p, "\n");
+ drm_printf(p, "Engine Workarounds\n");
for_each_set_bit(idx, gt->wa_active.engine, ARRAY_SIZE(engine_was))
drm_printf_indent(p, 1, "%s\n", engine_was[idx].name);
- drm_printf(p, "\nLRC Workarounds\n");
+ drm_puts(p, "\n");
+ drm_printf(p, "LRC Workarounds\n");
for_each_set_bit(idx, gt->wa_active.lrc, ARRAY_SIZE(lrc_was))
drm_printf_indent(p, 1, "%s\n", lrc_was[idx].name);
- drm_printf(p, "\nOOB Workarounds\n");
+ drm_puts(p, "\n");
+ drm_printf(p, "OOB Workarounds\n");
for_each_set_bit(idx, gt->wa_active.oob, ARRAY_SIZE(oob_was))
if (oob_was[idx].name)
drm_printf_indent(p, 1, "%s\n", oob_was[idx].name);
+ return 0;
}
/*
@@ -997,6 +1141,6 @@ void xe_wa_apply_tile_workarounds(struct xe_tile *tile)
if (IS_SRIOV_VF(tile->xe))
return;
- if (XE_WA(tile->primary_gt, 22010954014))
+ if (XE_DEVICE_WA(tile->xe, 22010954014))
xe_mmio_rmw32(mmio, XEHP_CLOCK_GATE_DIS, 0, SGSI_SIDECLK_DIS);
}
diff --git a/drivers/gpu/drm/xe/xe_wa.h b/drivers/gpu/drm/xe/xe_wa.h
index 52337405b5bc..8fd6a5af0910 100644
--- a/drivers/gpu/drm/xe/xe_wa.h
+++ b/drivers/gpu/drm/xe/xe_wa.h
@@ -13,23 +13,41 @@ struct xe_gt;
struct xe_hw_engine;
struct xe_tile;
-int xe_wa_init(struct xe_gt *gt);
-void xe_wa_process_oob(struct xe_gt *gt);
+int xe_wa_device_init(struct xe_device *xe);
+int xe_wa_gt_init(struct xe_gt *gt);
+void xe_wa_process_device_oob(struct xe_device *xe);
+void xe_wa_process_gt_oob(struct xe_gt *gt);
void xe_wa_process_gt(struct xe_gt *gt);
void xe_wa_process_engine(struct xe_hw_engine *hwe);
void xe_wa_process_lrc(struct xe_hw_engine *hwe);
void xe_wa_apply_tile_workarounds(struct xe_tile *tile);
-void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p);
+void xe_wa_device_dump(struct xe_device *xe, struct drm_printer *p);
+int xe_wa_gt_dump(struct xe_gt *gt, struct drm_printer *p);
/**
- * XE_WA - Out-of-band workarounds, that don't fit the lifecycle any
- * other more specific type
+ * XE_GT_WA - Out-of-band GT workarounds, to be queried and called as needed.
* @gt__: gt instance
* @id__: XE_OOB_<id__>, as generated by build system in generated/xe_wa_oob.h
*/
-#define XE_WA(gt__, id__) ({ \
+#define XE_GT_WA(gt__, id__) ({ \
xe_gt_assert(gt__, (gt__)->wa_active.oob_initialized); \
test_bit(XE_WA_OOB_ ## id__, (gt__)->wa_active.oob); \
})
+/**
+ * XE_DEVICE_WA - Out-of-band Device workarounds, to be queried and called
+ * as needed.
+ * @xe__: xe_device
+ * @id__: XE_DEVICE_WA_OOB_<id__>, as generated by build system in generated/xe_device_wa_oob.h
+ */
+#define XE_DEVICE_WA(xe__, id__) ({ \
+ xe_assert(xe__, (xe__)->wa_active.oob_initialized); \
+ test_bit(XE_DEVICE_WA_OOB_ ## id__, (xe__)->wa_active.oob); \
+})
+
+#define XE_DEVICE_WA_DISABLE(xe__, id__) ({ \
+ xe_assert(xe__, (xe__)->wa_active.oob_initialized); \
+ clear_bit(XE_DEVICE_WA_OOB_ ## id__, (xe__)->wa_active.oob); \
+})
+
#endif
diff --git a/drivers/gpu/drm/xe/xe_wa_oob.rules b/drivers/gpu/drm/xe/xe_wa_oob.rules
index 9efc5accd43d..7ca7258eb5d8 100644
--- a/drivers/gpu/drm/xe/xe_wa_oob.rules
+++ b/drivers/gpu/drm/xe/xe_wa_oob.rules
@@ -1,4 +1,6 @@
1607983814 GRAPHICS_VERSION_RANGE(1200, 1210)
+16010904313 GRAPHICS_VERSION_RANGE(1200, 1210)
+18022495364 GRAPHICS_VERSION_RANGE(1200, 1210)
22012773006 GRAPHICS_VERSION_RANGE(1200, 1250)
14014475959 GRAPHICS_VERSION_RANGE(1270, 1271), GRAPHICS_STEP(A0, B0)
PLATFORM(DG2)
@@ -9,10 +11,9 @@
18020744125 PLATFORM(PVC)
1509372804 PLATFORM(PVC), GRAPHICS_STEP(A0, C0)
1409600907 GRAPHICS_VERSION_RANGE(1200, 1250)
-14016763929 SUBPLATFORM(DG2, G10)
+22014953428 SUBPLATFORM(DG2, G10)
SUBPLATFORM(DG2, G12)
16017236439 PLATFORM(PVC)
-22010954014 PLATFORM(DG2)
14019821291 MEDIA_VERSION_RANGE(1300, 2000)
14015076503 MEDIA_VERSION(1300)
16020292621 GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)
@@ -21,7 +22,8 @@
GRAPHICS_VERSION_RANGE(1270, 1274)
MEDIA_VERSION(1300)
PLATFORM(DG2)
-14018094691 GRAPHICS_VERSION(2004)
+14018094691 GRAPHICS_VERSION_RANGE(2001, 2002)
+ GRAPHICS_VERSION(2004)
14019882105 GRAPHICS_VERSION(2004), GRAPHICS_STEP(A0, B0)
18024947630 GRAPHICS_VERSION(2001)
GRAPHICS_VERSION(2004)
@@ -29,21 +31,23 @@
16022287689 GRAPHICS_VERSION(2001)
GRAPHICS_VERSION(2004)
13011645652 GRAPHICS_VERSION(2004)
- GRAPHICS_VERSION(3001)
-14022293748 GRAPHICS_VERSION(2001)
- GRAPHICS_VERSION(2004)
GRAPHICS_VERSION_RANGE(3000, 3001)
-22019794406 GRAPHICS_VERSION(2001)
+ GRAPHICS_VERSION(3003)
+ GRAPHICS_VERSION_RANGE(3004, 3005)
+14022293748 GRAPHICS_VERSION_RANGE(2001, 2002)
+ GRAPHICS_VERSION(2004)
+ GRAPHICS_VERSION_RANGE(3000, 3005)
+22019794406 GRAPHICS_VERSION_RANGE(2001, 2002)
GRAPHICS_VERSION(2004)
GRAPHICS_VERSION_RANGE(3000, 3001)
+ GRAPHICS_VERSION(3003)
+ GRAPHICS_VERSION_RANGE(3004, 3005)
22019338487 MEDIA_VERSION(2000)
- GRAPHICS_VERSION(2001)
+ GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf)
MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), FUNC(xe_rtp_match_not_sriov_vf)
-22019338487_display PLATFORM(LUNARLAKE)
-16023588340 GRAPHICS_VERSION(2001)
+16023588340 GRAPHICS_VERSION(2001), FUNC(xe_rtp_match_not_sriov_vf)
14019789679 GRAPHICS_VERSION(1255)
GRAPHICS_VERSION_RANGE(1270, 2004)
-no_media_l3 MEDIA_VERSION(3000)
14022866841 GRAPHICS_VERSION(3000), GRAPHICS_STEP(A0, B0)
MEDIA_VERSION(3000), MEDIA_STEP(A0, B0)
16021333562 GRAPHICS_VERSION_RANGE(1200, 1274)
@@ -57,5 +61,19 @@ no_media_l3 MEDIA_VERSION(3000)
GRAPHICS_VERSION(1260), GRAPHICS_STEP(A0, B0)
16023105232 GRAPHICS_VERSION_RANGE(2001, 3001)
MEDIA_VERSION_RANGE(1301, 3000)
+ MEDIA_VERSION(3002)
+ GRAPHICS_VERSION_RANGE(3003, 3005)
16026508708 GRAPHICS_VERSION_RANGE(1200, 3001)
MEDIA_VERSION_RANGE(1300, 3000)
+ MEDIA_VERSION(3002)
+ GRAPHICS_VERSION_RANGE(3003, 3005)
+14020001231 GRAPHICS_VERSION_RANGE(2001,2004), FUNC(xe_rtp_match_psmi_enabled)
+ MEDIA_VERSION(2000), FUNC(xe_rtp_match_psmi_enabled)
+ MEDIA_VERSION(3000), FUNC(xe_rtp_match_psmi_enabled)
+ MEDIA_VERSION(3002), FUNC(xe_rtp_match_psmi_enabled)
+16023683509 MEDIA_VERSION(2000), FUNC(xe_rtp_match_psmi_enabled)
+ MEDIA_VERSION(3000), MEDIA_STEP(A0, B0), FUNC(xe_rtp_match_psmi_enabled)
+
+15015404425_disable PLATFORM(PANTHERLAKE), MEDIA_STEP(B0, FOREVER)
+16026007364 MEDIA_VERSION(3000)
+14020316580 MEDIA_VERSION(1301)