summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--drivers/accel/ivpu/ivpu_debugfs.c2
-rw-r--r--drivers/dma-buf/dma-resv.c5
-rw-r--r--drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c2
-rw-r--r--drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c12
-rw-r--r--drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c8
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c5
-rw-r--r--drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c16
-rw-r--r--drivers/gpu/drm/amd/display/dc/core/dc.c10
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c4
-rw-r--r--drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c20
-rw-r--r--drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c5
-rw-r--r--drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c6
-rw-r--r--drivers/gpu/drm/amd/display/dc/link/link_dpms.c13
-rw-r--r--drivers/gpu/drm/drm_gpusvm.c37
-rw-r--r--drivers/gpu/drm/meson/meson_encoder_hdmi.c4
-rw-r--r--drivers/gpu/drm/tiny/panel-mipi-dbi.c5
-rw-r--r--drivers/gpu/drm/xe/instructions/xe_mi_commands.h4
-rw-r--r--drivers/gpu/drm/xe/regs/xe_engine_regs.h5
-rw-r--r--drivers/gpu/drm/xe/regs/xe_gt_regs.h1
-rw-r--r--drivers/gpu/drm/xe/regs/xe_lrc_layout.h2
-rw-r--r--drivers/gpu/drm/xe/xe_device_types.h2
-rw-r--r--drivers/gpu/drm/xe/xe_exec_queue.c2
-rw-r--r--drivers/gpu/drm/xe/xe_guc_submit.c2
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.c199
-rw-r--r--drivers/gpu/drm/xe/xe_lrc.h5
-rw-r--r--drivers/gpu/drm/xe/xe_lrc_types.h9
-rw-r--r--drivers/gpu/drm/xe/xe_module.c3
-rw-r--r--drivers/gpu/drm/xe/xe_module.h1
-rw-r--r--drivers/gpu/drm/xe/xe_pci.c2
-rw-r--r--drivers/gpu/drm/xe/xe_pci_types.h1
-rw-r--r--drivers/gpu/drm/xe/xe_pt.c14
-rw-r--r--drivers/gpu/drm/xe/xe_ring_ops.c7
-rw-r--r--drivers/gpu/drm/xe/xe_shrinker.c2
-rw-r--r--drivers/gpu/drm/xe/xe_svm.c116
-rw-r--r--drivers/gpu/drm/xe/xe_svm.h5
-rw-r--r--drivers/gpu/drm/xe/xe_trace_lrc.h8
-rw-r--r--drivers/gpu/drm/xe/xe_wa.c4
-rw-r--r--include/drm/drm_gpusvm.h47
38 files changed, 474 insertions, 121 deletions
diff --git a/drivers/accel/ivpu/ivpu_debugfs.c b/drivers/accel/ivpu/ivpu_debugfs.c
index f0dad0c9ce33..cd24ccd20ba6 100644
--- a/drivers/accel/ivpu/ivpu_debugfs.c
+++ b/drivers/accel/ivpu/ivpu_debugfs.c
@@ -455,7 +455,7 @@ priority_bands_fops_write(struct file *file, const char __user *user_buf, size_t
if (ret < 0)
return ret;
- buf[size] = '\0';
+ buf[ret] = '\0';
ret = sscanf(buf, "%u %u %u %u", &band, &grace_period, &process_grace_period,
&process_quantum);
if (ret != 4)
diff --git a/drivers/dma-buf/dma-resv.c b/drivers/dma-buf/dma-resv.c
index 5f8d010516f0..b1ef4546346d 100644
--- a/drivers/dma-buf/dma-resv.c
+++ b/drivers/dma-buf/dma-resv.c
@@ -320,8 +320,9 @@ void dma_resv_add_fence(struct dma_resv *obj, struct dma_fence *fence,
count++;
dma_resv_list_set(fobj, i, fence, usage);
- /* pointer update must be visible before we extend the num_fences */
- smp_store_mb(fobj->num_fences, count);
+ /* fence update must be visible before we extend the num_fences */
+ smp_wmb();
+ fobj->num_fences = count;
}
EXPORT_SYMBOL(dma_resv_add_fence);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
index cfdf558b48b6..02138aa55793 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@@ -109,7 +109,7 @@ int amdgpu_unmap_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
struct drm_exec exec;
int r;
- drm_exec_init(&exec, DRM_EXEC_INTERRUPTIBLE_WAIT, 0);
+ drm_exec_init(&exec, 0, 0);
drm_exec_until_all_locked(&exec) {
r = amdgpu_vm_lock_pd(vm, &exec, 0);
if (likely(!r))
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
index e74e26b6a4f2..fec9a007533a 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c
@@ -752,6 +752,18 @@ static int gmc_v11_0_sw_init(struct amdgpu_ip_block *ip_block)
adev->gmc.vram_type = vram_type;
adev->gmc.vram_vendor = vram_vendor;
+ /* The mall_size is already calculated as mall_size_per_umc * num_umc.
+ * However, for gfx1151, which features a 2-to-1 UMC mapping,
+ * the result must be multiplied by 2 to determine the actual mall size.
+ */
+ switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
+ case IP_VERSION(11, 5, 1):
+ adev->gmc.mall_size *= 2;
+ break;
+ default:
+ break;
+ }
+
switch (amdgpu_ip_version(adev, GC_HWIP, 0)) {
case IP_VERSION(11, 0, 0):
case IP_VERSION(11, 0, 1):
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
index a1171e6152ed..f11df9c2ec13 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_5.c
@@ -1023,6 +1023,10 @@ static int vcn_v4_0_5_start_dpg_mode(struct amdgpu_vcn_inst *vinst,
ring->doorbell_index << VCN_RB1_DB_CTRL__OFFSET__SHIFT |
VCN_RB1_DB_CTRL__EN_MASK);
+ /* Keeping one read-back to ensure all register writes are done, otherwise
+ * it may introduce race conditions */
+ RREG32_SOC15(VCN, inst_idx, regVCN_RB1_DB_CTRL);
+
return 0;
}
@@ -1205,6 +1209,10 @@ static int vcn_v4_0_5_start(struct amdgpu_vcn_inst *vinst)
WREG32_SOC15(VCN, i, regVCN_RB_ENABLE, tmp);
fw_shared->sq.queue_mode &= ~(FW_QUEUE_RING_RESET | FW_QUEUE_DPG_HOLD_OFF);
+ /* Keeping one read-back to ensure all register writes are done, otherwise
+ * it may introduce race conditions */
+ RREG32_SOC15(VCN, i, regVCN_RB_ENABLE);
+
return 0;
}
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
index 64df8ca448b3..cc01b9c68b47 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
@@ -372,6 +372,8 @@ get_crtc_by_otg_inst(struct amdgpu_device *adev,
static inline bool is_dc_timing_adjust_needed(struct dm_crtc_state *old_state,
struct dm_crtc_state *new_state)
{
+ if (new_state->stream->adjust.timing_adjust_pending)
+ return true;
if (new_state->freesync_config.state == VRR_STATE_ACTIVE_FIXED)
return true;
else if (amdgpu_dm_crtc_vrr_active(old_state) != amdgpu_dm_crtc_vrr_active(new_state))
@@ -12763,7 +12765,8 @@ int amdgpu_dm_process_dmub_aux_transfer_sync(
/* The reply is stored in the top nibble of the command. */
payload->reply[0] = (adev->dm.dmub_notify->aux_reply.command >> 4) & 0xF;
- if (!payload->write && p_notify->aux_reply.length)
+ /*write req may receive a byte indicating partially written number as well*/
+ if (p_notify->aux_reply.length)
memcpy(payload->data, p_notify->aux_reply.data,
p_notify->aux_reply.length);
diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
index 074b79fd5822..5cdbc86ef8f5 100644
--- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
+++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_mst_types.c
@@ -62,6 +62,7 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
enum aux_return_code_type operation_result;
struct amdgpu_device *adev;
struct ddc_service *ddc;
+ uint8_t copy[16];
if (WARN_ON(msg->size > 16))
return -E2BIG;
@@ -77,6 +78,11 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
(msg->request & DP_AUX_I2C_WRITE_STATUS_UPDATE) != 0;
payload.defer_delay = 0;
+ if (payload.write) {
+ memcpy(copy, msg->buffer, msg->size);
+ payload.data = copy;
+ }
+
result = dc_link_aux_transfer_raw(TO_DM_AUX(aux)->ddc_service, &payload,
&operation_result);
@@ -100,9 +106,9 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
*/
if (payload.write && result >= 0) {
if (result) {
- /*one byte indicating partially written bytes. Force 0 to retry*/
- drm_info(adev_to_drm(adev), "amdgpu: AUX partially written\n");
- result = 0;
+ /*one byte indicating partially written bytes*/
+ drm_dbg_dp(adev_to_drm(adev), "amdgpu: AUX partially written\n");
+ result = payload.data[0];
} else if (!payload.reply[0])
/*I2C_ACK|AUX_ACK*/
result = msg->size;
@@ -127,11 +133,11 @@ static ssize_t dm_dp_aux_transfer(struct drm_dp_aux *aux,
break;
}
- drm_info(adev_to_drm(adev), "amdgpu: DP AUX transfer fail:%d\n", operation_result);
+ drm_dbg_dp(adev_to_drm(adev), "amdgpu: DP AUX transfer fail:%d\n", operation_result);
}
if (payload.reply[0])
- drm_info(adev_to_drm(adev), "amdgpu: AUX reply command not ACK: 0x%02x.",
+ drm_dbg_dp(adev_to_drm(adev), "amdgpu: AUX reply command not ACK: 0x%02x.",
payload.reply[0]);
return result;
diff --git a/drivers/gpu/drm/amd/display/dc/core/dc.c b/drivers/gpu/drm/amd/display/dc/core/dc.c
index 28d1353f403d..ba4ce8a63158 100644
--- a/drivers/gpu/drm/amd/display/dc/core/dc.c
+++ b/drivers/gpu/drm/amd/display/dc/core/dc.c
@@ -439,9 +439,12 @@ bool dc_stream_adjust_vmin_vmax(struct dc *dc,
* Don't adjust DRR while there's bandwidth optimizations pending to
* avoid conflicting with firmware updates.
*/
- if (dc->ctx->dce_version > DCE_VERSION_MAX)
- if (dc->optimized_required || dc->wm_optimized_required)
+ if (dc->ctx->dce_version > DCE_VERSION_MAX) {
+ if (dc->optimized_required || dc->wm_optimized_required) {
+ stream->adjust.timing_adjust_pending = true;
return false;
+ }
+ }
dc_exit_ips_for_hw_access(dc);
@@ -3168,7 +3171,8 @@ static void copy_stream_update_to_stream(struct dc *dc,
if (update->crtc_timing_adjust) {
if (stream->adjust.v_total_min != update->crtc_timing_adjust->v_total_min ||
- stream->adjust.v_total_max != update->crtc_timing_adjust->v_total_max)
+ stream->adjust.v_total_max != update->crtc_timing_adjust->v_total_max ||
+ stream->adjust.timing_adjust_pending)
update->crtc_timing_adjust->timing_adjust_pending = true;
stream->adjust = *update->crtc_timing_adjust;
update->crtc_timing_adjust->timing_adjust_pending = false;
diff --git a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
index d9159ca55412..92f0a099d089 100644
--- a/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
+++ b/drivers/gpu/drm/amd/display/dc/dml/dcn35/dcn35_fpu.c
@@ -195,9 +195,9 @@ struct _vcs_dpi_soc_bounding_box_st dcn3_5_soc = {
.dcn_downspread_percent = 0.5,
.gpuvm_min_page_size_bytes = 4096,
.hostvm_min_page_size_bytes = 4096,
- .do_urgent_latency_adjustment = 1,
+ .do_urgent_latency_adjustment = 0,
.urgent_latency_adjustment_fabric_clock_component_us = 0,
- .urgent_latency_adjustment_fabric_clock_reference_mhz = 3000,
+ .urgent_latency_adjustment_fabric_clock_reference_mhz = 0,
};
void dcn35_build_wm_range_table_fpu(struct clk_mgr *clk_mgr)
diff --git a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
index 0c8ec30ea672..731fbd4bc600 100644
--- a/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
+++ b/drivers/gpu/drm/amd/display/dc/dml2/dml21/dml21_translation_helper.c
@@ -910,7 +910,7 @@ static void populate_dml21_plane_config_from_plane_state(struct dml2_context *dm
}
//TODO : Could be possibly moved to a common helper layer.
-static bool dml21_wrapper_get_plane_id(const struct dc_state *context, const struct dc_plane_state *plane, unsigned int *plane_id)
+static bool dml21_wrapper_get_plane_id(const struct dc_state *context, unsigned int stream_id, const struct dc_plane_state *plane, unsigned int *plane_id)
{
int i, j;
@@ -918,10 +918,12 @@ static bool dml21_wrapper_get_plane_id(const struct dc_state *context, const str
return false;
for (i = 0; i < context->stream_count; i++) {
- for (j = 0; j < context->stream_status[i].plane_count; j++) {
- if (context->stream_status[i].plane_states[j] == plane) {
- *plane_id = (i << 16) | j;
- return true;
+ if (context->streams[i]->stream_id == stream_id) {
+ for (j = 0; j < context->stream_status[i].plane_count; j++) {
+ if (context->stream_status[i].plane_states[j] == plane) {
+ *plane_id = (i << 16) | j;
+ return true;
+ }
}
}
}
@@ -944,14 +946,14 @@ static unsigned int map_stream_to_dml21_display_cfg(const struct dml2_context *d
return location;
}
-static unsigned int map_plane_to_dml21_display_cfg(const struct dml2_context *dml_ctx,
+static unsigned int map_plane_to_dml21_display_cfg(const struct dml2_context *dml_ctx, unsigned int stream_id,
const struct dc_plane_state *plane, const struct dc_state *context)
{
unsigned int plane_id;
int i = 0;
int location = -1;
- if (!dml21_wrapper_get_plane_id(context, plane, &plane_id)) {
+ if (!dml21_wrapper_get_plane_id(context, stream_id, plane, &plane_id)) {
ASSERT(false);
return -1;
}
@@ -1037,7 +1039,7 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s
dml_dispcfg->plane_descriptors[disp_cfg_plane_location].stream_index = disp_cfg_stream_location;
} else {
for (plane_index = 0; plane_index < context->stream_status[stream_index].plane_count; plane_index++) {
- disp_cfg_plane_location = map_plane_to_dml21_display_cfg(dml_ctx, context->stream_status[stream_index].plane_states[plane_index], context);
+ disp_cfg_plane_location = map_plane_to_dml21_display_cfg(dml_ctx, context->streams[stream_index]->stream_id, context->stream_status[stream_index].plane_states[plane_index], context);
if (disp_cfg_plane_location < 0)
disp_cfg_plane_location = dml_dispcfg->num_planes++;
@@ -1048,7 +1050,7 @@ bool dml21_map_dc_state_into_dml_display_cfg(const struct dc *in_dc, struct dc_s
populate_dml21_plane_config_from_plane_state(dml_ctx, &dml_dispcfg->plane_descriptors[disp_cfg_plane_location], context->stream_status[stream_index].plane_states[plane_index], context, stream_index);
dml_dispcfg->plane_descriptors[disp_cfg_plane_location].stream_index = disp_cfg_stream_location;
- if (dml21_wrapper_get_plane_id(context, context->stream_status[stream_index].plane_states[plane_index], &dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[disp_cfg_plane_location]))
+ if (dml21_wrapper_get_plane_id(context, context->streams[stream_index]->stream_id, context->stream_status[stream_index].plane_states[plane_index], &dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id[disp_cfg_plane_location]))
dml_ctx->v21.dml_to_dc_pipe_mapping.disp_cfg_to_plane_id_valid[disp_cfg_plane_location] = true;
/* apply forced pstate policy */
diff --git a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c
index 1236e0f9a256..712aff7e17f7 100644
--- a/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c
+++ b/drivers/gpu/drm/amd/display/dc/dpp/dcn401/dcn401_dpp_cm.c
@@ -120,10 +120,11 @@ void dpp401_set_cursor_attributes(
enum dc_cursor_color_format color_format = cursor_attributes->color_format;
int cur_rom_en = 0;
- // DCN4 should always do Cursor degamma for Cursor Color modes
if (color_format == CURSOR_MODE_COLOR_PRE_MULTIPLIED_ALPHA ||
color_format == CURSOR_MODE_COLOR_UN_PRE_MULTIPLIED_ALPHA) {
- cur_rom_en = 1;
+ if (cursor_attributes->attribute_flags.bits.ENABLE_CURSOR_DEGAMMA) {
+ cur_rom_en = 1;
+ }
}
REG_UPDATE_3(CURSOR0_CONTROL,
diff --git a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c
index 5489f3d431f6..3af6a3402b89 100644
--- a/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c
+++ b/drivers/gpu/drm/amd/display/dc/hwss/dcn401/dcn401_hwseq.c
@@ -1980,9 +1980,9 @@ void dcn401_program_pipe(
dc->res_pool->hubbub, pipe_ctx->plane_res.hubp->inst, pipe_ctx->hubp_regs.det_size);
}
- if (pipe_ctx->update_flags.raw ||
- (pipe_ctx->plane_state && pipe_ctx->plane_state->update_flags.raw) ||
- pipe_ctx->stream->update_flags.raw)
+ if (pipe_ctx->plane_state && (pipe_ctx->update_flags.raw ||
+ pipe_ctx->plane_state->update_flags.raw ||
+ pipe_ctx->stream->update_flags.raw))
dc->hwss.update_dchubp_dpp(dc, pipe_ctx, context);
if (pipe_ctx->plane_state && (pipe_ctx->update_flags.bits.enable ||
diff --git a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
index 268626e73c54..53c961f86d43 100644
--- a/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
+++ b/drivers/gpu/drm/amd/display/dc/link/link_dpms.c
@@ -148,6 +148,7 @@ void link_blank_dp_stream(struct dc_link *link, bool hw_init)
void link_set_all_streams_dpms_off_for_link(struct dc_link *link)
{
struct pipe_ctx *pipes[MAX_PIPES];
+ struct dc_stream_state *streams[MAX_PIPES];
struct dc_state *state = link->dc->current_state;
uint8_t count;
int i;
@@ -160,10 +161,18 @@ void link_set_all_streams_dpms_off_for_link(struct dc_link *link)
link_get_master_pipes_with_dpms_on(link, state, &count, pipes);
+ /* The subsequent call to dc_commit_updates_for_stream for a full update
+ * will release the current state and swap to a new state. Releasing the
+ * current state results in the stream pointers in the pipe_ctx structs
+ * to be zero'd. Hence, cache all streams prior to dc_commit_updates_for_stream.
+ */
+ for (i = 0; i < count; i++)
+ streams[i] = pipes[i]->stream;
+
for (i = 0; i < count; i++) {
- stream_update.stream = pipes[i]->stream;
+ stream_update.stream = streams[i];
dc_commit_updates_for_stream(link->ctx->dc, NULL, 0,
- pipes[i]->stream, &stream_update,
+ streams[i], &stream_update,
state);
}
diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c
index de424e670995..4b2f32889f00 100644
--- a/drivers/gpu/drm/drm_gpusvm.c
+++ b/drivers/gpu/drm/drm_gpusvm.c
@@ -1118,6 +1118,10 @@ static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm,
lockdep_assert_held(&gpusvm->notifier_lock);
if (range->flags.has_dma_mapping) {
+ struct drm_gpusvm_range_flags flags = {
+ .__flags = range->flags.__flags,
+ };
+
for (i = 0, j = 0; i < npages; j++) {
struct drm_pagemap_device_addr *addr = &range->dma_addr[j];
@@ -1131,8 +1135,12 @@ static void __drm_gpusvm_range_unmap_pages(struct drm_gpusvm *gpusvm,
dev, *addr);
i += 1 << addr->order;
}
- range->flags.has_devmem_pages = false;
- range->flags.has_dma_mapping = false;
+
+ /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */
+ flags.has_devmem_pages = false;
+ flags.has_dma_mapping = false;
+ WRITE_ONCE(range->flags.__flags, flags.__flags);
+
range->dpagemap = NULL;
}
}
@@ -1334,6 +1342,7 @@ int drm_gpusvm_range_get_pages(struct drm_gpusvm *gpusvm,
int err = 0;
struct dev_pagemap *pagemap;
struct drm_pagemap *dpagemap;
+ struct drm_gpusvm_range_flags flags;
retry:
hmm_range.notifier_seq = mmu_interval_read_begin(notifier);
@@ -1378,7 +1387,8 @@ map_pages:
*/
drm_gpusvm_notifier_lock(gpusvm);
- if (range->flags.unmapped) {
+ flags.__flags = range->flags.__flags;
+ if (flags.unmapped) {
drm_gpusvm_notifier_unlock(gpusvm);
err = -EFAULT;
goto err_free;
@@ -1454,6 +1464,11 @@ map_pages:
goto err_unmap;
}
+ if (ctx->devmem_only) {
+ err = -EFAULT;
+ goto err_unmap;
+ }
+
addr = dma_map_page(gpusvm->drm->dev,
page, 0,
PAGE_SIZE << order,
@@ -1469,14 +1484,17 @@ map_pages:
}
i += 1 << order;
num_dma_mapped = i;
- range->flags.has_dma_mapping = true;
+ flags.has_dma_mapping = true;
}
if (zdd) {
- range->flags.has_devmem_pages = true;
+ flags.has_devmem_pages = true;
range->dpagemap = dpagemap;
}
+ /* WRITE_ONCE pairs with READ_ONCE for opportunistic checks */
+ WRITE_ONCE(range->flags.__flags, flags.__flags);
+
drm_gpusvm_notifier_unlock(gpusvm);
kvfree(pfns);
set_seqno:
@@ -1765,6 +1783,8 @@ int drm_gpusvm_migrate_to_devmem(struct drm_gpusvm *gpusvm,
goto err_finalize;
/* Upon success bind devmem allocation to range and zdd */
+ devmem_allocation->timeslice_expiration = get_jiffies_64() +
+ msecs_to_jiffies(ctx->timeslice_ms);
zdd->devmem_allocation = devmem_allocation; /* Owns ref */
err_finalize:
@@ -1985,6 +2005,13 @@ static int __drm_gpusvm_migrate_to_ram(struct vm_area_struct *vas,
void *buf;
int i, err = 0;
+ if (page) {
+ zdd = page->zone_device_data;
+ if (time_before64(get_jiffies_64(),
+ zdd->devmem_allocation->timeslice_expiration))
+ return 0;
+ }
+
start = ALIGN_DOWN(fault_addr, size);
end = ALIGN(fault_addr + 1, size);
diff --git a/drivers/gpu/drm/meson/meson_encoder_hdmi.c b/drivers/gpu/drm/meson/meson_encoder_hdmi.c
index 7752d8ac85f0..c08fa93e50a3 100644
--- a/drivers/gpu/drm/meson/meson_encoder_hdmi.c
+++ b/drivers/gpu/drm/meson/meson_encoder_hdmi.c
@@ -75,7 +75,7 @@ static void meson_encoder_hdmi_set_vclk(struct meson_encoder_hdmi *encoder_hdmi,
unsigned long long venc_freq;
unsigned long long hdmi_freq;
- vclk_freq = mode->clock * 1000;
+ vclk_freq = mode->clock * 1000ULL;
/* For 420, pixel clock is half unlike venc clock */
if (encoder_hdmi->output_bus_fmt == MEDIA_BUS_FMT_UYYVYY8_0_5X24)
@@ -123,7 +123,7 @@ static enum drm_mode_status meson_encoder_hdmi_mode_valid(struct drm_bridge *bri
struct meson_encoder_hdmi *encoder_hdmi = bridge_to_meson_encoder_hdmi(bridge);
struct meson_drm *priv = encoder_hdmi->priv;
bool is_hdmi2_sink = display_info->hdmi.scdc.supported;
- unsigned long long clock = mode->clock * 1000;
+ unsigned long long clock = mode->clock * 1000ULL;
unsigned long long phy_freq;
unsigned long long vclk_freq;
unsigned long long venc_freq;
diff --git a/drivers/gpu/drm/tiny/panel-mipi-dbi.c b/drivers/gpu/drm/tiny/panel-mipi-dbi.c
index 0460ecaef4bd..23914a9f7fd3 100644
--- a/drivers/gpu/drm/tiny/panel-mipi-dbi.c
+++ b/drivers/gpu/drm/tiny/panel-mipi-dbi.c
@@ -390,7 +390,10 @@ static int panel_mipi_dbi_spi_probe(struct spi_device *spi)
spi_set_drvdata(spi, drm);
- drm_client_setup(drm, NULL);
+ if (bpp == 16)
+ drm_client_setup_with_fourcc(drm, DRM_FORMAT_RGB565);
+ else
+ drm_client_setup_with_fourcc(drm, DRM_FORMAT_RGB888);
return 0;
}
diff --git a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
index 167fb0f742de..5a47991b4b81 100644
--- a/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
+++ b/drivers/gpu/drm/xe/instructions/xe_mi_commands.h
@@ -47,6 +47,10 @@
#define MI_LRI_FORCE_POSTED REG_BIT(12)
#define MI_LRI_LEN(x) (((x) & 0xff) + 1)
+#define MI_STORE_REGISTER_MEM (__MI_INSTR(0x24) | XE_INSTR_NUM_DW(4))
+#define MI_SRM_USE_GGTT REG_BIT(22)
+#define MI_SRM_ADD_CS_OFFSET REG_BIT(19)
+
#define MI_FLUSH_DW __MI_INSTR(0x26)
#define MI_FLUSH_DW_PROTECTED_MEM_EN REG_BIT(22)
#define MI_FLUSH_DW_STORE_INDEX REG_BIT(21)
diff --git a/drivers/gpu/drm/xe/regs/xe_engine_regs.h b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
index fb8ec317b6ee..891f928d80ce 100644
--- a/drivers/gpu/drm/xe/regs/xe_engine_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_engine_regs.h
@@ -43,6 +43,10 @@
#define XEHPC_BCS8_RING_BASE 0x3ee000
#define GSCCS_RING_BASE 0x11a000
+#define ENGINE_ID(base) XE_REG((base) + 0x8c)
+#define ENGINE_INSTANCE_ID REG_GENMASK(9, 4)
+#define ENGINE_CLASS_ID REG_GENMASK(2, 0)
+
#define RING_TAIL(base) XE_REG((base) + 0x30)
#define TAIL_ADDR REG_GENMASK(20, 3)
@@ -154,6 +158,7 @@
#define STOP_RING REG_BIT(8)
#define RING_CTX_TIMESTAMP(base) XE_REG((base) + 0x3a8)
+#define RING_CTX_TIMESTAMP_UDW(base) XE_REG((base) + 0x3ac)
#define CSBE_DEBUG_STATUS(base) XE_REG((base) + 0x3fc)
#define RING_FORCE_TO_NONPRIV(base, i) XE_REG(((base) + 0x4d0) + (i) * 4)
diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
index da1f198ac107..181913967ac9 100644
--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
@@ -157,6 +157,7 @@
#define XEHPG_SC_INSTDONE_EXTRA2 XE_REG_MCR(0x7108)
#define COMMON_SLICE_CHICKEN4 XE_REG(0x7300, XE_REG_OPTION_MASKED)
+#define SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE REG_BIT(12)
#define DISABLE_TDC_LOAD_BALANCING_CALC REG_BIT(6)
#define COMMON_SLICE_CHICKEN3 XE_REG(0x7304, XE_REG_OPTION_MASKED)
diff --git a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
index 57944f90bbf6..994af591a2e8 100644
--- a/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
+++ b/drivers/gpu/drm/xe/regs/xe_lrc_layout.h
@@ -11,7 +11,9 @@
#define CTX_RING_TAIL (0x06 + 1)
#define CTX_RING_START (0x08 + 1)
#define CTX_RING_CTL (0x0a + 1)
+#define CTX_BB_PER_CTX_PTR (0x12 + 1)
#define CTX_TIMESTAMP (0x22 + 1)
+#define CTX_TIMESTAMP_UDW (0x24 + 1)
#define CTX_INDIRECT_RING_STATE (0x26 + 1)
#define CTX_PDP0_UDW (0x30 + 1)
#define CTX_PDP0_LDW (0x32 + 1)
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 9f8667ebba85..0482f26aa480 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -330,6 +330,8 @@ struct xe_device {
u8 has_sriov:1;
/** @info.has_usm: Device has unified shared memory support */
u8 has_usm:1;
+ /** @info.has_64bit_timestamp: Device supports 64-bit timestamps */
+ u8 has_64bit_timestamp:1;
/** @info.is_dgfx: is discrete device */
u8 is_dgfx:1;
/**
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index 606922d9dd73..cd9b1c32f30f 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -830,7 +830,7 @@ void xe_exec_queue_update_run_ticks(struct xe_exec_queue *q)
{
struct xe_device *xe = gt_to_xe(q->gt);
struct xe_lrc *lrc;
- u32 old_ts, new_ts;
+ u64 old_ts, new_ts;
int idx;
/*
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index 31bc2022bfc2..769781d577df 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -941,7 +941,7 @@ static bool check_timeout(struct xe_exec_queue *q, struct xe_sched_job *job)
return xe_sched_invalidate_job(job, 2);
}
- ctx_timestamp = xe_lrc_ctx_timestamp(q->lrc[0]);
+ ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(q->lrc[0]));
ctx_job_timestamp = xe_lrc_ctx_job_timestamp(q->lrc[0]);
/*
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index df3ceddede07..03bfba696b37 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -24,6 +24,7 @@
#include "xe_hw_fence.h"
#include "xe_map.h"
#include "xe_memirq.h"
+#include "xe_mmio.h"
#include "xe_sriov.h"
#include "xe_trace_lrc.h"
#include "xe_vm.h"
@@ -650,6 +651,7 @@ u32 xe_lrc_pphwsp_offset(struct xe_lrc *lrc)
#define LRC_START_SEQNO_PPHWSP_OFFSET (LRC_SEQNO_PPHWSP_OFFSET + 8)
#define LRC_CTX_JOB_TIMESTAMP_OFFSET (LRC_START_SEQNO_PPHWSP_OFFSET + 8)
#define LRC_PARALLEL_PPHWSP_OFFSET 2048
+#define LRC_ENGINE_ID_PPHWSP_OFFSET 2096
#define LRC_PPHWSP_SIZE SZ_4K
u32 xe_lrc_regs_offset(struct xe_lrc *lrc)
@@ -684,7 +686,7 @@ static inline u32 __xe_lrc_start_seqno_offset(struct xe_lrc *lrc)
static u32 __xe_lrc_ctx_job_timestamp_offset(struct xe_lrc *lrc)
{
- /* The start seqno is stored in the driver-defined portion of PPHWSP */
+ /* This is stored in the driver-defined portion of PPHWSP */
return xe_lrc_pphwsp_offset(lrc) + LRC_CTX_JOB_TIMESTAMP_OFFSET;
}
@@ -694,11 +696,21 @@ static inline u32 __xe_lrc_parallel_offset(struct xe_lrc *lrc)
return xe_lrc_pphwsp_offset(lrc) + LRC_PARALLEL_PPHWSP_OFFSET;
}
+static inline u32 __xe_lrc_engine_id_offset(struct xe_lrc *lrc)
+{
+ return xe_lrc_pphwsp_offset(lrc) + LRC_ENGINE_ID_PPHWSP_OFFSET;
+}
+
static u32 __xe_lrc_ctx_timestamp_offset(struct xe_lrc *lrc)
{
return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP * sizeof(u32);
}
+static u32 __xe_lrc_ctx_timestamp_udw_offset(struct xe_lrc *lrc)
+{
+ return __xe_lrc_regs_offset(lrc) + CTX_TIMESTAMP_UDW * sizeof(u32);
+}
+
static inline u32 __xe_lrc_indirect_ring_offset(struct xe_lrc *lrc)
{
/* Indirect ring state page is at the very end of LRC */
@@ -726,8 +738,10 @@ DECL_MAP_ADDR_HELPERS(regs)
DECL_MAP_ADDR_HELPERS(start_seqno)
DECL_MAP_ADDR_HELPERS(ctx_job_timestamp)
DECL_MAP_ADDR_HELPERS(ctx_timestamp)
+DECL_MAP_ADDR_HELPERS(ctx_timestamp_udw)
DECL_MAP_ADDR_HELPERS(parallel)
DECL_MAP_ADDR_HELPERS(indirect_ring)
+DECL_MAP_ADDR_HELPERS(engine_id)
#undef DECL_MAP_ADDR_HELPERS
@@ -743,18 +757,37 @@ u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc)
}
/**
+ * xe_lrc_ctx_timestamp_udw_ggtt_addr() - Get ctx timestamp udw GGTT address
+ * @lrc: Pointer to the lrc.
+ *
+ * Returns: ctx timestamp udw GGTT address
+ */
+u32 xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc *lrc)
+{
+ return __xe_lrc_ctx_timestamp_udw_ggtt_addr(lrc);
+}
+
+/**
* xe_lrc_ctx_timestamp() - Read ctx timestamp value
* @lrc: Pointer to the lrc.
*
* Returns: ctx timestamp value
*/
-u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc)
+u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc)
{
struct xe_device *xe = lrc_to_xe(lrc);
struct iosys_map map;
+ u32 ldw, udw = 0;
map = __xe_lrc_ctx_timestamp_map(lrc);
- return xe_map_read32(xe, &map);
+ ldw = xe_map_read32(xe, &map);
+
+ if (xe->info.has_64bit_timestamp) {
+ map = __xe_lrc_ctx_timestamp_udw_map(lrc);
+ udw = xe_map_read32(xe, &map);
+ }
+
+ return (u64)udw << 32 | ldw;
}
/**
@@ -864,7 +897,7 @@ static void *empty_lrc_data(struct xe_hw_engine *hwe)
static void xe_lrc_set_ppgtt(struct xe_lrc *lrc, struct xe_vm *vm)
{
- u64 desc = xe_vm_pdp4_descriptor(vm, lrc->tile);
+ u64 desc = xe_vm_pdp4_descriptor(vm, gt_to_tile(lrc->gt));
xe_lrc_write_ctx_reg(lrc, CTX_PDP0_UDW, upper_32_bits(desc));
xe_lrc_write_ctx_reg(lrc, CTX_PDP0_LDW, lower_32_bits(desc));
@@ -877,6 +910,65 @@ static void xe_lrc_finish(struct xe_lrc *lrc)
xe_bo_unpin(lrc->bo);
xe_bo_unlock(lrc->bo);
xe_bo_put(lrc->bo);
+ xe_bo_unpin_map_no_vm(lrc->bb_per_ctx_bo);
+}
+
+/*
+ * xe_lrc_setup_utilization() - Setup wa bb to assist in calculating active
+ * context run ticks.
+ * @lrc: Pointer to the lrc.
+ *
+ * Context Timestamp (CTX_TIMESTAMP) in the LRC accumulates the run ticks of the
+ * context, but only gets updated when the context switches out. In order to
+ * check how long a context has been active before it switches out, two things
+ * are required:
+ *
+ * (1) Determine if the context is running:
+ * To do so, we program the WA BB to set an initial value for CTX_TIMESTAMP in
+ * the LRC. The value chosen is 1 since 0 is the initial value when the LRC is
+ * initialized. During a query, we just check for this value to determine if the
+ * context is active. If the context switched out, it would overwrite this
+ * location with the actual CTX_TIMESTAMP MMIO value. Note that WA BB runs as
+ * the last part of context restore, so reusing this LRC location will not
+ * clobber anything.
+ *
+ * (2) Calculate the time that the context has been active for:
+ * The CTX_TIMESTAMP ticks only when the context is active. If a context is
+ * active, we just use the CTX_TIMESTAMP MMIO as the new value of utilization.
+ * While doing so, we need to read the CTX_TIMESTAMP MMIO for the specific
+ * engine instance. Since we do not know which instance the context is running
+ * on until it is scheduled, we also read the ENGINE_ID MMIO in the WA BB and
+ * store it in the PPHSWP.
+ */
+#define CONTEXT_ACTIVE 1ULL
+static void xe_lrc_setup_utilization(struct xe_lrc *lrc)
+{
+ u32 *cmd;
+
+ cmd = lrc->bb_per_ctx_bo->vmap.vaddr;
+
+ *cmd++ = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET;
+ *cmd++ = ENGINE_ID(0).addr;
+ *cmd++ = __xe_lrc_engine_id_ggtt_addr(lrc);
+ *cmd++ = 0;
+
+ *cmd++ = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1);
+ *cmd++ = __xe_lrc_ctx_timestamp_ggtt_addr(lrc);
+ *cmd++ = 0;
+ *cmd++ = lower_32_bits(CONTEXT_ACTIVE);
+
+ if (lrc_to_xe(lrc)->info.has_64bit_timestamp) {
+ *cmd++ = MI_STORE_DATA_IMM | MI_SDI_GGTT | MI_SDI_NUM_DW(1);
+ *cmd++ = __xe_lrc_ctx_timestamp_udw_ggtt_addr(lrc);
+ *cmd++ = 0;
+ *cmd++ = upper_32_bits(CONTEXT_ACTIVE);
+ }
+
+ *cmd++ = MI_BATCH_BUFFER_END;
+
+ xe_lrc_write_ctx_reg(lrc, CTX_BB_PER_CTX_PTR,
+ xe_bo_ggtt_addr(lrc->bb_per_ctx_bo) | 1);
+
}
#define PVC_CTX_ASID (0x2e + 1)
@@ -893,31 +985,40 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
void *init_data = NULL;
u32 arb_enable;
u32 lrc_size;
+ u32 bo_flags;
int err;
kref_init(&lrc->refcount);
+ lrc->gt = gt;
lrc->flags = 0;
lrc_size = ring_size + xe_gt_lrc_size(gt, hwe->class);
if (xe_gt_has_indirect_ring_state(gt))
lrc->flags |= XE_LRC_FLAG_INDIRECT_RING_STATE;
+ bo_flags = XE_BO_FLAG_VRAM_IF_DGFX(tile) | XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE;
+
/*
* FIXME: Perma-pinning LRC as we don't yet support moving GGTT address
* via VM bind calls.
*/
lrc->bo = xe_bo_create_pin_map(xe, tile, vm, lrc_size,
ttm_bo_type_kernel,
- XE_BO_FLAG_VRAM_IF_DGFX(tile) |
- XE_BO_FLAG_GGTT |
- XE_BO_FLAG_GGTT_INVALIDATE);
+ bo_flags);
if (IS_ERR(lrc->bo))
return PTR_ERR(lrc->bo);
+ lrc->bb_per_ctx_bo = xe_bo_create_pin_map(xe, tile, NULL, SZ_4K,
+ ttm_bo_type_kernel,
+ bo_flags);
+ if (IS_ERR(lrc->bb_per_ctx_bo)) {
+ err = PTR_ERR(lrc->bb_per_ctx_bo);
+ goto err_lrc_finish;
+ }
+
lrc->size = lrc_size;
- lrc->tile = gt_to_tile(hwe->gt);
lrc->ring.size = ring_size;
lrc->ring.tail = 0;
- lrc->ctx_timestamp = 0;
xe_hw_fence_ctx_init(&lrc->fence_ctx, hwe->gt,
hwe->fence_irq, hwe->name);
@@ -990,7 +1091,10 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
xe_lrc_read_ctx_reg(lrc, CTX_CONTEXT_CONTROL) |
_MASKED_BIT_ENABLE(CTX_CTRL_PXP_ENABLE));
+ lrc->ctx_timestamp = 0;
xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0);
+ if (lrc_to_xe(lrc)->info.has_64bit_timestamp)
+ xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP_UDW, 0);
if (xe->info.has_asid && vm)
xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
@@ -1019,6 +1123,8 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
map = __xe_lrc_start_seqno_map(lrc);
xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
+ xe_lrc_setup_utilization(lrc);
+
return 0;
err_lrc_finish:
@@ -1238,6 +1344,21 @@ struct iosys_map xe_lrc_parallel_map(struct xe_lrc *lrc)
return __xe_lrc_parallel_map(lrc);
}
+/**
+ * xe_lrc_engine_id() - Read engine id value
+ * @lrc: Pointer to the lrc.
+ *
+ * Returns: context id value
+ */
+static u32 xe_lrc_engine_id(struct xe_lrc *lrc)
+{
+ struct xe_device *xe = lrc_to_xe(lrc);
+ struct iosys_map map;
+
+ map = __xe_lrc_engine_id_map(lrc);
+ return xe_map_read32(xe, &map);
+}
+
static int instr_dw(u32 cmd_header)
{
/* GFXPIPE "SINGLE_DW" opcodes are a single dword */
@@ -1684,7 +1805,7 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
snapshot->lrc_offset = xe_lrc_pphwsp_offset(lrc);
snapshot->lrc_size = lrc->bo->size - snapshot->lrc_offset;
snapshot->lrc_snapshot = NULL;
- snapshot->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
+ snapshot->ctx_timestamp = lower_32_bits(xe_lrc_ctx_timestamp(lrc));
snapshot->ctx_job_timestamp = xe_lrc_ctx_job_timestamp(lrc);
return snapshot;
}
@@ -1784,22 +1905,74 @@ void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot)
kfree(snapshot);
}
+static int get_ctx_timestamp(struct xe_lrc *lrc, u32 engine_id, u64 *reg_ctx_ts)
+{
+ u16 class = REG_FIELD_GET(ENGINE_CLASS_ID, engine_id);
+ u16 instance = REG_FIELD_GET(ENGINE_INSTANCE_ID, engine_id);
+ struct xe_hw_engine *hwe;
+ u64 val;
+
+ hwe = xe_gt_hw_engine(lrc->gt, class, instance, false);
+ if (xe_gt_WARN_ONCE(lrc->gt, !hwe || xe_hw_engine_is_reserved(hwe),
+ "Unexpected engine class:instance %d:%d for context utilization\n",
+ class, instance))
+ return -1;
+
+ if (lrc_to_xe(lrc)->info.has_64bit_timestamp)
+ val = xe_mmio_read64_2x32(&hwe->gt->mmio,
+ RING_CTX_TIMESTAMP(hwe->mmio_base));
+ else
+ val = xe_mmio_read32(&hwe->gt->mmio,
+ RING_CTX_TIMESTAMP(hwe->mmio_base));
+
+ *reg_ctx_ts = val;
+
+ return 0;
+}
+
/**
* xe_lrc_update_timestamp() - Update ctx timestamp
* @lrc: Pointer to the lrc.
* @old_ts: Old timestamp value
*
* Populate @old_ts current saved ctx timestamp, read new ctx timestamp and
- * update saved value.
+ * update saved value. With support for active contexts, the calculation may be
+ * slightly racy, so follow a read-again logic to ensure that the context is
+ * still active before returning the right timestamp.
*
* Returns: New ctx timestamp value
*/
-u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts)
+u64 xe_lrc_update_timestamp(struct xe_lrc *lrc, u64 *old_ts)
{
+ u64 lrc_ts, reg_ts;
+ u32 engine_id;
+
*old_ts = lrc->ctx_timestamp;
- lrc->ctx_timestamp = xe_lrc_ctx_timestamp(lrc);
+ lrc_ts = xe_lrc_ctx_timestamp(lrc);
+ /* CTX_TIMESTAMP mmio read is invalid on VF, so return the LRC value */
+ if (IS_SRIOV_VF(lrc_to_xe(lrc))) {
+ lrc->ctx_timestamp = lrc_ts;
+ goto done;
+ }
+
+ if (lrc_ts == CONTEXT_ACTIVE) {
+ engine_id = xe_lrc_engine_id(lrc);
+ if (!get_ctx_timestamp(lrc, engine_id, &reg_ts))
+ lrc->ctx_timestamp = reg_ts;
+
+ /* read lrc again to ensure context is still active */
+ lrc_ts = xe_lrc_ctx_timestamp(lrc);
+ }
+
+ /*
+ * If context switched out, just use the lrc_ts. Note that this needs to
+ * be a separate if condition.
+ */
+ if (lrc_ts != CONTEXT_ACTIVE)
+ lrc->ctx_timestamp = lrc_ts;
+done:
trace_xe_lrc_update_timestamp(lrc, *old_ts);
return lrc->ctx_timestamp;
diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
index 0b40f349ab95..eb6e8de8c939 100644
--- a/drivers/gpu/drm/xe/xe_lrc.h
+++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -120,7 +120,8 @@ void xe_lrc_snapshot_print(struct xe_lrc_snapshot *snapshot, struct drm_printer
void xe_lrc_snapshot_free(struct xe_lrc_snapshot *snapshot);
u32 xe_lrc_ctx_timestamp_ggtt_addr(struct xe_lrc *lrc);
-u32 xe_lrc_ctx_timestamp(struct xe_lrc *lrc);
+u32 xe_lrc_ctx_timestamp_udw_ggtt_addr(struct xe_lrc *lrc);
+u64 xe_lrc_ctx_timestamp(struct xe_lrc *lrc);
u32 xe_lrc_ctx_job_timestamp_ggtt_addr(struct xe_lrc *lrc);
u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc);
@@ -136,6 +137,6 @@ u32 xe_lrc_ctx_job_timestamp(struct xe_lrc *lrc);
*
* Returns the current LRC timestamp
*/
-u32 xe_lrc_update_timestamp(struct xe_lrc *lrc, u32 *old_ts);
+u64 xe_lrc_update_timestamp(struct xe_lrc *lrc, u64 *old_ts);
#endif
diff --git a/drivers/gpu/drm/xe/xe_lrc_types.h b/drivers/gpu/drm/xe/xe_lrc_types.h
index 71ecb453f811..ae24cf6f8dd9 100644
--- a/drivers/gpu/drm/xe/xe_lrc_types.h
+++ b/drivers/gpu/drm/xe/xe_lrc_types.h
@@ -25,8 +25,8 @@ struct xe_lrc {
/** @size: size of lrc including any indirect ring state page */
u32 size;
- /** @tile: tile which this LRC belongs to */
- struct xe_tile *tile;
+ /** @gt: gt which this LRC belongs to */
+ struct xe_gt *gt;
/** @flags: LRC flags */
#define XE_LRC_FLAG_INDIRECT_RING_STATE 0x1
@@ -52,7 +52,10 @@ struct xe_lrc {
struct xe_hw_fence_ctx fence_ctx;
/** @ctx_timestamp: readout value of CTX_TIMESTAMP on last update */
- u32 ctx_timestamp;
+ u64 ctx_timestamp;
+
+ /** @bb_per_ctx_bo: buffer object for per context batch wa buffer */
+ struct xe_bo *bb_per_ctx_bo;
};
struct xe_lrc_snapshot;
diff --git a/drivers/gpu/drm/xe/xe_module.c b/drivers/gpu/drm/xe/xe_module.c
index 9f4632e39a1a..e861c694f336 100644
--- a/drivers/gpu/drm/xe/xe_module.c
+++ b/drivers/gpu/drm/xe/xe_module.c
@@ -29,9 +29,6 @@ struct xe_modparam xe_modparam = {
module_param_named(svm_notifier_size, xe_modparam.svm_notifier_size, uint, 0600);
MODULE_PARM_DESC(svm_notifier_size, "Set the svm notifier size(in MiB), must be power of 2");
-module_param_named(always_migrate_to_vram, xe_modparam.always_migrate_to_vram, bool, 0444);
-MODULE_PARM_DESC(always_migrate_to_vram, "Always migrate to VRAM on GPU fault");
-
module_param_named_unsafe(force_execlist, xe_modparam.force_execlist, bool, 0444);
MODULE_PARM_DESC(force_execlist, "Force Execlist submission");
diff --git a/drivers/gpu/drm/xe/xe_module.h b/drivers/gpu/drm/xe/xe_module.h
index 84339e509c80..5a3bfea8b7b4 100644
--- a/drivers/gpu/drm/xe/xe_module.h
+++ b/drivers/gpu/drm/xe/xe_module.h
@@ -12,7 +12,6 @@
struct xe_modparam {
bool force_execlist;
bool probe_display;
- bool always_migrate_to_vram;
u32 force_vram_bar_size;
int guc_log_level;
char *guc_firmware_path;
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 818f023166d5..f4d108dc49b1 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -140,6 +140,7 @@ static const struct xe_graphics_desc graphics_xelpg = {
.has_indirect_ring_state = 1, \
.has_range_tlb_invalidation = 1, \
.has_usm = 1, \
+ .has_64bit_timestamp = 1, \
.va_bits = 48, \
.vm_max_level = 4, \
.hw_engine_mask = \
@@ -668,6 +669,7 @@ static int xe_info_init(struct xe_device *xe,
xe->info.has_range_tlb_invalidation = graphics_desc->has_range_tlb_invalidation;
xe->info.has_usm = graphics_desc->has_usm;
+ xe->info.has_64bit_timestamp = graphics_desc->has_64bit_timestamp;
for_each_remote_tile(tile, xe, id) {
int err;
diff --git a/drivers/gpu/drm/xe/xe_pci_types.h b/drivers/gpu/drm/xe/xe_pci_types.h
index e9b9bbc138d3..ca6b10d35573 100644
--- a/drivers/gpu/drm/xe/xe_pci_types.h
+++ b/drivers/gpu/drm/xe/xe_pci_types.h
@@ -21,6 +21,7 @@ struct xe_graphics_desc {
u8 has_indirect_ring_state:1;
u8 has_range_tlb_invalidation:1;
u8 has_usm:1;
+ u8 has_64bit_timestamp:1;
};
struct xe_media_desc {
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index ffaf0d02dc7d..856038553b81 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -2232,11 +2232,19 @@ static void op_commit(struct xe_vm *vm,
}
case DRM_GPUVA_OP_DRIVER:
{
+ /* WRITE_ONCE pairs with READ_ONCE in xe_svm.c */
+
if (op->subop == XE_VMA_SUBOP_MAP_RANGE) {
- op->map_range.range->tile_present |= BIT(tile->id);
- op->map_range.range->tile_invalidated &= ~BIT(tile->id);
+ WRITE_ONCE(op->map_range.range->tile_present,
+ op->map_range.range->tile_present |
+ BIT(tile->id));
+ WRITE_ONCE(op->map_range.range->tile_invalidated,
+ op->map_range.range->tile_invalidated &
+ ~BIT(tile->id));
} else if (op->subop == XE_VMA_SUBOP_UNMAP_RANGE) {
- op->unmap_range.range->tile_present &= ~BIT(tile->id);
+ WRITE_ONCE(op->unmap_range.range->tile_present,
+ op->unmap_range.range->tile_present &
+ ~BIT(tile->id));
}
break;
}
diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
index a7582b097ae6..bc1689db4cd7 100644
--- a/drivers/gpu/drm/xe/xe_ring_ops.c
+++ b/drivers/gpu/drm/xe/xe_ring_ops.c
@@ -234,13 +234,10 @@ static u32 get_ppgtt_flag(struct xe_sched_job *job)
static int emit_copy_timestamp(struct xe_lrc *lrc, u32 *dw, int i)
{
- dw[i++] = MI_COPY_MEM_MEM | MI_COPY_MEM_MEM_SRC_GGTT |
- MI_COPY_MEM_MEM_DST_GGTT;
+ dw[i++] = MI_STORE_REGISTER_MEM | MI_SRM_USE_GGTT | MI_SRM_ADD_CS_OFFSET;
+ dw[i++] = RING_CTX_TIMESTAMP(0).addr;
dw[i++] = xe_lrc_ctx_job_timestamp_ggtt_addr(lrc);
dw[i++] = 0;
- dw[i++] = xe_lrc_ctx_timestamp_ggtt_addr(lrc);
- dw[i++] = 0;
- dw[i++] = MI_NOOP;
return i;
}
diff --git a/drivers/gpu/drm/xe/xe_shrinker.c b/drivers/gpu/drm/xe/xe_shrinker.c
index 8184390f9c7b..86d47aaf0358 100644
--- a/drivers/gpu/drm/xe/xe_shrinker.c
+++ b/drivers/gpu/drm/xe/xe_shrinker.c
@@ -227,7 +227,7 @@ struct xe_shrinker *xe_shrinker_create(struct xe_device *xe)
if (!shrinker)
return ERR_PTR(-ENOMEM);
- shrinker->shrink = shrinker_alloc(0, "xe system shrinker");
+ shrinker->shrink = shrinker_alloc(0, "drm-xe_gem:%s", xe->drm.unique);
if (!shrinker->shrink) {
kfree(shrinker);
return ERR_PTR(-ENOMEM);
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 24c578e1170e..975094c1a582 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -15,8 +15,17 @@
static bool xe_svm_range_in_vram(struct xe_svm_range *range)
{
- /* Not reliable without notifier lock */
- return range->base.flags.has_devmem_pages;
+ /*
+ * Advisory only check whether the range is currently backed by VRAM
+ * memory.
+ */
+
+ struct drm_gpusvm_range_flags flags = {
+ /* Pairs with WRITE_ONCE in drm_gpusvm.c */
+ .__flags = READ_ONCE(range->base.flags.__flags),
+ };
+
+ return flags.has_devmem_pages;
}
static bool xe_svm_range_has_vram_binding(struct xe_svm_range *range)
@@ -645,9 +654,16 @@ void xe_svm_fini(struct xe_vm *vm)
}
static bool xe_svm_range_is_valid(struct xe_svm_range *range,
- struct xe_tile *tile)
+ struct xe_tile *tile,
+ bool devmem_only)
{
- return (range->tile_present & ~range->tile_invalidated) & BIT(tile->id);
+ /*
+ * Advisory only check whether the range currently has a valid mapping,
+ * READ_ONCE pairs with WRITE_ONCE in xe_pt.c
+ */
+ return ((READ_ONCE(range->tile_present) &
+ ~READ_ONCE(range->tile_invalidated)) & BIT(tile->id)) &&
+ (!devmem_only || xe_svm_range_in_vram(range));
}
static struct xe_vram_region *tile_to_vr(struct xe_tile *tile)
@@ -712,6 +728,36 @@ unlock:
return err;
}
+static bool supports_4K_migration(struct xe_device *xe)
+{
+ if (xe->info.vram_flags & XE_VRAM_FLAGS_NEED64K)
+ return false;
+
+ return true;
+}
+
+static bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range,
+ struct xe_vma *vma)
+{
+ struct xe_vm *vm = range_to_vm(&range->base);
+ u64 range_size = xe_svm_range_size(range);
+
+ if (!range->base.flags.migrate_devmem)
+ return false;
+
+ if (xe_svm_range_in_vram(range)) {
+ drm_dbg(&vm->xe->drm, "Range is already in VRAM\n");
+ return false;
+ }
+
+ if (range_size <= SZ_64K && !supports_4K_migration(vm->xe)) {
+ drm_dbg(&vm->xe->drm, "Platform doesn't support SZ_4K range migration\n");
+ return false;
+ }
+
+ return true;
+}
+
/**
* xe_svm_handle_pagefault() - SVM handle page fault
* @vm: The VM.
@@ -735,11 +781,16 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR),
.check_pages_threshold = IS_DGFX(vm->xe) &&
IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? SZ_64K : 0,
+ .devmem_only = atomic && IS_DGFX(vm->xe) &&
+ IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR),
+ .timeslice_ms = atomic && IS_DGFX(vm->xe) &&
+ IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? 5 : 0,
};
struct xe_svm_range *range;
struct drm_gpusvm_range *r;
struct drm_exec exec;
struct dma_fence *fence;
+ int migrate_try_count = ctx.devmem_only ? 3 : 1;
ktime_t end = 0;
int err;
@@ -758,24 +809,31 @@ retry:
if (IS_ERR(r))
return PTR_ERR(r);
+ if (ctx.devmem_only && !r->flags.migrate_devmem)
+ return -EACCES;
+
range = to_xe_range(r);
- if (xe_svm_range_is_valid(range, tile))
+ if (xe_svm_range_is_valid(range, tile, ctx.devmem_only))
return 0;
range_debug(range, "PAGE FAULT");
- /* XXX: Add migration policy, for now migrate range once */
- if (!range->skip_migrate && range->base.flags.migrate_devmem &&
- xe_svm_range_size(range) >= SZ_64K) {
- range->skip_migrate = true;
-
+ if (--migrate_try_count >= 0 &&
+ xe_svm_range_needs_migrate_to_vram(range, vma)) {
err = xe_svm_alloc_vram(vm, tile, range, &ctx);
+ ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
if (err) {
- drm_dbg(&vm->xe->drm,
- "VRAM allocation failed, falling back to "
- "retrying fault, asid=%u, errno=%pe\n",
- vm->usm.asid, ERR_PTR(err));
- goto retry;
+ if (migrate_try_count || !ctx.devmem_only) {
+ drm_dbg(&vm->xe->drm,
+ "VRAM allocation failed, falling back to retrying fault, asid=%u, errno=%pe\n",
+ vm->usm.asid, ERR_PTR(err));
+ goto retry;
+ } else {
+ drm_err(&vm->xe->drm,
+ "VRAM allocation failed, retry count exceeded, asid=%u, errno=%pe\n",
+ vm->usm.asid, ERR_PTR(err));
+ return err;
+ }
}
}
@@ -783,15 +841,23 @@ retry:
err = drm_gpusvm_range_get_pages(&vm->svm.gpusvm, r, &ctx);
/* Corner where CPU mappings have changed */
if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) {
- if (err == -EOPNOTSUPP) {
- range_debug(range, "PAGE FAULT - EVICT PAGES");
- drm_gpusvm_range_evict(&vm->svm.gpusvm, &range->base);
+ ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
+ if (migrate_try_count > 0 || !ctx.devmem_only) {
+ if (err == -EOPNOTSUPP) {
+ range_debug(range, "PAGE FAULT - EVICT PAGES");
+ drm_gpusvm_range_evict(&vm->svm.gpusvm,
+ &range->base);
+ }
+ drm_dbg(&vm->xe->drm,
+ "Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n",
+ vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
+ range_debug(range, "PAGE FAULT - RETRY PAGES");
+ goto retry;
+ } else {
+ drm_err(&vm->xe->drm,
+ "Get pages failed, retry count exceeded, asid=%u, gpusvm=%p, errno=%pe\n",
+ vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
}
- drm_dbg(&vm->xe->drm,
- "Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n",
- vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
- range_debug(range, "PAGE FAULT - RETRY PAGES");
- goto retry;
}
if (err) {
range_debug(range, "PAGE FAULT - FAIL PAGE COLLECT");
@@ -815,6 +881,7 @@ retry_bind:
drm_exec_fini(&exec);
err = PTR_ERR(fence);
if (err == -EAGAIN) {
+ ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
range_debug(range, "PAGE FAULT - RETRY BIND");
goto retry;
}
@@ -825,9 +892,6 @@ retry_bind:
}
drm_exec_fini(&exec);
- if (xe_modparam.always_migrate_to_vram)
- range->skip_migrate = false;
-
dma_fence_wait(fence, false);
dma_fence_put(fence);
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index be306fe7aaa4..fe58ac2f4baa 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -36,11 +36,6 @@ struct xe_svm_range {
* range. Protected by GPU SVM notifier lock.
*/
u8 tile_invalidated;
- /**
- * @skip_migrate: Skip migration to VRAM, protected by GPU fault handler
- * locking.
- */
- u8 skip_migrate :1;
};
#if IS_ENABLED(CONFIG_DRM_GPUSVM)
diff --git a/drivers/gpu/drm/xe/xe_trace_lrc.h b/drivers/gpu/drm/xe/xe_trace_lrc.h
index 5c669a0b2180..d525cbee1e34 100644
--- a/drivers/gpu/drm/xe/xe_trace_lrc.h
+++ b/drivers/gpu/drm/xe/xe_trace_lrc.h
@@ -19,12 +19,12 @@
#define __dev_name_lrc(lrc) dev_name(gt_to_xe((lrc)->fence_ctx.gt)->drm.dev)
TRACE_EVENT(xe_lrc_update_timestamp,
- TP_PROTO(struct xe_lrc *lrc, uint32_t old),
+ TP_PROTO(struct xe_lrc *lrc, uint64_t old),
TP_ARGS(lrc, old),
TP_STRUCT__entry(
__field(struct xe_lrc *, lrc)
- __field(u32, old)
- __field(u32, new)
+ __field(u64, old)
+ __field(u64, new)
__string(name, lrc->fence_ctx.name)
__string(device_id, __dev_name_lrc(lrc))
),
@@ -36,7 +36,7 @@ TRACE_EVENT(xe_lrc_update_timestamp,
__assign_str(name);
__assign_str(device_id);
),
- TP_printk("lrc=:%p lrc->name=%s old=%u new=%u device_id:%s",
+ TP_printk("lrc=:%p lrc->name=%s old=%llu new=%llu device_id:%s",
__entry->lrc, __get_str(name),
__entry->old, __entry->new,
__get_str(device_id))
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index 24f644c0a673..2f833f0d575f 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -815,6 +815,10 @@ static const struct xe_rtp_entry_sr lrc_was[] = {
XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
XE_RTP_ACTIONS(SET(CHICKEN_RASTER_1, DIS_CLIP_NEGATIVE_BOUNDING_BOX))
},
+ { XE_RTP_NAME("22021007897"),
+ XE_RTP_RULES(GRAPHICS_VERSION(2001), ENGINE_CLASS(RENDER)),
+ XE_RTP_ACTIONS(SET(COMMON_SLICE_CHICKEN4, SBE_PUSH_CONSTANT_BEHIND_FIX_ENABLE))
+ },
/* Xe3_LPG */
{ XE_RTP_NAME("14021490052"),
diff --git a/include/drm/drm_gpusvm.h b/include/drm/drm_gpusvm.h
index df120b4d1f83..eaf704d3d05e 100644
--- a/include/drm/drm_gpusvm.h
+++ b/include/drm/drm_gpusvm.h
@@ -89,6 +89,7 @@ struct drm_gpusvm_devmem_ops {
* @ops: Pointer to the operations structure for GPU SVM device memory
* @dpagemap: The struct drm_pagemap of the pages this allocation belongs to.
* @size: Size of device memory allocation
+ * @timeslice_expiration: Timeslice expiration in jiffies
*/
struct drm_gpusvm_devmem {
struct device *dev;
@@ -97,6 +98,7 @@ struct drm_gpusvm_devmem {
const struct drm_gpusvm_devmem_ops *ops;
struct drm_pagemap *dpagemap;
size_t size;
+ u64 timeslice_expiration;
};
/**
@@ -186,6 +188,31 @@ struct drm_gpusvm_notifier {
};
/**
+ * struct drm_gpusvm_range_flags - Structure representing a GPU SVM range flags
+ *
+ * @migrate_devmem: Flag indicating whether the range can be migrated to device memory
+ * @unmapped: Flag indicating if the range has been unmapped
+ * @partial_unmap: Flag indicating if the range has been partially unmapped
+ * @has_devmem_pages: Flag indicating if the range has devmem pages
+ * @has_dma_mapping: Flag indicating if the range has a DMA mapping
+ * @__flags: Flags for range in u16 form (used for READ_ONCE)
+ */
+struct drm_gpusvm_range_flags {
+ union {
+ struct {
+ /* All flags below must be set upon creation */
+ u16 migrate_devmem : 1;
+ /* All flags below must be set / cleared under notifier lock */
+ u16 unmapped : 1;
+ u16 partial_unmap : 1;
+ u16 has_devmem_pages : 1;
+ u16 has_dma_mapping : 1;
+ };
+ u16 __flags;
+ };
+};
+
+/**
* struct drm_gpusvm_range - Structure representing a GPU SVM range
*
* @gpusvm: Pointer to the GPU SVM structure
@@ -198,11 +225,6 @@ struct drm_gpusvm_notifier {
* @dpagemap: The struct drm_pagemap of the device pages we're dma-mapping.
* Note this is assuming only one drm_pagemap per range is allowed.
* @flags: Flags for range
- * @flags.migrate_devmem: Flag indicating whether the range can be migrated to device memory
- * @flags.unmapped: Flag indicating if the range has been unmapped
- * @flags.partial_unmap: Flag indicating if the range has been partially unmapped
- * @flags.has_devmem_pages: Flag indicating if the range has devmem pages
- * @flags.has_dma_mapping: Flag indicating if the range has a DMA mapping
*
* This structure represents a GPU SVM range used for tracking memory ranges
* mapped in a DRM device.
@@ -216,15 +238,7 @@ struct drm_gpusvm_range {
unsigned long notifier_seq;
struct drm_pagemap_device_addr *dma_addr;
struct drm_pagemap *dpagemap;
- struct {
- /* All flags below must be set upon creation */
- u16 migrate_devmem : 1;
- /* All flags below must be set / cleared under notifier lock */
- u16 unmapped : 1;
- u16 partial_unmap : 1;
- u16 has_devmem_pages : 1;
- u16 has_dma_mapping : 1;
- } flags;
+ struct drm_gpusvm_range_flags flags;
};
/**
@@ -283,17 +297,22 @@ struct drm_gpusvm {
* @check_pages_threshold: Check CPU pages for present if chunk is less than or
* equal to threshold. If not present, reduce chunk
* size.
+ * @timeslice_ms: The timeslice MS which in minimum time a piece of memory
+ * remains with either exclusive GPU or CPU access.
* @in_notifier: entering from a MMU notifier
* @read_only: operating on read-only memory
* @devmem_possible: possible to use device memory
+ * @devmem_only: use only device memory
*
* Context that is DRM GPUSVM is operating in (i.e. user arguments).
*/
struct drm_gpusvm_ctx {
unsigned long check_pages_threshold;
+ unsigned long timeslice_ms;
unsigned int in_notifier :1;
unsigned int read_only :1;
unsigned int devmem_possible :1;
+ unsigned int devmem_only :1;
};
int drm_gpusvm_init(struct drm_gpusvm *gpusvm,