summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd/kfd_topology.c')
-rw-r--r--drivers/gpu/drm/amd/amdkfd/kfd_topology.c50
1 files changed, 29 insertions, 21 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
index e477d7509646..baa2374acdeb 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c
@@ -1267,34 +1267,41 @@ static void kfd_set_recommended_sdma_engines(struct kfd_topology_device *to_dev,
{
struct kfd_node *gpu = outbound_link->gpu;
struct amdgpu_device *adev = gpu->adev;
- int num_xgmi_nodes = adev->gmc.xgmi.num_physical_nodes;
+ unsigned int num_xgmi_nodes = adev->gmc.xgmi.num_physical_nodes;
+ unsigned int num_xgmi_sdma_engines = kfd_get_num_xgmi_sdma_engines(gpu);
+ unsigned int num_sdma_engines = kfd_get_num_sdma_engines(gpu);
+ uint32_t sdma_eng_id_mask = (1 << num_sdma_engines) - 1;
+ uint32_t xgmi_sdma_eng_id_mask =
+ ((1 << num_xgmi_sdma_engines) - 1) << num_sdma_engines;
+
bool support_rec_eng = !amdgpu_sriov_vf(adev) && to_dev->gpu &&
adev->aid_mask && num_xgmi_nodes && gpu->kfd->num_nodes == 1 &&
- kfd_get_num_xgmi_sdma_engines(gpu) >= 14 &&
- (!(adev->flags & AMD_IS_APU) && num_xgmi_nodes == 8);
+ num_xgmi_sdma_engines >= 6 && (!(adev->flags & AMD_IS_APU) &&
+ num_xgmi_nodes == 8);
if (support_rec_eng) {
int src_socket_id = adev->gmc.xgmi.physical_node_id;
int dst_socket_id = to_dev->gpu->adev->gmc.xgmi.physical_node_id;
+ unsigned int reshift = num_xgmi_sdma_engines == 6 ? 1 : 0;
outbound_link->rec_sdma_eng_id_mask =
- 1 << rec_sdma_eng_map[src_socket_id][dst_socket_id];
+ 1 << (rec_sdma_eng_map[src_socket_id][dst_socket_id] >> reshift);
inbound_link->rec_sdma_eng_id_mask =
- 1 << rec_sdma_eng_map[dst_socket_id][src_socket_id];
- } else {
- int num_sdma_eng = kfd_get_num_sdma_engines(gpu);
- int i, eng_offset = 0;
+ 1 << (rec_sdma_eng_map[dst_socket_id][src_socket_id] >> reshift);
- if (outbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI &&
- kfd_get_num_xgmi_sdma_engines(gpu) && to_dev->gpu) {
- eng_offset = num_sdma_eng;
- num_sdma_eng = kfd_get_num_xgmi_sdma_engines(gpu);
- }
+ /* If recommended engine is out of range, need to reset the mask */
+ if (outbound_link->rec_sdma_eng_id_mask & sdma_eng_id_mask)
+ outbound_link->rec_sdma_eng_id_mask = xgmi_sdma_eng_id_mask;
+ if (inbound_link->rec_sdma_eng_id_mask & sdma_eng_id_mask)
+ inbound_link->rec_sdma_eng_id_mask = xgmi_sdma_eng_id_mask;
- for (i = 0; i < num_sdma_eng; i++) {
- outbound_link->rec_sdma_eng_id_mask |= (1 << (i + eng_offset));
- inbound_link->rec_sdma_eng_id_mask |= (1 << (i + eng_offset));
- }
+ } else {
+ uint32_t engine_mask = (outbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI &&
+ num_xgmi_sdma_engines && to_dev->gpu) ? xgmi_sdma_eng_id_mask :
+ sdma_eng_id_mask;
+
+ outbound_link->rec_sdma_eng_id_mask = engine_mask;
+ inbound_link->rec_sdma_eng_id_mask = engine_mask;
}
}
@@ -1983,9 +1990,6 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
if (kfd_dbg_has_ttmps_always_setup(dev->gpu))
dev->node_props.debug_prop |= HSA_DBG_DISPATCH_INFO_ALWAYS_VALID;
- if (dev->gpu->adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)
- dev->node_props.capability2 |= HSA_CAP2_PER_SDMA_QUEUE_RESET_SUPPORTED;
-
if (KFD_GC_VERSION(dev->gpu) < IP_VERSION(10, 0, 0)) {
if (KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 3) ||
KFD_GC_VERSION(dev->gpu) == IP_VERSION(9, 4, 4))
@@ -2001,7 +2005,11 @@ static void kfd_topology_set_capabilities(struct kfd_topology_device *dev)
dev->node_props.capability |=
HSA_CAP_TRAP_DEBUG_PRECISE_MEMORY_OPERATIONS_SUPPORTED;
- dev->node_props.capability |= HSA_CAP_PER_QUEUE_RESET_SUPPORTED;
+ if (!amdgpu_sriov_vf(dev->gpu->adev))
+ dev->node_props.capability |= HSA_CAP_PER_QUEUE_RESET_SUPPORTED;
+
+ if (dev->gpu->adev->sdma.supported_reset & AMDGPU_RESET_TYPE_PER_QUEUE)
+ dev->node_props.capability2 |= HSA_CAP2_PER_SDMA_QUEUE_RESET_SUPPORTED;
} else {
dev->node_props.debug_prop |= HSA_DBG_WATCH_ADDR_MASK_LO_BIT_GFX10 |
HSA_DBG_WATCH_ADDR_MASK_HI_BIT;