From 898c2cb5d94fc56d357f38ba7a05b1e0e23e44a3 Mon Sep 17 00:00:00 2001 From: Christian König Date: Tue, 16 Oct 2018 13:08:21 +0200 Subject: drm/amdgpu: use scheduler fault instead of reset work MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signal a fault to the scheduler on an illegal instruction or register access violation instead of kicking of the reset handler directly. Signed-off-by: Christian König Acked-by: Andrey Grodzovsky Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/cik_sdma.c') diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index b918c8886b75..32eb43d165f2 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -1214,8 +1214,11 @@ static int cik_sdma_process_illegal_inst_irq(struct amdgpu_device *adev, struct amdgpu_irq_src *source, struct amdgpu_iv_entry *entry) { + u8 instance_id; + DRM_ERROR("Illegal instruction in SDMA command stream\n"); - schedule_work(&adev->reset_work); + instance_id = (entry->ring_id & 0x3) >> 0; + drm_sched_fault(&adev->sdma.instance[instance_id].ring.sched); return 0; } -- cgit From c66ed765a0a97b8900f37d4a71f1d75f52f56eeb Mon Sep 17 00:00:00 2001 From: Andrey Grodzovsky Date: Fri, 19 Oct 2018 16:22:48 -0400 Subject: drm/amdgpu: Retire amdgpu_ring.ready flag v4 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Start using drm_gpu_scheduler.ready isntead. v3: Add helper function to run ring test and set sched.ready flag status accordingly, clean explicit sched.ready sets from the IP specific files. v4: Add kerneldoc and rebase. Signed-off-by: Andrey Grodzovsky Reviewed-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/cik_sdma.c') diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 32eb43d165f2..561406a1cf88 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -316,8 +316,8 @@ static void cik_sdma_gfx_stop(struct amdgpu_device *adev) WREG32(mmSDMA0_GFX_RB_CNTL + sdma_offsets[i], rb_cntl); WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], 0); } - sdma0->ready = false; - sdma1->ready = false; + sdma0->sched.ready = false; + sdma1->sched.ready = false; } /** @@ -494,18 +494,16 @@ static int cik_sdma_gfx_resume(struct amdgpu_device *adev) /* enable DMA IBs */ WREG32(mmSDMA0_GFX_IB_CNTL + sdma_offsets[i], ib_cntl); - ring->ready = true; + ring->sched.ready = true; } cik_sdma_enable(adev, true); for (i = 0; i < adev->sdma.num_instances; i++) { ring = &adev->sdma.instance[i].ring; - r = amdgpu_ring_test_ring(ring); - if (r) { - ring->ready = false; + r = amdgpu_ring_test_helper(ring); + if (r) return r; - } if (adev->mman.buffer_funcs_ring == ring) amdgpu_ttm_set_buffer_funcs_status(adev, true); -- cgit From dc9eeff84c77080f545575a30062af0ac65b1eb0 Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 29 Oct 2018 10:48:31 +0100 Subject: drm/amdgpu: further ring test cleanups MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move all error messages from IP specific code into the common helper. This way we now uses the ring name in the messages instead of the index and note which device is affected as well. Also cleanup error handling in the IP specific code and consequently use ETIMEDOUT when the ring test timed out. Signed-off-by: Christian König Reviewed-by: Alex Deucher Reviewed-by: Andrey Grodzovsky Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/cik_sdma.c') diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 561406a1cf88..5eb15bf9ec7c 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -616,21 +616,17 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring) u64 gpu_addr; r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r); + if (r) return r; - } gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); r = amdgpu_ring_alloc(ring, 5); - if (r) { - DRM_ERROR("amdgpu: dma failed to lock ring %d (%d).\n", ring->idx, r); - amdgpu_device_wb_free(adev, index); - return r; - } + if (r) + goto error_free_wb; + amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0)); amdgpu_ring_write(ring, lower_32_bits(gpu_addr)); amdgpu_ring_write(ring, upper_32_bits(gpu_addr)); @@ -645,15 +641,11 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring) DRM_UDELAY(1); } - if (i < adev->usec_timeout) { - DRM_DEBUG("ring test on %d succeeded in %d usecs\n", ring->idx, i); - } else { - DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n", - ring->idx, tmp); - r = -EINVAL; - } - amdgpu_device_wb_free(adev, index); + if (i >= adev->usec_timeout) + r = -ETIMEDOUT; +error_free_wb: + amdgpu_device_wb_free(adev, index); return r; } -- cgit From 98079389a873f45ba75bbb20dcf14db0ec694a9a Mon Sep 17 00:00:00 2001 From: Christian König Date: Mon, 29 Oct 2018 16:12:42 +0100 Subject: drm/amdgpu: remove messages from IB tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We already print an error message that an IB test failed in the common code. Signed-off-by: Christian König Reviewed-by: Alex Deucher Reviewed-by: Andrey Grodzovsky Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/cik_sdma.c') diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 5eb15bf9ec7c..49275f358f7a 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -668,20 +668,16 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout) long r; r = amdgpu_device_wb_get(adev, &index); - if (r) { - dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r); + if (r) return r; - } gpu_addr = adev->wb.gpu_addr + (index * 4); tmp = 0xCAFEDEAD; adev->wb.wb[index] = cpu_to_le32(tmp); memset(&ib, 0, sizeof(ib)); r = amdgpu_ib_get(adev, NULL, 256, &ib); - if (r) { - DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r); + if (r) goto err0; - } ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0); @@ -696,21 +692,16 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout) r = dma_fence_wait_timeout(f, false, timeout); if (r == 0) { - DRM_ERROR("amdgpu: IB test timed out\n"); r = -ETIMEDOUT; goto err1; } else if (r < 0) { - DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r); goto err1; } tmp = le32_to_cpu(adev->wb.wb[index]); - if (tmp == 0xDEADBEEF) { - DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx); + if (tmp == 0xDEADBEEF) r = 0; - } else { - DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp); + else r = -EINVAL; - } err1: amdgpu_ib_free(adev, &ib, NULL); -- cgit From 34955e038a1b313b0f19eeacfb0e22aa6877e11d Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Wed, 24 Oct 2018 13:37:37 +0800 Subject: drm/amdgpu: Modify the argument of emit_ib interface use the point of struct amdgpu_job as the function argument instand of vmid, so the other members of struct amdgpu_job can be visit in emit_ib function. v2: add a wrapper for getting the VMID add the job before the ib on the parameter list. v3: refine the wrapper name Reviewed-by: Alex Deucher Signed-off-by: Rex Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/amd/amdgpu/cik_sdma.c') diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index 49275f358f7a..ad58dcec223e 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -218,9 +218,11 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) * Schedule an IB in the DMA ring (CIK). */ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring, + struct amdgpu_job *job, struct amdgpu_ib *ib, - unsigned vmid, bool ctx_switch) + bool ctx_switch) { + unsigned vmid = AMDGPU_JOB_GET_VMID(job); u32 extra_bits = vmid & 0xf; /* IB packet must end on a 8 DW boundary */ -- cgit From ccf191f8aabf8cb7bb01e4efae7bdb73614c745b Mon Sep 17 00:00:00 2001 From: Rex Zhu Date: Thu, 1 Nov 2018 13:42:42 +0800 Subject: drm/amdgpu: Refine function name there is no functional changes.just refine function name to keep consistence with other files. change amdgpu_get_sdma_instance to amdgpu_sdma_get_instance_from_ring. suggested by alex. Reviewed-by: Alex Deucher Reviewed-by: Flora Cui Signed-off-by: Rex Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/cik_sdma.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/cik_sdma.c') diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c index ad58dcec223e..45795191de1f 100644 --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c @@ -198,7 +198,7 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring) static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count) { - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); int i; for (i = 0; i < count; i++) @@ -805,7 +805,7 @@ static void cik_sdma_vm_set_pte_pde(struct amdgpu_ib *ib, uint64_t pe, */ static void cik_sdma_ring_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib) { - struct amdgpu_sdma_instance *sdma = amdgpu_get_sdma_instance(ring); + struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring); u32 pad_count; int i; -- cgit