// SPDX-License-Identifier: GPL-2.0 /* Copyright (c) Qualcomm Technologies, Inc. and/or its subsidiaries. */ #include "msm_gem.h" #include "msm_mmu.h" #include "msm_gpu_trace.h" #include "a6xx_gpu.h" #include "a6xx_gmu.xml.h" #include #include #include #include #include #define GPU_PAS_ID 13 static void a8xx_aperture_slice_set(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 slice) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); u32 val; val = A8XX_CP_APERTURE_CNTL_HOST_PIPEID(pipe) | A8XX_CP_APERTURE_CNTL_HOST_SLICEID(slice); if (a6xx_gpu->cached_aperture == val) return; gpu_write(gpu, REG_A8XX_CP_APERTURE_CNTL_HOST, val); a6xx_gpu->cached_aperture = val; } static void a8xx_aperture_acquire(struct msm_gpu *gpu, enum adreno_pipe pipe, unsigned long *flags) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); spin_lock_irqsave(&a6xx_gpu->aperture_lock, *flags); a8xx_aperture_slice_set(gpu, pipe, 0); } static void a8xx_aperture_release(struct msm_gpu *gpu, unsigned long flags) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); spin_unlock_irqrestore(&a6xx_gpu->aperture_lock, flags); } static void a8xx_aperture_clear(struct msm_gpu *gpu) { unsigned long flags; a8xx_aperture_acquire(gpu, PIPE_NONE, &flags); a8xx_aperture_release(gpu, flags); } static void a8xx_write_pipe(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 offset, u32 data) { unsigned long flags; a8xx_aperture_acquire(gpu, pipe, &flags); gpu_write(gpu, offset, data); a8xx_aperture_release(gpu, flags); } static u32 a8xx_read_pipe_slice(struct msm_gpu *gpu, enum adreno_pipe pipe, u32 slice, u32 offset) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); unsigned long flags; u32 val; spin_lock_irqsave(&a6xx_gpu->aperture_lock, flags); a8xx_aperture_slice_set(gpu, pipe, slice); val = gpu_read(gpu, offset); spin_unlock_irqrestore(&a6xx_gpu->aperture_lock, flags); return val; } void a8xx_gpu_get_slice_info(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); const struct a6xx_info *info = adreno_gpu->info->a6xx; u32 slice_mask; if (adreno_gpu->info->family < ADRENO_8XX_GEN1) return; if (a6xx_gpu->slice_mask) return; slice_mask = GENMASK(info->max_slices - 1, 0); /* GEN1 doesn't support partial slice configurations */ if (adreno_gpu->info->family == ADRENO_8XX_GEN1) { a6xx_gpu->slice_mask = slice_mask; return; } slice_mask &= a6xx_llc_read(a6xx_gpu, REG_A8XX_CX_MISC_SLICE_ENABLE_FINAL); a6xx_gpu->slice_mask = slice_mask; /* Chip ID depends on the number of slices available. So update it */ adreno_gpu->chip_id |= FIELD_PREP(GENMASK(7, 4), hweight32(slice_mask)); } static u32 a8xx_get_first_slice(struct a6xx_gpu *a6xx_gpu) { return ffs(a6xx_gpu->slice_mask) - 1; } static inline bool _a8xx_check_idle(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); /* Check that the GMU is idle */ if (!a6xx_gmu_isidle(&a6xx_gpu->gmu)) return false; /* Check that the CX master is idle */ if (gpu_read(gpu, REG_A8XX_RBBM_STATUS) & ~A8XX_RBBM_STATUS_CP_AHB_BUSY_CX_MASTER) return false; return !(gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS) & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT); } static bool a8xx_idle(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { /* wait for CP to drain ringbuffer: */ if (!adreno_idle(gpu, ring)) return false; if (spin_until(_a8xx_check_idle(gpu))) { DRM_ERROR( "%s: %ps: timeout waiting for GPU to idle: status %8.8X irq %8.8X rptr/wptr %d/%d\n", gpu->name, __builtin_return_address(0), gpu_read(gpu, REG_A8XX_RBBM_STATUS), gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS), gpu_read(gpu, REG_A6XX_CP_RB_RPTR), gpu_read(gpu, REG_A6XX_CP_RB_WPTR)); return false; } return true; } void a8xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); uint32_t wptr; unsigned long flags; spin_lock_irqsave(&ring->preempt_lock, flags); /* Copy the shadow to the actual register */ ring->cur = ring->next; /* Make sure to wrap wptr if we need to */ wptr = get_wptr(ring); /* Update HW if this is the current ring and we are not in preempt*/ if (!a6xx_in_preempt(a6xx_gpu)) { if (a6xx_gpu->cur_ring == ring) gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr); else ring->restore_wptr = true; } else { ring->restore_wptr = true; } spin_unlock_irqrestore(&ring->preempt_lock, flags); } static void a8xx_set_hwcg(struct msm_gpu *gpu, bool state) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); struct a6xx_gmu *gmu = &a6xx_gpu->gmu; u32 val; if (adreno_is_x285(adreno_gpu) && state) gpu_write(gpu, REG_A8XX_RBBM_CGC_0_PC, 0x00000702); gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_MODE_CNTL, state ? adreno_gpu->info->a6xx->gmu_cgc_mode : 0); gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_DELAY_CNTL, state ? 0x110111 : 0); gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GMU_CGC_HYST_CNTL, state ? 0x55555 : 0); gpu_write(gpu, REG_A8XX_RBBM_CLOCK_CNTL_GLOBAL, 1); gpu_write(gpu, REG_A8XX_RBBM_CGC_GLOBAL_LOAD_CMD, !!state); if (state) { gpu_write(gpu, REG_A8XX_RBBM_CGC_P2S_TRIG_CMD, 1); if (gpu_poll_timeout(gpu, REG_A8XX_RBBM_CGC_P2S_STATUS, val, val & A8XX_RBBM_CGC_P2S_STATUS_TXDONE, 1, 10)) { dev_err(&gpu->pdev->dev, "RBBM_CGC_P2S_STATUS TXDONE Poll failed\n"); return; } gpu_write(gpu, REG_A8XX_RBBM_CLOCK_CNTL_GLOBAL, 0); } else { /* * GMU enables clk gating in GBIF during boot up. So, * override that here when hwcg feature is disabled */ gpu_rmw(gpu, REG_A8XX_GBIF_CX_CONFIG, BIT(0), 0); } } static void a8xx_set_cp_protect(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); const struct adreno_protect *protect = adreno_gpu->info->a6xx->protect; u32 cntl, final_cfg; unsigned int i; cntl = A8XX_CP_PROTECT_CNTL_PIPE_ACCESS_PROT_EN | A8XX_CP_PROTECT_CNTL_PIPE_ACCESS_FAULT_ON_VIOL_EN | A8XX_CP_PROTECT_CNTL_PIPE_LAST_SPAN_INF_RANGE | A8XX_CP_PROTECT_CNTL_PIPE_HALT_SQE_RANGE__MASK; /* * Enable access protection to privileged registers, fault on an access * protect violation and select the last span to protect from the start * address all the way to the end of the register address space */ a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_CP_PROTECT_CNTL_PIPE, cntl); a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_CP_PROTECT_CNTL_PIPE, cntl); a8xx_aperture_clear(gpu); for (i = 0; i < protect->count; i++) { /* Intentionally skip writing to some registers */ if (protect->regs[i]) { gpu_write(gpu, REG_A8XX_CP_PROTECT_GLOBAL(i), protect->regs[i]); final_cfg = protect->regs[i]; } } /* * Last span feature is only supported on PIPE specific register. * So update those here */ a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_CP_PROTECT_PIPE(protect->count_max), final_cfg); a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_CP_PROTECT_PIPE(protect->count_max), final_cfg); a8xx_aperture_clear(gpu); } static void a8xx_set_ubwc_config(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); const struct qcom_ubwc_cfg_data *cfg = adreno_gpu->ubwc_config; u32 level2_swizzling_dis = !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL2); u32 level3_swizzling_dis = !(cfg->ubwc_swizzle & UBWC_SWIZZLE_ENABLE_LVL3); bool rgba8888_lossless = false, fp16compoptdis = false; bool yuvnotcomptofc = false, min_acc_len_64b = false; bool rgb565_predicator = false, amsbc = false; bool ubwc_mode = qcom_ubwc_get_ubwc_mode(cfg); u32 ubwc_version = cfg->ubwc_enc_version; u32 hbb, hbb_hi, hbb_lo, mode = 1; u8 uavflagprd_inv = 2; switch (ubwc_version) { case UBWC_5_0: amsbc = true; rgb565_predicator = true; mode = 4; break; case UBWC_4_0: amsbc = true; rgb565_predicator = true; fp16compoptdis = true; rgba8888_lossless = true; mode = 2; break; case UBWC_3_0: amsbc = true; mode = 1; break; default: dev_err(&gpu->pdev->dev, "Unknown UBWC version: 0x%x\n", ubwc_version); break; } /* * We subtract 13 from the highest bank bit (13 is the minimum value * allowed by hw) and write the lowest two bits of the remaining value * as hbb_lo and the one above it as hbb_hi to the hardware. */ WARN_ON(cfg->highest_bank_bit < 13); hbb = cfg->highest_bank_bit - 13; hbb_hi = hbb >> 2; hbb_lo = hbb & 3; a8xx_write_pipe(gpu, PIPE_BV, REG_A8XX_GRAS_NC_MODE_CNTL, hbb << 5); a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_GRAS_NC_MODE_CNTL, hbb << 5); a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_CCU_NC_MODE_CNTL, yuvnotcomptofc << 6 | hbb_hi << 3 | hbb_lo << 1); a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_CMP_NC_MODE_CNTL, mode << 15 | yuvnotcomptofc << 6 | rgba8888_lossless << 4 | fp16compoptdis << 3 | rgb565_predicator << 2 | amsbc << 1 | min_acc_len_64b); a8xx_aperture_clear(gpu); gpu_write(gpu, REG_A6XX_SP_NC_MODE_CNTL, level3_swizzling_dis << 13 | level2_swizzling_dis << 12 | hbb_hi << 10 | uavflagprd_inv << 4 | min_acc_len_64b << 3 | hbb_lo << 1 | ubwc_mode); gpu_write(gpu, REG_A6XX_TPL1_NC_MODE_CNTL, level3_swizzling_dis << 7 | level2_swizzling_dis << 6 | hbb_hi << 4 | min_acc_len_64b << 3 | hbb_lo << 1 | ubwc_mode); } static void a8xx_nonctxt_config(struct msm_gpu *gpu, u32 *gmem_protect) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); const struct a6xx_info *info = adreno_gpu->info->a6xx; const struct adreno_reglist_pipe *regs = info->nonctxt_reglist; unsigned int pipe_id, i; unsigned long flags; for (pipe_id = PIPE_NONE; pipe_id <= PIPE_DDE_BV; pipe_id++) { /* We don't have support for LPAC yet */ if (pipe_id == PIPE_LPAC) continue; a8xx_aperture_acquire(gpu, pipe_id, &flags); for (i = 0; regs[i].offset; i++) { if (!(BIT(pipe_id) & regs[i].pipe)) continue; if (regs[i].offset == REG_A8XX_RB_GC_GMEM_PROTECT) *gmem_protect = regs[i].value; gpu_write(gpu, regs[i].offset, regs[i].value); } a8xx_aperture_release(gpu, flags); } a8xx_aperture_clear(gpu); } static int a8xx_cp_init(struct msm_gpu *gpu) { struct msm_ringbuffer *ring = gpu->rb[0]; u32 mask; /* Disable concurrent binning before sending CP init */ OUT_PKT7(ring, CP_THREAD_CONTROL, 1); OUT_RING(ring, BIT(27)); OUT_PKT7(ring, CP_ME_INIT, 4); /* Use multiple HW contexts */ mask = BIT(0); /* Enable error detection */ mask |= BIT(1); /* Set default reset state */ mask |= BIT(3); /* Disable save/restore of performance counters across preemption */ mask |= BIT(6); OUT_RING(ring, mask); /* Enable multiple hardware contexts */ OUT_RING(ring, 0x00000003); /* Enable error detection */ OUT_RING(ring, 0x20000000); /* Operation mode mask */ OUT_RING(ring, 0x00000002); a6xx_flush(gpu, ring); return a8xx_idle(gpu, ring) ? 0 : -EINVAL; } #define A8XX_INT_MASK \ (A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR | \ A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW | \ A6XX_RBBM_INT_0_MASK_RBBM_GPC_ERROR | \ A6XX_RBBM_INT_0_MASK_CP_SW | \ A6XX_RBBM_INT_0_MASK_CP_HW_ERROR | \ A6XX_RBBM_INT_0_MASK_PM4CPINTERRUPT | \ A6XX_RBBM_INT_0_MASK_CP_RB_DONE_TS | \ A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS | \ A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW | \ A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT | \ A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS | \ A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR | \ A6XX_RBBM_INT_0_MASK_TSBWRITEERROR | \ A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) #define A8XX_APRIV_MASK \ (A8XX_CP_APRIV_CNTL_PIPE_ICACHE | \ A8XX_CP_APRIV_CNTL_PIPE_RBFETCH | \ A8XX_CP_APRIV_CNTL_PIPE_RBPRIVLEVEL | \ A8XX_CP_APRIV_CNTL_PIPE_RBRPWB) #define A8XX_BR_APRIV_MASK \ (A8XX_APRIV_MASK | \ A8XX_CP_APRIV_CNTL_PIPE_CDREAD | \ A8XX_CP_APRIV_CNTL_PIPE_CDWRITE) #define A8XX_CP_GLOBAL_INT_MASK \ (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBR | \ A8XX_CP_GLOBAL_INT_MASK_HWFAULTBV | \ A8XX_CP_GLOBAL_INT_MASK_HWFAULTLPAC | \ A8XX_CP_GLOBAL_INT_MASK_HWFAULTAQE0 | \ A8XX_CP_GLOBAL_INT_MASK_HWFAULTAQE1 | \ A8XX_CP_GLOBAL_INT_MASK_HWFAULTDDEBR | \ A8XX_CP_GLOBAL_INT_MASK_HWFAULTDDEBV | \ A8XX_CP_GLOBAL_INT_MASK_SWFAULTBR | \ A8XX_CP_GLOBAL_INT_MASK_SWFAULTBV | \ A8XX_CP_GLOBAL_INT_MASK_SWFAULTLPAC | \ A8XX_CP_GLOBAL_INT_MASK_SWFAULTAQE0 | \ A8XX_CP_GLOBAL_INT_MASK_SWFAULTAQE1 | \ A8XX_CP_GLOBAL_INT_MASK_SWFAULTDDEBR | \ A8XX_CP_GLOBAL_INT_MASK_SWFAULTDDEBV) #define A8XX_CP_INTERRUPT_STATUS_MASK_PIPE \ (A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFRBWRAP | \ A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB1WRAP | \ A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB2WRAP | \ A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFIB3WRAP | \ A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFSDSWRAP | \ A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFMRBWRAP | \ A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_CSFVSDWRAP | \ A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_OPCODEERROR | \ A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VSDPARITYERROR | \ A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_REGISTERPROTECTIONERROR | \ A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_ILLEGALINSTRUCTION | \ A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_SMMUFAULT | \ A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPCLIENT| \ A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPTYPE | \ A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESPREAD | \ A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_VBIFRESP | \ A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_RTWROVF | \ A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTWROVF | \ A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTREFCNTOVF | \ A8XX_CP_INTERRUPT_STATUS_MASK_PIPE_LRZRTCLRRESMISS) #define A8XX_CP_HW_FAULT_STATUS_MASK_PIPE \ (A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFRBFAULT | \ A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB1FAULT | \ A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB2FAULT | \ A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFIB3FAULT | \ A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFSDSFAULT | \ A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFMRBFAULT | \ A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_CSFVSDFAULT | \ A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_SQEREADBURSTOVF | \ A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_EVENTENGINEOVF | \ A8XX_CP_HW_FAULT_STATUS_MASK_PIPE_UCODEERROR) static int hw_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); struct a6xx_gmu *gmu = &a6xx_gpu->gmu; unsigned int pipe_id, i; u32 gmem_protect = 0; u64 gmem_range_min; int ret; ret = a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); if (ret) return ret; /* Clear the cached value to force aperture configuration next time */ a6xx_gpu->cached_aperture = UINT_MAX; a8xx_aperture_clear(gpu); /* Clear GBIF halt in case GX domain was not collapsed */ gpu_write(gpu, REG_A6XX_GBIF_HALT, 0); gpu_read(gpu, REG_A6XX_GBIF_HALT); gpu_write(gpu, REG_A8XX_RBBM_GBIF_HALT, 0); gpu_read(gpu, REG_A8XX_RBBM_GBIF_HALT); gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_CNTL, 0); /* * Disable the trusted memory range - we don't actually supported secure * memory rendering at this point in time and we don't want to block off * part of the virtual memory space. */ gpu_write64(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_BASE, 0x00000000); gpu_write(gpu, REG_A6XX_RBBM_SECVID_TSB_TRUSTED_SIZE, 0x00000000); /* Make all blocks contribute to the GPU BUSY perf counter */ gpu_write(gpu, REG_A8XX_RBBM_PERFCTR_GPU_BUSY_MASKED, 0xffffffff); /* Setup GMEM Range in UCHE */ gmem_range_min = SZ_64M; /* Set the GMEM VA range [0x100000:0x100000 + gpu->gmem - 1] */ gpu_write64(gpu, REG_A8XX_UCHE_CCHE_GC_GMEM_RANGE_MIN, gmem_range_min); gpu_write64(gpu, REG_A8XX_SP_HLSQ_GC_GMEM_RANGE_MIN, gmem_range_min); /* Setup UCHE Trap region */ gpu_write64(gpu, REG_A8XX_UCHE_TRAP_BASE, adreno_gpu->uche_trap_base); gpu_write64(gpu, REG_A8XX_UCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); gpu_write64(gpu, REG_A8XX_UCHE_CCHE_TRAP_BASE, adreno_gpu->uche_trap_base); gpu_write64(gpu, REG_A8XX_UCHE_CCHE_WRITE_THRU_BASE, adreno_gpu->uche_trap_base); /* Turn on performance counters */ gpu_write(gpu, REG_A8XX_RBBM_PERFCTR_CNTL, 0x1); gpu_write(gpu, REG_A8XX_RBBM_SLICE_PERFCTR_CNTL, 0x1); /* Turn on the IFPC counter (countable 4 on XOCLK1) */ gmu_write(&a6xx_gpu->gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_SELECT_XOCLK_1, FIELD_PREP(GENMASK(7, 0), 0x4)); /* Select CP0 to always count cycles */ gpu_write(gpu, REG_A8XX_CP_PERFCTR_CP_SEL(0), 1); a8xx_set_ubwc_config(gpu); /* Set weights for bicubic filtering */ gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(0), 0); gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(1), 0x3fe05ff4); gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(2), 0x3fa0ebee); gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(3), 0x3f5193ed); gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(4), 0x3f0243f0); gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(5), 0x00000000); gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(6), 0x3fd093e8); gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(7), 0x3f4133dc); gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(8), 0x3ea1dfdb); gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(9), 0x3e0283e0); gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(10), 0x0000ac2b); gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(11), 0x0000f01d); gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(12), 0x00114412); gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(13), 0x0021980a); gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(14), 0x0051ec05); gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(15), 0x0000380e); gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(16), 0x3ff09001); gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(17), 0x3fc10bfa); gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(18), 0x3f9193f7); gpu_write(gpu, REG_A8XX_TPL1_BICUBIC_WEIGHTS_TABLE(19), 0x3f7227f7); gpu_write(gpu, REG_A8XX_UCHE_CLIENT_PF, BIT(7) | 0x1); a8xx_nonctxt_config(gpu, &gmem_protect); /* Enable fault detection */ gpu_write(gpu, REG_A8XX_RBBM_INTERFACE_HANG_INT_CNTL, BIT(30) | 0xcfffff); gpu_write(gpu, REG_A8XX_RBBM_SLICE_INTERFACE_HANG_INT_CNTL, BIT(30)); /* Set up the CX GMU counter 0 to count busy ticks */ gmu_write(gmu, REG_A6XX_GPU_GMU_AO_GPU_CX_BUSY_MASK, 0xff000000); /* Enable the power counter */ gmu_rmw(gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_SELECT_XOCLK_0, 0xff, BIT(5)); gmu_write(gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_ENABLE, 1); /* Protect registers from the CP */ a8xx_set_cp_protect(gpu); /* Enable the GMEM save/restore feature for preemption */ a8xx_write_pipe(gpu, PIPE_BR, REG_A6XX_RB_CONTEXT_SWITCH_GMEM_SAVE_RESTORE_ENABLE, 1); for (pipe_id = PIPE_BR; pipe_id <= PIPE_DDE_BV; pipe_id++) { u32 apriv_mask = A8XX_APRIV_MASK; unsigned long flags; if (pipe_id == PIPE_LPAC) continue; if (pipe_id == PIPE_BR) apriv_mask = A8XX_BR_APRIV_MASK; a8xx_aperture_acquire(gpu, pipe_id, &flags); gpu_write(gpu, REG_A8XX_CP_APRIV_CNTL_PIPE, apriv_mask); gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_PIPE, A8XX_CP_INTERRUPT_STATUS_MASK_PIPE); gpu_write(gpu, REG_A8XX_CP_HW_FAULT_STATUS_MASK_PIPE, A8XX_CP_HW_FAULT_STATUS_MASK_PIPE); a8xx_aperture_release(gpu, flags); } a8xx_aperture_clear(gpu); /* Enable interrupts */ gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_GLOBAL, A8XX_CP_GLOBAL_INT_MASK); gpu_write(gpu, REG_A8XX_RBBM_INT_0_MASK, A8XX_INT_MASK); ret = adreno_hw_init(gpu); if (ret) goto out; gpu_write64(gpu, REG_A8XX_CP_SQE_INSTR_BASE, a6xx_gpu->sqe_iova); if (a6xx_gpu->aqe_iova) gpu_write64(gpu, REG_A8XX_CP_AQE_INSTR_BASE_0, a6xx_gpu->aqe_iova); /* Set the ringbuffer address */ gpu_write64(gpu, REG_A6XX_CP_RB_BASE, gpu->rb[0]->iova); gpu_write(gpu, REG_A6XX_CP_RB_CNTL, MSM_GPU_RB_CNTL_DEFAULT); /* Configure the RPTR shadow if needed: */ gpu_write64(gpu, REG_A6XX_CP_RB_RPTR_ADDR, shadowptr(a6xx_gpu, gpu->rb[0])); gpu_write64(gpu, REG_A8XX_CP_RB_RPTR_ADDR_BV, rbmemptr(gpu->rb[0], bv_rptr)); for (i = 0; i < gpu->nr_rings; i++) a6xx_gpu->shadow[i] = 0; /* Always come up on rb 0 */ a6xx_gpu->cur_ring = gpu->rb[0]; for (i = 0; i < gpu->nr_rings; i++) gpu->rb[i]->cur_ctx_seqno = 0; /* Enable the SQE_to start the CP engine */ gpu_write(gpu, REG_A8XX_CP_SQE_CNTL, 1); ret = a8xx_cp_init(gpu); if (ret) goto out; /* * Try to load a zap shader into the secure world. If successful * we can use the CP to switch out of secure mode. If not then we * have no resource but to try to switch ourselves out manually. If we * guessed wrong then access to the RBBM_SECVID_TRUST_CNTL register will * be blocked and a permissions violation will soon follow. */ ret = a6xx_zap_shader_init(gpu); if (!ret) { OUT_PKT7(gpu->rb[0], CP_SET_SECURE_MODE, 1); OUT_RING(gpu->rb[0], 0x00000000); a6xx_flush(gpu, gpu->rb[0]); if (!a8xx_idle(gpu, gpu->rb[0])) return -EINVAL; } else if (ret == -ENODEV) { /* * This device does not use zap shader (but print a warning * just in case someone got their dt wrong.. hopefully they * have a debug UART to realize the error of their ways... * if you mess this up you are about to crash horribly) */ dev_warn_once(gpu->dev->dev, "Zap shader not enabled - using SECVID_TRUST_CNTL instead\n"); gpu_write(gpu, REG_A6XX_RBBM_SECVID_TRUST_CNTL, 0x0); ret = 0; } else { return ret; } /* * GMEM_PROTECT register should be programmed after GPU is transitioned to * non-secure mode */ a8xx_write_pipe(gpu, PIPE_BR, REG_A8XX_RB_GC_GMEM_PROTECT, gmem_protect); WARN_ON(!gmem_protect); a8xx_aperture_clear(gpu); /* Enable hardware clockgating */ a8xx_set_hwcg(gpu, true); out: /* * Tell the GMU that we are done touching the GPU and it can start power * management */ a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET); return ret; } int a8xx_hw_init(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); int ret; mutex_lock(&a6xx_gpu->gmu.lock); ret = hw_init(gpu); mutex_unlock(&a6xx_gpu->gmu.lock); return ret; } static void a8xx_dump(struct msm_gpu *gpu) { DRM_DEV_INFO(&gpu->pdev->dev, "status: %08x\n", gpu_read(gpu, REG_A8XX_RBBM_STATUS)); adreno_dump(gpu); } void a8xx_recover(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); struct a6xx_gmu *gmu = &a6xx_gpu->gmu; int active_submits; adreno_dump_info(gpu); if (hang_debug) a8xx_dump(gpu); /* * To handle recovery specific sequences during the rpm suspend we are * about to trigger */ a6xx_gpu->hung = true; /* Halt SQE first */ gpu_write(gpu, REG_A8XX_CP_SQE_CNTL, 3); pm_runtime_dont_use_autosuspend(&gpu->pdev->dev); /* active_submit won't change until we make a submission */ mutex_lock(&gpu->active_lock); active_submits = gpu->active_submits; /* * Temporarily clear active_submits count to silence a WARN() in the * runtime suspend cb */ gpu->active_submits = 0; reinit_completion(&gmu->pd_gate); dev_pm_genpd_add_notifier(gmu->cxpd, &gmu->pd_nb); dev_pm_genpd_synced_poweroff(gmu->cxpd); /* Drop the rpm refcount from active submits */ if (active_submits) pm_runtime_put(&gpu->pdev->dev); /* And the final one from recover worker */ pm_runtime_put_sync(&gpu->pdev->dev); if (!wait_for_completion_timeout(&gmu->pd_gate, msecs_to_jiffies(1000))) DRM_DEV_ERROR(&gpu->pdev->dev, "cx gdsc didn't collapse\n"); dev_pm_genpd_remove_notifier(gmu->cxpd); pm_runtime_use_autosuspend(&gpu->pdev->dev); if (active_submits) pm_runtime_get(&gpu->pdev->dev); pm_runtime_get_sync(&gpu->pdev->dev); gpu->active_submits = active_submits; mutex_unlock(&gpu->active_lock); msm_gpu_hw_init(gpu); a6xx_gpu->hung = false; } static const char *a8xx_uche_fault_block(struct msm_gpu *gpu, u32 mid) { static const char * const uche_clients[] = { "BR_VFD", "BR_SP", "BR_VSC", "BR_VPC", "BR_HLSQ", "BR_PC", "BR_LRZ", "BR_TP", "BV_VFD", "BV_SP", "BV_VSC", "BV_VPC", "BV_HLSQ", "BV_PC", "BV_LRZ", "BV_TP", "STCHE", }; static const char * const uche_clients_lpac[] = { "-", "SP_LPAC", "-", "-", "HLSQ_LPAC", "-", "-", "TP_LPAC", }; u32 val; /* * The source of the data depends on the mid ID read from FSYNR1. * and the client ID read from the UCHE block */ val = gpu_read(gpu, REG_A8XX_UCHE_CLIENT_PF); val &= GENMASK(6, 0); /* mid=3 refers to BR or BV */ if (mid == 3) { if (val < ARRAY_SIZE(uche_clients)) return uche_clients[val]; else return "UCHE"; } /* mid=8 refers to LPAC */ if (mid == 8) { if (val < ARRAY_SIZE(uche_clients_lpac)) return uche_clients_lpac[val]; else return "UCHE_LPAC"; } return "Unknown"; } static const char *a8xx_fault_block(struct msm_gpu *gpu, u32 id) { switch (id) { case 0x0: return "CP"; case 0x1: return "UCHE: Unknown"; case 0x2: return "UCHE_LPAC: Unknown"; case 0x3: case 0x8: return a8xx_uche_fault_block(gpu, id); case 0x4: return "CCU"; case 0x5: return "Flag cache"; case 0x6: return "PREFETCH"; case 0x7: return "GMU"; case 0x9: return "UCHE_HPAC"; } return "Unknown"; } int a8xx_fault_handler(void *arg, unsigned long iova, int flags, void *data) { struct msm_gpu *gpu = arg; struct adreno_smmu_fault_info *info = data; const char *block = "unknown"; u32 scratch[] = { gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(0)), gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(1)), gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(2)), gpu_read(gpu, REG_A8XX_CP_SCRATCH_GLOBAL(3)), }; if (info) block = a8xx_fault_block(gpu, info->fsynr1 & 0xff); return adreno_fault_handler(gpu, iova, flags, info, block, scratch); } static void a8xx_cp_hw_err_irq(struct msm_gpu *gpu) { u32 status = gpu_read(gpu, REG_A8XX_CP_INTERRUPT_STATUS_GLOBAL); struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); u32 slice = a8xx_get_first_slice(a6xx_gpu); u32 hw_fault_mask = GENMASK(6, 0); u32 sw_fault_mask = GENMASK(22, 16); u32 pipe = 0; dev_err_ratelimited(&gpu->pdev->dev, "CP Fault Global INT status: 0x%x\n", status); if (status & (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBR | A8XX_CP_GLOBAL_INT_MASK_SWFAULTBR)) pipe |= BIT(PIPE_BR); if (status & (A8XX_CP_GLOBAL_INT_MASK_HWFAULTBV | A8XX_CP_GLOBAL_INT_MASK_SWFAULTBV)) pipe |= BIT(PIPE_BV); if (!pipe) { dev_err_ratelimited(&gpu->pdev->dev, "CP Fault Unknown pipe\n"); goto out; } for (unsigned int pipe_id = PIPE_NONE; pipe_id <= PIPE_DDE_BV; pipe_id++) { if (!(BIT(pipe_id) & pipe)) continue; if (hw_fault_mask & status) { status = a8xx_read_pipe_slice(gpu, pipe_id, slice, REG_A8XX_CP_HW_FAULT_STATUS_PIPE); dev_err_ratelimited(&gpu->pdev->dev, "CP HW FAULT pipe: %u status: 0x%x\n", pipe_id, status); } if (sw_fault_mask & status) { status = a8xx_read_pipe_slice(gpu, pipe_id, slice, REG_A8XX_CP_INTERRUPT_STATUS_PIPE); dev_err_ratelimited(&gpu->pdev->dev, "CP SW FAULT pipe: %u status: 0x%x\n", pipe_id, status); if (status & BIT(8)) { a8xx_write_pipe(gpu, pipe_id, REG_A8XX_CP_SQE_STAT_ADDR_PIPE, 1); status = a8xx_read_pipe_slice(gpu, pipe_id, slice, REG_A8XX_CP_SQE_STAT_DATA_PIPE); dev_err_ratelimited(&gpu->pdev->dev, "CP Opcode error, opcode=0x%x\n", status); } if (status & BIT(10)) { status = a8xx_read_pipe_slice(gpu, pipe_id, slice, REG_A8XX_CP_PROTECT_STATUS_PIPE); dev_err_ratelimited(&gpu->pdev->dev, "CP REG PROTECT error, status=0x%x\n", status); } } } out: /* Turn off interrupts to avoid triggering recovery again */ a8xx_aperture_clear(gpu); gpu_write(gpu, REG_A8XX_CP_INTERRUPT_STATUS_MASK_GLOBAL, 0); gpu_write(gpu, REG_A8XX_RBBM_INT_0_MASK, 0); kthread_queue_work(gpu->worker, &gpu->recover_work); } static u32 gpu_periph_read(struct msm_gpu *gpu, u32 dbg_offset) { gpu_write(gpu, REG_A8XX_CP_SQE_UCODE_DBG_ADDR_PIPE, dbg_offset); return gpu_read(gpu, REG_A8XX_CP_SQE_UCODE_DBG_DATA_PIPE); } static u64 gpu_periph_read64(struct msm_gpu *gpu, u32 dbg_offset) { u64 lo, hi; lo = gpu_periph_read(gpu, dbg_offset); hi = gpu_periph_read(gpu, dbg_offset + 1); return (hi << 32) | lo; } #define CP_PERIPH_IB1_BASE_LO 0x7005 #define CP_PERIPH_IB1_BASE_HI 0x7006 #define CP_PERIPH_IB1_SIZE 0x7007 #define CP_PERIPH_IB1_OFFSET 0x7008 #define CP_PERIPH_IB2_BASE_LO 0x7009 #define CP_PERIPH_IB2_BASE_HI 0x700a #define CP_PERIPH_IB2_SIZE 0x700b #define CP_PERIPH_IB2_OFFSET 0x700c #define CP_PERIPH_IB3_BASE_LO 0x700d #define CP_PERIPH_IB3_BASE_HI 0x700e #define CP_PERIPH_IB3_SIZE 0x700f #define CP_PERIPH_IB3_OFFSET 0x7010 static void a8xx_fault_detect_irq(struct msm_gpu *gpu) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); struct msm_ringbuffer *ring = gpu->funcs->active_ring(gpu); unsigned long flags; /* * If stalled on SMMU fault, we could trip the GPU's hang detection, * but the fault handler will trigger the devcore dump, and we want * to otherwise resume normally rather than killing the submit, so * just bail. */ if (gpu_read(gpu, REG_A8XX_RBBM_MISC_STATUS) & A8XX_RBBM_MISC_STATUS_SMMU_STALLED_ON_FAULT) return; /* * Force the GPU to stay on until after we finish * collecting information */ if (!adreno_has_gmu_wrapper(adreno_gpu)) gmu_write(&a6xx_gpu->gmu, REG_A6XX_GMU_GMU_PWR_COL_KEEPALIVE, 1); DRM_DEV_ERROR(&gpu->pdev->dev, "gpu fault ring %d fence %x status %8.8X gfx_status %8.8X\n", ring ? ring->id : -1, ring ? ring->fctx->last_fence : 0, gpu_read(gpu, REG_A8XX_RBBM_STATUS), gpu_read(gpu, REG_A8XX_RBBM_GFX_STATUS)); a8xx_aperture_acquire(gpu, PIPE_BR, &flags); DRM_DEV_ERROR(&gpu->pdev->dev, "BR: status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n", gpu_read(gpu, REG_A8XX_RBBM_GFX_BR_STATUS), gpu_read(gpu, REG_A6XX_CP_RB_RPTR), gpu_read(gpu, REG_A6XX_CP_RB_WPTR), gpu_periph_read64(gpu, CP_PERIPH_IB1_BASE_LO), gpu_periph_read(gpu, CP_PERIPH_IB1_OFFSET), gpu_periph_read64(gpu, CP_PERIPH_IB2_BASE_LO), gpu_periph_read(gpu, CP_PERIPH_IB2_OFFSET), gpu_periph_read64(gpu, CP_PERIPH_IB3_BASE_LO), gpu_periph_read(gpu, CP_PERIPH_IB3_OFFSET)); a8xx_aperture_release(gpu, flags); a8xx_aperture_acquire(gpu, PIPE_BV, &flags); DRM_DEV_ERROR(&gpu->pdev->dev, "BV: status %8.8X rb %4.4x/%4.4x ib1 %16.16llX/%4.4x ib2 %16.16llX/%4.4x ib3 %16.16llX/%4.4x\n", gpu_read(gpu, REG_A8XX_RBBM_GFX_BV_STATUS), gpu_read(gpu, REG_A8XX_CP_RB_RPTR_BV), gpu_read(gpu, REG_A6XX_CP_RB_WPTR), gpu_periph_read64(gpu, CP_PERIPH_IB1_BASE_LO), gpu_periph_read(gpu, CP_PERIPH_IB1_OFFSET), gpu_periph_read64(gpu, CP_PERIPH_IB2_BASE_LO), gpu_periph_read(gpu, CP_PERIPH_IB2_OFFSET), gpu_periph_read64(gpu, CP_PERIPH_IB3_BASE_LO), gpu_periph_read(gpu, CP_PERIPH_IB3_OFFSET)); a8xx_aperture_release(gpu, flags); a8xx_aperture_clear(gpu); /* Turn off the hangcheck timer to keep it from bothering us */ timer_delete(&gpu->hangcheck_timer); kthread_queue_work(gpu->worker, &gpu->recover_work); } static void a8xx_sw_fuse_violation_irq(struct msm_gpu *gpu) { u32 status; status = gpu_read(gpu, REG_A8XX_RBBM_SW_FUSE_INT_STATUS); gpu_write(gpu, REG_A8XX_RBBM_SW_FUSE_INT_MASK, 0); dev_err_ratelimited(&gpu->pdev->dev, "SW fuse violation status=%8.8x\n", status); /* * Ignore FASTBLEND violations, because the HW will silently fall back * to legacy blending. */ if (status & (A7XX_CX_MISC_SW_FUSE_VALUE_RAYTRACING | A7XX_CX_MISC_SW_FUSE_VALUE_LPAC)) { timer_delete(&gpu->hangcheck_timer); kthread_queue_work(gpu->worker, &gpu->recover_work); } } irqreturn_t a8xx_irq(struct msm_gpu *gpu) { struct msm_drm_private *priv = gpu->dev->dev_private; u32 status = gpu_read(gpu, REG_A8XX_RBBM_INT_0_STATUS); gpu_write(gpu, REG_A8XX_RBBM_INT_CLEAR_CMD, status); if (priv->disable_err_irq) status &= A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS; if (status & A6XX_RBBM_INT_0_MASK_RBBM_HANG_DETECT) a8xx_fault_detect_irq(gpu); if (status & A6XX_RBBM_INT_0_MASK_CP_AHB_ERROR) { u32 rl0, rl1; rl0 = gpu_read(gpu, REG_A8XX_CP_RL_ERROR_DETAILS_0); rl1 = gpu_read(gpu, REG_A8XX_CP_RL_ERROR_DETAILS_1); dev_err_ratelimited(&gpu->pdev->dev, "CP | AHB bus error RL_ERROR_0: %x, RL_ERROR_1: %x\n", rl0, rl1); } if (status & A6XX_RBBM_INT_0_MASK_CP_HW_ERROR) a8xx_cp_hw_err_irq(gpu); if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_ASYNCFIFO_OVERFLOW) dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB ASYNC overflow\n"); if (status & A6XX_RBBM_INT_0_MASK_RBBM_ATB_BUS_OVERFLOW) dev_err_ratelimited(&gpu->pdev->dev, "RBBM | ATB bus overflow\n"); if (status & A6XX_RBBM_INT_0_MASK_UCHE_OOB_ACCESS) dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Out of bounds access\n"); if (status & A6XX_RBBM_INT_0_MASK_UCHE_TRAP_INTR) dev_err_ratelimited(&gpu->pdev->dev, "UCHE | Trap interrupt\n"); if (status & A6XX_RBBM_INT_0_MASK_SWFUSEVIOLATION) a8xx_sw_fuse_violation_irq(gpu); if (status & A6XX_RBBM_INT_0_MASK_CP_CACHE_FLUSH_TS) { msm_gpu_retire(gpu); a6xx_preempt_trigger(gpu); } if (status & A6XX_RBBM_INT_0_MASK_CP_SW) a6xx_preempt_irq(gpu); return IRQ_HANDLED; } void a8xx_llc_activate(struct a6xx_gpu *a6xx_gpu) { struct adreno_gpu *adreno_gpu = &a6xx_gpu->base; struct msm_gpu *gpu = &adreno_gpu->base; if (!llcc_slice_activate(a6xx_gpu->llc_slice)) { u32 gpu_scid = llcc_get_slice_id(a6xx_gpu->llc_slice); gpu_scid &= GENMASK(5, 0); gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL1, FIELD_PREP(GENMASK(29, 24), gpu_scid) | FIELD_PREP(GENMASK(23, 18), gpu_scid) | FIELD_PREP(GENMASK(17, 12), gpu_scid) | FIELD_PREP(GENMASK(11, 6), gpu_scid) | FIELD_PREP(GENMASK(5, 0), gpu_scid)); gpu_write(gpu, REG_A6XX_GBIF_SCACHE_CNTL0, FIELD_PREP(GENMASK(27, 22), gpu_scid) | FIELD_PREP(GENMASK(21, 16), gpu_scid) | FIELD_PREP(GENMASK(15, 10), gpu_scid) | BIT(8)); } llcc_slice_activate(a6xx_gpu->htw_llc_slice); } #define GBIF_CLIENT_HALT_MASK BIT(0) #define GBIF_ARB_HALT_MASK BIT(1) #define VBIF_XIN_HALT_CTRL0_MASK GENMASK(3, 0) #define VBIF_RESET_ACK_MASK 0xF0 #define GPR0_GBIF_HALT_REQUEST 0x1E0 void a8xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off) { struct msm_gpu *gpu = &adreno_gpu->base; if (gx_off) { /* Halt the gx side of GBIF */ gpu_write(gpu, REG_A8XX_RBBM_GBIF_HALT, 1); spin_until(gpu_read(gpu, REG_A8XX_RBBM_GBIF_HALT_ACK) & 1); } /* Halt new client requests on GBIF */ gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_CLIENT_HALT_MASK); spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) & (GBIF_CLIENT_HALT_MASK)) == GBIF_CLIENT_HALT_MASK); /* Halt all AXI requests on GBIF */ gpu_write(gpu, REG_A6XX_GBIF_HALT, GBIF_ARB_HALT_MASK); spin_until((gpu_read(gpu, REG_A6XX_GBIF_HALT_ACK) & (GBIF_ARB_HALT_MASK)) == GBIF_ARB_HALT_MASK); /* The GBIF halt needs to be explicitly cleared */ gpu_write(gpu, REG_A6XX_GBIF_HALT, 0x0); } int a8xx_gmu_get_timestamp(struct msm_gpu *gpu, uint64_t *value) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); mutex_lock(&a6xx_gpu->gmu.lock); /* Force the GPU power on so we can read this register */ a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); *value = gpu_read64(gpu, REG_A8XX_CP_ALWAYS_ON_COUNTER); a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET); mutex_unlock(&a6xx_gpu->gmu.lock); return 0; } u64 a8xx_gpu_busy(struct msm_gpu *gpu, unsigned long *out_sample_rate) { struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu); struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu); u64 busy_cycles; /* 19.2MHz */ *out_sample_rate = 19200000; busy_cycles = gmu_read64(&a6xx_gpu->gmu, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_L, REG_A8XX_GMU_CX_GMU_POWER_COUNTER_XOCLK_0_H); return busy_cycles; } bool a8xx_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring) { return true; }