[PATCH 07/17] drm/msm/adreno: Add fenced regwrite support
Akhil P Oommen
akhilpo at oss.qualcomm.com
Wed Jul 23 21:04:10 UTC 2025
On 7/22/2025 7:09 PM, Dmitry Baryshkov wrote:
> On Sun, Jul 20, 2025 at 05:46:08PM +0530, Akhil P Oommen wrote:
>> There are some special registers which are accessible even when GX power
>> domain is collapsed during an IFPC sleep. Accessing these registers
>> wakes up GPU from power collapse and allow programming these registers
>> without additional handshake with GMU. This patch adds support for this
>> special register write sequence.
>>
>> Signed-off-by: Akhil P Oommen <akhilpo at oss.qualcomm.com>
>> ---
>> drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 63 ++++++++++++++++++++++++++++++-
>> drivers/gpu/drm/msm/adreno/a6xx_gpu.h | 1 +
>> drivers/gpu/drm/msm/adreno/a6xx_preempt.c | 20 +++++-----
>> 3 files changed, 73 insertions(+), 11 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>> index 491fde0083a202bec7c6b3bca88d0e5a717a6560..8c004fc3abd2896d467a9728b34e99e4ed944dc4 100644
>> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
>> @@ -16,6 +16,67 @@
>>
>> #define GPU_PAS_ID 13
>>
>> +static bool fence_status_check(struct msm_gpu *gpu, u32 offset, u32 value, u32 status, u32 mask)
>> +{
>> + /* Success if !writedropped0/1 */>> + if (!(status & mask))
>> + return true;
>> +
>> + udelay(10);
>
> Why do we need udelay() here? Why can't we use interval setting inside
> gmu_poll_timeout()?
Then the delay won't be at the right place when we use gmu_poll_timeout.
We need the below sequence:
1. reg write
2. check for status
2.1 Done if success
3. wait
4. reg write again
5. goto 2
Another option is to avoid gmu_poll_timeout and just open code the loop
here.
>
>> +
>> + /* Try to update fenced register again */
>> + gpu_write(gpu, offset, value);
>> + return false;
>> +}
>> +
>> +static int fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u32 value, u32 mask)
>> +{
>> + struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
>> + struct msm_gpu *gpu = &adreno_gpu->base;
>> + struct a6xx_gmu *gmu = &a6xx_gpu->gmu;
>> + u32 status;
>> +
>> + gpu_write(gpu, offset, value);
>> +
>> + /* Nothing else to be done in the case of no-GMU */
>> + if (adreno_has_gmu_wrapper(adreno_gpu))
>> + return 0;
>> +
I think we should add an mb() here like downstream just to be cautious.
>> + if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
>> + fence_status_check(gpu, offset, value, status, mask), 0, 1000))
>> + return 0;
>> +
>> + dev_err_ratelimited(gmu->dev, "delay in fenced register write (0x%x)\n",
>> + offset);
>> +
>> + /* Try again for another 1ms before failing */
>> + gpu_write(gpu, offset, value);
>> + if (!gmu_poll_timeout(gmu, REG_A6XX_GMU_AHB_FENCE_STATUS, status,
>> + fence_status_check(gpu, offset, value, status, mask), 0, 1000))
>> + return 0;
>> +
>> + dev_err_ratelimited(gmu->dev, "fenced register write (0x%x) fail\n",
>> + offset);
>> +
>> + return -ETIMEDOUT;
>> +}
>> +
>> +int a6xx_fenced_write(struct a6xx_gpu *a6xx_gpu, u32 offset, u64 value, u32 mask, bool is_64b)
>> +{
>> + int ret;
>> +
>> + ret = fenced_write(a6xx_gpu, offset, lower_32_bits(value), mask);
>> + if (ret)
>> + return ret;
>> +
>> + if (!is_64b)
>> + return 0;
>> +
>> + ret = fenced_write(a6xx_gpu, offset + 1, upper_32_bits(value), mask);
>
> no need for a separate ret assignment.
Ack
>
>> +
>> + return ret;
>> +}
>> +
>> static inline bool _a6xx_check_idle(struct msm_gpu *gpu)
>> {
>> struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
>> @@ -86,7 +147,7 @@ static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
>> /* Update HW if this is the current ring and we are not in preempt*/
>> if (!a6xx_in_preempt(a6xx_gpu)) {
>> if (a6xx_gpu->cur_ring == ring)
>> - gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
>> + a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false);
>
> I can't stop but notice that we don't handle a6xx_fenced_write() errors.
> Is it fine? Or will it result in some sort of crash / reset?
Recover_worker will kick in indirectly, for eg: due to hangcheck timeout
here. Chances of failure here production devices are low though.
-Akhil
>
>> else
>> ring->restore_wptr = true;
>> } else {
>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
>> index 9201a53dd341bf432923ffb44947e015208a3d02..2be036a3faca58b4b559c30881e4b31d5929592a 100644
>> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.h
>> @@ -291,5 +291,6 @@ int a6xx_gpu_state_put(struct msm_gpu_state *state);
>>
>> void a6xx_bus_clear_pending_transactions(struct adreno_gpu *adreno_gpu, bool gx_off);
>> void a6xx_gpu_sw_reset(struct msm_gpu *gpu, bool assert);
>> +int a6xx_fenced_write(struct a6xx_gpu *gpu, u32 offset, u64 value, u32 mask, bool is_64b);
>>
>> #endif /* __A6XX_GPU_H__ */
>> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
>> index 3b17fd2dba89115a8e48ba9469e52e4305b0cdbb..5b0fd510ff58d989ab285f1a2497f6f522a6b187 100644
>> --- a/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
>> +++ b/drivers/gpu/drm/msm/adreno/a6xx_preempt.c
>> @@ -41,7 +41,7 @@ static inline void set_preempt_state(struct a6xx_gpu *gpu,
>> }
>>
>> /* Write the most recent wptr for the given ring into the hardware */
>> -static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
>> +static inline void update_wptr(struct a6xx_gpu *a6xx_gpu, struct msm_ringbuffer *ring)
>> {
>> unsigned long flags;
>> uint32_t wptr;
>> @@ -51,7 +51,7 @@ static inline void update_wptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
>> if (ring->restore_wptr) {
>> wptr = get_wptr(ring);
>>
>> - gpu_write(gpu, REG_A6XX_CP_RB_WPTR, wptr);
>> + a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_RB_WPTR, wptr, BIT(0), false);
>>
>> ring->restore_wptr = false;
>> }
>> @@ -172,7 +172,7 @@ void a6xx_preempt_irq(struct msm_gpu *gpu)
>>
>> set_preempt_state(a6xx_gpu, PREEMPT_FINISH);
>>
>> - update_wptr(gpu, a6xx_gpu->cur_ring);
>> + update_wptr(a6xx_gpu, a6xx_gpu->cur_ring);
>>
>> set_preempt_state(a6xx_gpu, PREEMPT_NONE);
>>
>> @@ -268,7 +268,7 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu)
>> */
>> if (!ring || (a6xx_gpu->cur_ring == ring)) {
>> set_preempt_state(a6xx_gpu, PREEMPT_FINISH);
>> - update_wptr(gpu, a6xx_gpu->cur_ring);
>> + update_wptr(a6xx_gpu, a6xx_gpu->cur_ring);
>> set_preempt_state(a6xx_gpu, PREEMPT_NONE);
>> spin_unlock_irqrestore(&a6xx_gpu->eval_lock, flags);
>> return;
>> @@ -302,13 +302,13 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu)
>>
>> spin_unlock_irqrestore(&ring->preempt_lock, flags);
>>
>> - gpu_write64(gpu,
>> - REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO,
>> - a6xx_gpu->preempt_smmu_iova[ring->id]);
>> + a6xx_fenced_write(a6xx_gpu,
>> + REG_A6XX_CP_CONTEXT_SWITCH_SMMU_INFO, a6xx_gpu->preempt_smmu_iova[ring->id],
>> + BIT(1), true);
>>
>> - gpu_write64(gpu,
>> + a6xx_fenced_write(a6xx_gpu,
>> REG_A6XX_CP_CONTEXT_SWITCH_PRIV_NON_SECURE_RESTORE_ADDR,
>> - a6xx_gpu->preempt_iova[ring->id]);
>> + a6xx_gpu->preempt_iova[ring->id], BIT(1), true);
>>
>> a6xx_gpu->next_ring = ring;
>>
>> @@ -328,7 +328,7 @@ void a6xx_preempt_trigger(struct msm_gpu *gpu)
>> set_preempt_state(a6xx_gpu, PREEMPT_TRIGGERED);
>>
>> /* Trigger the preemption */
>> - gpu_write(gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL, cntl);
>> + a6xx_fenced_write(a6xx_gpu, REG_A6XX_CP_CONTEXT_SWITCH_CNTL, cntl, BIT(1), false);
>> }
>>
>> static int preempt_init_ring(struct a6xx_gpu *a6xx_gpu,
>>
>> --
>> 2.50.1
>>
>
More information about the dri-devel
mailing list