[PATCH 1/2] Support for adreno 430.
Rob Clark
robdclark at gmail.com
Tue Feb 2 22:31:50 UTC 2016
overall, looks pretty good, few small comments inline
On Tue, Feb 2, 2016 at 4:06 PM, C Stout <cstout at chromium.org> wrote:
> Change-Id: I6c891515d93a6f1a597e762090c3530a6810c6c6
> ---
> drivers/gpu/drm/msm/adreno/a4xx_gpu.c | 50 ++++++++++++++++++++++++++----
> drivers/gpu/drm/msm/adreno/adreno_device.c | 8 +++++
> drivers/gpu/drm/msm/adreno/adreno_gpu.c | 41 ++++++++++++++++--------
> drivers/gpu/drm/msm/adreno/adreno_gpu.h | 5 +++
> 4 files changed, 85 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
> index ef20cb5..9ed3abe 100644
> --- a/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a4xx_gpu.c
> @@ -111,11 +111,17 @@ static void a4xx_enable_hwcg(struct msm_gpu *gpu)
> gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00000222);
> gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL_HLSQ , 0x00000000);
> gpu_write(gpu, REG_A4XX_RBBM_CLOCK_HYST_HLSQ, 0x00000000);
> - gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00020000);
> - gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA);
> + gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, 0x00220000);
> + /* Early A430's have a timing issue with SP/TP power collapse;
> + disabling HW clock gating prevents it. */
> + if (adreno_is_a430(adreno_gpu) && adreno_gpu->rev.patchid < 2)
> + gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0);
> + else
> + gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL, 0xAAAAAAAA);
> gpu_write(gpu, REG_A4XX_RBBM_CLOCK_CTL2, 0);
> }
>
> +
> static void a4xx_me_init(struct msm_gpu *gpu)
> {
> struct msm_ringbuffer *ring = gpu->rb;
> @@ -150,7 +156,7 @@ static int a4xx_hw_init(struct msm_gpu *gpu)
> uint32_t *ptr, len;
> int i, ret;
>
> - if (adreno_is_a4xx(adreno_gpu)) {
> + if (adreno_is_a420(adreno_gpu)) {
> gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT, 0x0001001F);
> gpu_write(gpu, REG_A4XX_VBIF_ABIT_SORT_CONF, 0x000000A4);
> gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
> @@ -159,6 +165,13 @@ static int a4xx_hw_init(struct msm_gpu *gpu)
> gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
> gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
> gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
> + } else if (adreno_is_a430(adreno_gpu)) {
> + gpu_write(gpu, REG_A4XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000001);
> + gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF0, 0x18181818);
> + gpu_write(gpu, REG_A4XX_VBIF_IN_RD_LIM_CONF1, 0x00000018);
> + gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF0, 0x18181818);
> + gpu_write(gpu, REG_A4XX_VBIF_IN_WR_LIM_CONF1, 0x00000018);
> + gpu_write(gpu, REG_A4XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
> } else {
> BUG();
> }
> @@ -170,6 +183,10 @@ static int a4xx_hw_init(struct msm_gpu *gpu)
> gpu_write(gpu, REG_A4XX_RBBM_SP_HYST_CNT, 0x10);
> gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL, 0x10);
>
> + if (adreno_is_a430(adreno_gpu)) {
> + gpu_write(gpu, REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2, 0x30);
> + }
> +
> /* Enable the RBBM error reporting bits */
> gpu_write(gpu, REG_A4XX_RBBM_AHB_CTL0, 0x00000001);
>
> @@ -192,6 +209,9 @@ static int a4xx_hw_init(struct msm_gpu *gpu)
> /* Turn on performance counters: */
> gpu_write(gpu, REG_A4XX_RBBM_PERFCTR_CTL, 0x01);
>
> + if (adreno_is_a430(adreno_gpu))
> + gpu_write(gpu, REG_A4XX_UCHE_CACHE_WAYS_VFD, 0x07);
> +
> /* Disable L2 bypass to avoid UCHE out of bounds errors */
> gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_LO, 0xffff0000);
> gpu_write(gpu, REG_A4XX_UCHE_TRAP_BASE_HI, 0xffff0000);
> @@ -199,6 +219,15 @@ static int a4xx_hw_init(struct msm_gpu *gpu)
> gpu_write(gpu, REG_A4XX_CP_DEBUG, (1 << 25) |
> (adreno_is_a420(adreno_gpu) ? (1 << 29) : 0));
>
> + /* On A430 enable SP regfile sleep for power savings */
> + /* TODO downstream does this for !420, so maybe applies for 405 too? */
> + if (!adreno_is_a420(adreno_gpu)) {
> + gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0,
> + 0x00000441);
> + gpu_write(gpu, REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1,
> + 0x00000441);
> + }
> +
> a4xx_enable_hwcg(gpu);
>
> /*
> @@ -213,9 +242,11 @@ static int a4xx_hw_init(struct msm_gpu *gpu)
> gpu_write(gpu, REG_A4XX_RBBM_CLOCK_DELAY_HLSQ, val);
> }
>
> - ret = adreno_hw_init(gpu);
> - if (ret)
> - return ret;
hmm, it seems a bit wrong to never call adreno_hw_init() for a430
(esp. when below you have some changes in adreno_hw_init()).. am I
not seeing something, or did you miss a hunk?
> + if (!adreno_is_a430(adreno_gpu)) {
> + ret = adreno_hw_init(gpu);
> + if (ret)
> + return ret;
> + }
>
> /* setup access protection: */
> gpu_write(gpu, REG_A4XX_CP_PROTECT_CTRL, 0x00000007);
> @@ -326,6 +357,13 @@ static irqreturn_t a4xx_irq(struct msm_gpu *gpu)
> status = gpu_read(gpu, REG_A4XX_RBBM_INT_0_STATUS);
> DBG("%s: Int status %08x", gpu->name, status);
>
> + if (status & (1 << A4XX_INT_CP_REG_PROTECT_FAULT)) {
> + uint32_t reg = gpu_read(gpu, REG_A4XX_CP_PROTECT_STATUS);
> + printk("CP | Protected mode error| %s | addr=%x\n",
> + reg & (1 << 24) ? "WRITE" : "READ",
> + (reg & 0xFFFFF) >> 2);
> + }
> +
this looks pretty useful, but also unrelated to a430. Mind splitting
it out into it's own patch?
> gpu_write(gpu, REG_A4XX_RBBM_INT_CLEAR_CMD, status);
>
> msm_gpu_retire(gpu);
> diff --git a/drivers/gpu/drm/msm/adreno/adreno_device.c b/drivers/gpu/drm/msm/adreno/adreno_device.c
> index 1ea2df5..43d5aaa 100644
> --- a/drivers/gpu/drm/msm/adreno/adreno_device.c
> +++ b/drivers/gpu/drm/msm/adreno/adreno_device.c
> @@ -73,6 +73,14 @@ static const struct adreno_info gpulist[] = {
> .pfpfw = "a420_pfp.fw",
> .gmem = (SZ_1M + SZ_512K),
> .init = a4xx_gpu_init,
> + }, {
> + .rev = ADRENO_REV(4, 3, 0, ANY_ID),
> + .revn = 430,
> + .name = "A430",
> + .pm4fw = "a420_pm4.fw",
> + .pfpfw = "a420_pfp.fw",
> + .gmem = (SZ_1M + SZ_512K),
> + .init = a4xx_gpu_init,
> },
> };
>
> diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> index a3b54cc..33c96d0d 100644
> --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> @@ -68,18 +68,15 @@ int adreno_hw_init(struct msm_gpu *gpu)
> adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_CNTL,
> /* size is log2(quad-words): */
> AXXX_CP_RB_CNTL_BUFSZ(ilog2(gpu->rb->size / 8)) |
> - AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8)));
> + AXXX_CP_RB_CNTL_BLKSZ(ilog2(RB_BLKSIZE / 8)) |
> + (adreno_is_a430(adreno_gpu) ? (1 << 27) : 0));
So.. from the a3xx (apq8016) docs which I do have, bit 27 here is
'RB_NO_UPDATE - Disable writes of Read Pointer to memory'. And
actually looks like we already have AXXX_CP_RB_CNTL_NO_UPDATE in the
.xml.h so no need to open code it.
Downstream commit which makes this change mentions "Sometimes the RPTR
shadow memory is unreliable causing timeouts in adreno_idle(). Read
it directly from the register instead." This might be a bit easier to
understand in the git history if we split the logical equivalent of
downstream cdcc02a68649f9929e3f3a5a84bfcac5e480894e into one patch,
and then the remaining a430 changes as preceding patch. I think the
RB_NO_UPDATE change we could just make unconditional (although if you
want to take the safe route, make it conditional on a430 for now and
once I have a chance to test it on others I can flip it to
unconditional).
> /* Setup ringbuffer address: */
> adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_BASE, gpu->rb_iova);
> - adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_RPTR_ADDR,
> - rbmemptr(adreno_gpu, rptr));
>
> - /* Setup scratch/timestamp: */
> - adreno_gpu_write(adreno_gpu, REG_ADRENO_SCRATCH_ADDR,
> - rbmemptr(adreno_gpu, fence));
> -
> - adreno_gpu_write(adreno_gpu, REG_ADRENO_SCRATCH_UMSK, 0x1);
> + if (!adreno_is_a430(adreno_gpu))
> + adreno_gpu_write(adreno_gpu, REG_ADRENO_CP_RB_RPTR_ADDR,
> + rbmemptr(adreno_gpu, rptr));
>
> return 0;
> }
> @@ -89,6 +86,16 @@ static uint32_t get_wptr(struct msm_ringbuffer *ring)
> return ring->cur - ring->start;
> }
>
> +/* Use this helper to read rptr, since a430 doesn't update rptr in memory */
> +static uint32_t get_rptr(struct adreno_gpu *adreno_gpu)
> +{
> + if (adreno_is_a430(adreno_gpu))
> + return adreno_gpu->memptrs->rptr = adreno_gpu_read(
> + adreno_gpu, REG_ADRENO_CP_RB_RPTR);
> + else
> + return adreno_gpu->memptrs->rptr;
> +}
> +
> uint32_t adreno_last_fence(struct msm_gpu *gpu)
> {
> struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> @@ -137,7 +144,8 @@ int adreno_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit,
> if (priv->lastctx == ctx)
> break;
> case MSM_SUBMIT_CMD_BUF:
> - OUT_PKT3(ring, CP_INDIRECT_BUFFER_PFD, 2);
> + OUT_PKT3(ring, adreno_is_a430(adreno_gpu) ?
> + CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD, 2);
so far, other than a420 which I still need to test, I think we can
switch to 'prefetch enabled' version of the IB packet across the
board. But I think it's fine to leave it, I can either drop the hunk
if I've had a chance to test a420 or remove it later.
> OUT_RING(ring, submit->cmd[i].iova);
> OUT_RING(ring, submit->cmd[i].size);
> ibs++;
> @@ -216,9 +224,16 @@ void adreno_idle(struct msm_gpu *gpu)
> {
> struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> uint32_t wptr = get_wptr(gpu->rb);
> + int ret;
>
> /* wait for CP to drain ringbuffer: */
> - if (spin_until(adreno_gpu->memptrs->rptr == wptr))
> + if (adreno_is_a430(adreno_gpu))
> + ret = spin_until((adreno_gpu->memptrs->rptr = adreno_gpu_read(
> + adreno_gpu, REG_ADRENO_CP_RB_RPTR)) == wptr);
> + else
> + ret = spin_until(adreno_gpu->memptrs->rptr == wptr);
I think just use get_rptr() here?
BR,
-R
> +
> + if (ret)
> DRM_ERROR("%s: timeout waiting to drain ringbuffer!\n", gpu->name);
>
> /* TODO maybe we need to reset GPU here to recover from hang? */
> @@ -237,7 +252,7 @@ void adreno_show(struct msm_gpu *gpu, struct seq_file *m)
>
> seq_printf(m, "fence: %d/%d\n", adreno_gpu->memptrs->fence,
> gpu->submitted_fence);
> - seq_printf(m, "rptr: %d\n", adreno_gpu->memptrs->rptr);
> + seq_printf(m, "rptr: %d\n", get_rptr(adreno_gpu));
> seq_printf(m, "wptr: %d\n", adreno_gpu->memptrs->wptr);
> seq_printf(m, "rb wptr: %d\n", get_wptr(gpu->rb));
>
> @@ -278,7 +293,7 @@ void adreno_dump_info(struct msm_gpu *gpu)
>
> printk("fence: %d/%d\n", adreno_gpu->memptrs->fence,
> gpu->submitted_fence);
> - printk("rptr: %d\n", adreno_gpu->memptrs->rptr);
> + printk("rptr: %d\n", get_rptr(adreno_gpu));
> printk("wptr: %d\n", adreno_gpu->memptrs->wptr);
> printk("rb wptr: %d\n", get_wptr(gpu->rb));
>
> @@ -313,7 +328,7 @@ static uint32_t ring_freewords(struct msm_gpu *gpu)
> struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
> uint32_t size = gpu->rb->size / 4;
> uint32_t wptr = get_wptr(gpu->rb);
> - uint32_t rptr = adreno_gpu->memptrs->rptr;
> + uint32_t rptr = get_rptr(adreno_gpu);
> return (rptr + (size - 1) - wptr) % size;
> }
>
> diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.h b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
> index 0a312e9..c26aea1 100644
> --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.h
> +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.h
> @@ -228,6 +228,11 @@ static inline int adreno_is_a420(struct adreno_gpu *gpu)
> return gpu->revn == 420;
> }
>
> +static inline int adreno_is_a430(struct adreno_gpu *gpu)
> +{
> + return gpu->revn == 430;
> +}
> +
> int adreno_get_param(struct msm_gpu *gpu, uint32_t param, uint64_t *value);
> int adreno_hw_init(struct msm_gpu *gpu);
> uint32_t adreno_last_fence(struct msm_gpu *gpu);
> --
> 2.1.4
>
> _______________________________________________
> dri-devel mailing list
> dri-devel at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/dri-devel
More information about the dri-devel
mailing list