[Freedreno] [RFC PATCH 4/5] freedreno: Basic register differences for A20x.

Ilia Mirkin imirkin at alum.mit.edu
Tue Aug 4 12:19:39 PDT 2015


On Tue, Aug 4, 2015 at 1:53 PM, Martin Fuzzey <mfuzzey at parkeon.com> wrote:
> A few register differences needed to get triangle-quad working.
>
> Signed-off-by: Martin Fuzzey <mfuzzey at parkeon.com>
> ---
>  src/gallium/drivers/freedreno/a2xx/fd2_draw.c |   64 ++++++++++++++++---------
>  src/gallium/drivers/freedreno/a2xx/fd2_emit.c |   15 +++++-
>  src/gallium/drivers/freedreno/a2xx/fd2_gmem.c |   24 +++++----
>  3 files changed, 70 insertions(+), 33 deletions(-)
>
> diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_draw.c b/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
> index f2efd5f..8cdca32 100644
> --- a/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
> +++ b/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
> @@ -95,26 +95,31 @@ fd2_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
>
>         OUT_PKT3(ring, CP_SET_CONSTANT, 2);
>         OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
> -       OUT_RING(ring, 0x0000003b);
> +       OUT_RING(ring, 0x0000003b); /* A20x blob uses 0x2 but doesn't seem to matter */
>
>         OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
>         OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
>
>         OUT_WFI (ring);
>
> -       OUT_PKT3(ring, CP_SET_CONSTANT, 3);
> -       OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
> -       OUT_RING(ring, info->max_index);        /* VGT_MAX_VTX_INDX */
> -       OUT_RING(ring, info->min_index);        /* VGT_MIN_VTX_INDX */
> +       if (!is_a20x(ctx->screen)) {
> +               OUT_PKT3(ring, CP_SET_CONSTANT, 3);
> +               OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
> +               OUT_RING(ring, info->max_index);        /* VGT_MAX_VTX_INDX */
> +               OUT_RING(ring, info->min_index);        /* VGT_MIN_VTX_INDX */
> +       }

Is this necessary? It seems like setting the min/max vertex index
would be a good idea, esp given the availability of e.g.
glDrawRangeElements(). In many cases max_index will be ~0 though (i.e.
whenever a non-Range draw variant is used).

>
>         fd_draw_emit(ctx, ring, ctx->primtypes[info->mode],
>                                  IGNORE_VISIBILITY, info);
>
> -       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
> -       OUT_RING(ring, CP_REG(REG_A2XX_UNKNOWN_2010));
> -       OUT_RING(ring, 0x00000000);
> +       /* A20x: this doesn't hurt but isn't needed on A205 less pollution! */
> +       if (!is_a20x(ctx->screen)) {
> +               OUT_PKT3(ring, CP_SET_CONSTANT, 2);
> +               OUT_RING(ring, CP_REG(REG_A2XX_UNKNOWN_2010));
> +               OUT_RING(ring, 0x00000000);
>
> -       emit_cacheflush(ring);
> +               emit_cacheflush(ring);
> +       }
>  }
>
>
> @@ -125,10 +130,7 @@ fd2_clear(struct fd_context *ctx, unsigned buffers,
>         struct fd2_context *fd2_ctx = fd2_context(ctx);
>         struct fd_ringbuffer *ring = ctx->ring;
>         struct pipe_framebuffer_state *fb = &ctx->framebuffer;
> -       uint32_t reg, colr = 0;
> -
> -       if ((buffers & PIPE_CLEAR_COLOR) && fb->nr_cbufs)
> -               colr  = pack_rgba(fb->cbufs[0]->format, color->f);
> +       uint32_t reg;
>
>         /* emit generic state now: */
>         fd2_emit_state(ctx, ctx->dirty &
> @@ -145,17 +147,32 @@ fd2_clear(struct fd_context *ctx, unsigned buffers,
>
>         OUT_PKT3(ring, CP_SET_CONSTANT, 2);
>         OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
> -       OUT_RING(ring, 0x0000028f);
> -
> +       OUT_RING(ring, 0x0000028f); /* A20x blob uses 0x2 but doesn't seem to matter */
>         fd2_program_emit(ring, &ctx->solid_prog);
>
>         OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
>         OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
>
> -       OUT_PKT3(ring, CP_SET_CONSTANT, 2);
> -       OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));
> -       OUT_RING(ring, colr);
> +       if (is_a20x(ctx->screen)) {
> +               /* On A205 REG_A2XX_CLEAR_COLOR doesn't exist... */
> +               OUT_PKT3(ring, CP_SET_CONSTANT, 5);
> +               OUT_RING(ring, 0x00000480);

PS_CONST_BASE * 4 might be a bit clearer.

> +               OUT_RING(ring, color->ui[0]);
> +               OUT_RING(ring, color->ui[1]);
> +               OUT_RING(ring, color->ui[2]);
> +               OUT_RING(ring, color->ui[3]);
> +       } else {
> +               uint32_t colr = 0;
>
> +               if ((buffers & PIPE_CLEAR_COLOR) && fb->nr_cbufs)
> +                       colr  = pack_rgba(fb->cbufs[0]->format, color->f);
> +
> +               OUT_PKT3(ring, CP_SET_CONSTANT, 2);
> +               OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));
> +               OUT_RING(ring, colr);

I'm actually a tad surprised this works on a220 -- the fd clear seems
to just use the solid_prog, not a fast clear thing. I guess Rob will
know more.

> +       }
> +
> +       /* A20x: not done by the A205 blob but doesn't seem to hurt */
>         OUT_PKT3(ring, CP_SET_CONSTANT, 2);
>         OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
>         OUT_RING(ring, 0x00000084);
> @@ -259,14 +276,17 @@ fd2_clear(struct fd_context *ctx, unsigned buffers,
>                 OUT_RING(ring, 0x0);
>         }
>
> -       OUT_PKT3(ring, CP_SET_CONSTANT, 3);
> -       OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
> -       OUT_RING(ring, 3);                 /* VGT_MAX_VTX_INDX */
> -       OUT_RING(ring, 0);                 /* VGT_MIN_VTX_INDX */
> +       if (!is_a20x(ctx->screen)) {
> +               OUT_PKT3(ring, CP_SET_CONSTANT, 3);
> +               OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
> +               OUT_RING(ring, 3);                 /* VGT_MAX_VTX_INDX */
> +               OUT_RING(ring, 0);                 /* VGT_MIN_VTX_INDX */
> +       }

Again I'd leave this stuff in (but it does have to be all-or-nothing of course).

>
>         fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
>                         DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
>
> +       /* A20x: not done by the A205 blob but doesn't seem to hurt */
>         OUT_PKT3(ring, CP_SET_CONSTANT, 2);
>         OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
>         OUT_RING(ring, 0x00000000);
> diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
> index cc0ed59..4be215f 100644
> --- a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
> +++ b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
> @@ -320,6 +320,17 @@ fd2_emit_setup(struct fd_context *ctx)
>  {
>         struct fd_ringbuffer *ring = ctx->ring;
>
> +       if (is_a20x(ctx->screen)) {
> +               /* On A205 gmem2mem hangs without this */
> +               OUT_PKT0(ring, REG_A2XX_RB_BC_CONTROL, 1);
> +               OUT_RING(ring, /* Flags below from blob value 0x1c004046 */
> +                       A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT(3) |
> +                       A2XX_RB_BC_CONTROL_DISABLE_LZ_NULL_ZCMD_DROP |
> +                       A2XX_RB_BC_CONTROL_ENABLE_CRC_UPDATE |
> +                       A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT(8) |  // important
> +                       A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT(3));
> +       }
> +
>         OUT_PKT0(ring, REG_A2XX_TP0_CHICKEN, 1);
>         OUT_RING(ring, 0x00000002);
>
> @@ -338,6 +349,7 @@ fd2_emit_setup(struct fd_context *ctx)
>
>         OUT_PKT3(ring, CP_SET_CONSTANT, 3);
>         OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
> +       /* A20x: blob uses 0x00ffffff here but doesn't seem to hurt */
>         OUT_RING(ring, 0xffffffff);        /* VGT_MAX_VTX_INDX */
>         OUT_RING(ring, 0x00000000);        /* VGT_MIN_VTX_INDX */
>
> @@ -347,7 +359,7 @@ fd2_emit_setup(struct fd_context *ctx)
>
>         OUT_PKT3(ring, CP_SET_CONSTANT, 2);
>         OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
> -       OUT_RING(ring, 0x0000003b);
> +       OUT_RING(ring, 0x0000003b); /* A20x blob uses 0x2 but doesn't seem to matter */
>
>         OUT_PKT3(ring, CP_SET_CONSTANT, 2);
>         OUT_RING(ring, CP_REG(REG_A2XX_SQ_CONTEXT_MISC));
> @@ -374,6 +386,7 @@ fd2_emit_setup(struct fd_context *ctx)
>         OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL));
>         OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(COLOR_DEPTH));
>
> +       /* A20x: not done by the A205 blob but doesn't seem to hurt */
>         OUT_PKT3(ring, CP_SET_CONSTANT, 2);
>         OUT_RING(ring, CP_REG(REG_A2XX_RB_SAMPLE_POS));
>         OUT_RING(ring, 0x88888888);
> diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
> index 982c9c2..8a333af 100644
> --- a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
> +++ b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
> @@ -85,10 +85,12 @@ emit_gmem2mem_surf(struct fd_context *ctx, uint32_t base,
>
>         OUT_WFI (ring);
>
> -       OUT_PKT3(ring, CP_SET_CONSTANT, 3);
> -       OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
> -       OUT_RING(ring, 3);                 /* VGT_MAX_VTX_INDX */
> -       OUT_RING(ring, 0);                 /* VGT_MIN_VTX_INDX */
> +       if (!is_a20x(ctx->screen)) {
> +               OUT_PKT3(ring, CP_SET_CONSTANT, 3);
> +               OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
> +               OUT_RING(ring, 3);                 /* VGT_MAX_VTX_INDX */
> +               OUT_RING(ring, 0);                 /* VGT_MIN_VTX_INDX */
> +       }
>
>         fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
>                         DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
> @@ -115,7 +117,7 @@ fd2_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
>
>         OUT_PKT3(ring, CP_SET_CONSTANT, 2);
>         OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
> -       OUT_RING(ring, 0x0000028f);
> +       OUT_RING(ring, 0x0000028f); /* A20x blob uses 0x2 but doesn't seem to matter */
>
>         fd2_program_emit(ring, &ctx->solid_prog);
>
> @@ -207,10 +209,12 @@ emit_mem2gmem_surf(struct fd_context *ctx, uint32_t base,
>         OUT_RING(ring, 0x00000000);
>         OUT_RING(ring, 0x00000200);
>
> -       OUT_PKT3(ring, CP_SET_CONSTANT, 3);
> -       OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
> -       OUT_RING(ring, 3);                 /* VGT_MAX_VTX_INDX */
> -       OUT_RING(ring, 0);                 /* VGT_MIN_VTX_INDX */
> +       if (!is_a20x(ctx->screen)) {
> +               OUT_PKT3(ring, CP_SET_CONSTANT, 3);
> +               OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
> +               OUT_RING(ring, 3);                 /* VGT_MAX_VTX_INDX */
> +               OUT_RING(ring, 0);                 /* VGT_MIN_VTX_INDX */
> +       }
>
>         fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
>                         DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
> @@ -253,7 +257,7 @@ fd2_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
>
>         OUT_PKT3(ring, CP_SET_CONSTANT, 2);
>         OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
> -       OUT_RING(ring, 0x0000003b);
> +       OUT_RING(ring, 0x0000003b); /* A20x blob uses 0x0 but doesn't seem to matter */
>
>         fd2_program_emit(ring, &ctx->blit_prog[0]);
>
>
> _______________________________________________
> Freedreno mailing list
> Freedreno at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/freedreno


More information about the Freedreno mailing list