[Freedreno] [RFC PATCH 4/5] freedreno: Basic register differences for A20x.
Ilia Mirkin
imirkin at alum.mit.edu
Tue Aug 4 12:19:39 PDT 2015
On Tue, Aug 4, 2015 at 1:53 PM, Martin Fuzzey <mfuzzey at parkeon.com> wrote:
> A few register differences needed to get triangle-quad working.
>
> Signed-off-by: Martin Fuzzey <mfuzzey at parkeon.com>
> ---
> src/gallium/drivers/freedreno/a2xx/fd2_draw.c | 64 ++++++++++++++++---------
> src/gallium/drivers/freedreno/a2xx/fd2_emit.c | 15 +++++-
> src/gallium/drivers/freedreno/a2xx/fd2_gmem.c | 24 +++++----
> 3 files changed, 70 insertions(+), 33 deletions(-)
>
> diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_draw.c b/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
> index f2efd5f..8cdca32 100644
> --- a/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
> +++ b/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
> @@ -95,26 +95,31 @@ fd2_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
>
> OUT_PKT3(ring, CP_SET_CONSTANT, 2);
> OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
> - OUT_RING(ring, 0x0000003b);
> + OUT_RING(ring, 0x0000003b); /* A20x blob uses 0x2 but doesn't seem to matter */
>
> OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
> OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
>
> OUT_WFI (ring);
>
> - OUT_PKT3(ring, CP_SET_CONSTANT, 3);
> - OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
> - OUT_RING(ring, info->max_index); /* VGT_MAX_VTX_INDX */
> - OUT_RING(ring, info->min_index); /* VGT_MIN_VTX_INDX */
> + if (!is_a20x(ctx->screen)) {
> + OUT_PKT3(ring, CP_SET_CONSTANT, 3);
> + OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
> + OUT_RING(ring, info->max_index); /* VGT_MAX_VTX_INDX */
> + OUT_RING(ring, info->min_index); /* VGT_MIN_VTX_INDX */
> + }
Is this necessary? It seems like setting the min/max vertex index
would be a good idea, esp given the availability of e.g.
glDrawRangeElements(). In many cases max_index will be ~0 though (i.e.
whenever a non-Range draw variant is used).
>
> fd_draw_emit(ctx, ring, ctx->primtypes[info->mode],
> IGNORE_VISIBILITY, info);
>
> - OUT_PKT3(ring, CP_SET_CONSTANT, 2);
> - OUT_RING(ring, CP_REG(REG_A2XX_UNKNOWN_2010));
> - OUT_RING(ring, 0x00000000);
> + /* A20x: this doesn't hurt but isn't needed on A205 less pollution! */
> + if (!is_a20x(ctx->screen)) {
> + OUT_PKT3(ring, CP_SET_CONSTANT, 2);
> + OUT_RING(ring, CP_REG(REG_A2XX_UNKNOWN_2010));
> + OUT_RING(ring, 0x00000000);
>
> - emit_cacheflush(ring);
> + emit_cacheflush(ring);
> + }
> }
>
>
> @@ -125,10 +130,7 @@ fd2_clear(struct fd_context *ctx, unsigned buffers,
> struct fd2_context *fd2_ctx = fd2_context(ctx);
> struct fd_ringbuffer *ring = ctx->ring;
> struct pipe_framebuffer_state *fb = &ctx->framebuffer;
> - uint32_t reg, colr = 0;
> -
> - if ((buffers & PIPE_CLEAR_COLOR) && fb->nr_cbufs)
> - colr = pack_rgba(fb->cbufs[0]->format, color->f);
> + uint32_t reg;
>
> /* emit generic state now: */
> fd2_emit_state(ctx, ctx->dirty &
> @@ -145,17 +147,32 @@ fd2_clear(struct fd_context *ctx, unsigned buffers,
>
> OUT_PKT3(ring, CP_SET_CONSTANT, 2);
> OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
> - OUT_RING(ring, 0x0000028f);
> -
> + OUT_RING(ring, 0x0000028f); /* A20x blob uses 0x2 but doesn't seem to matter */
> fd2_program_emit(ring, &ctx->solid_prog);
>
> OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
> OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
>
> - OUT_PKT3(ring, CP_SET_CONSTANT, 2);
> - OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));
> - OUT_RING(ring, colr);
> + if (is_a20x(ctx->screen)) {
> + /* On A205 REG_A2XX_CLEAR_COLOR doesn't exist... */
> + OUT_PKT3(ring, CP_SET_CONSTANT, 5);
> + OUT_RING(ring, 0x00000480);
PS_CONST_BASE * 4 might be a bit clearer.
> + OUT_RING(ring, color->ui[0]);
> + OUT_RING(ring, color->ui[1]);
> + OUT_RING(ring, color->ui[2]);
> + OUT_RING(ring, color->ui[3]);
> + } else {
> + uint32_t colr = 0;
>
> + if ((buffers & PIPE_CLEAR_COLOR) && fb->nr_cbufs)
> + colr = pack_rgba(fb->cbufs[0]->format, color->f);
> +
> + OUT_PKT3(ring, CP_SET_CONSTANT, 2);
> + OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));
> + OUT_RING(ring, colr);
I'm actually a tad surprised this works on a220 -- the fd clear seems
to just use the solid_prog, not a fast clear thing. I guess Rob will
know more.
> + }
> +
> + /* A20x: not done by the A205 blob but doesn't seem to hurt */
> OUT_PKT3(ring, CP_SET_CONSTANT, 2);
> OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
> OUT_RING(ring, 0x00000084);
> @@ -259,14 +276,17 @@ fd2_clear(struct fd_context *ctx, unsigned buffers,
> OUT_RING(ring, 0x0);
> }
>
> - OUT_PKT3(ring, CP_SET_CONSTANT, 3);
> - OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
> - OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
> - OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
> + if (!is_a20x(ctx->screen)) {
> + OUT_PKT3(ring, CP_SET_CONSTANT, 3);
> + OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
> + OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
> + OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
> + }
Again I'd leave this stuff in (but it does have to be all-or-nothing of course).
>
> fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
> DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
>
> + /* A20x: not done by the A205 blob but doesn't seem to hurt */
> OUT_PKT3(ring, CP_SET_CONSTANT, 2);
> OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
> OUT_RING(ring, 0x00000000);
> diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
> index cc0ed59..4be215f 100644
> --- a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
> +++ b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
> @@ -320,6 +320,17 @@ fd2_emit_setup(struct fd_context *ctx)
> {
> struct fd_ringbuffer *ring = ctx->ring;
>
> + if (is_a20x(ctx->screen)) {
> + /* On A205 gmem2mem hangs without this */
> + OUT_PKT0(ring, REG_A2XX_RB_BC_CONTROL, 1);
> + OUT_RING(ring, /* Flags below from blob value 0x1c004046 */
> + A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT(3) |
> + A2XX_RB_BC_CONTROL_DISABLE_LZ_NULL_ZCMD_DROP |
> + A2XX_RB_BC_CONTROL_ENABLE_CRC_UPDATE |
> + A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT(8) | // important
> + A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT(3));
> + }
> +
> OUT_PKT0(ring, REG_A2XX_TP0_CHICKEN, 1);
> OUT_RING(ring, 0x00000002);
>
> @@ -338,6 +349,7 @@ fd2_emit_setup(struct fd_context *ctx)
>
> OUT_PKT3(ring, CP_SET_CONSTANT, 3);
> OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
> + /* A20x: blob uses 0x00ffffff here but doesn't seem to hurt */
> OUT_RING(ring, 0xffffffff); /* VGT_MAX_VTX_INDX */
> OUT_RING(ring, 0x00000000); /* VGT_MIN_VTX_INDX */
>
> @@ -347,7 +359,7 @@ fd2_emit_setup(struct fd_context *ctx)
>
> OUT_PKT3(ring, CP_SET_CONSTANT, 2);
> OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
> - OUT_RING(ring, 0x0000003b);
> + OUT_RING(ring, 0x0000003b); /* A20x blob uses 0x2 but doesn't seem to matter */
>
> OUT_PKT3(ring, CP_SET_CONSTANT, 2);
> OUT_RING(ring, CP_REG(REG_A2XX_SQ_CONTEXT_MISC));
> @@ -374,6 +386,7 @@ fd2_emit_setup(struct fd_context *ctx)
> OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL));
> OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(COLOR_DEPTH));
>
> + /* A20x: not done by the A205 blob but doesn't seem to hurt */
> OUT_PKT3(ring, CP_SET_CONSTANT, 2);
> OUT_RING(ring, CP_REG(REG_A2XX_RB_SAMPLE_POS));
> OUT_RING(ring, 0x88888888);
> diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
> index 982c9c2..8a333af 100644
> --- a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
> +++ b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
> @@ -85,10 +85,12 @@ emit_gmem2mem_surf(struct fd_context *ctx, uint32_t base,
>
> OUT_WFI (ring);
>
> - OUT_PKT3(ring, CP_SET_CONSTANT, 3);
> - OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
> - OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
> - OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
> + if (!is_a20x(ctx->screen)) {
> + OUT_PKT3(ring, CP_SET_CONSTANT, 3);
> + OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
> + OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
> + OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
> + }
>
> fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
> DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
> @@ -115,7 +117,7 @@ fd2_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
>
> OUT_PKT3(ring, CP_SET_CONSTANT, 2);
> OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
> - OUT_RING(ring, 0x0000028f);
> + OUT_RING(ring, 0x0000028f); /* A20x blob uses 0x2 but doesn't seem to matter */
>
> fd2_program_emit(ring, &ctx->solid_prog);
>
> @@ -207,10 +209,12 @@ emit_mem2gmem_surf(struct fd_context *ctx, uint32_t base,
> OUT_RING(ring, 0x00000000);
> OUT_RING(ring, 0x00000200);
>
> - OUT_PKT3(ring, CP_SET_CONSTANT, 3);
> - OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
> - OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
> - OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
> + if (!is_a20x(ctx->screen)) {
> + OUT_PKT3(ring, CP_SET_CONSTANT, 3);
> + OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
> + OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
> + OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
> + }
>
> fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
> DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
> @@ -253,7 +257,7 @@ fd2_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
>
> OUT_PKT3(ring, CP_SET_CONSTANT, 2);
> OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
> - OUT_RING(ring, 0x0000003b);
> + OUT_RING(ring, 0x0000003b); /* A20x blob uses 0x0 but doesn't seem to matter */
>
> fd2_program_emit(ring, &ctx->blit_prog[0]);
>
>
> _______________________________________________
> Freedreno mailing list
> Freedreno at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/freedreno
More information about the Freedreno
mailing list