[Freedreno] [RFC PATCH 4/5] freedreno: Basic register differences for A20x.
Rob Clark
robdclark at gmail.com
Tue Aug 4 12:26:41 PDT 2015
On Tue, Aug 4, 2015 at 3:19 PM, Ilia Mirkin <imirkin at alum.mit.edu> wrote:
> On Tue, Aug 4, 2015 at 1:53 PM, Martin Fuzzey <mfuzzey at parkeon.com> wrote:
>> A few register differences needed to get triangle-quad working.
>>
>> Signed-off-by: Martin Fuzzey <mfuzzey at parkeon.com>
>> ---
>> src/gallium/drivers/freedreno/a2xx/fd2_draw.c | 64 ++++++++++++++++---------
>> src/gallium/drivers/freedreno/a2xx/fd2_emit.c | 15 +++++-
>> src/gallium/drivers/freedreno/a2xx/fd2_gmem.c | 24 +++++----
>> 3 files changed, 70 insertions(+), 33 deletions(-)
>>
>> diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_draw.c b/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
>> index f2efd5f..8cdca32 100644
>> --- a/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
>> +++ b/src/gallium/drivers/freedreno/a2xx/fd2_draw.c
>> @@ -95,26 +95,31 @@ fd2_draw_vbo(struct fd_context *ctx, const struct pipe_draw_info *info)
>>
>> OUT_PKT3(ring, CP_SET_CONSTANT, 2);
>> OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
>> - OUT_RING(ring, 0x0000003b);
>> + OUT_RING(ring, 0x0000003b); /* A20x blob uses 0x2 but doesn't seem to matter */
>>
>> OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
>> OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
>>
>> OUT_WFI (ring);
>>
>> - OUT_PKT3(ring, CP_SET_CONSTANT, 3);
>> - OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
>> - OUT_RING(ring, info->max_index); /* VGT_MAX_VTX_INDX */
>> - OUT_RING(ring, info->min_index); /* VGT_MIN_VTX_INDX */
>> + if (!is_a20x(ctx->screen)) {
>> + OUT_PKT3(ring, CP_SET_CONSTANT, 3);
>> + OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
>> + OUT_RING(ring, info->max_index); /* VGT_MAX_VTX_INDX */
>> + OUT_RING(ring, info->min_index); /* VGT_MIN_VTX_INDX */
>> + }
>
> Is this necessary? It seems like setting the min/max vertex index
> would be a good idea, esp given the availability of e.g.
> glDrawRangeElements(). In many cases max_index will be ~0 though (i.e.
> whenever a non-Range draw variant is used).
so according to:
https://github.com/freedreno/amd-gpu/blob/master/include/reg/yamato/22/yamato_offset.h#L107
Which is I believe the kernel driver side code for a200 (before it was
called adreno), the a20x's should have the min/max_vtx_indx
registers..
Is it causing problems if you write those regs?
>>
>> fd_draw_emit(ctx, ring, ctx->primtypes[info->mode],
>> IGNORE_VISIBILITY, info);
>>
>> - OUT_PKT3(ring, CP_SET_CONSTANT, 2);
>> - OUT_RING(ring, CP_REG(REG_A2XX_UNKNOWN_2010));
>> - OUT_RING(ring, 0x00000000);
>> + /* A20x: this doesn't hurt but isn't needed on A205 less pollution! */
>> + if (!is_a20x(ctx->screen)) {
>> + OUT_PKT3(ring, CP_SET_CONSTANT, 2);
>> + OUT_RING(ring, CP_REG(REG_A2XX_UNKNOWN_2010));
>> + OUT_RING(ring, 0x00000000);
>>
>> - emit_cacheflush(ring);
>> + emit_cacheflush(ring);
>> + }
>> }
>>
>>
>> @@ -125,10 +130,7 @@ fd2_clear(struct fd_context *ctx, unsigned buffers,
>> struct fd2_context *fd2_ctx = fd2_context(ctx);
>> struct fd_ringbuffer *ring = ctx->ring;
>> struct pipe_framebuffer_state *fb = &ctx->framebuffer;
>> - uint32_t reg, colr = 0;
>> -
>> - if ((buffers & PIPE_CLEAR_COLOR) && fb->nr_cbufs)
>> - colr = pack_rgba(fb->cbufs[0]->format, color->f);
>> + uint32_t reg;
>>
>> /* emit generic state now: */
>> fd2_emit_state(ctx, ctx->dirty &
>> @@ -145,17 +147,32 @@ fd2_clear(struct fd_context *ctx, unsigned buffers,
>>
>> OUT_PKT3(ring, CP_SET_CONSTANT, 2);
>> OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
>> - OUT_RING(ring, 0x0000028f);
>> -
>> + OUT_RING(ring, 0x0000028f); /* A20x blob uses 0x2 but doesn't seem to matter */
>> fd2_program_emit(ring, &ctx->solid_prog);
>>
>> OUT_PKT0(ring, REG_A2XX_TC_CNTL_STATUS, 1);
>> OUT_RING(ring, A2XX_TC_CNTL_STATUS_L2_INVALIDATE);
>>
>> - OUT_PKT3(ring, CP_SET_CONSTANT, 2);
>> - OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));
>> - OUT_RING(ring, colr);
>> + if (is_a20x(ctx->screen)) {
>> + /* On A205 REG_A2XX_CLEAR_COLOR doesn't exist... */
>> + OUT_PKT3(ring, CP_SET_CONSTANT, 5);
>> + OUT_RING(ring, 0x00000480);
>
> PS_CONST_BASE * 4 might be a bit clearer.
>
>> + OUT_RING(ring, color->ui[0]);
>> + OUT_RING(ring, color->ui[1]);
>> + OUT_RING(ring, color->ui[2]);
>> + OUT_RING(ring, color->ui[3]);
>> + } else {
>> + uint32_t colr = 0;
>>
>> + if ((buffers & PIPE_CLEAR_COLOR) && fb->nr_cbufs)
>> + colr = pack_rgba(fb->cbufs[0]->format, color->f);
>> +
>> + OUT_PKT3(ring, CP_SET_CONSTANT, 2);
>> + OUT_RING(ring, CP_REG(REG_A2XX_CLEAR_COLOR));
>> + OUT_RING(ring, colr);
>
> I'm actually a tad surprised this works on a220 -- the fd clear seems
> to just use the solid_prog, not a fast clear thing. I guess Rob will
> know more.
yeah, that was probably just cargo cult'd from blob cmdstream traces..
it sometimes still writes regs related to some feature (like
fast-clear) even if disabled via conf file..
BR,
-R
>> + }
>> +
>> + /* A20x: not done by the A205 blob but doesn't seem to hurt */
>> OUT_PKT3(ring, CP_SET_CONSTANT, 2);
>> OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
>> OUT_RING(ring, 0x00000084);
>> @@ -259,14 +276,17 @@ fd2_clear(struct fd_context *ctx, unsigned buffers,
>> OUT_RING(ring, 0x0);
>> }
>>
>> - OUT_PKT3(ring, CP_SET_CONSTANT, 3);
>> - OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
>> - OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
>> - OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
>> + if (!is_a20x(ctx->screen)) {
>> + OUT_PKT3(ring, CP_SET_CONSTANT, 3);
>> + OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
>> + OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
>> + OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
>> + }
>
> Again I'd leave this stuff in (but it does have to be all-or-nothing of course).
>
>>
>> fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
>> DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
>>
>> + /* A20x: not done by the A205 blob but doesn't seem to hurt */
>> OUT_PKT3(ring, CP_SET_CONSTANT, 2);
>> OUT_RING(ring, CP_REG(REG_A2XX_A220_RB_LRZ_VSC_CONTROL));
>> OUT_RING(ring, 0x00000000);
>> diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
>> index cc0ed59..4be215f 100644
>> --- a/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
>> +++ b/src/gallium/drivers/freedreno/a2xx/fd2_emit.c
>> @@ -320,6 +320,17 @@ fd2_emit_setup(struct fd_context *ctx)
>> {
>> struct fd_ringbuffer *ring = ctx->ring;
>>
>> + if (is_a20x(ctx->screen)) {
>> + /* On A205 gmem2mem hangs without this */
>> + OUT_PKT0(ring, REG_A2XX_RB_BC_CONTROL, 1);
>> + OUT_RING(ring, /* Flags below from blob value 0x1c004046 */
>> + A2XX_RB_BC_CONTROL_ACCUM_TIMEOUT_SELECT(3) |
>> + A2XX_RB_BC_CONTROL_DISABLE_LZ_NULL_ZCMD_DROP |
>> + A2XX_RB_BC_CONTROL_ENABLE_CRC_UPDATE |
>> + A2XX_RB_BC_CONTROL_ACCUM_DATA_FIFO_LIMIT(8) | // important
>> + A2XX_RB_BC_CONTROL_MEM_EXPORT_TIMEOUT_SELECT(3));
>> + }
>> +
>> OUT_PKT0(ring, REG_A2XX_TP0_CHICKEN, 1);
>> OUT_RING(ring, 0x00000002);
>>
>> @@ -338,6 +349,7 @@ fd2_emit_setup(struct fd_context *ctx)
>>
>> OUT_PKT3(ring, CP_SET_CONSTANT, 3);
>> OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
>> + /* A20x: blob uses 0x00ffffff here but doesn't seem to hurt */
>> OUT_RING(ring, 0xffffffff); /* VGT_MAX_VTX_INDX */
>> OUT_RING(ring, 0x00000000); /* VGT_MIN_VTX_INDX */
>>
>> @@ -347,7 +359,7 @@ fd2_emit_setup(struct fd_context *ctx)
>>
>> OUT_PKT3(ring, CP_SET_CONSTANT, 2);
>> OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
>> - OUT_RING(ring, 0x0000003b);
>> + OUT_RING(ring, 0x0000003b); /* A20x blob uses 0x2 but doesn't seem to matter */
>>
>> OUT_PKT3(ring, CP_SET_CONSTANT, 2);
>> OUT_RING(ring, CP_REG(REG_A2XX_SQ_CONTEXT_MISC));
>> @@ -374,6 +386,7 @@ fd2_emit_setup(struct fd_context *ctx)
>> OUT_RING(ring, CP_REG(REG_A2XX_RB_MODECONTROL));
>> OUT_RING(ring, A2XX_RB_MODECONTROL_EDRAM_MODE(COLOR_DEPTH));
>>
>> + /* A20x: not done by the A205 blob but doesn't seem to hurt */
>> OUT_PKT3(ring, CP_SET_CONSTANT, 2);
>> OUT_RING(ring, CP_REG(REG_A2XX_RB_SAMPLE_POS));
>> OUT_RING(ring, 0x88888888);
>> diff --git a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
>> index 982c9c2..8a333af 100644
>> --- a/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
>> +++ b/src/gallium/drivers/freedreno/a2xx/fd2_gmem.c
>> @@ -85,10 +85,12 @@ emit_gmem2mem_surf(struct fd_context *ctx, uint32_t base,
>>
>> OUT_WFI (ring);
>>
>> - OUT_PKT3(ring, CP_SET_CONSTANT, 3);
>> - OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
>> - OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
>> - OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
>> + if (!is_a20x(ctx->screen)) {
>> + OUT_PKT3(ring, CP_SET_CONSTANT, 3);
>> + OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
>> + OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
>> + OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
>> + }
>>
>> fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
>> DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
>> @@ -115,7 +117,7 @@ fd2_emit_tile_gmem2mem(struct fd_context *ctx, struct fd_tile *tile)
>>
>> OUT_PKT3(ring, CP_SET_CONSTANT, 2);
>> OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
>> - OUT_RING(ring, 0x0000028f);
>> + OUT_RING(ring, 0x0000028f); /* A20x blob uses 0x2 but doesn't seem to matter */
>>
>> fd2_program_emit(ring, &ctx->solid_prog);
>>
>> @@ -207,10 +209,12 @@ emit_mem2gmem_surf(struct fd_context *ctx, uint32_t base,
>> OUT_RING(ring, 0x00000000);
>> OUT_RING(ring, 0x00000200);
>>
>> - OUT_PKT3(ring, CP_SET_CONSTANT, 3);
>> - OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
>> - OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
>> - OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
>> + if (!is_a20x(ctx->screen)) {
>> + OUT_PKT3(ring, CP_SET_CONSTANT, 3);
>> + OUT_RING(ring, CP_REG(REG_A2XX_VGT_MAX_VTX_INDX));
>> + OUT_RING(ring, 3); /* VGT_MAX_VTX_INDX */
>> + OUT_RING(ring, 0); /* VGT_MIN_VTX_INDX */
>> + }
>>
>> fd_draw(ctx, ring, DI_PT_RECTLIST, IGNORE_VISIBILITY,
>> DI_SRC_SEL_AUTO_INDEX, 3, 0, INDEX_SIZE_IGN, 0, 0, NULL);
>> @@ -253,7 +257,7 @@ fd2_emit_tile_mem2gmem(struct fd_context *ctx, struct fd_tile *tile)
>>
>> OUT_PKT3(ring, CP_SET_CONSTANT, 2);
>> OUT_RING(ring, CP_REG(REG_A2XX_VGT_VERTEX_REUSE_BLOCK_CNTL));
>> - OUT_RING(ring, 0x0000003b);
>> + OUT_RING(ring, 0x0000003b); /* A20x blob uses 0x0 but doesn't seem to matter */
>>
>> fd2_program_emit(ring, &ctx->blit_prog[0]);
>>
>>
>> _______________________________________________
>> Freedreno mailing list
>> Freedreno at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/freedreno
More information about the Freedreno
mailing list