[Mesa-dev] [PATCH] r600: initial attempt at gl_HelperInvocation (v3)

Roland Scheidegger sroland at vmware.com
Thu Feb 1 23:30:21 UTC 2018


Am 02.02.2018 um 00:23 schrieb Dave Airlie:
> On 2 February 2018 at 09:23, Dave Airlie <airlied at gmail.com> wrote:
>> On 2 February 2018 at 02:05, Roland Scheidegger <sroland at vmware.com> wrote:
>>> Am 01.02.2018 um 09:21 schrieb Dave Airlie:
>>>> From: Dave Airlie <airlied at redhat.com>
>>>>
>>>> This passes the CTS and piglit tests.
>>>>
>>>> This also disable sb for helper invocations until it doesn't
>>>> mess up the VPM flags.
>>>>
>>>> Thanks to Ilia and Glenn for advice, and Roland for working
>>>> out the working evergreen path.
>>>> ---
>>>>  src/gallium/drivers/r600/r600_asm.c    |   7 +-
>>>>  src/gallium/drivers/r600/r600_isa.c    |   1 +
>>>>  src/gallium/drivers/r600/r600_isa.h    |   5 +-
>>>>  src/gallium/drivers/r600/r600_shader.c | 113 +++++++++++++++++++++++++++++++++
>>>>  src/gallium/drivers/r600/r600_shader.h |   1 +
>>>>  src/gallium/drivers/r600/r600_sq.h     |   2 +
>>>>  6 files changed, 126 insertions(+), 3 deletions(-)
>>>>
>>>> diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
>>>> index 21d069d..ec2d34e 100644
>>>> --- a/src/gallium/drivers/r600/r600_asm.c
>>>> +++ b/src/gallium/drivers/r600/r600_asm.c
>>>> @@ -2099,9 +2099,12 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
>>>>                               fprintf(stderr, "%04d %08X %08X  %s ", id, bc->bytecode[id],
>>>>                                               bc->bytecode[id + 1], cfop->name);
>>>>                               fprintf(stderr, "%d @%d ", cf->ndw / 4, cf->addr);
>>>> -                             fprintf(stderr, "\n");
>>>> +                             if (cf->vpm)
>>>> +                                     fprintf(stderr, "VPM ");
>>>>                               if (cf->end_of_program)
>>>>                                       fprintf(stderr, "EOP ");
>>>> +                             fprintf(stderr, "\n");
>>>> +
>>>>                       } else if (cfop->flags & CF_EXP) {
>>>>                               int o = 0;
>>>>                               const char *exp_type[] = {"PIXEL", "POS  ", "PARAM"};
>>>> @@ -2198,6 +2201,8 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
>>>>                                       fprintf(stderr, "POP:%X ", cf->pop_count);
>>>>                               if (cf->count && (cfop->flags & CF_EMIT))
>>>>                                       fprintf(stderr, "STREAM%d ", cf->count);
>>>> +                             if (cf->vpm)
>>>> +                                     fprintf(stderr, "VPM ");
>>>>                               if (cf->end_of_program)
>>>>                                       fprintf(stderr, "EOP ");
>>>>                               fprintf(stderr, "\n");
>>>> diff --git a/src/gallium/drivers/r600/r600_isa.c b/src/gallium/drivers/r600/r600_isa.c
>>>> index 2633cdc..611b370 100644
>>>> --- a/src/gallium/drivers/r600/r600_isa.c
>>>> +++ b/src/gallium/drivers/r600/r600_isa.c
>>>> @@ -506,6 +506,7 @@ static const struct cf_op_info cf_op_table[] = {
>>>>               {"ALU_EXT",                       {   -1,   -1, 0x0C, 0x0C },  CF_CLAUSE | CF_ALU | CF_ALU_EXT  },
>>>>               {"ALU_CONTINUE",                  { 0x0D, 0x0D, 0x0D,   -1 },  CF_CLAUSE | CF_ALU  },
>>>>               {"ALU_BREAK",                     { 0x0E, 0x0E, 0x0E,   -1 },  CF_CLAUSE | CF_ALU  },
>>>> +             {"ALU_VALID_PIXEL_MODE",          {   -1,   -1,   -1, 0x0E },  CF_CLAUSE | CF_ALU  },
>>>>               {"ALU_ELSE_AFTER",                { 0x0F, 0x0F, 0x0F, 0x0F },  CF_CLAUSE | CF_ALU  },
>>>>               {"CF_NATIVE",                     { 0x00, 0x00, 0x00, 0x00 },  0  }
>>>>  };
>>>> diff --git a/src/gallium/drivers/r600/r600_isa.h b/src/gallium/drivers/r600/r600_isa.h
>>>> index f6e2697..fcaf1f7 100644
>>>> --- a/src/gallium/drivers/r600/r600_isa.h
>>>> +++ b/src/gallium/drivers/r600/r600_isa.h
>>>> @@ -646,10 +646,11 @@ struct cf_op_info
>>>>  #define CF_OP_ALU_EXT                      84
>>>>  #define CF_OP_ALU_CONTINUE                 85
>>>>  #define CF_OP_ALU_BREAK                    86
>>>> -#define CF_OP_ALU_ELSE_AFTER               87
>>>> +#define CF_OP_ALU_VALID_PIXEL_MODE         87
>>>> +#define CF_OP_ALU_ELSE_AFTER               88
>>>>
>>>>  /* CF_NATIVE means that r600_bytecode_cf contains pre-encoded native data */
>>>> -#define CF_NATIVE                          88
>>>> +#define CF_NATIVE                          89
>>>>
>>>>  enum r600_chip_class {
>>>>       ISA_CC_R600,
>>>> diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
>>>> index a462691..9388db9 100644
>>>> --- a/src/gallium/drivers/r600/r600_shader.c
>>>> +++ b/src/gallium/drivers/r600/r600_shader.c
>>>> @@ -197,6 +197,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
>>>>
>>>>       use_sb &= !shader->shader.uses_atomics;
>>>>       use_sb &= !shader->shader.uses_images;
>>>> +     use_sb &= !shader->shader.uses_helper_invocation;
>>>>
>>>>       /* Check if the bytecode has already been built. */
>>>>       if (!shader->shader.bc.bytecode) {
>>>> @@ -346,6 +347,7 @@ struct r600_shader_ctx {
>>>>       boolean                 clip_vertex_write;
>>>>       unsigned                cv_output;
>>>>       unsigned                edgeflag_output;
>>>> +     int                                     helper_invoc_reg;
>>>>       int                                     cs_block_size_reg;
>>>>       int                                     cs_grid_size_reg;
>>>>       bool cs_block_size_loaded, cs_grid_size_loaded;
>>>> @@ -1295,6 +1297,93 @@ static int load_sample_position(struct r600_shader_ctx *ctx, struct r600_shader_
>>>>       return t1;
>>>>  }
>>>>
>>>> +static int eg_load_helper_invocation(struct r600_shader_ctx *ctx)
>>>> +{
>>>> +     int r;
>>>> +     struct r600_bytecode_alu alu;
>>>> +
>>>> +     /* do a vtx fetch with wqm set on the vtx fetch */
>>>> +     memset(&alu, 0, sizeof(struct r600_bytecode_alu));
>>>> +     alu.op = ALU_OP1_MOV;
>>>> +     alu.dst.sel = ctx->helper_invoc_reg;
>>>> +     alu.dst.chan = 0;
>>>> +     alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
>>>> +     alu.src[0].value = 0xffffffff;
>>>> +     alu.dst.write = 1;
>>>> +     alu.last = 1;
>>>> +     r = r600_bytecode_add_alu(ctx->bc, &alu);
>>>> +     if (r)
>>>> +             return r;
>>>> +
>>>> +     /* do a vtx fetch in VPM mode */
>>>> +     struct r600_bytecode_vtx vtx;
>>>> +     memset(&vtx, 0, sizeof(vtx));
>>>> +     vtx.op = FETCH_OP_GET_BUFFER_RESINFO;
>>>> +     vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER;
>>>> +     vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
>>>> +     vtx.src_gpr = 0;
>>>> +     vtx.mega_fetch_count = 16; /* no idea here really... */
>>>> +     vtx.dst_gpr = ctx->helper_invoc_reg;
>>>> +     vtx.dst_sel_x = 4;
>>>> +     vtx.dst_sel_y = 7;              /* SEL_Y */
>>>> +     vtx.dst_sel_z = 7;              /* SEL_Z */
>>>> +     vtx.dst_sel_w = 7;              /* SEL_W */
>>>> +     vtx.data_format = FMT_32;
>>>> +     if ((r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx)))
>>>> +             return r;
>>>> +     ctx->bc->cf_last->vpm = 1;
>>>> +
>>>> +     /* compare the result with 0 */
>>>> +     memset(&alu, 0, sizeof(struct r600_bytecode_alu));
>>>> +     alu.op = ALU_OP3_CNDE_INT;
>>>> +     alu.is_op3 = 1;
>>>> +     alu.dst.sel = ctx->helper_invoc_reg;
>>>> +     alu.dst.chan = 0;
>>>> +     alu.dst.write = 1;
>>>> +     alu.src[0].sel = ctx->helper_invoc_reg;
>>>> +     alu.src[0].chan = 0;
>>>> +     alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
>>>> +     alu.src[1].value = 0x0;
>>>> +     alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
>>>> +     alu.src[2].value = 0xffffffff;
>>>> +     alu.last = 1;
>>>> +     r = r600_bytecode_add_alu(ctx->bc, &alu);
>>>> +     if (r)
>>>> +             return r;
>>> I realized this only later, this alu conditional is completely
>>> unnecessary, just skip it...
> 
> It might be for this test, but I don't think it is in general. We want boolean
> which I think is 0 or 0xffffffff, not 0.0 or 1.0.

The initial alu reg write was 0xffffffff, and the resinfo gives you back
a fixed 0, so...

Roland



More information about the mesa-dev mailing list