[Mesa-dev] [PATCH] r600: initial attempt at gl_HelperInvocation (v3)
Dave Airlie
airlied at gmail.com
Thu Feb 1 23:23:50 UTC 2018
On 2 February 2018 at 09:23, Dave Airlie <airlied at gmail.com> wrote:
> On 2 February 2018 at 02:05, Roland Scheidegger <sroland at vmware.com> wrote:
>> Am 01.02.2018 um 09:21 schrieb Dave Airlie:
>>> From: Dave Airlie <airlied at redhat.com>
>>>
>>> This passes the CTS and piglit tests.
>>>
>>> This also disable sb for helper invocations until it doesn't
>>> mess up the VPM flags.
>>>
>>> Thanks to Ilia and Glenn for advice, and Roland for working
>>> out the working evergreen path.
>>> ---
>>> src/gallium/drivers/r600/r600_asm.c | 7 +-
>>> src/gallium/drivers/r600/r600_isa.c | 1 +
>>> src/gallium/drivers/r600/r600_isa.h | 5 +-
>>> src/gallium/drivers/r600/r600_shader.c | 113 +++++++++++++++++++++++++++++++++
>>> src/gallium/drivers/r600/r600_shader.h | 1 +
>>> src/gallium/drivers/r600/r600_sq.h | 2 +
>>> 6 files changed, 126 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
>>> index 21d069d..ec2d34e 100644
>>> --- a/src/gallium/drivers/r600/r600_asm.c
>>> +++ b/src/gallium/drivers/r600/r600_asm.c
>>> @@ -2099,9 +2099,12 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
>>> fprintf(stderr, "%04d %08X %08X %s ", id, bc->bytecode[id],
>>> bc->bytecode[id + 1], cfop->name);
>>> fprintf(stderr, "%d @%d ", cf->ndw / 4, cf->addr);
>>> - fprintf(stderr, "\n");
>>> + if (cf->vpm)
>>> + fprintf(stderr, "VPM ");
>>> if (cf->end_of_program)
>>> fprintf(stderr, "EOP ");
>>> + fprintf(stderr, "\n");
>>> +
>>> } else if (cfop->flags & CF_EXP) {
>>> int o = 0;
>>> const char *exp_type[] = {"PIXEL", "POS ", "PARAM"};
>>> @@ -2198,6 +2201,8 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
>>> fprintf(stderr, "POP:%X ", cf->pop_count);
>>> if (cf->count && (cfop->flags & CF_EMIT))
>>> fprintf(stderr, "STREAM%d ", cf->count);
>>> + if (cf->vpm)
>>> + fprintf(stderr, "VPM ");
>>> if (cf->end_of_program)
>>> fprintf(stderr, "EOP ");
>>> fprintf(stderr, "\n");
>>> diff --git a/src/gallium/drivers/r600/r600_isa.c b/src/gallium/drivers/r600/r600_isa.c
>>> index 2633cdc..611b370 100644
>>> --- a/src/gallium/drivers/r600/r600_isa.c
>>> +++ b/src/gallium/drivers/r600/r600_isa.c
>>> @@ -506,6 +506,7 @@ static const struct cf_op_info cf_op_table[] = {
>>> {"ALU_EXT", { -1, -1, 0x0C, 0x0C }, CF_CLAUSE | CF_ALU | CF_ALU_EXT },
>>> {"ALU_CONTINUE", { 0x0D, 0x0D, 0x0D, -1 }, CF_CLAUSE | CF_ALU },
>>> {"ALU_BREAK", { 0x0E, 0x0E, 0x0E, -1 }, CF_CLAUSE | CF_ALU },
>>> + {"ALU_VALID_PIXEL_MODE", { -1, -1, -1, 0x0E }, CF_CLAUSE | CF_ALU },
>>> {"ALU_ELSE_AFTER", { 0x0F, 0x0F, 0x0F, 0x0F }, CF_CLAUSE | CF_ALU },
>>> {"CF_NATIVE", { 0x00, 0x00, 0x00, 0x00 }, 0 }
>>> };
>>> diff --git a/src/gallium/drivers/r600/r600_isa.h b/src/gallium/drivers/r600/r600_isa.h
>>> index f6e2697..fcaf1f7 100644
>>> --- a/src/gallium/drivers/r600/r600_isa.h
>>> +++ b/src/gallium/drivers/r600/r600_isa.h
>>> @@ -646,10 +646,11 @@ struct cf_op_info
>>> #define CF_OP_ALU_EXT 84
>>> #define CF_OP_ALU_CONTINUE 85
>>> #define CF_OP_ALU_BREAK 86
>>> -#define CF_OP_ALU_ELSE_AFTER 87
>>> +#define CF_OP_ALU_VALID_PIXEL_MODE 87
>>> +#define CF_OP_ALU_ELSE_AFTER 88
>>>
>>> /* CF_NATIVE means that r600_bytecode_cf contains pre-encoded native data */
>>> -#define CF_NATIVE 88
>>> +#define CF_NATIVE 89
>>>
>>> enum r600_chip_class {
>>> ISA_CC_R600,
>>> diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
>>> index a462691..9388db9 100644
>>> --- a/src/gallium/drivers/r600/r600_shader.c
>>> +++ b/src/gallium/drivers/r600/r600_shader.c
>>> @@ -197,6 +197,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
>>>
>>> use_sb &= !shader->shader.uses_atomics;
>>> use_sb &= !shader->shader.uses_images;
>>> + use_sb &= !shader->shader.uses_helper_invocation;
>>>
>>> /* Check if the bytecode has already been built. */
>>> if (!shader->shader.bc.bytecode) {
>>> @@ -346,6 +347,7 @@ struct r600_shader_ctx {
>>> boolean clip_vertex_write;
>>> unsigned cv_output;
>>> unsigned edgeflag_output;
>>> + int helper_invoc_reg;
>>> int cs_block_size_reg;
>>> int cs_grid_size_reg;
>>> bool cs_block_size_loaded, cs_grid_size_loaded;
>>> @@ -1295,6 +1297,93 @@ static int load_sample_position(struct r600_shader_ctx *ctx, struct r600_shader_
>>> return t1;
>>> }
>>>
>>> +static int eg_load_helper_invocation(struct r600_shader_ctx *ctx)
>>> +{
>>> + int r;
>>> + struct r600_bytecode_alu alu;
>>> +
>>> + /* do a vtx fetch with wqm set on the vtx fetch */
>>> + memset(&alu, 0, sizeof(struct r600_bytecode_alu));
>>> + alu.op = ALU_OP1_MOV;
>>> + alu.dst.sel = ctx->helper_invoc_reg;
>>> + alu.dst.chan = 0;
>>> + alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
>>> + alu.src[0].value = 0xffffffff;
>>> + alu.dst.write = 1;
>>> + alu.last = 1;
>>> + r = r600_bytecode_add_alu(ctx->bc, &alu);
>>> + if (r)
>>> + return r;
>>> +
>>> + /* do a vtx fetch in VPM mode */
>>> + struct r600_bytecode_vtx vtx;
>>> + memset(&vtx, 0, sizeof(vtx));
>>> + vtx.op = FETCH_OP_GET_BUFFER_RESINFO;
>>> + vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER;
>>> + vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
>>> + vtx.src_gpr = 0;
>>> + vtx.mega_fetch_count = 16; /* no idea here really... */
>>> + vtx.dst_gpr = ctx->helper_invoc_reg;
>>> + vtx.dst_sel_x = 4;
>>> + vtx.dst_sel_y = 7; /* SEL_Y */
>>> + vtx.dst_sel_z = 7; /* SEL_Z */
>>> + vtx.dst_sel_w = 7; /* SEL_W */
>>> + vtx.data_format = FMT_32;
>>> + if ((r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx)))
>>> + return r;
>>> + ctx->bc->cf_last->vpm = 1;
>>> +
>>> + /* compare the result with 0 */
>>> + memset(&alu, 0, sizeof(struct r600_bytecode_alu));
>>> + alu.op = ALU_OP3_CNDE_INT;
>>> + alu.is_op3 = 1;
>>> + alu.dst.sel = ctx->helper_invoc_reg;
>>> + alu.dst.chan = 0;
>>> + alu.dst.write = 1;
>>> + alu.src[0].sel = ctx->helper_invoc_reg;
>>> + alu.src[0].chan = 0;
>>> + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
>>> + alu.src[1].value = 0x0;
>>> + alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
>>> + alu.src[2].value = 0xffffffff;
>>> + alu.last = 1;
>>> + r = r600_bytecode_add_alu(ctx->bc, &alu);
>>> + if (r)
>>> + return r;
>> I realized this only later, this alu conditional is completely
>> unnecessary, just skip it...
It might be for this test, but I don't think it is in general. We want boolean
which I think is 0 or 0xffffffff, not 0.0 or 1.0.
Dave.
More information about the mesa-dev
mailing list