[Mesa-dev] [PATCH] r600: initial attempt at gl_HelperInvocation (v3)
Dave Airlie
airlied at gmail.com
Thu Feb 1 23:23:03 UTC 2018
On 2 February 2018 at 02:05, Roland Scheidegger <sroland at vmware.com> wrote:
> Am 01.02.2018 um 09:21 schrieb Dave Airlie:
>> From: Dave Airlie <airlied at redhat.com>
>>
>> This passes the CTS and piglit tests.
>>
>> This also disable sb for helper invocations until it doesn't
>> mess up the VPM flags.
>>
>> Thanks to Ilia and Glenn for advice, and Roland for working
>> out the working evergreen path.
>> ---
>> src/gallium/drivers/r600/r600_asm.c | 7 +-
>> src/gallium/drivers/r600/r600_isa.c | 1 +
>> src/gallium/drivers/r600/r600_isa.h | 5 +-
>> src/gallium/drivers/r600/r600_shader.c | 113 +++++++++++++++++++++++++++++++++
>> src/gallium/drivers/r600/r600_shader.h | 1 +
>> src/gallium/drivers/r600/r600_sq.h | 2 +
>> 6 files changed, 126 insertions(+), 3 deletions(-)
>>
>> diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
>> index 21d069d..ec2d34e 100644
>> --- a/src/gallium/drivers/r600/r600_asm.c
>> +++ b/src/gallium/drivers/r600/r600_asm.c
>> @@ -2099,9 +2099,12 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
>> fprintf(stderr, "%04d %08X %08X %s ", id, bc->bytecode[id],
>> bc->bytecode[id + 1], cfop->name);
>> fprintf(stderr, "%d @%d ", cf->ndw / 4, cf->addr);
>> - fprintf(stderr, "\n");
>> + if (cf->vpm)
>> + fprintf(stderr, "VPM ");
>> if (cf->end_of_program)
>> fprintf(stderr, "EOP ");
>> + fprintf(stderr, "\n");
>> +
>> } else if (cfop->flags & CF_EXP) {
>> int o = 0;
>> const char *exp_type[] = {"PIXEL", "POS ", "PARAM"};
>> @@ -2198,6 +2201,8 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
>> fprintf(stderr, "POP:%X ", cf->pop_count);
>> if (cf->count && (cfop->flags & CF_EMIT))
>> fprintf(stderr, "STREAM%d ", cf->count);
>> + if (cf->vpm)
>> + fprintf(stderr, "VPM ");
>> if (cf->end_of_program)
>> fprintf(stderr, "EOP ");
>> fprintf(stderr, "\n");
>> diff --git a/src/gallium/drivers/r600/r600_isa.c b/src/gallium/drivers/r600/r600_isa.c
>> index 2633cdc..611b370 100644
>> --- a/src/gallium/drivers/r600/r600_isa.c
>> +++ b/src/gallium/drivers/r600/r600_isa.c
>> @@ -506,6 +506,7 @@ static const struct cf_op_info cf_op_table[] = {
>> {"ALU_EXT", { -1, -1, 0x0C, 0x0C }, CF_CLAUSE | CF_ALU | CF_ALU_EXT },
>> {"ALU_CONTINUE", { 0x0D, 0x0D, 0x0D, -1 }, CF_CLAUSE | CF_ALU },
>> {"ALU_BREAK", { 0x0E, 0x0E, 0x0E, -1 }, CF_CLAUSE | CF_ALU },
>> + {"ALU_VALID_PIXEL_MODE", { -1, -1, -1, 0x0E }, CF_CLAUSE | CF_ALU },
>> {"ALU_ELSE_AFTER", { 0x0F, 0x0F, 0x0F, 0x0F }, CF_CLAUSE | CF_ALU },
>> {"CF_NATIVE", { 0x00, 0x00, 0x00, 0x00 }, 0 }
>> };
>> diff --git a/src/gallium/drivers/r600/r600_isa.h b/src/gallium/drivers/r600/r600_isa.h
>> index f6e2697..fcaf1f7 100644
>> --- a/src/gallium/drivers/r600/r600_isa.h
>> +++ b/src/gallium/drivers/r600/r600_isa.h
>> @@ -646,10 +646,11 @@ struct cf_op_info
>> #define CF_OP_ALU_EXT 84
>> #define CF_OP_ALU_CONTINUE 85
>> #define CF_OP_ALU_BREAK 86
>> -#define CF_OP_ALU_ELSE_AFTER 87
>> +#define CF_OP_ALU_VALID_PIXEL_MODE 87
>> +#define CF_OP_ALU_ELSE_AFTER 88
>>
>> /* CF_NATIVE means that r600_bytecode_cf contains pre-encoded native data */
>> -#define CF_NATIVE 88
>> +#define CF_NATIVE 89
>>
>> enum r600_chip_class {
>> ISA_CC_R600,
>> diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
>> index a462691..9388db9 100644
>> --- a/src/gallium/drivers/r600/r600_shader.c
>> +++ b/src/gallium/drivers/r600/r600_shader.c
>> @@ -197,6 +197,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
>>
>> use_sb &= !shader->shader.uses_atomics;
>> use_sb &= !shader->shader.uses_images;
>> + use_sb &= !shader->shader.uses_helper_invocation;
>>
>> /* Check if the bytecode has already been built. */
>> if (!shader->shader.bc.bytecode) {
>> @@ -346,6 +347,7 @@ struct r600_shader_ctx {
>> boolean clip_vertex_write;
>> unsigned cv_output;
>> unsigned edgeflag_output;
>> + int helper_invoc_reg;
>> int cs_block_size_reg;
>> int cs_grid_size_reg;
>> bool cs_block_size_loaded, cs_grid_size_loaded;
>> @@ -1295,6 +1297,93 @@ static int load_sample_position(struct r600_shader_ctx *ctx, struct r600_shader_
>> return t1;
>> }
>>
>> +static int eg_load_helper_invocation(struct r600_shader_ctx *ctx)
>> +{
>> + int r;
>> + struct r600_bytecode_alu alu;
>> +
>> + /* do a vtx fetch with wqm set on the vtx fetch */
>> + memset(&alu, 0, sizeof(struct r600_bytecode_alu));
>> + alu.op = ALU_OP1_MOV;
>> + alu.dst.sel = ctx->helper_invoc_reg;
>> + alu.dst.chan = 0;
>> + alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
>> + alu.src[0].value = 0xffffffff;
>> + alu.dst.write = 1;
>> + alu.last = 1;
>> + r = r600_bytecode_add_alu(ctx->bc, &alu);
>> + if (r)
>> + return r;
>> +
>> + /* do a vtx fetch in VPM mode */
>> + struct r600_bytecode_vtx vtx;
>> + memset(&vtx, 0, sizeof(vtx));
>> + vtx.op = FETCH_OP_GET_BUFFER_RESINFO;
>> + vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER;
>> + vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
>> + vtx.src_gpr = 0;
>> + vtx.mega_fetch_count = 16; /* no idea here really... */
>> + vtx.dst_gpr = ctx->helper_invoc_reg;
>> + vtx.dst_sel_x = 4;
>> + vtx.dst_sel_y = 7; /* SEL_Y */
>> + vtx.dst_sel_z = 7; /* SEL_Z */
>> + vtx.dst_sel_w = 7; /* SEL_W */
>> + vtx.data_format = FMT_32;
>> + if ((r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx)))
>> + return r;
>> + ctx->bc->cf_last->vpm = 1;
>> +
>> + /* compare the result with 0 */
>> + memset(&alu, 0, sizeof(struct r600_bytecode_alu));
>> + alu.op = ALU_OP3_CNDE_INT;
>> + alu.is_op3 = 1;
>> + alu.dst.sel = ctx->helper_invoc_reg;
>> + alu.dst.chan = 0;
>> + alu.dst.write = 1;
>> + alu.src[0].sel = ctx->helper_invoc_reg;
>> + alu.src[0].chan = 0;
>> + alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
>> + alu.src[1].value = 0x0;
>> + alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
>> + alu.src[2].value = 0xffffffff;
>> + alu.last = 1;
>> + r = r600_bytecode_add_alu(ctx->bc, &alu);
>> + if (r)
>> + return r;
> I realized this only later, this alu conditional is completely
> unnecessary, just skip it...
>
> Other than that,
> Reviewed-by: Roland Scheidegger <sroland at vmware.com>
>
>
>
>> + return 0;
>> +}
>> +
>> +static int cm_load_helper_invocation(struct r600_shader_ctx *ctx)
>> +{
>> + int r;
>> + struct r600_bytecode_alu alu;
>> +
>> + memset(&alu, 0, sizeof(struct r600_bytecode_alu));
>> + alu.op = ALU_OP1_MOV;
>> + alu.dst.sel = ctx->helper_invoc_reg;
>> + alu.dst.chan = 0;
>> + alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
>> + alu.src[0].value = 0xffffffff;
>> + alu.dst.write = 1;
>> + alu.last = 1;
>> + r = r600_bytecode_add_alu(ctx->bc, &alu);
>> + if (r)
>> + return r;
>> +
>> + memset(&alu, 0, sizeof(struct r600_bytecode_alu));
>> + alu.op = ALU_OP1_MOV;
>> + alu.dst.sel = ctx->helper_invoc_reg;
>> + alu.dst.chan = 0;
>> + alu.src[0].sel = V_SQ_ALU_SRC_0;
>> + alu.dst.write = 1;
>> + alu.last = 1;
>> + r = r600_bytecode_add_alu_type(ctx->bc, &alu, CF_OP_ALU_VALID_PIXEL_MODE);
>> + if (r)
>> + return r;
>> +
>> + return ctx->helper_invoc_reg;
>> +}
>> +
>> static int load_block_grid_size(struct r600_shader_ctx *ctx, bool load_block)
>> {
>> struct r600_bytecode_vtx vtx;
>> @@ -1458,6 +1547,12 @@ static void tgsi_src(struct r600_shader_ctx *ctx,
>> r600_src->sel = load_block_grid_size(ctx, false);
>> } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_BLOCK_SIZE) {
>> r600_src->sel = load_block_grid_size(ctx, true);
>> + } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_HELPER_INVOCATION) {
>> + r600_src->sel = ctx->helper_invoc_reg;
>> + r600_src->swizzle[0] = 0;
>> + r600_src->swizzle[1] = 0;
>> + r600_src->swizzle[2] = 0;
>> + r600_src->swizzle[3] = 0;
>> }
>> } else {
>> if (tgsi_src->Register.Indirect)
>> @@ -3120,6 +3215,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
>> tgsi_scan_shader(tokens, &ctx.info);
>> shader->indirect_files = ctx.info.indirect_files;
>>
>> + shader->uses_helper_invocation = false;
>> shader->uses_doubles = ctx.info.uses_doubles;
>> shader->uses_atomics = ctx.info.file_mask[TGSI_FILE_HW_ATOMIC];
>> shader->nsys_inputs = 0;
>> @@ -3193,6 +3289,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
>> ctx.clip_vertex_write = 0;
>> ctx.thread_id_gpr_loaded = false;
>>
>> + ctx.helper_invoc_reg = -1;
>> ctx.cs_block_size_reg = -1;
>> ctx.cs_grid_size_reg = -1;
>> ctx.cs_block_size_loaded = false;
>> @@ -3238,6 +3335,13 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
>> ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
>> else
>> ctx.file_offset[TGSI_FILE_INPUT] = allocate_system_value_inputs(&ctx, ctx.file_offset[TGSI_FILE_INPUT]);
>> +
>> + for (i = 0; i < PIPE_MAX_SHADER_INPUTS; i++) {
>> + if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_HELPER_INVOCATION) {
>> + ctx.helper_invoc_reg = ctx.file_offset[TGSI_FILE_INPUT]++;
>> + shader->uses_helper_invocation = true;
>> + }
>> + }
>> }
>> if (ctx.type == PIPE_SHADER_GEOMETRY) {
>> /* FIXME 1 would be enough in some cases (3 or less input vertices) */
>> @@ -3439,6 +3543,15 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
>> if (shader->fs_write_all && rscreen->b.chip_class >= EVERGREEN)
>> shader->nr_ps_max_color_exports = 8;
>>
>> + if (ctx.shader->uses_helper_invocation) {
>> + if (ctx.bc->chip_class == CAYMAN)
>> + r = cm_load_helper_invocation(&ctx);
>> + else
>> + r = eg_load_helper_invocation(&ctx);
>> + if (r)
>> + return r;
>> +
>> + }
>> if (ctx.fragcoord_input >= 0) {
>> if (ctx.bc->chip_class == CAYMAN) {
>> for (j = 0 ; j < 4; j++) {
>> diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
>> index 8444907..da96688 100644
>> --- a/src/gallium/drivers/r600/r600_shader.h
>> +++ b/src/gallium/drivers/r600/r600_shader.h
>> @@ -119,6 +119,7 @@ struct r600_shader {
>> boolean uses_doubles;
>> boolean uses_atomics;
>> boolean uses_images;
>> + boolean uses_helper_invocation;
>> uint8_t atomic_base;
>> uint8_t rat_base;
>> uint8_t image_size_const_offset;
>> diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h
>> index f51ffcf..6b07dc1 100644
>> --- a/src/gallium/drivers/r600/r600_sq.h
>> +++ b/src/gallium/drivers/r600/r600_sq.h
>> @@ -198,6 +198,8 @@
>> #define EG_V_SQ_ALU_SRC_LDS_DIRECT_B 0x000000E0
>> #define EG_V_SQ_ALU_SRC_TIME_HI 0x000000E3
>> #define EG_V_SQ_ALU_SRC_TIME_LO 0x000000E4
>> +#define EG_V_SQ_ALU_SRC_MASK_HI 0x000000E5
>> +#define EG_V_SQ_ALU_SRC_MASK_LO 0x000000E6
>> #define EG_V_SQ_ALU_SRC_HW_WAVE_ID 0x000000E7
>> #define EG_V_SQ_ALU_SRC_SIMD_ID 0x000000E8
>> #define EG_V_SQ_ALU_SRC_SE_ID 0x000000E9
>>
>
More information about the mesa-dev
mailing list