[Mesa-dev] [PATCH] r600: initial attempt at gl_HelperInvocation (v3)
Dave Airlie
airlied at gmail.com
Thu Feb 1 08:21:56 UTC 2018
From: Dave Airlie <airlied at redhat.com>
This passes the CTS and piglit tests.
This also disable sb for helper invocations until it doesn't
mess up the VPM flags.
Thanks to Ilia and Glenn for advice, and Roland for working
out the working evergreen path.
---
src/gallium/drivers/r600/r600_asm.c | 7 +-
src/gallium/drivers/r600/r600_isa.c | 1 +
src/gallium/drivers/r600/r600_isa.h | 5 +-
src/gallium/drivers/r600/r600_shader.c | 113 +++++++++++++++++++++++++++++++++
src/gallium/drivers/r600/r600_shader.h | 1 +
src/gallium/drivers/r600/r600_sq.h | 2 +
6 files changed, 126 insertions(+), 3 deletions(-)
diff --git a/src/gallium/drivers/r600/r600_asm.c b/src/gallium/drivers/r600/r600_asm.c
index 21d069d..ec2d34e 100644
--- a/src/gallium/drivers/r600/r600_asm.c
+++ b/src/gallium/drivers/r600/r600_asm.c
@@ -2099,9 +2099,12 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
fprintf(stderr, "%04d %08X %08X %s ", id, bc->bytecode[id],
bc->bytecode[id + 1], cfop->name);
fprintf(stderr, "%d @%d ", cf->ndw / 4, cf->addr);
- fprintf(stderr, "\n");
+ if (cf->vpm)
+ fprintf(stderr, "VPM ");
if (cf->end_of_program)
fprintf(stderr, "EOP ");
+ fprintf(stderr, "\n");
+
} else if (cfop->flags & CF_EXP) {
int o = 0;
const char *exp_type[] = {"PIXEL", "POS ", "PARAM"};
@@ -2198,6 +2201,8 @@ void r600_bytecode_disasm(struct r600_bytecode *bc)
fprintf(stderr, "POP:%X ", cf->pop_count);
if (cf->count && (cfop->flags & CF_EMIT))
fprintf(stderr, "STREAM%d ", cf->count);
+ if (cf->vpm)
+ fprintf(stderr, "VPM ");
if (cf->end_of_program)
fprintf(stderr, "EOP ");
fprintf(stderr, "\n");
diff --git a/src/gallium/drivers/r600/r600_isa.c b/src/gallium/drivers/r600/r600_isa.c
index 2633cdc..611b370 100644
--- a/src/gallium/drivers/r600/r600_isa.c
+++ b/src/gallium/drivers/r600/r600_isa.c
@@ -506,6 +506,7 @@ static const struct cf_op_info cf_op_table[] = {
{"ALU_EXT", { -1, -1, 0x0C, 0x0C }, CF_CLAUSE | CF_ALU | CF_ALU_EXT },
{"ALU_CONTINUE", { 0x0D, 0x0D, 0x0D, -1 }, CF_CLAUSE | CF_ALU },
{"ALU_BREAK", { 0x0E, 0x0E, 0x0E, -1 }, CF_CLAUSE | CF_ALU },
+ {"ALU_VALID_PIXEL_MODE", { -1, -1, -1, 0x0E }, CF_CLAUSE | CF_ALU },
{"ALU_ELSE_AFTER", { 0x0F, 0x0F, 0x0F, 0x0F }, CF_CLAUSE | CF_ALU },
{"CF_NATIVE", { 0x00, 0x00, 0x00, 0x00 }, 0 }
};
diff --git a/src/gallium/drivers/r600/r600_isa.h b/src/gallium/drivers/r600/r600_isa.h
index f6e2697..fcaf1f7 100644
--- a/src/gallium/drivers/r600/r600_isa.h
+++ b/src/gallium/drivers/r600/r600_isa.h
@@ -646,10 +646,11 @@ struct cf_op_info
#define CF_OP_ALU_EXT 84
#define CF_OP_ALU_CONTINUE 85
#define CF_OP_ALU_BREAK 86
-#define CF_OP_ALU_ELSE_AFTER 87
+#define CF_OP_ALU_VALID_PIXEL_MODE 87
+#define CF_OP_ALU_ELSE_AFTER 88
/* CF_NATIVE means that r600_bytecode_cf contains pre-encoded native data */
-#define CF_NATIVE 88
+#define CF_NATIVE 89
enum r600_chip_class {
ISA_CC_R600,
diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
index a462691..9388db9 100644
--- a/src/gallium/drivers/r600/r600_shader.c
+++ b/src/gallium/drivers/r600/r600_shader.c
@@ -197,6 +197,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
use_sb &= !shader->shader.uses_atomics;
use_sb &= !shader->shader.uses_images;
+ use_sb &= !shader->shader.uses_helper_invocation;
/* Check if the bytecode has already been built. */
if (!shader->shader.bc.bytecode) {
@@ -346,6 +347,7 @@ struct r600_shader_ctx {
boolean clip_vertex_write;
unsigned cv_output;
unsigned edgeflag_output;
+ int helper_invoc_reg;
int cs_block_size_reg;
int cs_grid_size_reg;
bool cs_block_size_loaded, cs_grid_size_loaded;
@@ -1295,6 +1297,93 @@ static int load_sample_position(struct r600_shader_ctx *ctx, struct r600_shader_
return t1;
}
+static int eg_load_helper_invocation(struct r600_shader_ctx *ctx)
+{
+ int r;
+ struct r600_bytecode_alu alu;
+
+ /* do a vtx fetch with wqm set on the vtx fetch */
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.dst.sel = ctx->helper_invoc_reg;
+ alu.dst.chan = 0;
+ alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[0].value = 0xffffffff;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ /* do a vtx fetch in VPM mode */
+ struct r600_bytecode_vtx vtx;
+ memset(&vtx, 0, sizeof(vtx));
+ vtx.op = FETCH_OP_GET_BUFFER_RESINFO;
+ vtx.buffer_id = R600_BUFFER_INFO_CONST_BUFFER;
+ vtx.fetch_type = SQ_VTX_FETCH_NO_INDEX_OFFSET;
+ vtx.src_gpr = 0;
+ vtx.mega_fetch_count = 16; /* no idea here really... */
+ vtx.dst_gpr = ctx->helper_invoc_reg;
+ vtx.dst_sel_x = 4;
+ vtx.dst_sel_y = 7; /* SEL_Y */
+ vtx.dst_sel_z = 7; /* SEL_Z */
+ vtx.dst_sel_w = 7; /* SEL_W */
+ vtx.data_format = FMT_32;
+ if ((r = r600_bytecode_add_vtx_tc(ctx->bc, &vtx)))
+ return r;
+ ctx->bc->cf_last->vpm = 1;
+
+ /* compare the result with 0 */
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP3_CNDE_INT;
+ alu.is_op3 = 1;
+ alu.dst.sel = ctx->helper_invoc_reg;
+ alu.dst.chan = 0;
+ alu.dst.write = 1;
+ alu.src[0].sel = ctx->helper_invoc_reg;
+ alu.src[0].chan = 0;
+ alu.src[1].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[1].value = 0x0;
+ alu.src[2].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[2].value = 0xffffffff;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+ return 0;
+}
+
+static int cm_load_helper_invocation(struct r600_shader_ctx *ctx)
+{
+ int r;
+ struct r600_bytecode_alu alu;
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.dst.sel = ctx->helper_invoc_reg;
+ alu.dst.chan = 0;
+ alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
+ alu.src[0].value = 0xffffffff;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu(ctx->bc, &alu);
+ if (r)
+ return r;
+
+ memset(&alu, 0, sizeof(struct r600_bytecode_alu));
+ alu.op = ALU_OP1_MOV;
+ alu.dst.sel = ctx->helper_invoc_reg;
+ alu.dst.chan = 0;
+ alu.src[0].sel = V_SQ_ALU_SRC_0;
+ alu.dst.write = 1;
+ alu.last = 1;
+ r = r600_bytecode_add_alu_type(ctx->bc, &alu, CF_OP_ALU_VALID_PIXEL_MODE);
+ if (r)
+ return r;
+
+ return ctx->helper_invoc_reg;
+}
+
static int load_block_grid_size(struct r600_shader_ctx *ctx, bool load_block)
{
struct r600_bytecode_vtx vtx;
@@ -1458,6 +1547,12 @@ static void tgsi_src(struct r600_shader_ctx *ctx,
r600_src->sel = load_block_grid_size(ctx, false);
} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_BLOCK_SIZE) {
r600_src->sel = load_block_grid_size(ctx, true);
+ } else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_HELPER_INVOCATION) {
+ r600_src->sel = ctx->helper_invoc_reg;
+ r600_src->swizzle[0] = 0;
+ r600_src->swizzle[1] = 0;
+ r600_src->swizzle[2] = 0;
+ r600_src->swizzle[3] = 0;
}
} else {
if (tgsi_src->Register.Indirect)
@@ -3120,6 +3215,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
tgsi_scan_shader(tokens, &ctx.info);
shader->indirect_files = ctx.info.indirect_files;
+ shader->uses_helper_invocation = false;
shader->uses_doubles = ctx.info.uses_doubles;
shader->uses_atomics = ctx.info.file_mask[TGSI_FILE_HW_ATOMIC];
shader->nsys_inputs = 0;
@@ -3193,6 +3289,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
ctx.clip_vertex_write = 0;
ctx.thread_id_gpr_loaded = false;
+ ctx.helper_invoc_reg = -1;
ctx.cs_block_size_reg = -1;
ctx.cs_grid_size_reg = -1;
ctx.cs_block_size_loaded = false;
@@ -3238,6 +3335,13 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
else
ctx.file_offset[TGSI_FILE_INPUT] = allocate_system_value_inputs(&ctx, ctx.file_offset[TGSI_FILE_INPUT]);
+
+ for (i = 0; i < PIPE_MAX_SHADER_INPUTS; i++) {
+ if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_HELPER_INVOCATION) {
+ ctx.helper_invoc_reg = ctx.file_offset[TGSI_FILE_INPUT]++;
+ shader->uses_helper_invocation = true;
+ }
+ }
}
if (ctx.type == PIPE_SHADER_GEOMETRY) {
/* FIXME 1 would be enough in some cases (3 or less input vertices) */
@@ -3439,6 +3543,15 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
if (shader->fs_write_all && rscreen->b.chip_class >= EVERGREEN)
shader->nr_ps_max_color_exports = 8;
+ if (ctx.shader->uses_helper_invocation) {
+ if (ctx.bc->chip_class == CAYMAN)
+ r = cm_load_helper_invocation(&ctx);
+ else
+ r = eg_load_helper_invocation(&ctx);
+ if (r)
+ return r;
+
+ }
if (ctx.fragcoord_input >= 0) {
if (ctx.bc->chip_class == CAYMAN) {
for (j = 0 ; j < 4; j++) {
diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
index 8444907..da96688 100644
--- a/src/gallium/drivers/r600/r600_shader.h
+++ b/src/gallium/drivers/r600/r600_shader.h
@@ -119,6 +119,7 @@ struct r600_shader {
boolean uses_doubles;
boolean uses_atomics;
boolean uses_images;
+ boolean uses_helper_invocation;
uint8_t atomic_base;
uint8_t rat_base;
uint8_t image_size_const_offset;
diff --git a/src/gallium/drivers/r600/r600_sq.h b/src/gallium/drivers/r600/r600_sq.h
index f51ffcf..6b07dc1 100644
--- a/src/gallium/drivers/r600/r600_sq.h
+++ b/src/gallium/drivers/r600/r600_sq.h
@@ -198,6 +198,8 @@
#define EG_V_SQ_ALU_SRC_LDS_DIRECT_B 0x000000E0
#define EG_V_SQ_ALU_SRC_TIME_HI 0x000000E3
#define EG_V_SQ_ALU_SRC_TIME_LO 0x000000E4
+#define EG_V_SQ_ALU_SRC_MASK_HI 0x000000E5
+#define EG_V_SQ_ALU_SRC_MASK_LO 0x000000E6
#define EG_V_SQ_ALU_SRC_HW_WAVE_ID 0x000000E7
#define EG_V_SQ_ALU_SRC_SIMD_ID 0x000000E8
#define EG_V_SQ_ALU_SRC_SE_ID 0x000000E9
--
2.9.5
More information about the mesa-dev
mailing list