[Mesa-dev] [PATCH] r600/cayman: initial attempt at gl_HelperInvocation (v2)

Thu Feb 1 03:43:47 UTC 2018

Looks good to me (albeit disabling sb is a shame).

Reviewed-by: Roland Scheidegger <sroland at vmware.com>
Am 01.02.2018 um 03:19 schrieb Dave Airlie:
> From: Dave Airlie <airlied at redhat.com>
> 
> This is a cayman only patch, it doesn't appear that
> evergreen supports the ALU on VPM. I'll try and figure it out later.
> 
> All I can say for this patch is it passes the piglit test and
> the CTS tests.
> 
> This also disable sb for helper invocations until it can handle
> the special ALU clause
> 
> I'd like to push this (evergreen is left as an exercise for the
> reader :-)
> 
> v2: move to using alu vpm mode, and just setting 0, -1.
> move calcs to top of pixel shader and store value.
> 
> Signed-off-by: Dave Airlie <airlied at redhat.com>
> ---
>  src/gallium/drivers/r600/r600_isa.c    |  1 +
>  src/gallium/drivers/r600/r600_isa.h    |  5 +--
>  src/gallium/drivers/r600/r600_shader.c | 64 ++++++++++++++++++++++++++++++++++
>  src/gallium/drivers/r600/r600_shader.h |  1 +
>  4 files changed, 69 insertions(+), 2 deletions(-)
> 
> diff --git a/src/gallium/drivers/r600/r600_isa.c b/src/gallium/drivers/r600/r600_isa.c
> index 2633cdcdb9..611b370bf5 100644
> --- a/src/gallium/drivers/r600/r600_isa.c
> +++ b/src/gallium/drivers/r600/r600_isa.c
> @@ -506,6 +506,7 @@ static const struct cf_op_info cf_op_table[] = {
>  		{"ALU_EXT",                       {   -1,   -1, 0x0C, 0x0C },  CF_CLAUSE | CF_ALU | CF_ALU_EXT  },
>  		{"ALU_CONTINUE",                  { 0x0D, 0x0D, 0x0D,   -1 },  CF_CLAUSE | CF_ALU  },
>  		{"ALU_BREAK",                     { 0x0E, 0x0E, 0x0E,   -1 },  CF_CLAUSE | CF_ALU  },
> +		{"ALU_VALID_PIXEL_MODE",          {   -1,   -1,   -1, 0x0E },  CF_CLAUSE | CF_ALU  },
>  		{"ALU_ELSE_AFTER",                { 0x0F, 0x0F, 0x0F, 0x0F },  CF_CLAUSE | CF_ALU  },
>  		{"CF_NATIVE",                     { 0x00, 0x00, 0x00, 0x00 },  0  }
>  };
> diff --git a/src/gallium/drivers/r600/r600_isa.h b/src/gallium/drivers/r600/r600_isa.h
> index f6e26976c5..fcaf1f766b 100644
> --- a/src/gallium/drivers/r600/r600_isa.h
> +++ b/src/gallium/drivers/r600/r600_isa.h
> @@ -646,10 +646,11 @@ struct cf_op_info
>  #define CF_OP_ALU_EXT                      84
>  #define CF_OP_ALU_CONTINUE                 85
>  #define CF_OP_ALU_BREAK                    86
> -#define CF_OP_ALU_ELSE_AFTER               87
> +#define CF_OP_ALU_VALID_PIXEL_MODE         87
> +#define CF_OP_ALU_ELSE_AFTER               88
>  
>  /* CF_NATIVE means that r600_bytecode_cf contains pre-encoded native data */
> -#define CF_NATIVE                          88
> +#define CF_NATIVE                          89
>  
>  enum r600_chip_class {
>  	ISA_CC_R600,
> diff --git a/src/gallium/drivers/r600/r600_shader.c b/src/gallium/drivers/r600/r600_shader.c
> index a462691f7a..54c67c7f83 100644
> --- a/src/gallium/drivers/r600/r600_shader.c
> +++ b/src/gallium/drivers/r600/r600_shader.c
> @@ -197,6 +197,7 @@ int r600_pipe_shader_create(struct pipe_context *ctx,
>  
>  	use_sb &= !shader->shader.uses_atomics;
>  	use_sb &= !shader->shader.uses_images;
> +	use_sb &= !shader->shader.uses_helper_invocation;
>  
>  	/* Check if the bytecode has already been built. */
>  	if (!shader->shader.bc.bytecode) {
> @@ -346,6 +347,7 @@ struct r600_shader_ctx {
>  	boolean                 clip_vertex_write;
>  	unsigned                cv_output;
>  	unsigned		edgeflag_output;
> +	int					helper_invoc_reg;
>  	int                                     cs_block_size_reg;
>  	int                                     cs_grid_size_reg;
>  	bool cs_block_size_loaded, cs_grid_size_loaded;
> @@ -1295,6 +1297,44 @@ static int load_sample_position(struct r600_shader_ctx *ctx, struct r600_shader_
>  	return t1;
>  }
>  
> +static int eg_load_helper_invocation(struct r600_shader_ctx *ctx)
> +{
> +	/* TODO eg support */
> +	return -1;
> +}
> +
> +static int cm_load_helper_invocation(struct r600_shader_ctx *ctx)
> +{
> +	int r;
> +
> +	struct r600_bytecode_alu alu;
> +
> +	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
> +	alu.op = ALU_OP1_MOV;
> +	alu.dst.sel = ctx->helper_invoc_reg;
> +	alu.dst.chan = 0;
> +	alu.src[0].sel = V_SQ_ALU_SRC_LITERAL;
> +	alu.src[0].value = 0xffffffff;
> +	alu.dst.write = 1;
> +	alu.last = 1;
> +	r = r600_bytecode_add_alu(ctx->bc, &alu);
> +	if (r)
> +		return r;
> +
> +	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
> +	alu.op = ALU_OP1_MOV;
> +	alu.dst.sel = ctx->helper_invoc_reg;
> +	alu.dst.chan = 0;
> +	alu.src[0].sel = V_SQ_ALU_SRC_0;
> +	alu.dst.write = 1;
> +	alu.last = 1;
> +	r = r600_bytecode_add_alu_type(ctx->bc, &alu, CF_OP_ALU_VALID_PIXEL_MODE);
> +	if (r)
> +		return r;
> +
> +	return 0;
> +}
> +
>  static int load_block_grid_size(struct r600_shader_ctx *ctx, bool load_block)
>  {
>  	struct r600_bytecode_vtx vtx;
> @@ -1458,6 +1498,12 @@ static void tgsi_src(struct r600_shader_ctx *ctx,
>  			r600_src->sel = load_block_grid_size(ctx, false);
>  		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_BLOCK_SIZE) {
>  			r600_src->sel = load_block_grid_size(ctx, true);
> +		} else if (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] == TGSI_SEMANTIC_HELPER_INVOCATION) {
> +			r600_src->sel = ctx->helper_invoc_reg;
> +			r600_src->swizzle[0] = 0;
> +			r600_src->swizzle[1] = 0;
> +			r600_src->swizzle[2] = 0;
> +			r600_src->swizzle[3] = 0;
>  		}
>  	} else {
>  		if (tgsi_src->Register.Indirect)
> @@ -3120,6 +3166,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
>  	tgsi_scan_shader(tokens, &ctx.info);
>  	shader->indirect_files = ctx.info.indirect_files;
>  
> +	shader->uses_helper_invocation = false;
>  	shader->uses_doubles = ctx.info.uses_doubles;
>  	shader->uses_atomics = ctx.info.file_mask[TGSI_FILE_HW_ATOMIC];
>  	shader->nsys_inputs = 0;
> @@ -3193,6 +3240,7 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
>  	ctx.clip_vertex_write = 0;
>  	ctx.thread_id_gpr_loaded = false;
>  
> +	ctx.helper_invoc_reg = -1;
>  	ctx.cs_block_size_reg = -1;
>  	ctx.cs_grid_size_reg = -1;
>  	ctx.cs_block_size_loaded = false;
> @@ -3238,6 +3286,13 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
>  			ctx.file_offset[TGSI_FILE_INPUT] = evergreen_gpr_count(&ctx);
>  		else
>  			ctx.file_offset[TGSI_FILE_INPUT] = allocate_system_value_inputs(&ctx, ctx.file_offset[TGSI_FILE_INPUT]);
> +
> +		for (i = 0; i < PIPE_MAX_SHADER_INPUTS; i++) {
> +			if (ctx.info.system_value_semantic_name[i] == TGSI_SEMANTIC_HELPER_INVOCATION) {
> +				ctx.helper_invoc_reg = ctx.file_offset[TGSI_FILE_INPUT]++;
> +				shader->uses_helper_invocation = true;
> +			}
> +		}
>  	}
>  	if (ctx.type == PIPE_SHADER_GEOMETRY) {
>  		/* FIXME 1 would be enough in some cases (3 or less input vertices) */
> @@ -3439,6 +3494,15 @@ static int r600_shader_from_tgsi(struct r600_context *rctx,
>  	if (shader->fs_write_all && rscreen->b.chip_class >= EVERGREEN)
>  		shader->nr_ps_max_color_exports = 8;
>  
> +	if (ctx.shader->uses_helper_invocation) {
> +		if (ctx.bc->chip_class == CAYMAN)
> +			r = cm_load_helper_invocation(&ctx);
> +		else
> +			r = eg_load_helper_invocation(&ctx);
> +		if (r)
> +			return r;
> +
> +	}
>  	if (ctx.fragcoord_input >= 0) {
>  		if (ctx.bc->chip_class == CAYMAN) {
>  			for (j = 0 ; j < 4; j++) {
> diff --git a/src/gallium/drivers/r600/r600_shader.h b/src/gallium/drivers/r600/r600_shader.h
> index 8444907883..da96688e54 100644
> --- a/src/gallium/drivers/r600/r600_shader.h
> +++ b/src/gallium/drivers/r600/r600_shader.h
> @@ -119,6 +119,7 @@ struct r600_shader {
>  	boolean			uses_doubles;
>  	boolean                 uses_atomics;
>  	boolean			uses_images;
> +	boolean			uses_helper_invocation;
>  	uint8_t                 atomic_base;
>  	uint8_t			rat_base;
>  	uint8_t                 image_size_const_offset;
>