[Mesa-dev] [PATCH] r600g: Support TGSI_SEMANTIC_HELPER_INVOCATION

Fri Nov 13 15:11:38 PST 2015

On Fri, 13 Nov 2015 18:57:28 +0100, Nicolai Hähnle <nhaehnle at gmail.com>  
wrote:

> On 13.11.2015 00:14, Glenn Kennard wrote:
>> Signed-off-by: Glenn Kennard <glenn.kennard at gmail.com>
>> ---
>> Maybe there is a better way to check if a thread is a helper invocation?
>
> Is ctx->face_gpr guaranteed to be initialized when  
> load_helper_invocation is called?
>

allocate_system_value_inputs() sets that if needed, and is called before  
parsing any opcodes.

> Aside, I'm not sure I understand correctly what this is supposed to do.  
> The values you're querying are related to multi-sampling, but my  
> understanding has always been that helper invocations can also happen  
> without multi-sampling: you always want to process 2x2 quads of pixels  
> at a time to be able to compute derivatives for texture sampling. When  
> the boundary of primitive intersects such a quad, you get helper  
> invocations outside the primitive.
>

Non-MSAA buffers act just like 1 sample buffers with regards to the  
coverage mask supplied by the hardware, so helper invocations which have  
no coverage get a 0 for the mask value, and normal fragments get 1. Works  
with the piglit test case posted at least...

> Cheers,
> Nicolai
>
>>   src/gallium/drivers/r600/r600_shader.c | 83  
>> +++++++++++++++++++++++++++++-----
>>   1 file changed, 72 insertions(+), 11 deletions(-)
>>
>> diff --git a/src/gallium/drivers/r600/r600_shader.c  
>> b/src/gallium/drivers/r600/r600_shader.c
>> index 560197c..a227d78 100644
>> --- a/src/gallium/drivers/r600/r600_shader.c
>> +++ b/src/gallium/drivers/r600/r600_shader.c
>> @@ -530,7 +530,8 @@ static int r600_spi_sid(struct r600_shader_io * io)
>>   	    name == TGSI_SEMANTIC_PSIZE ||
>>   	    name == TGSI_SEMANTIC_EDGEFLAG ||
>>   	    name == TGSI_SEMANTIC_FACE ||
>> -	    name == TGSI_SEMANTIC_SAMPLEMASK)
>> +	    name == TGSI_SEMANTIC_SAMPLEMASK ||
>> +	    name == TGSI_SEMANTIC_HELPER_INVOCATION)
>>   		index = 0;
>>   	else {
>>   		if (name == TGSI_SEMANTIC_GENERIC) {
>> @@ -734,7 +735,8 @@ static int tgsi_declaration(struct r600_shader_ctx  
>> *ctx)
>>   	case TGSI_FILE_SYSTEM_VALUE:
>>   		if (d->Semantic.Name == TGSI_SEMANTIC_SAMPLEMASK ||
>>   			d->Semantic.Name == TGSI_SEMANTIC_SAMPLEID ||
>> -			d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS) {
>> +			d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS ||
>> +			d->Semantic.Name == TGSI_SEMANTIC_HELPER_INVOCATION) {
>>   			break; /* Already handled from allocate_system_value_inputs */
>>   		} else if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
>>   			if (!ctx->native_integers) {
>> @@ -776,13 +778,14 @@ static int allocate_system_value_inputs(struct  
>> r600_shader_ctx *ctx, int gpr_off
>>   	struct {
>>   		boolean enabled;
>>   		int *reg;
>> -		unsigned name, alternate_name;
>> +		unsigned associated_semantics[3];
>>   	} inputs[2] = {
>> -		{ false, &ctx->face_gpr, TGSI_SEMANTIC_SAMPLEMASK, ~0u }, /* lives  
>> in Front Face GPR.z */
>> -
>> -		{ false, &ctx->fixed_pt_position_gpr, TGSI_SEMANTIC_SAMPLEID,  
>> TGSI_SEMANTIC_SAMPLEPOS } /* SAMPLEID is in Fixed Point Position GPR.w  
>> */
>> +		{ false, &ctx->face_gpr, { TGSI_SEMANTIC_SAMPLEMASK /* lives in  
>> Front Face GPR.z */,
>> +			TGSI_SEMANTIC_HELPER_INVOCATION, ~0u } },
>> +		{ false, &ctx->fixed_pt_position_gpr, { TGSI_SEMANTIC_SAMPLEID  /*  
>> in Fixed Point Position GPR.w */,
>> +			TGSI_SEMANTIC_SAMPLEPOS, TGSI_SEMANTIC_HELPER_INVOCATION } }
>>   	};
>> -	int i, k, num_regs = 0;
>> +	int i, k, l, num_regs = 0;
>>
>>   	if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) {
>>   		return 0;
>> @@ -818,9 +821,11 @@ static int allocate_system_value_inputs(struct  
>> r600_shader_ctx *ctx, int gpr_off
>>   			struct tgsi_full_declaration *d = &parse.FullToken.FullDeclaration;
>>   			if (d->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
>>   				for (k = 0; k < Elements(inputs); k++) {
>> -					if (d->Semantic.Name == inputs[k].name ||
>> -						d->Semantic.Name == inputs[k].alternate_name) {
>> -						inputs[k].enabled = true;
>> +					for (l = 0; l < 3; l++) {
>> +						if (d->Semantic.Name == inputs[k].associated_semantics[l]) {
>> +							inputs[k].enabled = true;
>> +							break;
>> +						}
>>   					}
>>   				}
>>   			}
>> @@ -832,7 +837,7 @@ static int allocate_system_value_inputs(struct  
>> r600_shader_ctx *ctx, int gpr_off
>>   	for (i = 0; i < Elements(inputs); i++) {
>>   		boolean enabled = inputs[i].enabled;
>>   		int *reg = inputs[i].reg;
>> -		unsigned name = inputs[i].name;
>> +		unsigned name = inputs[i].associated_semantics[0];
>>
>>   		if (enabled) {
>>   			int gpr = gpr_offset + num_regs++;
>> @@ -985,6 +990,56 @@ static int load_sample_position(struct  
>> r600_shader_ctx *ctx, struct r600_shader_
>>   	return t1;
>>   }
>>
>> +static int load_helper_invocation(struct r600_shader_ctx *ctx,
>> +	int mask_gpr, int mask_chan, int id_gpr, int id_chan)
>> +{
>> +	// sample (mask >> sampleid) & 1
>> +	struct r600_bytecode_alu alu;
>> +	int r, t = r600_get_temp(ctx);
>> +
>> +	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
>> +	alu.op = ALU_OP2_LSHR_INT;
>> +	alu.src[0].sel = mask_gpr;
>> +	alu.src[0].chan = mask_chan;
>> +	alu.src[1].sel = id_gpr;
>> +	alu.src[1].chan = id_chan;
>> +	alu.dst.sel = t;
>> +	alu.dst.chan = 0;
>> +	alu.dst.write = 1;
>> +	alu.last = 1;
>> +	r = r600_bytecode_add_alu(ctx->bc, &alu);
>> +	if (r)
>> +		return r;
>> +
>> +	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
>> +	alu.op = ALU_OP2_AND_INT;
>> +	alu.src[0].sel = t;
>> +	alu.src[0].chan = 0;
>> +	alu.src[1].sel = V_SQ_ALU_SRC_1_INT;
>> +	alu.dst.sel = t;
>> +	alu.dst.chan = 0;
>> +	alu.dst.write = 1;
>> +	alu.last = 1;
>> +	r = r600_bytecode_add_alu(ctx->bc, &alu);
>> +	if (r)
>> +		return r;
>> +
>> +	memset(&alu, 0, sizeof(struct r600_bytecode_alu));
>> +	alu.op = ALU_OP2_SUB_INT;
>> +	alu.src[0].sel = t;
>> +	alu.src[0].chan = 0;
>> +	alu.src[1].sel = V_SQ_ALU_SRC_1_INT;
>> +	alu.dst.sel = t;
>> +	alu.dst.chan = 0;
>> +	alu.dst.write = 1;
>> +	alu.last = 1;
>> +	r = r600_bytecode_add_alu(ctx->bc, &alu);
>> +	if (r)
>> +		return r;
>> +
>> +	return t;
>> +}
>> +
>>   static void tgsi_src(struct r600_shader_ctx *ctx,
>>   		     const struct tgsi_full_src_register *tgsi_src,
>>   		     struct r600_shader_src *r600_src)
>> @@ -1048,6 +1103,12 @@ static void tgsi_src(struct r600_shader_ctx *ctx,
>>   			r600_src->swizzle[2] = 3;
>>   			r600_src->swizzle[3] = 3;
>>   			r600_src->sel = 1;
>> +		} else if  
>> (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] ==  
>> TGSI_SEMANTIC_HELPER_INVOCATION) {
>> +			r600_src->swizzle[0] = 0;
>> +			r600_src->swizzle[1] = 0;
>> +			r600_src->swizzle[2] = 0;
>> +			r600_src->swizzle[3] = 0;
>> +			r600_src->sel = load_helper_invocation(ctx, ctx->face_gpr, 2,  
>> ctx->fixed_pt_position_gpr, 3);
>>   		}
>>   	} else {
>>   		if (tgsi_src->Register.Indirect)
>>