[Mesa-dev] [PATCH] r600g: Support TGSI_SEMANTIC_HELPER_INVOCATION

Nicolai Hähnle nhaehnle at gmail.com
Mon Nov 16 05:31:03 PST 2015


Hi Glenn,

On 14.11.2015 00:11, Glenn Kennard wrote:
> On Fri, 13 Nov 2015 18:57:28 +0100, Nicolai Hähnle <nhaehnle at gmail.com>
> wrote:
>
>> On 13.11.2015 00:14, Glenn Kennard wrote:
>>> Signed-off-by: Glenn Kennard <glenn.kennard at gmail.com>
>>> ---
>>> Maybe there is a better way to check if a thread is a helper invocation?
>>
>> Is ctx->face_gpr guaranteed to be initialized when
>> load_helper_invocation is called?
>>
>
> allocate_system_value_inputs() sets that if needed, and is called before
> parsing any opcodes.

Sorry, you're right, I missed the second change to the inputs array there.


>> Aside, I'm not sure I understand correctly what this is supposed to
>> do. The values you're querying are related to multi-sampling, but my
>> understanding has always been that helper invocations can also happen
>> without multi-sampling: you always want to process 2x2 quads of pixels
>> at a time to be able to compute derivatives for texture sampling. When
>> the boundary of primitive intersects such a quad, you get helper
>> invocations outside the primitive.
>>
>
> Non-MSAA buffers act just like 1 sample buffers with regards to the
> coverage mask supplied by the hardware, so helper invocations which have
> no coverage get a 0 for the mask value, and normal fragments get 1.
> Works with the piglit test case posted at least...

Here's why I'm still skeptical: According to the GLSL spec, the fragment 
shader is only run once per pixel by default, even when MSAA is enabled. 
_However_, if a shader statically accesses the SampleID, _then_ it must 
be run once per fragment. The way I understand it, your change forces 
the fragment shader to access SampleID, even when people ostensibly use 
HelperInvocation in the hope of optimizing something.

In the usual MSAA operation of only running the fragment shader once per 
pixel, HelperInvocation should be the same as SampleMask != 0, right? It 
seems like the right thing to do is to _not_ allocate the 
TGSI_SEMANTIC_SAMPLEID when TGSI_SEMANTIC_HELPER_INVOCATION is used, and 
then use different code paths in load_helper_invocation based on which 
of the source registers are actually there.

Cheers,
Nicolai

>
>> Cheers,
>> Nicolai
>>
>>>   src/gallium/drivers/r600/r600_shader.c | 83
>>> +++++++++++++++++++++++++++++-----
>>>   1 file changed, 72 insertions(+), 11 deletions(-)
>>>
>>> diff --git a/src/gallium/drivers/r600/r600_shader.c
>>> b/src/gallium/drivers/r600/r600_shader.c
>>> index 560197c..a227d78 100644
>>> --- a/src/gallium/drivers/r600/r600_shader.c
>>> +++ b/src/gallium/drivers/r600/r600_shader.c
>>> @@ -530,7 +530,8 @@ static int r600_spi_sid(struct r600_shader_io * io)
>>>           name == TGSI_SEMANTIC_PSIZE ||
>>>           name == TGSI_SEMANTIC_EDGEFLAG ||
>>>           name == TGSI_SEMANTIC_FACE ||
>>> -        name == TGSI_SEMANTIC_SAMPLEMASK)
>>> +        name == TGSI_SEMANTIC_SAMPLEMASK ||
>>> +        name == TGSI_SEMANTIC_HELPER_INVOCATION)
>>>           index = 0;
>>>       else {
>>>           if (name == TGSI_SEMANTIC_GENERIC) {
>>> @@ -734,7 +735,8 @@ static int tgsi_declaration(struct
>>> r600_shader_ctx *ctx)
>>>       case TGSI_FILE_SYSTEM_VALUE:
>>>           if (d->Semantic.Name == TGSI_SEMANTIC_SAMPLEMASK ||
>>>               d->Semantic.Name == TGSI_SEMANTIC_SAMPLEID ||
>>> -            d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS) {
>>> +            d->Semantic.Name == TGSI_SEMANTIC_SAMPLEPOS ||
>>> +            d->Semantic.Name == TGSI_SEMANTIC_HELPER_INVOCATION) {
>>>               break; /* Already handled from
>>> allocate_system_value_inputs */
>>>           } else if (d->Semantic.Name == TGSI_SEMANTIC_INSTANCEID) {
>>>               if (!ctx->native_integers) {
>>> @@ -776,13 +778,14 @@ static int allocate_system_value_inputs(struct
>>> r600_shader_ctx *ctx, int gpr_off
>>>       struct {
>>>           boolean enabled;
>>>           int *reg;
>>> -        unsigned name, alternate_name;
>>> +        unsigned associated_semantics[3];
>>>       } inputs[2] = {
>>> -        { false, &ctx->face_gpr, TGSI_SEMANTIC_SAMPLEMASK, ~0u }, /*
>>> lives in Front Face GPR.z */
>>> -
>>> -        { false, &ctx->fixed_pt_position_gpr,
>>> TGSI_SEMANTIC_SAMPLEID, TGSI_SEMANTIC_SAMPLEPOS } /* SAMPLEID is in
>>> Fixed Point Position GPR.w */
>>> +        { false, &ctx->face_gpr, { TGSI_SEMANTIC_SAMPLEMASK /* lives
>>> in Front Face GPR.z */,
>>> +            TGSI_SEMANTIC_HELPER_INVOCATION, ~0u } },
>>> +        { false, &ctx->fixed_pt_position_gpr, {
>>> TGSI_SEMANTIC_SAMPLEID  /* in Fixed Point Position GPR.w */,
>>> +            TGSI_SEMANTIC_SAMPLEPOS, TGSI_SEMANTIC_HELPER_INVOCATION
>>> } }
>>>       };
>>> -    int i, k, num_regs = 0;
>>> +    int i, k, l, num_regs = 0;
>>>
>>>       if (tgsi_parse_init(&parse, ctx->tokens) != TGSI_PARSE_OK) {
>>>           return 0;
>>> @@ -818,9 +821,11 @@ static int allocate_system_value_inputs(struct
>>> r600_shader_ctx *ctx, int gpr_off
>>>               struct tgsi_full_declaration *d =
>>> &parse.FullToken.FullDeclaration;
>>>               if (d->Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
>>>                   for (k = 0; k < Elements(inputs); k++) {
>>> -                    if (d->Semantic.Name == inputs[k].name ||
>>> -                        d->Semantic.Name == inputs[k].alternate_name) {
>>> -                        inputs[k].enabled = true;
>>> +                    for (l = 0; l < 3; l++) {
>>> +                        if (d->Semantic.Name ==
>>> inputs[k].associated_semantics[l]) {
>>> +                            inputs[k].enabled = true;
>>> +                            break;
>>> +                        }
>>>                       }
>>>                   }
>>>               }
>>> @@ -832,7 +837,7 @@ static int allocate_system_value_inputs(struct
>>> r600_shader_ctx *ctx, int gpr_off
>>>       for (i = 0; i < Elements(inputs); i++) {
>>>           boolean enabled = inputs[i].enabled;
>>>           int *reg = inputs[i].reg;
>>> -        unsigned name = inputs[i].name;
>>> +        unsigned name = inputs[i].associated_semantics[0];
>>>
>>>           if (enabled) {
>>>               int gpr = gpr_offset + num_regs++;
>>> @@ -985,6 +990,56 @@ static int load_sample_position(struct
>>> r600_shader_ctx *ctx, struct r600_shader_
>>>       return t1;
>>>   }
>>>
>>> +static int load_helper_invocation(struct r600_shader_ctx *ctx,
>>> +    int mask_gpr, int mask_chan, int id_gpr, int id_chan)
>>> +{
>>> +    // sample (mask >> sampleid) & 1
>>> +    struct r600_bytecode_alu alu;
>>> +    int r, t = r600_get_temp(ctx);
>>> +
>>> +    memset(&alu, 0, sizeof(struct r600_bytecode_alu));
>>> +    alu.op = ALU_OP2_LSHR_INT;
>>> +    alu.src[0].sel = mask_gpr;
>>> +    alu.src[0].chan = mask_chan;
>>> +    alu.src[1].sel = id_gpr;
>>> +    alu.src[1].chan = id_chan;
>>> +    alu.dst.sel = t;
>>> +    alu.dst.chan = 0;
>>> +    alu.dst.write = 1;
>>> +    alu.last = 1;
>>> +    r = r600_bytecode_add_alu(ctx->bc, &alu);
>>> +    if (r)
>>> +        return r;
>>> +
>>> +    memset(&alu, 0, sizeof(struct r600_bytecode_alu));
>>> +    alu.op = ALU_OP2_AND_INT;
>>> +    alu.src[0].sel = t;
>>> +    alu.src[0].chan = 0;
>>> +    alu.src[1].sel = V_SQ_ALU_SRC_1_INT;
>>> +    alu.dst.sel = t;
>>> +    alu.dst.chan = 0;
>>> +    alu.dst.write = 1;
>>> +    alu.last = 1;
>>> +    r = r600_bytecode_add_alu(ctx->bc, &alu);
>>> +    if (r)
>>> +        return r;
>>> +
>>> +    memset(&alu, 0, sizeof(struct r600_bytecode_alu));
>>> +    alu.op = ALU_OP2_SUB_INT;
>>> +    alu.src[0].sel = t;
>>> +    alu.src[0].chan = 0;
>>> +    alu.src[1].sel = V_SQ_ALU_SRC_1_INT;
>>> +    alu.dst.sel = t;
>>> +    alu.dst.chan = 0;
>>> +    alu.dst.write = 1;
>>> +    alu.last = 1;
>>> +    r = r600_bytecode_add_alu(ctx->bc, &alu);
>>> +    if (r)
>>> +        return r;
>>> +
>>> +    return t;
>>> +}
>>> +
>>>   static void tgsi_src(struct r600_shader_ctx *ctx,
>>>                const struct tgsi_full_src_register *tgsi_src,
>>>                struct r600_shader_src *r600_src)
>>> @@ -1048,6 +1103,12 @@ static void tgsi_src(struct r600_shader_ctx *ctx,
>>>               r600_src->swizzle[2] = 3;
>>>               r600_src->swizzle[3] = 3;
>>>               r600_src->sel = 1;
>>> +        } else if
>>> (ctx->info.system_value_semantic_name[tgsi_src->Register.Index] ==
>>> TGSI_SEMANTIC_HELPER_INVOCATION) {
>>> +            r600_src->swizzle[0] = 0;
>>> +            r600_src->swizzle[1] = 0;
>>> +            r600_src->swizzle[2] = 0;
>>> +            r600_src->swizzle[3] = 0;
>>> +            r600_src->sel = load_helper_invocation(ctx,
>>> ctx->face_gpr, 2, ctx->fixed_pt_position_gpr, 3);
>>>           }
>>>       } else {
>>>           if (tgsi_src->Register.Indirect)
>>>



More information about the mesa-dev mailing list