[Mesa-dev] [PATCH 1/2] tgsi/scan: add a new pass that analyzes tess factor writes (v2)

Nicolai Hähnle nhaehnle at gmail.com
Mon Sep 11 14:48:47 UTC 2017


On 11.09.2017 16:42, Marek Olšák wrote:
> On Mon, Sep 11, 2017 at 4:12 PM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
>> On 07.09.2017 20:26, Marek Olšák wrote:
>>>
>>> From: Marek Olšák <marek.olsak at amd.com>
>>>
>>> The pass tries to deduce whether tess factors are always written by
>>> all shader invocations.
>>>
>>> The implication for radeonsi is that it doesn't have to use a barrier
>>> near the end of TCS, and doesn't have to use LDS for passing the tess
>>> factors to the epilog.
>>>
>>> v2: Handle barriers and do the analysis pass for each code segment
>>>       surrounded by barriers separately, and AND results from all
>>>       such segments writing tess factors. The change is trivial in the main
>>>       switch statement.
>>>
>>>       Also, the result is renamed to "tessfactors_are_def_in_all_invocs"
>>>       to make the name accurate.
>>> ---
>>>    src/gallium/auxiliary/tgsi/tgsi_scan.c | 224
>>> +++++++++++++++++++++++++++++++++
>>>    src/gallium/auxiliary/tgsi/tgsi_scan.h |  11 ++
>>>    2 files changed, 235 insertions(+)
>>>
>>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c
>>> b/src/gallium/auxiliary/tgsi/tgsi_scan.c
>>> index db87ce3..b893289 100644
>>> --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
>>> +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
>>> @@ -930,10 +930,234 @@ tgsi_scan_arrays(const struct tgsi_token *tokens,
>>>                      array->writemask |= dst->Register.WriteMask;
>>>                }
>>>             }
>>>          }
>>>       }
>>>         tgsi_parse_free(&parse);
>>>         return;
>>>    }
>>> +
>>> +static void
>>> +check_no_subroutines(const struct tgsi_full_instruction *inst)
>>> +{
>>> +   switch (inst->Instruction.Opcode) {
>>> +   case TGSI_OPCODE_BGNSUB:
>>> +   case TGSI_OPCODE_ENDSUB:
>>> +   case TGSI_OPCODE_CAL:
>>> +      unreachable("subroutines unhandled");
>>> +   }
>>> +}
>>> +
>>> +static unsigned
>>> +get_inst_tessfactor_writemask(const struct tgsi_shader_info *info,
>>> +                              const struct tgsi_full_instruction *inst)
>>> +{
>>> +   unsigned writemask = 0;
>>> +
>>> +   for (unsigned i = 0; i < inst->Instruction.NumDstRegs; i++) {
>>> +      const struct tgsi_full_dst_register *dst = &inst->Dst[i];
>>> +
>>> +      if (dst->Register.File == TGSI_FILE_OUTPUT &&
>>> +          !dst->Register.Indirect) {
>>> +         unsigned name = info->output_semantic_name[dst->Register.Index];
>>> +
>>> +         if (name == TGSI_SEMANTIC_TESSINNER)
>>> +            writemask |= dst->Register.WriteMask;
>>> +         else if (name == TGSI_SEMANTIC_TESSOUTER)
>>> +            writemask |= dst->Register.WriteMask << 4;
>>> +      }
>>> +   }
>>> +   return writemask;
>>> +}
>>> +
>>> +static unsigned
>>> +get_block_tessfactor_writemask(const struct tgsi_shader_info *info,
>>> +                               struct tgsi_parse_context *parse,
>>> +                               unsigned end_opcode)
>>> +{
>>> +   struct tgsi_full_instruction *inst;
>>> +   unsigned writemask = 0;
>>> +
>>> +   do {
>>> +      tgsi_parse_token(parse);
>>> +      assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
>>> +      inst = &parse->FullToken.FullInstruction;
>>> +      check_no_subroutines(inst);
>>> +
>>> +      /* Recursively process nested blocks. */
>>> +      switch (inst->Instruction.Opcode) {
>>> +      case TGSI_OPCODE_IF:
>>> +      case TGSI_OPCODE_UIF:
>>> +         writemask |=
>>> +            get_block_tessfactor_writemask(info, parse,
>>> TGSI_OPCODE_ENDIF);
>>> +         continue;
>>> +
>>> +      case TGSI_OPCODE_BGNLOOP:
>>> +         writemask |=
>>> +            get_block_tessfactor_writemask(info, parse,
>>> TGSI_OPCODE_ENDLOOP);
>>> +         continue;
>>> +
>>> +      case TGSI_OPCODE_BARRIER:
>>> +         unreachable("nested BARRIER is illegal");
>>> +         continue;
>>> +      }
>>> +
>>> +      writemask |= get_inst_tessfactor_writemask(info, inst);
>>> +   } while (inst->Instruction.Opcode != end_opcode);
>>> +
>>> +   return writemask;
>>> +}
>>> +
>>> +static void
>>> +get_if_block_tessfactor_writemask(const struct tgsi_shader_info *info,
>>> +                                  struct tgsi_parse_context *parse,
>>> +                                  unsigned *upper_block_tf_writemask,
>>> +                                  unsigned *cond_block_tf_writemask)
>>> +{
>>> +   struct tgsi_full_instruction *inst;
>>> +   unsigned then_tessfactor_writemask = 0;
>>> +   unsigned else_tessfactor_writemask = 0;
>>> +   bool is_then = true;
>>> +
>>> +   do {
>>> +      tgsi_parse_token(parse);
>>> +      assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
>>> +      inst = &parse->FullToken.FullInstruction;
>>> +      check_no_subroutines(inst);
>>> +
>>> +      switch (inst->Instruction.Opcode) {
>>> +      case TGSI_OPCODE_ELSE:
>>> +         is_then = false;
>>> +         continue;
>>> +
>>> +      /* Recursively process nested blocks. */
>>> +      case TGSI_OPCODE_IF:
>>> +      case TGSI_OPCODE_UIF:
>>> +         get_if_block_tessfactor_writemask(info, parse,
>>> +                                           is_then ?
>>> &then_tessfactor_writemask :
>>> +
>>> &else_tessfactor_writemask,
>>> +                                           cond_block_tf_writemask);
>>> +         continue;
>>> +
>>> +      case TGSI_OPCODE_BGNLOOP:
>>> +         *cond_block_tf_writemask |=
>>> +            get_block_tessfactor_writemask(info, parse,
>>> TGSI_OPCODE_ENDLOOP);
>>> +         continue;
>>> +
>>> +      case TGSI_OPCODE_BARRIER:
>>> +         unreachable("nested BARRIER is illegal");
>>> +         continue;
>>> +      }
>>> +
>>> +      /* Process an instruction in the current block. */
>>> +      unsigned writemask = get_inst_tessfactor_writemask(info, inst);
>>> +
>>> +      if (writemask) {
>>> +         if (is_then)
>>> +            then_tessfactor_writemask |= writemask;
>>> +         else
>>> +            else_tessfactor_writemask |= writemask;
>>> +      }
>>> +   } while (inst->Instruction.Opcode != TGSI_OPCODE_ENDIF);
>>> +
>>> +   if (then_tessfactor_writemask || else_tessfactor_writemask) {
>>> +      /* If both statements write the same tess factor channels,
>>> +       * we can say that the upper block writes them too. */
>>> +      *upper_block_tf_writemask |= then_tessfactor_writemask &
>>> +                                   else_tessfactor_writemask;
>>> +      *cond_block_tf_writemask |= then_tessfactor_writemask |
>>> +                                  else_tessfactor_writemask;
>>> +   }
>>> +}
>>> +
>>> +void
>>> +tgsi_scan_tess_ctrl(const struct tgsi_token *tokens,
>>> +                    const struct tgsi_shader_info *info,
>>> +                    struct tgsi_tessctrl_info *out)
>>> +{
>>> +   memset(out, 0, sizeof(*out));
>>> +
>>> +   if (info->processor != PIPE_SHADER_TESS_CTRL)
>>> +      return;
>>> +
>>> +   struct tgsi_parse_context parse;
>>> +   if (tgsi_parse_init(&parse, tokens) != TGSI_PARSE_OK) {
>>> +      debug_printf("tgsi_parse_init() failed in tgsi_scan_arrays()!\n");
>>> +      return;
>>> +   }
>>> +
>>> +   /* The pass works as follows:
>>> +    * If all codepaths write tess factors, we can say that all
>>> invocations
>>> +    * define tess factors.
>>> +    *
>>> +    * Each tess factor channel is tracked separately.
>>> +    */
>>> +   unsigned main_block_tf_writemask = 0; /* if main block writes tess
>>> factors */
>>> +   unsigned cond_block_tf_writemask = 0; /* if cond block writes tess
>>> factors */
>>> +
>>> +   /* Initial value = true. Here the pass will accumulate results from
>>> multiple
>>> +    * segments surrounded by barriers. If tess factors aren't written at
>>> all,
>>> +    * it's a shader bug and we don't care if this will be true.
>>> +    */
>>> +   out->tessfactors_are_def_in_all_invocs = true;
>>> +
>>> +   while (!tgsi_parse_end_of_tokens(&parse)) {
>>> +      tgsi_parse_token(&parse);
>>> +
>>> +      if (parse.FullToken.Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
>>> +         continue;
>>> +
>>> +      struct tgsi_full_instruction *inst =
>>> &parse.FullToken.FullInstruction;
>>> +      check_no_subroutines(inst);
>>> +
>>> +      /* Process nested blocks. */
>>> +      switch (inst->Instruction.Opcode) {
>>> +      case TGSI_OPCODE_IF:
>>> +      case TGSI_OPCODE_UIF:
>>> +         get_if_block_tessfactor_writemask(info, &parse,
>>> +                                           &main_block_tf_writemask,
>>> +                                           &cond_block_tf_writemask);
>>> +         continue;
>>> +
>>> +      case TGSI_OPCODE_BGNLOOP:
>>> +         cond_block_tf_writemask |=
>>> +            get_block_tessfactor_writemask(info, &parse,
>>> TGSI_OPCODE_ENDIF);
>>> +         continue;
>>> +
>>> +      case TGSI_OPCODE_BARRIER:
>>> +         /* The following case must be prevented:
>>> +          *    gl_TessLevelInner = ...;
>>> +          *    barrier();
>>> +          *    if (gl_InvocationID == 1)
>>> +          *       gl_TessLevelInner = ...;
>>> +          *
>>> +          * If you consider disjoint code segments separated by barriers,
>>> each
>>> +          * such segment that writes tess factor channels should write
>>> the same
>>> +          * channels in all codepaths within that segment.
>>> +          */
>>> +         if (main_block_tf_writemask || cond_block_tf_writemask) {
>>> +            /* Accumulate the result: */
>>> +            out->tessfactors_are_def_in_all_invocs &=
>>> +               main_block_tf_writemask &&
>>> +               !(cond_block_tf_writemask & ~main_block_tf_writemask);
>>
>>
>> Could this be just the following:
>>
>>     out->tessfactors_are_def_in_all_invocs &=
>>        !(cond_block_tf_writemask & ~main_block_tf_writemask);
>>
>> (And the same below after the loop)
> 
> Do you mean that "main_block_tf_writemask &&" is redundant and always
> evaluates the same as the other part of the condition?

It can be different, but when the second part is true, then the first 
one must be true as well. So the second part dominates the condition.

Basically, the condition we want for the optimization can only be broken 
if "cond_block_tf_writemask & ~main_block_tf_writemask" happens, so that 
part of the condition is the only thing that matters.

> 
>>
>> Point being, what if you have a barrier() and no assignment to tessfactors
>> afterwards.
> 
> This point doesn't make sense - the conditional block assures that
> code segments separated by barriers without assignments to tess
> factors are ignored.

Ah yes, of course.

Cheers,
Nicolai

> 
> Marek
> 


-- 
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.


More information about the mesa-dev mailing list