[Mesa-dev] [PATCH 1/9] tgsi/scan: add a new pass that analyzes tess factor writes

Marek Olšák maraeo at gmail.com
Thu Sep 7 10:55:30 UTC 2017


We can also say if gl_TessLevel* is written multiple times, then one
these must be true:
- there must not be a barrier between the writes
- gl_TessLevel* writes aren't inside conditional blocks

I see that these games use a barrier in TCS:
- Hitman
- Grid Autosport
- Tomb Raider

All constraints are true for those games. If we do this, it should be
enough: "there must not be a barrier between the writes"

Marek


On Thu, Sep 7, 2017 at 9:13 AM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
> On 06.09.2017 19:03, Marek Olšák wrote:
>>
>> From: Marek Olšák <marek.olsak at amd.com>
>>
>> The pass tries to deduce whether tess factors are always written by
>> invocation 0 (at least).
>>
>> The implication for radeonsi is that it doesn't have to use a barrier
>> near the end of TCS, and doesn't have to use LDS for passing the tess
>> factors to the epilog.
>
>
> What about the following shader snippet, which is particularly nasty on
> purpose:
>
>   gl_TessLevelInner = ...;
>
>   barrier();
>
>   if (gl_InvocationID == 1) {
>     gl_TessLevelInner = ...;
>   }
>
> The final gl_TessLevelInner must be the one written by invocation 1.
>
> The point is, this would be a nice improvement, but it requires us to check
> that whenever tess factors are written, then that path includes invocation
> 0.
>
> (The written value itself might depend on the invocation ID, but in that
> case, we can do whatever we like, which means we can just take the value
> written by invocation 0.)
>
> I don't think we can determine this easily with the tools we have, and the
> effort is likely not worth it.
>
> Although you could do a very simplistic approximation of "tess factors are
> only written outside of control flow", which is probably reasonably common.
> (Uniform control flow would also work -- but that would require going to the
> LLVM level).
>
> Cheers,
> Nicolai
>
>
>
>> ---
>>   src/gallium/auxiliary/tgsi/tgsi_scan.c | 188
>> +++++++++++++++++++++++++++++++++
>>   src/gallium/auxiliary/tgsi/tgsi_scan.h |  11 ++
>>   2 files changed, 199 insertions(+)
>>
>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c
>> b/src/gallium/auxiliary/tgsi/tgsi_scan.c
>> index db87ce3..612a8c6 100644
>> --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
>> +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
>> @@ -930,10 +930,198 @@ tgsi_scan_arrays(const struct tgsi_token *tokens,
>>                     array->writemask |= dst->Register.WriteMask;
>>               }
>>            }
>>         }
>>      }
>>        tgsi_parse_free(&parse);
>>        return;
>>   }
>> +
>> +static void
>> +check_no_subroutines(const struct tgsi_full_instruction *inst)
>> +{
>> +   switch (inst->Instruction.Opcode) {
>> +   case TGSI_OPCODE_BGNSUB:
>> +   case TGSI_OPCODE_ENDSUB:
>> +   case TGSI_OPCODE_CAL:
>> +      unreachable("subroutines unhandled");
>> +   }
>> +}
>> +
>> +static unsigned
>> +get_inst_tessfactor_writemask(const struct tgsi_shader_info *info,
>> +                              const struct tgsi_full_instruction *inst)
>> +{
>> +   unsigned writemask = 0;
>> +
>> +   for (unsigned i = 0; i < inst->Instruction.NumDstRegs; i++) {
>> +      const struct tgsi_full_dst_register *dst = &inst->Dst[i];
>> +
>> +      if (dst->Register.File == TGSI_FILE_OUTPUT &&
>> +          !dst->Register.Indirect) {
>> +         unsigned name = info->output_semantic_name[dst->Register.Index];
>> +
>> +         if (name == TGSI_SEMANTIC_TESSINNER)
>> +            writemask |= dst->Register.WriteMask;
>> +         else if (name == TGSI_SEMANTIC_TESSOUTER)
>> +            writemask |= dst->Register.WriteMask << 4;
>> +      }
>> +   }
>> +   return writemask;
>> +}
>> +
>> +static unsigned
>> +get_block_tessfactor_writemask(const struct tgsi_shader_info *info,
>> +                               struct tgsi_parse_context *parse,
>> +                               unsigned end_opcode)
>> +{
>> +   struct tgsi_full_instruction *inst;
>> +   unsigned writemask = 0;
>> +
>> +   do {
>> +      tgsi_parse_token(parse);
>> +      assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
>> +      inst = &parse->FullToken.FullInstruction;
>> +      check_no_subroutines(inst);
>> +
>> +      /* Recursively process nested blocks. */
>> +      switch (inst->Instruction.Opcode) {
>> +      case TGSI_OPCODE_IF:
>> +      case TGSI_OPCODE_UIF:
>> +         writemask |=
>> +            get_block_tessfactor_writemask(info, parse,
>> TGSI_OPCODE_ENDIF);
>> +         continue;
>> +
>> +      case TGSI_OPCODE_BGNLOOP:
>> +         writemask |=
>> +            get_block_tessfactor_writemask(info, parse,
>> TGSI_OPCODE_ENDLOOP);
>> +         continue;
>> +      }
>> +
>> +      writemask |= get_inst_tessfactor_writemask(info, inst);
>> +   } while (inst->Instruction.Opcode != end_opcode);
>> +
>> +   return writemask;
>> +}
>> +
>> +static void
>> +get_if_block_tessfactor_writemask(const struct tgsi_shader_info *info,
>> +                                  struct tgsi_parse_context *parse,
>> +                                  unsigned *upper_block_tf_writemask,
>> +                                  unsigned *cond_block_tf_writemask)
>> +{
>> +   struct tgsi_full_instruction *inst;
>> +   unsigned then_tessfactor_writemask = 0;
>> +   unsigned else_tessfactor_writemask = 0;
>> +   bool is_then = true;
>> +
>> +   do {
>> +      tgsi_parse_token(parse);
>> +      assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
>> +      inst = &parse->FullToken.FullInstruction;
>> +      check_no_subroutines(inst);
>> +
>> +      switch (inst->Instruction.Opcode) {
>> +      case TGSI_OPCODE_ELSE:
>> +         is_then = false;
>> +         continue;
>> +
>> +      /* Recursively process nested blocks. */
>> +      case TGSI_OPCODE_IF:
>> +      case TGSI_OPCODE_UIF:
>> +         get_if_block_tessfactor_writemask(info, parse,
>> +                                           is_then ?
>> &then_tessfactor_writemask :
>> +
>> &else_tessfactor_writemask,
>> +                                           cond_block_tf_writemask);
>> +         continue;
>> +
>> +      case TGSI_OPCODE_BGNLOOP:
>> +         *cond_block_tf_writemask |=
>> +            get_block_tessfactor_writemask(info, parse,
>> TGSI_OPCODE_ENDLOOP);
>> +         continue;
>> +      }
>> +
>> +      /* Process an instruction in the current block. */
>> +      unsigned writemask = get_inst_tessfactor_writemask(info, inst);
>> +
>> +      if (writemask) {
>> +         if (is_then)
>> +            then_tessfactor_writemask |= writemask;
>> +         else
>> +            else_tessfactor_writemask |= writemask;
>> +      }
>> +   } while (inst->Instruction.Opcode != TGSI_OPCODE_ENDIF);
>> +
>> +   if (then_tessfactor_writemask || else_tessfactor_writemask) {
>> +      /* If both statements write the same tess factor channels,
>> +       * we can say that the upper block writes them too. */
>> +      *upper_block_tf_writemask |= then_tessfactor_writemask &
>> +                                   else_tessfactor_writemask;
>> +      *cond_block_tf_writemask |= then_tessfactor_writemask |
>> +                                  else_tessfactor_writemask;
>> +   }
>> +}
>> +
>> +void
>> +tgsi_scan_tess_ctrl(const struct tgsi_token *tokens,
>> +                    const struct tgsi_shader_info *info,
>> +                    struct tgsi_tessctrl_info *out)
>> +{
>> +   memset(out, 0, sizeof(*out));
>> +
>> +   if (info->processor != PIPE_SHADER_TESS_CTRL)
>> +      return;
>> +
>> +   struct tgsi_parse_context parse;
>> +   if (tgsi_parse_init(&parse, tokens) != TGSI_PARSE_OK) {
>> +      debug_printf("tgsi_parse_init() failed in tgsi_scan_arrays()!\n");
>> +      return;
>> +   }
>> +
>> +   /* The pass works as follows:
>> +    * If all codepaths write tess factors, we can say that all
>> invocations
>> +    * define tess factors, therefore invocation 0 defines tess factors.
>> +    *
>> +    * Each tess factor channel is tracked separately.
>> +    */
>> +   unsigned main_block_tf_writemask = 0;
>> +   unsigned cond_block_tf_writemask = 0;
>> +
>> +   while (!tgsi_parse_end_of_tokens(&parse)) {
>> +      tgsi_parse_token(&parse);
>> +
>> +      if (parse.FullToken.Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
>> +         continue;
>> +
>> +      struct tgsi_full_instruction *inst =
>> &parse.FullToken.FullInstruction;
>> +      check_no_subroutines(inst);
>> +
>> +      /* Process nested blocks. */
>> +      switch (inst->Instruction.Opcode) {
>> +      case TGSI_OPCODE_IF:
>> +      case TGSI_OPCODE_UIF:
>> +         get_if_block_tessfactor_writemask(info, &parse,
>> +                                           &main_block_tf_writemask,
>> +                                           &cond_block_tf_writemask);
>> +         continue;
>> +
>> +      case TGSI_OPCODE_BGNLOOP:
>> +         cond_block_tf_writemask |=
>> +            get_block_tessfactor_writemask(info, &parse,
>> TGSI_OPCODE_ENDIF);
>> +         continue;
>> +      }
>> +
>> +      main_block_tf_writemask |= get_inst_tessfactor_writemask(info,
>> inst);
>> +   }
>> +
>> +   /* If there is a conditional block that writes a tess factor channel
>> that
>> +    * the main block doesn't write, we can't say (we're not sure) that
>> +    * invocation 0 writes it.
>> +    */
>> +   out->invoc0_tessfactors_are_def =
>> +      main_block_tf_writemask &&
>> +      !(cond_block_tf_writemask & ~main_block_tf_writemask);
>> +
>> +   tgsi_parse_free(&parse);
>> +}
>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h
>> b/src/gallium/auxiliary/tgsi/tgsi_scan.h
>> index 857434f..ff64789 100644
>> --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
>> +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
>> @@ -181,30 +181,41 @@ struct tgsi_array_info
>>      /** Whether an array with this ID was declared. */
>>      bool declared;
>>        /** The OR of all writemasks used to write to this array. */
>>      ubyte writemask;
>>        /** The range with which the array was declared. */
>>      struct tgsi_declaration_range range;
>>   };
>>   +struct tgsi_tessctrl_info
>> +{
>> +   /** Whether all codepaths of invocation 0 write tess factors. */
>> +   bool invoc0_tessfactors_are_def;
>> +};
>> +
>>   extern void
>>   tgsi_scan_shader(const struct tgsi_token *tokens,
>>                    struct tgsi_shader_info *info);
>>     void
>>   tgsi_scan_arrays(const struct tgsi_token *tokens,
>>                    unsigned file,
>>                    unsigned max_array_id,
>>                    struct tgsi_array_info *arrays);
>>   +void
>> +tgsi_scan_tess_ctrl(const struct tgsi_token *tokens,
>> +                    const struct tgsi_shader_info *info,
>> +                    struct tgsi_tessctrl_info *out);
>> +
>>   static inline bool
>>   tgsi_is_bindless_image_file(unsigned file)
>>   {
>>      return file != TGSI_FILE_IMAGE &&
>>             file != TGSI_FILE_MEMORY &&
>>             file != TGSI_FILE_BUFFER;
>>   }
>>     #ifdef __cplusplus
>>   } // extern "C"
>>
>
>
> --
> Lerne, wie die Welt wirklich ist,
> Aber vergiss niemals, wie sie sein sollte.


More information about the mesa-dev mailing list