[Mesa-dev] [PATCH 1/9] tgsi/scan: add a new pass that analyzes tess factor writes

Nicolai Hähnle nhaehnle at gmail.com
Thu Sep 7 12:24:13 UTC 2017


On 07.09.2017 14:23, Nicolai Hähnle wrote:
> On 07.09.2017 12:55, Marek Olšák wrote:
>> We can also say if gl_TessLevel* is written multiple times, then one
>> these must be true:
>> - there must not be a barrier between the writes
>> - gl_TessLevel* writes aren't inside conditional blocks
>>
>> I see that these games use a barrier in TCS:
>> - Hitman
>> - Grid Autosport
>> - Tomb Raider
>>
>> All constraints are true for those games. If we do this, it should be
>> enough: "there must not be a barrier between the writes"
> 
> Right, if there are no barriers between the writes, we can always assume 
> that the writes from invocation 0 land last (obviously we still need to 
> know that invocation 0 did write, but your patch already checks that).

There's an annoying subtlety though, if there are tess factor writes 
*before* a barrier, and then tess factor reads after the barrier. But 
that should be easy enough to check.

Cheers,
Nicolai

> 
> Cheers,
> Nicolai
> 
> 
>>
>> Marek
>>
>>
>> On Thu, Sep 7, 2017 at 9:13 AM, Nicolai Hähnle <nhaehnle at gmail.com> 
>> wrote:
>>> On 06.09.2017 19:03, Marek Olšák wrote:
>>>>
>>>> From: Marek Olšák <marek.olsak at amd.com>
>>>>
>>>> The pass tries to deduce whether tess factors are always written by
>>>> invocation 0 (at least).
>>>>
>>>> The implication for radeonsi is that it doesn't have to use a barrier
>>>> near the end of TCS, and doesn't have to use LDS for passing the tess
>>>> factors to the epilog.
>>>
>>>
>>> What about the following shader snippet, which is particularly nasty on
>>> purpose:
>>>
>>>    gl_TessLevelInner = ...;
>>>
>>>    barrier();
>>>
>>>    if (gl_InvocationID == 1) {
>>>      gl_TessLevelInner = ...;
>>>    }
>>>
>>> The final gl_TessLevelInner must be the one written by invocation 1.
>>>
>>> The point is, this would be a nice improvement, but it requires us to 
>>> check
>>> that whenever tess factors are written, then that path includes 
>>> invocation
>>> 0.
>>>
>>> (The written value itself might depend on the invocation ID, but in that
>>> case, we can do whatever we like, which means we can just take the value
>>> written by invocation 0.)
>>>
>>> I don't think we can determine this easily with the tools we have, 
>>> and the
>>> effort is likely not worth it.
>>>
>>> Although you could do a very simplistic approximation of "tess 
>>> factors are
>>> only written outside of control flow", which is probably reasonably 
>>> common.
>>> (Uniform control flow would also work -- but that would require going 
>>> to the
>>> LLVM level).
>>>
>>> Cheers,
>>> Nicolai
>>>
>>>
>>>
>>>> ---
>>>>    src/gallium/auxiliary/tgsi/tgsi_scan.c | 188
>>>> +++++++++++++++++++++++++++++++++
>>>>    src/gallium/auxiliary/tgsi/tgsi_scan.h |  11 ++
>>>>    2 files changed, 199 insertions(+)
>>>>
>>>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c
>>>> b/src/gallium/auxiliary/tgsi/tgsi_scan.c
>>>> index db87ce3..612a8c6 100644
>>>> --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
>>>> +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
>>>> @@ -930,10 +930,198 @@ tgsi_scan_arrays(const struct tgsi_token 
>>>> *tokens,
>>>>                      array->writemask |= dst->Register.WriteMask;
>>>>                }
>>>>             }
>>>>          }
>>>>       }
>>>>         tgsi_parse_free(&parse);
>>>>         return;
>>>>    }
>>>> +
>>>> +static void
>>>> +check_no_subroutines(const struct tgsi_full_instruction *inst)
>>>> +{
>>>> +   switch (inst->Instruction.Opcode) {
>>>> +   case TGSI_OPCODE_BGNSUB:
>>>> +   case TGSI_OPCODE_ENDSUB:
>>>> +   case TGSI_OPCODE_CAL:
>>>> +      unreachable("subroutines unhandled");
>>>> +   }
>>>> +}
>>>> +
>>>> +static unsigned
>>>> +get_inst_tessfactor_writemask(const struct tgsi_shader_info *info,
>>>> +                              const struct tgsi_full_instruction 
>>>> *inst)
>>>> +{
>>>> +   unsigned writemask = 0;
>>>> +
>>>> +   for (unsigned i = 0; i < inst->Instruction.NumDstRegs; i++) {
>>>> +      const struct tgsi_full_dst_register *dst = &inst->Dst[i];
>>>> +
>>>> +      if (dst->Register.File == TGSI_FILE_OUTPUT &&
>>>> +          !dst->Register.Indirect) {
>>>> +         unsigned name = 
>>>> info->output_semantic_name[dst->Register.Index];
>>>> +
>>>> +         if (name == TGSI_SEMANTIC_TESSINNER)
>>>> +            writemask |= dst->Register.WriteMask;
>>>> +         else if (name == TGSI_SEMANTIC_TESSOUTER)
>>>> +            writemask |= dst->Register.WriteMask << 4;
>>>> +      }
>>>> +   }
>>>> +   return writemask;
>>>> +}
>>>> +
>>>> +static unsigned
>>>> +get_block_tessfactor_writemask(const struct tgsi_shader_info *info,
>>>> +                               struct tgsi_parse_context *parse,
>>>> +                               unsigned end_opcode)
>>>> +{
>>>> +   struct tgsi_full_instruction *inst;
>>>> +   unsigned writemask = 0;
>>>> +
>>>> +   do {
>>>> +      tgsi_parse_token(parse);
>>>> +      assert(parse->FullToken.Token.Type == 
>>>> TGSI_TOKEN_TYPE_INSTRUCTION);
>>>> +      inst = &parse->FullToken.FullInstruction;
>>>> +      check_no_subroutines(inst);
>>>> +
>>>> +      /* Recursively process nested blocks. */
>>>> +      switch (inst->Instruction.Opcode) {
>>>> +      case TGSI_OPCODE_IF:
>>>> +      case TGSI_OPCODE_UIF:
>>>> +         writemask |=
>>>> +            get_block_tessfactor_writemask(info, parse,
>>>> TGSI_OPCODE_ENDIF);
>>>> +         continue;
>>>> +
>>>> +      case TGSI_OPCODE_BGNLOOP:
>>>> +         writemask |=
>>>> +            get_block_tessfactor_writemask(info, parse,
>>>> TGSI_OPCODE_ENDLOOP);
>>>> +         continue;
>>>> +      }
>>>> +
>>>> +      writemask |= get_inst_tessfactor_writemask(info, inst);
>>>> +   } while (inst->Instruction.Opcode != end_opcode);
>>>> +
>>>> +   return writemask;
>>>> +}
>>>> +
>>>> +static void
>>>> +get_if_block_tessfactor_writemask(const struct tgsi_shader_info *info,
>>>> +                                  struct tgsi_parse_context *parse,
>>>> +                                  unsigned *upper_block_tf_writemask,
>>>> +                                  unsigned *cond_block_tf_writemask)
>>>> +{
>>>> +   struct tgsi_full_instruction *inst;
>>>> +   unsigned then_tessfactor_writemask = 0;
>>>> +   unsigned else_tessfactor_writemask = 0;
>>>> +   bool is_then = true;
>>>> +
>>>> +   do {
>>>> +      tgsi_parse_token(parse);
>>>> +      assert(parse->FullToken.Token.Type == 
>>>> TGSI_TOKEN_TYPE_INSTRUCTION);
>>>> +      inst = &parse->FullToken.FullInstruction;
>>>> +      check_no_subroutines(inst);
>>>> +
>>>> +      switch (inst->Instruction.Opcode) {
>>>> +      case TGSI_OPCODE_ELSE:
>>>> +         is_then = false;
>>>> +         continue;
>>>> +
>>>> +      /* Recursively process nested blocks. */
>>>> +      case TGSI_OPCODE_IF:
>>>> +      case TGSI_OPCODE_UIF:
>>>> +         get_if_block_tessfactor_writemask(info, parse,
>>>> +                                           is_then ?
>>>> &then_tessfactor_writemask :
>>>> +
>>>> &else_tessfactor_writemask,
>>>> +                                           cond_block_tf_writemask);
>>>> +         continue;
>>>> +
>>>> +      case TGSI_OPCODE_BGNLOOP:
>>>> +         *cond_block_tf_writemask |=
>>>> +            get_block_tessfactor_writemask(info, parse,
>>>> TGSI_OPCODE_ENDLOOP);
>>>> +         continue;
>>>> +      }
>>>> +
>>>> +      /* Process an instruction in the current block. */
>>>> +      unsigned writemask = get_inst_tessfactor_writemask(info, inst);
>>>> +
>>>> +      if (writemask) {
>>>> +         if (is_then)
>>>> +            then_tessfactor_writemask |= writemask;
>>>> +         else
>>>> +            else_tessfactor_writemask |= writemask;
>>>> +      }
>>>> +   } while (inst->Instruction.Opcode != TGSI_OPCODE_ENDIF);
>>>> +
>>>> +   if (then_tessfactor_writemask || else_tessfactor_writemask) {
>>>> +      /* If both statements write the same tess factor channels,
>>>> +       * we can say that the upper block writes them too. */
>>>> +      *upper_block_tf_writemask |= then_tessfactor_writemask &
>>>> +                                   else_tessfactor_writemask;
>>>> +      *cond_block_tf_writemask |= then_tessfactor_writemask |
>>>> +                                  else_tessfactor_writemask;
>>>> +   }
>>>> +}
>>>> +
>>>> +void
>>>> +tgsi_scan_tess_ctrl(const struct tgsi_token *tokens,
>>>> +                    const struct tgsi_shader_info *info,
>>>> +                    struct tgsi_tessctrl_info *out)
>>>> +{
>>>> +   memset(out, 0, sizeof(*out));
>>>> +
>>>> +   if (info->processor != PIPE_SHADER_TESS_CTRL)
>>>> +      return;
>>>> +
>>>> +   struct tgsi_parse_context parse;
>>>> +   if (tgsi_parse_init(&parse, tokens) != TGSI_PARSE_OK) {
>>>> +      debug_printf("tgsi_parse_init() failed in 
>>>> tgsi_scan_arrays()!\n");
>>>> +      return;
>>>> +   }
>>>> +
>>>> +   /* The pass works as follows:
>>>> +    * If all codepaths write tess factors, we can say that all
>>>> invocations
>>>> +    * define tess factors, therefore invocation 0 defines tess 
>>>> factors.
>>>> +    *
>>>> +    * Each tess factor channel is tracked separately.
>>>> +    */
>>>> +   unsigned main_block_tf_writemask = 0;
>>>> +   unsigned cond_block_tf_writemask = 0;
>>>> +
>>>> +   while (!tgsi_parse_end_of_tokens(&parse)) {
>>>> +      tgsi_parse_token(&parse);
>>>> +
>>>> +      if (parse.FullToken.Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
>>>> +         continue;
>>>> +
>>>> +      struct tgsi_full_instruction *inst =
>>>> &parse.FullToken.FullInstruction;
>>>> +      check_no_subroutines(inst);
>>>> +
>>>> +      /* Process nested blocks. */
>>>> +      switch (inst->Instruction.Opcode) {
>>>> +      case TGSI_OPCODE_IF:
>>>> +      case TGSI_OPCODE_UIF:
>>>> +         get_if_block_tessfactor_writemask(info, &parse,
>>>> +                                           &main_block_tf_writemask,
>>>> +                                           &cond_block_tf_writemask);
>>>> +         continue;
>>>> +
>>>> +      case TGSI_OPCODE_BGNLOOP:
>>>> +         cond_block_tf_writemask |=
>>>> +            get_block_tessfactor_writemask(info, &parse,
>>>> TGSI_OPCODE_ENDIF);
>>>> +         continue;
>>>> +      }
>>>> +
>>>> +      main_block_tf_writemask |= get_inst_tessfactor_writemask(info,
>>>> inst);
>>>> +   }
>>>> +
>>>> +   /* If there is a conditional block that writes a tess factor 
>>>> channel
>>>> that
>>>> +    * the main block doesn't write, we can't say (we're not sure) that
>>>> +    * invocation 0 writes it.
>>>> +    */
>>>> +   out->invoc0_tessfactors_are_def =
>>>> +      main_block_tf_writemask &&
>>>> +      !(cond_block_tf_writemask & ~main_block_tf_writemask);
>>>> +
>>>> +   tgsi_parse_free(&parse);
>>>> +}
>>>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h
>>>> b/src/gallium/auxiliary/tgsi/tgsi_scan.h
>>>> index 857434f..ff64789 100644
>>>> --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
>>>> +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
>>>> @@ -181,30 +181,41 @@ struct tgsi_array_info
>>>>       /** Whether an array with this ID was declared. */
>>>>       bool declared;
>>>>         /** The OR of all writemasks used to write to this array. */
>>>>       ubyte writemask;
>>>>         /** The range with which the array was declared. */
>>>>       struct tgsi_declaration_range range;
>>>>    };
>>>>    +struct tgsi_tessctrl_info
>>>> +{
>>>> +   /** Whether all codepaths of invocation 0 write tess factors. */
>>>> +   bool invoc0_tessfactors_are_def;
>>>> +};
>>>> +
>>>>    extern void
>>>>    tgsi_scan_shader(const struct tgsi_token *tokens,
>>>>                     struct tgsi_shader_info *info);
>>>>      void
>>>>    tgsi_scan_arrays(const struct tgsi_token *tokens,
>>>>                     unsigned file,
>>>>                     unsigned max_array_id,
>>>>                     struct tgsi_array_info *arrays);
>>>>    +void
>>>> +tgsi_scan_tess_ctrl(const struct tgsi_token *tokens,
>>>> +                    const struct tgsi_shader_info *info,
>>>> +                    struct tgsi_tessctrl_info *out);
>>>> +
>>>>    static inline bool
>>>>    tgsi_is_bindless_image_file(unsigned file)
>>>>    {
>>>>       return file != TGSI_FILE_IMAGE &&
>>>>              file != TGSI_FILE_MEMORY &&
>>>>              file != TGSI_FILE_BUFFER;
>>>>    }
>>>>      #ifdef __cplusplus
>>>>    } // extern "C"
>>>>
>>>
>>>
>>> -- 
>>> Lerne, wie die Welt wirklich ist,
>>> Aber vergiss niemals, wie sie sein sollte.
> 
> 


-- 
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.


More information about the mesa-dev mailing list