[Mesa-dev] [PATCH 1/9] tgsi/scan: add a new pass that analyzes tess factor writes

Nicolai Hähnle nhaehnle at gmail.com
Thu Sep 7 12:23:02 UTC 2017


On 07.09.2017 12:55, Marek Olšák wrote:
> We can also say if gl_TessLevel* is written multiple times, then one
> these must be true:
> - there must not be a barrier between the writes
> - gl_TessLevel* writes aren't inside conditional blocks
> 
> I see that these games use a barrier in TCS:
> - Hitman
> - Grid Autosport
> - Tomb Raider
> 
> All constraints are true for those games. If we do this, it should be
> enough: "there must not be a barrier between the writes"

Right, if there are no barriers between the writes, we can always assume 
that the writes from invocation 0 land last (obviously we still need to 
know that invocation 0 did write, but your patch already checks that).

Cheers,
Nicolai


> 
> Marek
> 
> 
> On Thu, Sep 7, 2017 at 9:13 AM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
>> On 06.09.2017 19:03, Marek Olšák wrote:
>>>
>>> From: Marek Olšák <marek.olsak at amd.com>
>>>
>>> The pass tries to deduce whether tess factors are always written by
>>> invocation 0 (at least).
>>>
>>> The implication for radeonsi is that it doesn't have to use a barrier
>>> near the end of TCS, and doesn't have to use LDS for passing the tess
>>> factors to the epilog.
>>
>>
>> What about the following shader snippet, which is particularly nasty on
>> purpose:
>>
>>    gl_TessLevelInner = ...;
>>
>>    barrier();
>>
>>    if (gl_InvocationID == 1) {
>>      gl_TessLevelInner = ...;
>>    }
>>
>> The final gl_TessLevelInner must be the one written by invocation 1.
>>
>> The point is, this would be a nice improvement, but it requires us to check
>> that whenever tess factors are written, then that path includes invocation
>> 0.
>>
>> (The written value itself might depend on the invocation ID, but in that
>> case, we can do whatever we like, which means we can just take the value
>> written by invocation 0.)
>>
>> I don't think we can determine this easily with the tools we have, and the
>> effort is likely not worth it.
>>
>> Although you could do a very simplistic approximation of "tess factors are
>> only written outside of control flow", which is probably reasonably common.
>> (Uniform control flow would also work -- but that would require going to the
>> LLVM level).
>>
>> Cheers,
>> Nicolai
>>
>>
>>
>>> ---
>>>    src/gallium/auxiliary/tgsi/tgsi_scan.c | 188
>>> +++++++++++++++++++++++++++++++++
>>>    src/gallium/auxiliary/tgsi/tgsi_scan.h |  11 ++
>>>    2 files changed, 199 insertions(+)
>>>
>>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c
>>> b/src/gallium/auxiliary/tgsi/tgsi_scan.c
>>> index db87ce3..612a8c6 100644
>>> --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
>>> +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
>>> @@ -930,10 +930,198 @@ tgsi_scan_arrays(const struct tgsi_token *tokens,
>>>                      array->writemask |= dst->Register.WriteMask;
>>>                }
>>>             }
>>>          }
>>>       }
>>>         tgsi_parse_free(&parse);
>>>         return;
>>>    }
>>> +
>>> +static void
>>> +check_no_subroutines(const struct tgsi_full_instruction *inst)
>>> +{
>>> +   switch (inst->Instruction.Opcode) {
>>> +   case TGSI_OPCODE_BGNSUB:
>>> +   case TGSI_OPCODE_ENDSUB:
>>> +   case TGSI_OPCODE_CAL:
>>> +      unreachable("subroutines unhandled");
>>> +   }
>>> +}
>>> +
>>> +static unsigned
>>> +get_inst_tessfactor_writemask(const struct tgsi_shader_info *info,
>>> +                              const struct tgsi_full_instruction *inst)
>>> +{
>>> +   unsigned writemask = 0;
>>> +
>>> +   for (unsigned i = 0; i < inst->Instruction.NumDstRegs; i++) {
>>> +      const struct tgsi_full_dst_register *dst = &inst->Dst[i];
>>> +
>>> +      if (dst->Register.File == TGSI_FILE_OUTPUT &&
>>> +          !dst->Register.Indirect) {
>>> +         unsigned name = info->output_semantic_name[dst->Register.Index];
>>> +
>>> +         if (name == TGSI_SEMANTIC_TESSINNER)
>>> +            writemask |= dst->Register.WriteMask;
>>> +         else if (name == TGSI_SEMANTIC_TESSOUTER)
>>> +            writemask |= dst->Register.WriteMask << 4;
>>> +      }
>>> +   }
>>> +   return writemask;
>>> +}
>>> +
>>> +static unsigned
>>> +get_block_tessfactor_writemask(const struct tgsi_shader_info *info,
>>> +                               struct tgsi_parse_context *parse,
>>> +                               unsigned end_opcode)
>>> +{
>>> +   struct tgsi_full_instruction *inst;
>>> +   unsigned writemask = 0;
>>> +
>>> +   do {
>>> +      tgsi_parse_token(parse);
>>> +      assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
>>> +      inst = &parse->FullToken.FullInstruction;
>>> +      check_no_subroutines(inst);
>>> +
>>> +      /* Recursively process nested blocks. */
>>> +      switch (inst->Instruction.Opcode) {
>>> +      case TGSI_OPCODE_IF:
>>> +      case TGSI_OPCODE_UIF:
>>> +         writemask |=
>>> +            get_block_tessfactor_writemask(info, parse,
>>> TGSI_OPCODE_ENDIF);
>>> +         continue;
>>> +
>>> +      case TGSI_OPCODE_BGNLOOP:
>>> +         writemask |=
>>> +            get_block_tessfactor_writemask(info, parse,
>>> TGSI_OPCODE_ENDLOOP);
>>> +         continue;
>>> +      }
>>> +
>>> +      writemask |= get_inst_tessfactor_writemask(info, inst);
>>> +   } while (inst->Instruction.Opcode != end_opcode);
>>> +
>>> +   return writemask;
>>> +}
>>> +
>>> +static void
>>> +get_if_block_tessfactor_writemask(const struct tgsi_shader_info *info,
>>> +                                  struct tgsi_parse_context *parse,
>>> +                                  unsigned *upper_block_tf_writemask,
>>> +                                  unsigned *cond_block_tf_writemask)
>>> +{
>>> +   struct tgsi_full_instruction *inst;
>>> +   unsigned then_tessfactor_writemask = 0;
>>> +   unsigned else_tessfactor_writemask = 0;
>>> +   bool is_then = true;
>>> +
>>> +   do {
>>> +      tgsi_parse_token(parse);
>>> +      assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
>>> +      inst = &parse->FullToken.FullInstruction;
>>> +      check_no_subroutines(inst);
>>> +
>>> +      switch (inst->Instruction.Opcode) {
>>> +      case TGSI_OPCODE_ELSE:
>>> +         is_then = false;
>>> +         continue;
>>> +
>>> +      /* Recursively process nested blocks. */
>>> +      case TGSI_OPCODE_IF:
>>> +      case TGSI_OPCODE_UIF:
>>> +         get_if_block_tessfactor_writemask(info, parse,
>>> +                                           is_then ?
>>> &then_tessfactor_writemask :
>>> +
>>> &else_tessfactor_writemask,
>>> +                                           cond_block_tf_writemask);
>>> +         continue;
>>> +
>>> +      case TGSI_OPCODE_BGNLOOP:
>>> +         *cond_block_tf_writemask |=
>>> +            get_block_tessfactor_writemask(info, parse,
>>> TGSI_OPCODE_ENDLOOP);
>>> +         continue;
>>> +      }
>>> +
>>> +      /* Process an instruction in the current block. */
>>> +      unsigned writemask = get_inst_tessfactor_writemask(info, inst);
>>> +
>>> +      if (writemask) {
>>> +         if (is_then)
>>> +            then_tessfactor_writemask |= writemask;
>>> +         else
>>> +            else_tessfactor_writemask |= writemask;
>>> +      }
>>> +   } while (inst->Instruction.Opcode != TGSI_OPCODE_ENDIF);
>>> +
>>> +   if (then_tessfactor_writemask || else_tessfactor_writemask) {
>>> +      /* If both statements write the same tess factor channels,
>>> +       * we can say that the upper block writes them too. */
>>> +      *upper_block_tf_writemask |= then_tessfactor_writemask &
>>> +                                   else_tessfactor_writemask;
>>> +      *cond_block_tf_writemask |= then_tessfactor_writemask |
>>> +                                  else_tessfactor_writemask;
>>> +   }
>>> +}
>>> +
>>> +void
>>> +tgsi_scan_tess_ctrl(const struct tgsi_token *tokens,
>>> +                    const struct tgsi_shader_info *info,
>>> +                    struct tgsi_tessctrl_info *out)
>>> +{
>>> +   memset(out, 0, sizeof(*out));
>>> +
>>> +   if (info->processor != PIPE_SHADER_TESS_CTRL)
>>> +      return;
>>> +
>>> +   struct tgsi_parse_context parse;
>>> +   if (tgsi_parse_init(&parse, tokens) != TGSI_PARSE_OK) {
>>> +      debug_printf("tgsi_parse_init() failed in tgsi_scan_arrays()!\n");
>>> +      return;
>>> +   }
>>> +
>>> +   /* The pass works as follows:
>>> +    * If all codepaths write tess factors, we can say that all
>>> invocations
>>> +    * define tess factors, therefore invocation 0 defines tess factors.
>>> +    *
>>> +    * Each tess factor channel is tracked separately.
>>> +    */
>>> +   unsigned main_block_tf_writemask = 0;
>>> +   unsigned cond_block_tf_writemask = 0;
>>> +
>>> +   while (!tgsi_parse_end_of_tokens(&parse)) {
>>> +      tgsi_parse_token(&parse);
>>> +
>>> +      if (parse.FullToken.Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
>>> +         continue;
>>> +
>>> +      struct tgsi_full_instruction *inst =
>>> &parse.FullToken.FullInstruction;
>>> +      check_no_subroutines(inst);
>>> +
>>> +      /* Process nested blocks. */
>>> +      switch (inst->Instruction.Opcode) {
>>> +      case TGSI_OPCODE_IF:
>>> +      case TGSI_OPCODE_UIF:
>>> +         get_if_block_tessfactor_writemask(info, &parse,
>>> +                                           &main_block_tf_writemask,
>>> +                                           &cond_block_tf_writemask);
>>> +         continue;
>>> +
>>> +      case TGSI_OPCODE_BGNLOOP:
>>> +         cond_block_tf_writemask |=
>>> +            get_block_tessfactor_writemask(info, &parse,
>>> TGSI_OPCODE_ENDIF);
>>> +         continue;
>>> +      }
>>> +
>>> +      main_block_tf_writemask |= get_inst_tessfactor_writemask(info,
>>> inst);
>>> +   }
>>> +
>>> +   /* If there is a conditional block that writes a tess factor channel
>>> that
>>> +    * the main block doesn't write, we can't say (we're not sure) that
>>> +    * invocation 0 writes it.
>>> +    */
>>> +   out->invoc0_tessfactors_are_def =
>>> +      main_block_tf_writemask &&
>>> +      !(cond_block_tf_writemask & ~main_block_tf_writemask);
>>> +
>>> +   tgsi_parse_free(&parse);
>>> +}
>>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h
>>> b/src/gallium/auxiliary/tgsi/tgsi_scan.h
>>> index 857434f..ff64789 100644
>>> --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h
>>> +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h
>>> @@ -181,30 +181,41 @@ struct tgsi_array_info
>>>       /** Whether an array with this ID was declared. */
>>>       bool declared;
>>>         /** The OR of all writemasks used to write to this array. */
>>>       ubyte writemask;
>>>         /** The range with which the array was declared. */
>>>       struct tgsi_declaration_range range;
>>>    };
>>>    +struct tgsi_tessctrl_info
>>> +{
>>> +   /** Whether all codepaths of invocation 0 write tess factors. */
>>> +   bool invoc0_tessfactors_are_def;
>>> +};
>>> +
>>>    extern void
>>>    tgsi_scan_shader(const struct tgsi_token *tokens,
>>>                     struct tgsi_shader_info *info);
>>>      void
>>>    tgsi_scan_arrays(const struct tgsi_token *tokens,
>>>                     unsigned file,
>>>                     unsigned max_array_id,
>>>                     struct tgsi_array_info *arrays);
>>>    +void
>>> +tgsi_scan_tess_ctrl(const struct tgsi_token *tokens,
>>> +                    const struct tgsi_shader_info *info,
>>> +                    struct tgsi_tessctrl_info *out);
>>> +
>>>    static inline bool
>>>    tgsi_is_bindless_image_file(unsigned file)
>>>    {
>>>       return file != TGSI_FILE_IMAGE &&
>>>              file != TGSI_FILE_MEMORY &&
>>>              file != TGSI_FILE_BUFFER;
>>>    }
>>>      #ifdef __cplusplus
>>>    } // extern "C"
>>>
>>
>>
>> --
>> Lerne, wie die Welt wirklich ist,
>> Aber vergiss niemals, wie sie sein sollte.


-- 
Lerne, wie die Welt wirklich ist,
Aber vergiss niemals, wie sie sein sollte.


More information about the mesa-dev mailing list