[Mesa-dev] [PATCH 1/2] tgsi/scan: add a new pass that analyzes tess factor writes (v2)
Marek Olšák
maraeo at gmail.com
Mon Sep 11 14:42:50 UTC 2017
On Mon, Sep 11, 2017 at 4:12 PM, Nicolai Hähnle <nhaehnle at gmail.com> wrote:
> On 07.09.2017 20:26, Marek Olšák wrote:
>>
>> From: Marek Olšák <marek.olsak at amd.com>
>>
>> The pass tries to deduce whether tess factors are always written by
>> all shader invocations.
>>
>> The implication for radeonsi is that it doesn't have to use a barrier
>> near the end of TCS, and doesn't have to use LDS for passing the tess
>> factors to the epilog.
>>
>> v2: Handle barriers and do the analysis pass for each code segment
>> surrounded by barriers separately, and AND results from all
>> such segments writing tess factors. The change is trivial in the main
>> switch statement.
>>
>> Also, the result is renamed to "tessfactors_are_def_in_all_invocs"
>> to make the name accurate.
>> ---
>> src/gallium/auxiliary/tgsi/tgsi_scan.c | 224
>> +++++++++++++++++++++++++++++++++
>> src/gallium/auxiliary/tgsi/tgsi_scan.h | 11 ++
>> 2 files changed, 235 insertions(+)
>>
>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c
>> b/src/gallium/auxiliary/tgsi/tgsi_scan.c
>> index db87ce3..b893289 100644
>> --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c
>> +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c
>> @@ -930,10 +930,234 @@ tgsi_scan_arrays(const struct tgsi_token *tokens,
>> array->writemask |= dst->Register.WriteMask;
>> }
>> }
>> }
>> }
>> tgsi_parse_free(&parse);
>> return;
>> }
>> +
>> +static void
>> +check_no_subroutines(const struct tgsi_full_instruction *inst)
>> +{
>> + switch (inst->Instruction.Opcode) {
>> + case TGSI_OPCODE_BGNSUB:
>> + case TGSI_OPCODE_ENDSUB:
>> + case TGSI_OPCODE_CAL:
>> + unreachable("subroutines unhandled");
>> + }
>> +}
>> +
>> +static unsigned
>> +get_inst_tessfactor_writemask(const struct tgsi_shader_info *info,
>> + const struct tgsi_full_instruction *inst)
>> +{
>> + unsigned writemask = 0;
>> +
>> + for (unsigned i = 0; i < inst->Instruction.NumDstRegs; i++) {
>> + const struct tgsi_full_dst_register *dst = &inst->Dst[i];
>> +
>> + if (dst->Register.File == TGSI_FILE_OUTPUT &&
>> + !dst->Register.Indirect) {
>> + unsigned name = info->output_semantic_name[dst->Register.Index];
>> +
>> + if (name == TGSI_SEMANTIC_TESSINNER)
>> + writemask |= dst->Register.WriteMask;
>> + else if (name == TGSI_SEMANTIC_TESSOUTER)
>> + writemask |= dst->Register.WriteMask << 4;
>> + }
>> + }
>> + return writemask;
>> +}
>> +
>> +static unsigned
>> +get_block_tessfactor_writemask(const struct tgsi_shader_info *info,
>> + struct tgsi_parse_context *parse,
>> + unsigned end_opcode)
>> +{
>> + struct tgsi_full_instruction *inst;
>> + unsigned writemask = 0;
>> +
>> + do {
>> + tgsi_parse_token(parse);
>> + assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
>> + inst = &parse->FullToken.FullInstruction;
>> + check_no_subroutines(inst);
>> +
>> + /* Recursively process nested blocks. */
>> + switch (inst->Instruction.Opcode) {
>> + case TGSI_OPCODE_IF:
>> + case TGSI_OPCODE_UIF:
>> + writemask |=
>> + get_block_tessfactor_writemask(info, parse,
>> TGSI_OPCODE_ENDIF);
>> + continue;
>> +
>> + case TGSI_OPCODE_BGNLOOP:
>> + writemask |=
>> + get_block_tessfactor_writemask(info, parse,
>> TGSI_OPCODE_ENDLOOP);
>> + continue;
>> +
>> + case TGSI_OPCODE_BARRIER:
>> + unreachable("nested BARRIER is illegal");
>> + continue;
>> + }
>> +
>> + writemask |= get_inst_tessfactor_writemask(info, inst);
>> + } while (inst->Instruction.Opcode != end_opcode);
>> +
>> + return writemask;
>> +}
>> +
>> +static void
>> +get_if_block_tessfactor_writemask(const struct tgsi_shader_info *info,
>> + struct tgsi_parse_context *parse,
>> + unsigned *upper_block_tf_writemask,
>> + unsigned *cond_block_tf_writemask)
>> +{
>> + struct tgsi_full_instruction *inst;
>> + unsigned then_tessfactor_writemask = 0;
>> + unsigned else_tessfactor_writemask = 0;
>> + bool is_then = true;
>> +
>> + do {
>> + tgsi_parse_token(parse);
>> + assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
>> + inst = &parse->FullToken.FullInstruction;
>> + check_no_subroutines(inst);
>> +
>> + switch (inst->Instruction.Opcode) {
>> + case TGSI_OPCODE_ELSE:
>> + is_then = false;
>> + continue;
>> +
>> + /* Recursively process nested blocks. */
>> + case TGSI_OPCODE_IF:
>> + case TGSI_OPCODE_UIF:
>> + get_if_block_tessfactor_writemask(info, parse,
>> + is_then ?
>> &then_tessfactor_writemask :
>> +
>> &else_tessfactor_writemask,
>> + cond_block_tf_writemask);
>> + continue;
>> +
>> + case TGSI_OPCODE_BGNLOOP:
>> + *cond_block_tf_writemask |=
>> + get_block_tessfactor_writemask(info, parse,
>> TGSI_OPCODE_ENDLOOP);
>> + continue;
>> +
>> + case TGSI_OPCODE_BARRIER:
>> + unreachable("nested BARRIER is illegal");
>> + continue;
>> + }
>> +
>> + /* Process an instruction in the current block. */
>> + unsigned writemask = get_inst_tessfactor_writemask(info, inst);
>> +
>> + if (writemask) {
>> + if (is_then)
>> + then_tessfactor_writemask |= writemask;
>> + else
>> + else_tessfactor_writemask |= writemask;
>> + }
>> + } while (inst->Instruction.Opcode != TGSI_OPCODE_ENDIF);
>> +
>> + if (then_tessfactor_writemask || else_tessfactor_writemask) {
>> + /* If both statements write the same tess factor channels,
>> + * we can say that the upper block writes them too. */
>> + *upper_block_tf_writemask |= then_tessfactor_writemask &
>> + else_tessfactor_writemask;
>> + *cond_block_tf_writemask |= then_tessfactor_writemask |
>> + else_tessfactor_writemask;
>> + }
>> +}
>> +
>> +void
>> +tgsi_scan_tess_ctrl(const struct tgsi_token *tokens,
>> + const struct tgsi_shader_info *info,
>> + struct tgsi_tessctrl_info *out)
>> +{
>> + memset(out, 0, sizeof(*out));
>> +
>> + if (info->processor != PIPE_SHADER_TESS_CTRL)
>> + return;
>> +
>> + struct tgsi_parse_context parse;
>> + if (tgsi_parse_init(&parse, tokens) != TGSI_PARSE_OK) {
>> + debug_printf("tgsi_parse_init() failed in tgsi_scan_arrays()!\n");
>> + return;
>> + }
>> +
>> + /* The pass works as follows:
>> + * If all codepaths write tess factors, we can say that all
>> invocations
>> + * define tess factors.
>> + *
>> + * Each tess factor channel is tracked separately.
>> + */
>> + unsigned main_block_tf_writemask = 0; /* if main block writes tess
>> factors */
>> + unsigned cond_block_tf_writemask = 0; /* if cond block writes tess
>> factors */
>> +
>> + /* Initial value = true. Here the pass will accumulate results from
>> multiple
>> + * segments surrounded by barriers. If tess factors aren't written at
>> all,
>> + * it's a shader bug and we don't care if this will be true.
>> + */
>> + out->tessfactors_are_def_in_all_invocs = true;
>> +
>> + while (!tgsi_parse_end_of_tokens(&parse)) {
>> + tgsi_parse_token(&parse);
>> +
>> + if (parse.FullToken.Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION)
>> + continue;
>> +
>> + struct tgsi_full_instruction *inst =
>> &parse.FullToken.FullInstruction;
>> + check_no_subroutines(inst);
>> +
>> + /* Process nested blocks. */
>> + switch (inst->Instruction.Opcode) {
>> + case TGSI_OPCODE_IF:
>> + case TGSI_OPCODE_UIF:
>> + get_if_block_tessfactor_writemask(info, &parse,
>> + &main_block_tf_writemask,
>> + &cond_block_tf_writemask);
>> + continue;
>> +
>> + case TGSI_OPCODE_BGNLOOP:
>> + cond_block_tf_writemask |=
>> + get_block_tessfactor_writemask(info, &parse,
>> TGSI_OPCODE_ENDIF);
>> + continue;
>> +
>> + case TGSI_OPCODE_BARRIER:
>> + /* The following case must be prevented:
>> + * gl_TessLevelInner = ...;
>> + * barrier();
>> + * if (gl_InvocationID == 1)
>> + * gl_TessLevelInner = ...;
>> + *
>> + * If you consider disjoint code segments separated by barriers,
>> each
>> + * such segment that writes tess factor channels should write
>> the same
>> + * channels in all codepaths within that segment.
>> + */
>> + if (main_block_tf_writemask || cond_block_tf_writemask) {
>> + /* Accumulate the result: */
>> + out->tessfactors_are_def_in_all_invocs &=
>> + main_block_tf_writemask &&
>> + !(cond_block_tf_writemask & ~main_block_tf_writemask);
>
>
> Could this be just the following:
>
> out->tessfactors_are_def_in_all_invocs &=
> !(cond_block_tf_writemask & ~main_block_tf_writemask);
>
> (And the same below after the loop)
Do you mean that "main_block_tf_writemask &&" is redundant and always
evaluates the same as the other part of the condition?
>
> Point being, what if you have a barrier() and no assignment to tessfactors
> afterwards.
This point doesn't make sense - the conditional block assures that
code segments separated by barriers without assignments to tess
factors are ignored.
Marek
More information about the mesa-dev
mailing list