[Mesa-dev] [PATCH] tgsi: lowering support for alpha test

Sat Dec 20 07:27:16 PST 2014

Am 20.12.2014 um 02:02 schrieb Rob Clark:
> On Fri, Dec 19, 2014 at 5:35 PM, Roland Scheidegger <sroland at vmware.com> wrote:
>> I'm not a fan of using semantics for consts (fwiw I think it would
>> probably be a good idea to use different namespaces for system values
>> and inputs too but that's another topic).
>> I don't quite share the concerns about possibly indirectly addressing
>> such a const though (because if const buffers are used this isn't a
>> problem).
> 
> yeah, I guess I'll juggle things around so driver could pass in which
> const slot it wants to use..
> 
>> As for the implementation, it is a bit lacking though I don't know if
>> you care. Because you are supposed to do that in the format of the
>> (first) render target. And your results won't match that very well
>> (especially a problem probably if you use exact comparisons). llvmpipe
>> has some code to do exactly that (though only works for 8bit unorm
>> alpha). I suppose there could be different ways to do it (at least for
>> most functions except equal/not equal you could probably get away with
>> adjusting the alpha ref value sligthly to account for the rounding you'd
>> get when converting to render target output - I'd guess for the exact
>> comparisons you could do something similar possibly, use a range and
>> combine two comparisons).
> 
> hmm, sigh... I guess the issue is that we end up doing the comparsion
> at too high a precision?
Indeed (might also do the comparison against unclamped values which can
be a problem too).

> well, in general, my aim when it comes to doing gl things on gles hw
> is to maximize usefulness.. and what I seem to see a lot is alpharef
> values like 0.5 (or other suitably round values), so when it comes
> down to being able to render correctly whatever random gl game, the
> current approach seems sufficient.  Maybe there is room for a strict
> mode, at the expense of a few extra instructions and extra const slot
> for the edge cases?
I think you would not really require extra const slots (just some more
arithmetic for some functions, otherwise it may be possible to adjust the
ref value based on the alpha test function to account for the assumed
rounding you'd get when converting to the unorm values later, though it
possibly might not quite be 100% exact solution).
I don't know if apps rely on this, but at least with the equal function
it's obvious that sloppy behavior is quite broken, your exact float
comparison might never match depending on the shader.
(That said, a sloppy solution is just fine by me, this is all optional
code.)

Roland


> BR,
> -R
> 
>> Roland
>>
>> Am 19.12.2014 um 20:11 schrieb Rob Clark:
>>> From: Rob Clark <robclark at freedesktop.org>
>>>
>>> This emulates alpha-test with a compare + KILL_IF.  The alpha-ref value
>>> is passed to the shader via constant tagged with new ALPHAREF semantic.
>>> For example:
>>>
>>>   FRAG
>>>   PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
>>>   DCL IN[0], COLOR, COLOR
>>>   DCL OUT[0], COLOR
>>>     0: MOV OUT[0], IN[0]
>>>     1: END
>>>
>>> with alpha-func PIPE_FUNC_LESS, becomes:
>>>
>>>   FRAG
>>>   PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
>>>   DCL IN[0], COLOR, COLOR
>>>   DCL OUT[0], COLOR
>>>   IMM[0] FLT32 {    0.0000,     1.0000,   128.0000,     0.0000}
>>>   DCL TEMP[0]
>>>   DCL TEMP[1]
>>>   DCL CONST[0], ALPHAREF
>>>     0: MOV TEMP[1], IN[0]
>>>     1: SLT TEMP[0].x, TEMP[1].wwww, CONST[0].xxxx
>>>     2: KILL_IF TEMP[0].xxxx
>>>     3: MOV OUT[0], TEMP[1]
>>>     4: END
>>>
>>> Signed-off-by: Rob Clark <robclark at freedesktop.org>
>>> ---
>>>  src/gallium/auxiliary/tgsi/tgsi_lowering.c | 170 ++++++++++++++++++++++++++++-
>>>  src/gallium/auxiliary/tgsi/tgsi_lowering.h |   7 ++
>>>  src/gallium/auxiliary/tgsi/tgsi_strings.c  |   1 +
>>>  src/gallium/include/pipe/p_shader_tokens.h |   3 +-
>>>  4 files changed, 177 insertions(+), 4 deletions(-)
>>>
>>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.c b/src/gallium/auxiliary/tgsi/tgsi_lowering.c
>>> index dee6c41..4f1da29 100644
>>> --- a/src/gallium/auxiliary/tgsi/tgsi_lowering.c
>>> +++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.c
>>> @@ -37,15 +37,22 @@ struct tgsi_lowering_context {
>>>     struct tgsi_transform_context base;
>>>     const struct tgsi_lowering_config *config;
>>>     struct tgsi_shader_info *info;
>>> +
>>> +   bool alpha_test;
>>> +   unsigned colorout;
>>> +   unsigned colortmp;   /* tmp register to hold original color out */
>>> +   unsigned alpharef;   /* const slot used to hold alpharef value */
>>> +
>>>     unsigned two_side_colors;
>>>     unsigned two_side_idx[PIPE_MAX_SHADER_INPUTS];
>>>     unsigned color_base;  /* base register for chosen COLOR/BCOLOR's */
>>>     int face_idx;
>>> +
>>>     unsigned numtmp;
>>>     struct {
>>>        struct tgsi_full_src_register src;
>>>        struct tgsi_full_dst_register dst;
>>> -   } tmp[2];
>>> +   } tmp[3];
>>>  #define A 0
>>>  #define B 1
>>>     struct tgsi_full_src_register imm;
>>> @@ -1146,6 +1153,128 @@ transform_samp(struct tgsi_transform_context *tctx,
>>>     return 0;
>>>  }
>>>
>>> +/* Alpha-test emulation:
>>> + * Redirects writes to COLOR output to a temporary and appends
>>> + * extra instructions to do conditional kill based on output
>>> + * alpha value
>>> + */
>>> +#define ALPHATEST_GROW (                        \
>>> +      3 +         /* CONST[], ALPHAREF */       \
>>> +      NINST(2) +  /* SEQ/SNE/SLE/SGT/SLT */     \
>>> +      NINST(1) +  /* KILL_IF */                 \
>>> +      NINST(1)    /* MOV */                     \
>>> +      )
>>> +#define ALPHATEST_TMP  1
>>> +
>>> +static void
>>> +emit_alphatest_decls(struct tgsi_transform_context *tctx)
>>> +{
>>> +   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
>>> +   struct tgsi_full_declaration decl;
>>> +
>>> +   /* NOTE: the temporaries we need are handled in emit_decls() */
>>> +
>>> +   decl = tgsi_default_full_declaration();
>>> +   decl.Declaration.File = TGSI_FILE_CONSTANT;
>>> +   decl.Declaration.Semantic = true;
>>> +   decl.Range.First = decl.Range.Last = ctx->alpharef;
>>> +   decl.Semantic.Name = TGSI_SEMANTIC_ALPHAREF;
>>> +   decl.Semantic.Index = 0;
>>> +   tctx->emit_declaration(tctx, &decl);
>>> +}
>>> +
>>> +static void
>>> +emit_alphatest_instrs(struct tgsi_transform_context *tctx)
>>> +{
>>> +   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
>>> +   struct tgsi_full_instruction new_inst;
>>> +   int c = ctx->colortmp;
>>> +
>>> +   if (ctx->config->alpha_func == PIPE_FUNC_NEVER) {
>>> +      /* KILL */
>>> +      new_inst = tgsi_default_full_instruction();
>>> +      new_inst.Instruction.Opcode = TGSI_OPCODE_KILL;
>>> +      new_inst.Instruction.NumDstRegs = 0;
>>> +      new_inst.Instruction.NumSrcRegs = 0;
>>> +      tctx->emit_instruction(tctx, &new_inst);
>>> +   } else {
>>> +      unsigned opcode;
>>> +
>>> +      switch (ctx->config->alpha_func) {
>>> +      case PIPE_FUNC_LESS:
>>> +         opcode = TGSI_OPCODE_SLT;
>>> +         break;
>>> +      case PIPE_FUNC_EQUAL:
>>> +         opcode = TGSI_OPCODE_SEQ;
>>> +         break;
>>> +      case PIPE_FUNC_LEQUAL:
>>> +         opcode = TGSI_OPCODE_SLE;
>>> +         break;
>>> +      case PIPE_FUNC_GREATER:
>>> +         opcode = TGSI_OPCODE_SGT;
>>> +         break;
>>> +      case PIPE_FUNC_NOTEQUAL:
>>> +         opcode = TGSI_OPCODE_SNE;
>>> +         break;
>>> +      case PIPE_FUNC_GEQUAL:
>>> +         opcode = TGSI_OPCODE_SGE;
>>> +         break;
>>> +      default:
>>> +         assert(0);
>>> +         return;
>>> +      }
>>> +
>>> +      /* SEQ/SNE/SGE/SLE/SGT/SLT tmpA.x, tmpColor.w, alpharef.x */
>>> +      new_inst = tgsi_default_full_instruction();
>>> +      new_inst.Instruction.Opcode = opcode;
>>> +      new_inst.Instruction.NumDstRegs = 1;
>>> +      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
>>> +      new_inst.Instruction.NumSrcRegs = 2;
>>> +      reg_src(&new_inst.Src[0], &ctx->tmp[c].src, SWIZ(W, W, W, W));
>>> +      new_inst.Src[1].Register.File = TGSI_FILE_CONSTANT;
>>> +      new_inst.Src[1].Register.Index = ctx->alpharef;
>>> +      new_inst.Src[1].Register.SwizzleX = TGSI_SWIZZLE_X;
>>> +      new_inst.Src[1].Register.SwizzleY = TGSI_SWIZZLE_X;
>>> +      new_inst.Src[1].Register.SwizzleZ = TGSI_SWIZZLE_X;
>>> +      new_inst.Src[1].Register.SwizzleW = TGSI_SWIZZLE_X;
>>> +      tctx->emit_instruction(tctx, &new_inst);
>>> +
>>> +      /* KILL_IF tmpA.x */
>>> +      new_inst = tgsi_default_full_instruction();
>>> +      new_inst.Instruction.Opcode = TGSI_OPCODE_KILL_IF;
>>> +      new_inst.Instruction.NumDstRegs = 0;
>>> +      new_inst.Instruction.NumSrcRegs = 1;
>>> +      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(X, _, _, _));
>>> +      tctx->emit_instruction(tctx, &new_inst);
>>> +   }
>>> +
>>> +   /* MOV OUT[color], tmpColor */
>>> +   /* (would be nice if we could create_mov() here..) */
>>> +   new_inst = tgsi_default_full_instruction();
>>> +   new_inst.Instruction.Opcode = TGSI_OPCODE_MOV;
>>> +   new_inst.Instruction.NumDstRegs = 1;
>>> +   new_inst.Dst[0].Register.File  = TGSI_FILE_OUTPUT;
>>> +   new_inst.Dst[0].Register.Index = ctx->colorout;
>>> +   new_inst.Dst[0].Register.WriteMask = TGSI_WRITEMASK_XYZW;
>>> +   new_inst.Instruction.NumSrcRegs = 1;
>>> +   reg_src(&new_inst.Src[0], &ctx->tmp[c].src, SWIZ(X, Y, Z, W));
>>> +   tctx->emit_instruction(tctx, &new_inst);
>>> +}
>>> +
>>> +static void
>>> +rename_color_outputs(struct tgsi_lowering_context *ctx,
>>> +                    struct tgsi_full_instruction *inst)
>>> +{
>>> +   unsigned i;
>>> +   for (i = 0; i < inst->Instruction.NumDstRegs; i++) {
>>> +      struct tgsi_dst_register *dst = &inst->Dst[i].Register;
>>> +      if ((dst->File == TGSI_FILE_OUTPUT) &&
>>> +          (dst->Index == ctx->colorout)) {
>>> +         *dst = ctx->tmp[ctx->colortmp].dst.Register;
>>> +      }
>>> +   }
>>> +}
>>> +
>>>  /* Two-sided color emulation:
>>>   * For each COLOR input, create a corresponding BCOLOR input, plus
>>>   * CMP instruction to select front or back color based on FACE
>>> @@ -1288,6 +1417,9 @@ emit_decls(struct tgsi_transform_context *tctx)
>>>
>>>     if (ctx->two_side_colors)
>>>        emit_twoside(tctx);
>>> +
>>> +   if (ctx->alpha_test)
>>> +      emit_alphatest_decls(tctx);
>>>  }
>>>
>>>  static void
>>> @@ -1307,7 +1439,6 @@ rename_color_inputs(struct tgsi_lowering_context *ctx,
>>>           }
>>>        }
>>>     }
>>> -
>>>  }
>>>
>>>  static void
>>> @@ -1327,6 +1458,12 @@ transform_instr(struct tgsi_transform_context *tctx,
>>>     if (ctx->two_side_colors)
>>>        rename_color_inputs(ctx, inst);
>>>
>>> +   /* if emulating alpha-test, we need to re-write some dst
>>> +    * registers:
>>> +    */
>>> +   if (ctx->alpha_test)
>>> +      rename_color_outputs(ctx, inst);
>>> +
>>>     switch (inst->Instruction.Opcode) {
>>>     case TGSI_OPCODE_DST:
>>>        if (!ctx->config->lower_DST)
>>> @@ -1406,6 +1543,10 @@ transform_instr(struct tgsi_transform_context *tctx,
>>>        if (transform_samp(tctx, inst))
>>>           goto skip;
>>>        break;
>>> +   case TGSI_OPCODE_END:
>>> +      if (ctx->alpha_test)
>>> +         emit_alphatest_instrs(tctx);
>>> +      goto skip;   /* emit the actual END instruction itself */
>>>     default:
>>>     skip:
>>>        tctx->emit_instruction(tctx, inst);
>>> @@ -1452,6 +1593,20 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
>>>        }
>>>     }
>>>
>>> +   if ((info->processor == TGSI_PROCESSOR_FRAGMENT) &&
>>> +       config->lower_alpha_test &&
>>> +       (config->alpha_func != PIPE_FUNC_ALWAYS)) {
>>> +      int i;
>>> +      ctx.alpha_test = true;
>>> +      for (i = 0; i < info->file_max[TGSI_FILE_OUTPUT]; i++) {
>>> +         /* TODO not sure what to do in case of MRT */
>>> +         if (info->output_semantic_name[i] == TGSI_SEMANTIC_COLOR) {
>>> +             ctx.colorout = i;
>>> +             break;
>>> +         }
>>> +      }
>>> +   }
>>> +
>>>     ctx.saturate = config->saturate_r | config->saturate_s | config->saturate_t;
>>>
>>>  #define OPCS(x) ((config->lower_ ## x) ? info->opcode_count[TGSI_OPCODE_ ## x] : 0)
>>> @@ -1472,7 +1627,8 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
>>>           OPCS(DP2A) ||
>>>           OPCS(TXP) ||
>>>           ctx.two_side_colors ||
>>> -         ctx.saturate))
>>> +         ctx.saturate ||
>>> +         ctx.alpha_test))
>>>        return NULL;
>>>
>>>  #if 0  /* debug */
>>> @@ -1555,6 +1711,14 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
>>>        numtmp = MAX2(numtmp, SAMP_TMP);
>>>     }
>>>
>>> +   if (ctx.alpha_test) {
>>> +      newlen += ALPHATEST_GROW;
>>> +      numtmp = MAX2(numtmp, ALPHATEST_TMP);
>>> +      /* and one more tmp to hold temporary color output: */
>>> +      ctx.colortmp = numtmp++;
>>> +      ctx.alpharef = info->file_max[TGSI_FILE_CONSTANT] + 1;
>>> +   }
>>> +
>>>     /* specifically don't include two_side_colors temps in the count: */
>>>     ctx.numtmp = numtmp;
>>>
>>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.h b/src/gallium/auxiliary/tgsi/tgsi_lowering.h
>>> index 52c204f..3a95c82 100644
>>> --- a/src/gallium/auxiliary/tgsi/tgsi_lowering.h
>>> +++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.h
>>> @@ -69,6 +69,13 @@ struct tgsi_lowering_config
>>>     unsigned lower_DP2:1;
>>>     unsigned lower_DP2A:1;
>>>
>>> +   /* If lowering alpha-test, a constant w/ the semantic
>>> +    * ALPHAREF is inserted.  The driver should pass in the
>>> +    * actual alpha-ref value via this constant (.x coord)
>>> +    */
>>> +   unsigned lower_alpha_test:1;
>>> +   unsigned alpha_func:3;    /* PIPE_FUNC_x */
>>> +
>>>     /* bitmask of (1 << TGSI_TEXTURE_type): */
>>>     unsigned lower_TXP;
>>>
>>> diff --git a/src/gallium/auxiliary/tgsi/tgsi_strings.c b/src/gallium/auxiliary/tgsi/tgsi_strings.c
>>> index bd97544..b696838 100644
>>> --- a/src/gallium/auxiliary/tgsi/tgsi_strings.c
>>> +++ b/src/gallium/auxiliary/tgsi/tgsi_strings.c
>>> @@ -88,6 +88,7 @@ const char *tgsi_semantic_names[TGSI_SEMANTIC_COUNT] =
>>>     "INVOCATIONID",
>>>     "VERTEXID_NOBASE",
>>>     "BASEVERTEX",
>>> +   "ALPHAREF",
>>>  };
>>>
>>>  const char *tgsi_texture_names[TGSI_TEXTURE_COUNT] =
>>> diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
>>> index 442b67b..bf54c3e 100644
>>> --- a/src/gallium/include/pipe/p_shader_tokens.h
>>> +++ b/src/gallium/include/pipe/p_shader_tokens.h
>>> @@ -178,7 +178,8 @@ struct tgsi_declaration_interp
>>>  #define TGSI_SEMANTIC_INVOCATIONID 27
>>>  #define TGSI_SEMANTIC_VERTEXID_NOBASE 28
>>>  #define TGSI_SEMANTIC_BASEVERTEX 29
>>> -#define TGSI_SEMANTIC_COUNT      30 /**< number of semantic values */
>>> +#define TGSI_SEMANTIC_ALPHAREF   30
>>> +#define TGSI_SEMANTIC_COUNT      31 /**< number of semantic values */
>>>
>>>  struct tgsi_declaration_semantic
>>>  {
>>>
>>