[Mesa-dev] [PATCH 1/4] tgsi/lowering: add support for lowering FLR and CEIL

Thu Apr 14 06:15:45 UTC 2016

2016-04-14 0:42 GMT+02:00 Rob Clark <robdclark at gmail.com>:
> From: Russell King <rmk at arm.linux.org.uk>
>
> Add support for lowering FLR and CEIL to FRC/SUB and FRC/ADD
> instructions for GPUs that support FRC but not FLR or CEIL.  Since
> these uses FRC, it is invalid to ask for FLR or CEIL to be lowered
> along with FRC, so add an assert to catch this invalid configuration.
>
> We also need to deal with FLR instructions emitted by the lowering
> code.  Fix these up with the FRC+SUB equivalent when FLR lowering is
> enabled.
>
> Signed-off-by: Russell King <rmk at arm.linux.org.uk>
> Reviewed-by: Rob Clark <robdclark at gmail.com>
> ---
>  src/gallium/auxiliary/tgsi/tgsi_lowering.c | 167 +++++++++++++++++++++++++----
>  src/gallium/auxiliary/tgsi/tgsi_lowering.h |   2 +
>  2 files changed, 149 insertions(+), 20 deletions(-)
>
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.c b/src/gallium/auxiliary/tgsi/tgsi_lowering.c
> index 0ffd855..b2dd37e 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_lowering.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.c
> @@ -676,14 +676,19 @@ transform_lit(struct tgsi_transform_context *tctx,
>   *  dst.w = 1.0
>   *
>   * ; needs: 1 tmp, imm{1.0}
> - * FLR tmpA.x, src.x
> + * if (lowering FLR) {
> + *   FRC tmpA.x, src.x
> + *   SUB tmpA.x, src.x, tmpA.x
> + * } else {
> + *   FLR tmpA.x, src.x
> + * }
>   * EX2 tmpA.y, src.x
>   * SUB dst.y, src.x, tmpA.x
>   * EX2 dst.x, tmpA.x
>   * MOV dst.z, tmpA.y
>   * MOV dst.w, imm{1.0}
>   */
> -#define EXP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
> +#define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \
>                 NINST(1)+ NINST(1) - OINST(1))
>  #define EXP_TMP  1
>  static void
> @@ -696,14 +701,35 @@ transform_exp(struct tgsi_transform_context *tctx,
>     struct tgsi_full_instruction new_inst;
>
>     if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
> -      /* FLR tmpA.x, src.x */
> -      new_inst = tgsi_default_full_instruction();
> -      new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
> -      new_inst.Instruction.NumDstRegs = 1;
> -      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
> -      new_inst.Instruction.NumSrcRegs = 1;
> -      reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
> -      tctx->emit_instruction(tctx, &new_inst);
> +      if (ctx->config->lower_FLR) {
> +         /* FRC tmpA.x, src.x */
> +         new_inst = tgsi_default_full_instruction();
> +         new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
> +         new_inst.Instruction.NumDstRegs = 1;
> +         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
> +         new_inst.Instruction.NumSrcRegs = 1;
> +         reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
> +         tctx->emit_instruction(tctx, &new_inst);
> +
> +         /* SUB tmpA.x, src.x, tmpA.x */
> +         new_inst = tgsi_default_full_instruction();
> +         new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
> +         new_inst.Instruction.NumDstRegs = 1;
> +         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
> +         new_inst.Instruction.NumSrcRegs = 2;
> +         reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
> +         reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
> +         tctx->emit_instruction(tctx, &new_inst);
> +     } else {
> +         /* FLR tmpA.x, src.x */
> +         new_inst = tgsi_default_full_instruction();
> +         new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
> +         new_inst.Instruction.NumDstRegs = 1;
> +         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
> +         new_inst.Instruction.NumSrcRegs = 1;
> +         reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
> +         tctx->emit_instruction(tctx, &new_inst);
> +      }
>     }
>
>     if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
> @@ -771,14 +797,19 @@ transform_exp(struct tgsi_transform_context *tctx,
>   *
>   * ; needs: 1 tmp, imm{1.0}
>   * LG2 tmpA.x, |src.x|
> - * FLR tmpA.y, tmpA.x
> + * if (lowering FLR) {
> + *   FRC tmpA.y, tmpA.x
> + *   SUB tmpA.y, tmpA.x, tmpA.y
> + * } else {
> + *   FLR tmpA.y, tmpA.x
> + * }
>   * EX2 tmpA.z, tmpA.y
>   * RCP tmpA.z, tmpA.z
>   * MUL dst.y, |src.x|, tmpA.z
>   * MOV dst.xz, tmpA.yx
>   * MOV dst.w, imm{1.0}
>   */
> -#define LOG_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) + \
> +#define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \
>                 NINST(2) + NINST(1) + NINST(1) - OINST(1))
>  #define LOG_TMP  1
>  static void
> @@ -803,14 +834,35 @@ transform_log(struct tgsi_transform_context *tctx,
>     }
>
>     if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
> -      /* FLR tmpA.y, tmpA.x */
> -      new_inst = tgsi_default_full_instruction();
> -      new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
> -      new_inst.Instruction.NumDstRegs = 1;
> -      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
> -      new_inst.Instruction.NumSrcRegs = 1;
> -      reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
> -      tctx->emit_instruction(tctx, &new_inst);
> +      if (ctx->config->lower_FLR) {
> +         /* FRC tmpA.y, tmpA.x */
> +         new_inst = tgsi_default_full_instruction();
> +         new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
> +         new_inst.Instruction.NumDstRegs = 1;
> +         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
> +         new_inst.Instruction.NumSrcRegs = 1;
> +         reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
> +         tctx->emit_instruction(tctx, &new_inst);
> +
> +         /* SUB tmpA.y, tmpA.x, tmpA.y */
> +         new_inst = tgsi_default_full_instruction();
> +         new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
> +         new_inst.Instruction.NumDstRegs = 1;
> +         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
> +         new_inst.Instruction.NumSrcRegs = 2;
> +         reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
> +         reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
> +         tctx->emit_instruction(tctx, &new_inst);
> +      } else {
> +         /* FLR tmpA.y, tmpA.x */
> +         new_inst = tgsi_default_full_instruction();
> +         new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
> +         new_inst.Instruction.NumDstRegs = 1;
> +         reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
> +         new_inst.Instruction.NumSrcRegs = 1;
> +         reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
> +         tctx->emit_instruction(tctx, &new_inst);
> +      }
>     }
>
>     if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
> @@ -1005,6 +1057,58 @@ transform_dotp(struct tgsi_transform_context *tctx,
>     }
>  }
>
> +/* FLR - floor, CEIL - ceil
> + * ; needs: 1 tmp
> + * if (CEIL) {
> + *   FRC tmpA, -src
> + *   ADD dst, src, tmpA
> + * } else {
> + *   FRC tmpA, src
> + *   SUB dst, src, tmpA
> + * }
> + */
> +#define FLR_GROW (NINST(1) + NINST(2) - OINST(1))
> +#define CEIL_GROW (NINST(1) + NINST(2) - OINST(1))
> +#define FLR_TMP 1
> +#define CEIL_TMP 1
> +static void
> +transform_flr_ceil(struct tgsi_transform_context *tctx,
> +                   struct tgsi_full_instruction *inst)
> +{
> +   struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
> +   struct tgsi_full_dst_register *dst  = &inst->Dst[0];
> +   struct tgsi_full_src_register *src0 = &inst->Src[0];
> +   struct tgsi_full_instruction new_inst;
> +   unsigned opcode = inst->Instruction.Opcode;
> +
> +   if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
> +      /* FLR: FRC tmpA, src  CEIL: FRC tmpA, -src */
> +      new_inst = tgsi_default_full_instruction();
> +      new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
> +      new_inst.Instruction.NumDstRegs = 1;
> +      reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
> +      new_inst.Instruction.NumSrcRegs = 1;
> +      reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
> +
> +      if (opcode == TGSI_OPCODE_CEIL)
> +         new_inst.Src[0].Register.Negate = !new_inst.Src[0].Register.Negate;
> +      tctx->emit_instruction(tctx, &new_inst);
> +
> +      /* FLR: SUB dst, src, tmpA  CEIL: ADD dst, src, tmpA */
> +      new_inst = tgsi_default_full_instruction();
> +      if (opcode == TGSI_OPCODE_CEIL)
> +         new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
> +      else
> +         new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
> +      new_inst.Instruction.NumDstRegs = 1;
> +      reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
> +      new_inst.Instruction.NumSrcRegs = 2;
> +      reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
> +      reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
> +      tctx->emit_instruction(tctx, &new_inst);
> +   }
> +}
> +
>  /* Inserts a MOV_SAT for the needed components of tex coord.  Note that
>   * in the case of TXP, the clamping must happen *after* projection, so
>   * we need to lower TXP to TEX.
> @@ -1401,6 +1505,16 @@ transform_instr(struct tgsi_transform_context *tctx,
>           goto skip;
>        transform_dotp(tctx, inst);
>        break;
> +   case TGSI_OPCODE_FLR:
> +      if (!ctx->config->lower_FLR)
> +         goto skip;
> +      transform_flr_ceil(tctx, inst);
> +      break;
> +   case TGSI_OPCODE_CEIL:
> +      if (!ctx->config->lower_CEIL)
> +         goto skip;
> +      transform_flr_ceil(tctx, inst);
> +      break;
>     case TGSI_OPCODE_TEX:
>     case TGSI_OPCODE_TXP:
>     case TGSI_OPCODE_TXB:
> @@ -1432,6 +1546,9 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
>     /* sanity check in case limit is ever increased: */
>     STATIC_ASSERT((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS);
>
> +   /* sanity check the lowering */
> +   assert(!(config->lower_FRC && (config->lower_FLR || config->lower_CEIL)));
> +
>     memset(&ctx, 0, sizeof(ctx));
>     ctx.base.transform_instruction = transform_instr;
>     ctx.info = info;
> @@ -1473,6 +1590,8 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
>           OPCS(DPH) ||
>           OPCS(DP2) ||
>           OPCS(DP2A) ||
> +         OPCS(FLR) ||
> +         OPCS(CEIL) ||
>           OPCS(TXP) ||
>           ctx.two_side_colors ||
>           ctx.saturate))
> @@ -1541,6 +1660,14 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
>        newlen += DP2A_GROW * OPCS(DP2A);
>        numtmp = MAX2(numtmp, DOTP_TMP);
>     }
> +   if (OPCS(FLR)) {
> +      newlen += FLR_GROW * OPCS(FLR);
> +      numtmp = MAX2(numtmp, FLR_TMP);
> +   }
> +   if (OPCS(CEIL)) {
> +      newlen += CEIL_GROW * OPCS(CEIL);
> +      numtmp = MAX2(numtmp, CEIL_TMP);
> +   }
>     if (ctx.saturate || config->lower_TXP) {
>        int n = 0;
>
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.h b/src/gallium/auxiliary/tgsi/tgsi_lowering.h
> index 52c204f..a96d85d 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_lowering.h
> +++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.h
> @@ -68,6 +68,8 @@ struct tgsi_lowering_config
>     unsigned lower_DPH:1;
>     unsigned lower_DP2:1;
>     unsigned lower_DP2A:1;
> +   unsigned lower_FLR:1;
> +   unsigned lower_CEIL:1;
>
>     /* bitmask of (1 << TGSI_TEXTURE_type): */
>     unsigned lower_TXP;
> --
> 2.5.5
>

The series is

Reviewed-by: Christian Gmeiner <christian.gmeiner at gmail.com>