[Mesa-dev] [PATCH 1/4] tgsi/lowering: add support for lowering FLR and CEIL
Christian Gmeiner
christian.gmeiner at gmail.com
Thu Apr 14 06:15:45 UTC 2016
2016-04-14 0:42 GMT+02:00 Rob Clark <robdclark at gmail.com>:
> From: Russell King <rmk at arm.linux.org.uk>
>
> Add support for lowering FLR and CEIL to FRC/SUB and FRC/ADD
> instructions for GPUs that support FRC but not FLR or CEIL. Since
> these uses FRC, it is invalid to ask for FLR or CEIL to be lowered
> along with FRC, so add an assert to catch this invalid configuration.
>
> We also need to deal with FLR instructions emitted by the lowering
> code. Fix these up with the FRC+SUB equivalent when FLR lowering is
> enabled.
>
> Signed-off-by: Russell King <rmk at arm.linux.org.uk>
> Reviewed-by: Rob Clark <robdclark at gmail.com>
> ---
> src/gallium/auxiliary/tgsi/tgsi_lowering.c | 167 +++++++++++++++++++++++++----
> src/gallium/auxiliary/tgsi/tgsi_lowering.h | 2 +
> 2 files changed, 149 insertions(+), 20 deletions(-)
>
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.c b/src/gallium/auxiliary/tgsi/tgsi_lowering.c
> index 0ffd855..b2dd37e 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_lowering.c
> +++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.c
> @@ -676,14 +676,19 @@ transform_lit(struct tgsi_transform_context *tctx,
> * dst.w = 1.0
> *
> * ; needs: 1 tmp, imm{1.0}
> - * FLR tmpA.x, src.x
> + * if (lowering FLR) {
> + * FRC tmpA.x, src.x
> + * SUB tmpA.x, src.x, tmpA.x
> + * } else {
> + * FLR tmpA.x, src.x
> + * }
> * EX2 tmpA.y, src.x
> * SUB dst.y, src.x, tmpA.x
> * EX2 dst.x, tmpA.x
> * MOV dst.z, tmpA.y
> * MOV dst.w, imm{1.0}
> */
> -#define EXP_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + \
> +#define EXP_GROW (NINST(1) + NINST(2) + NINST(1) + NINST(2) + NINST(1) + \
> NINST(1)+ NINST(1) - OINST(1))
> #define EXP_TMP 1
> static void
> @@ -696,14 +701,35 @@ transform_exp(struct tgsi_transform_context *tctx,
> struct tgsi_full_instruction new_inst;
>
> if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
> - /* FLR tmpA.x, src.x */
> - new_inst = tgsi_default_full_instruction();
> - new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
> - new_inst.Instruction.NumDstRegs = 1;
> - reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
> - new_inst.Instruction.NumSrcRegs = 1;
> - reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
> - tctx->emit_instruction(tctx, &new_inst);
> + if (ctx->config->lower_FLR) {
> + /* FRC tmpA.x, src.x */
> + new_inst = tgsi_default_full_instruction();
> + new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
> + new_inst.Instruction.NumDstRegs = 1;
> + reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
> + new_inst.Instruction.NumSrcRegs = 1;
> + reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
> + tctx->emit_instruction(tctx, &new_inst);
> +
> + /* SUB tmpA.x, src.x, tmpA.x */
> + new_inst = tgsi_default_full_instruction();
> + new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
> + new_inst.Instruction.NumDstRegs = 1;
> + reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
> + new_inst.Instruction.NumSrcRegs = 2;
> + reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
> + reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, _, _, _));
> + tctx->emit_instruction(tctx, &new_inst);
> + } else {
> + /* FLR tmpA.x, src.x */
> + new_inst = tgsi_default_full_instruction();
> + new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
> + new_inst.Instruction.NumDstRegs = 1;
> + reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_X);
> + new_inst.Instruction.NumSrcRegs = 1;
> + reg_src(&new_inst.Src[0], src, SWIZ(X, _, _, _));
> + tctx->emit_instruction(tctx, &new_inst);
> + }
> }
>
> if (dst->Register.WriteMask & TGSI_WRITEMASK_Z) {
> @@ -771,14 +797,19 @@ transform_exp(struct tgsi_transform_context *tctx,
> *
> * ; needs: 1 tmp, imm{1.0}
> * LG2 tmpA.x, |src.x|
> - * FLR tmpA.y, tmpA.x
> + * if (lowering FLR) {
> + * FRC tmpA.y, tmpA.x
> + * SUB tmpA.y, tmpA.x, tmpA.y
> + * } else {
> + * FLR tmpA.y, tmpA.x
> + * }
> * EX2 tmpA.z, tmpA.y
> * RCP tmpA.z, tmpA.z
> * MUL dst.y, |src.x|, tmpA.z
> * MOV dst.xz, tmpA.yx
> * MOV dst.w, imm{1.0}
> */
> -#define LOG_GROW (NINST(1) + NINST(1) + NINST(1) + NINST(1) + \
> +#define LOG_GROW (NINST(1) + NINST(1) + NINST(2) + NINST(1) + NINST(1) + \
> NINST(2) + NINST(1) + NINST(1) - OINST(1))
> #define LOG_TMP 1
> static void
> @@ -803,14 +834,35 @@ transform_log(struct tgsi_transform_context *tctx,
> }
>
> if (dst->Register.WriteMask & TGSI_WRITEMASK_XY) {
> - /* FLR tmpA.y, tmpA.x */
> - new_inst = tgsi_default_full_instruction();
> - new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
> - new_inst.Instruction.NumDstRegs = 1;
> - reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
> - new_inst.Instruction.NumSrcRegs = 1;
> - reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
> - tctx->emit_instruction(tctx, &new_inst);
> + if (ctx->config->lower_FLR) {
> + /* FRC tmpA.y, tmpA.x */
> + new_inst = tgsi_default_full_instruction();
> + new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
> + new_inst.Instruction.NumDstRegs = 1;
> + reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
> + new_inst.Instruction.NumSrcRegs = 1;
> + reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
> + tctx->emit_instruction(tctx, &new_inst);
> +
> + /* SUB tmpA.y, tmpA.x, tmpA.y */
> + new_inst = tgsi_default_full_instruction();
> + new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
> + new_inst.Instruction.NumDstRegs = 1;
> + reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
> + new_inst.Instruction.NumSrcRegs = 2;
> + reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
> + reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(_, Y, _, _));
> + tctx->emit_instruction(tctx, &new_inst);
> + } else {
> + /* FLR tmpA.y, tmpA.x */
> + new_inst = tgsi_default_full_instruction();
> + new_inst.Instruction.Opcode = TGSI_OPCODE_FLR;
> + new_inst.Instruction.NumDstRegs = 1;
> + reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_Y);
> + new_inst.Instruction.NumSrcRegs = 1;
> + reg_src(&new_inst.Src[0], &ctx->tmp[A].src, SWIZ(_, X, _, _));
> + tctx->emit_instruction(tctx, &new_inst);
> + }
> }
>
> if (dst->Register.WriteMask & TGSI_WRITEMASK_Y) {
> @@ -1005,6 +1057,58 @@ transform_dotp(struct tgsi_transform_context *tctx,
> }
> }
>
> +/* FLR - floor, CEIL - ceil
> + * ; needs: 1 tmp
> + * if (CEIL) {
> + * FRC tmpA, -src
> + * ADD dst, src, tmpA
> + * } else {
> + * FRC tmpA, src
> + * SUB dst, src, tmpA
> + * }
> + */
> +#define FLR_GROW (NINST(1) + NINST(2) - OINST(1))
> +#define CEIL_GROW (NINST(1) + NINST(2) - OINST(1))
> +#define FLR_TMP 1
> +#define CEIL_TMP 1
> +static void
> +transform_flr_ceil(struct tgsi_transform_context *tctx,
> + struct tgsi_full_instruction *inst)
> +{
> + struct tgsi_lowering_context *ctx = tgsi_lowering_context(tctx);
> + struct tgsi_full_dst_register *dst = &inst->Dst[0];
> + struct tgsi_full_src_register *src0 = &inst->Src[0];
> + struct tgsi_full_instruction new_inst;
> + unsigned opcode = inst->Instruction.Opcode;
> +
> + if (dst->Register.WriteMask & TGSI_WRITEMASK_XYZW) {
> + /* FLR: FRC tmpA, src CEIL: FRC tmpA, -src */
> + new_inst = tgsi_default_full_instruction();
> + new_inst.Instruction.Opcode = TGSI_OPCODE_FRC;
> + new_inst.Instruction.NumDstRegs = 1;
> + reg_dst(&new_inst.Dst[0], &ctx->tmp[A].dst, TGSI_WRITEMASK_XYZW);
> + new_inst.Instruction.NumSrcRegs = 1;
> + reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
> +
> + if (opcode == TGSI_OPCODE_CEIL)
> + new_inst.Src[0].Register.Negate = !new_inst.Src[0].Register.Negate;
> + tctx->emit_instruction(tctx, &new_inst);
> +
> + /* FLR: SUB dst, src, tmpA CEIL: ADD dst, src, tmpA */
> + new_inst = tgsi_default_full_instruction();
> + if (opcode == TGSI_OPCODE_CEIL)
> + new_inst.Instruction.Opcode = TGSI_OPCODE_ADD;
> + else
> + new_inst.Instruction.Opcode = TGSI_OPCODE_SUB;
> + new_inst.Instruction.NumDstRegs = 1;
> + reg_dst(&new_inst.Dst[0], dst, TGSI_WRITEMASK_XYZW);
> + new_inst.Instruction.NumSrcRegs = 2;
> + reg_src(&new_inst.Src[0], src0, SWIZ(X, Y, Z, W));
> + reg_src(&new_inst.Src[1], &ctx->tmp[A].src, SWIZ(X, Y, Z, W));
> + tctx->emit_instruction(tctx, &new_inst);
> + }
> +}
> +
> /* Inserts a MOV_SAT for the needed components of tex coord. Note that
> * in the case of TXP, the clamping must happen *after* projection, so
> * we need to lower TXP to TEX.
> @@ -1401,6 +1505,16 @@ transform_instr(struct tgsi_transform_context *tctx,
> goto skip;
> transform_dotp(tctx, inst);
> break;
> + case TGSI_OPCODE_FLR:
> + if (!ctx->config->lower_FLR)
> + goto skip;
> + transform_flr_ceil(tctx, inst);
> + break;
> + case TGSI_OPCODE_CEIL:
> + if (!ctx->config->lower_CEIL)
> + goto skip;
> + transform_flr_ceil(tctx, inst);
> + break;
> case TGSI_OPCODE_TEX:
> case TGSI_OPCODE_TXP:
> case TGSI_OPCODE_TXB:
> @@ -1432,6 +1546,9 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
> /* sanity check in case limit is ever increased: */
> STATIC_ASSERT((sizeof(config->saturate_s) * 8) >= PIPE_MAX_SAMPLERS);
>
> + /* sanity check the lowering */
> + assert(!(config->lower_FRC && (config->lower_FLR || config->lower_CEIL)));
> +
> memset(&ctx, 0, sizeof(ctx));
> ctx.base.transform_instruction = transform_instr;
> ctx.info = info;
> @@ -1473,6 +1590,8 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
> OPCS(DPH) ||
> OPCS(DP2) ||
> OPCS(DP2A) ||
> + OPCS(FLR) ||
> + OPCS(CEIL) ||
> OPCS(TXP) ||
> ctx.two_side_colors ||
> ctx.saturate))
> @@ -1541,6 +1660,14 @@ tgsi_transform_lowering(const struct tgsi_lowering_config *config,
> newlen += DP2A_GROW * OPCS(DP2A);
> numtmp = MAX2(numtmp, DOTP_TMP);
> }
> + if (OPCS(FLR)) {
> + newlen += FLR_GROW * OPCS(FLR);
> + numtmp = MAX2(numtmp, FLR_TMP);
> + }
> + if (OPCS(CEIL)) {
> + newlen += CEIL_GROW * OPCS(CEIL);
> + numtmp = MAX2(numtmp, CEIL_TMP);
> + }
> if (ctx.saturate || config->lower_TXP) {
> int n = 0;
>
> diff --git a/src/gallium/auxiliary/tgsi/tgsi_lowering.h b/src/gallium/auxiliary/tgsi/tgsi_lowering.h
> index 52c204f..a96d85d 100644
> --- a/src/gallium/auxiliary/tgsi/tgsi_lowering.h
> +++ b/src/gallium/auxiliary/tgsi/tgsi_lowering.h
> @@ -68,6 +68,8 @@ struct tgsi_lowering_config
> unsigned lower_DPH:1;
> unsigned lower_DP2:1;
> unsigned lower_DP2A:1;
> + unsigned lower_FLR:1;
> + unsigned lower_CEIL:1;
>
> /* bitmask of (1 << TGSI_TEXTURE_type): */
> unsigned lower_TXP;
> --
> 2.5.5
>
The series is
Reviewed-by: Christian Gmeiner <christian.gmeiner at gmail.com>
More information about the mesa-dev
mailing list