[Freedreno] [PATCH] freedreno/ir3: fix UCMP handling
Rob Clark
robdclark at gmail.com
Thu Sep 11 04:24:50 PDT 2014
On Thu, Sep 11, 2014 at 1:58 AM, Ilia Mirkin <imirkin at alum.mit.edu> wrote:
> UCMP does not require a compare, only a select.
>
> Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
> ---
>
> Not sure if I need to do some sort of get_unconst here... looked at
> instr_cat3_t, but can't quite tell.
yeah, you do.. cat3 cannot seem to handle a const in 2nd src (src1)
(cat2 cannot handle 2 const's, but the 1 const can be in either position, fwiw)
> Ideally this stage of the compiler wouldn't have to worry about that, and just
> stick everything into registers + mov's, and then have an SSA pass be able to
> propagate things into place when allowed...
currently we just figure out where extra mov's are due to
-ETOOMANYCONST in the frontend. I guess we could have an extra pass
to do that, but not really sure that this would add any benefit (other
than moving a small bit of complexity from the frontend to somewhere
else
BR,
-R
> src/gallium/drivers/freedreno/ir3/ir3_compiler.c | 73 ++++++++++++------------
> 1 file changed, 36 insertions(+), 37 deletions(-)
>
> diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
> index cd6d230..35a97ce 100644
> --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
> +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
> @@ -1475,9 +1475,6 @@ trans_cmp(const struct instr_translater *t,
> * USLT(a,b) = (a < b) ? ~0 : 0
> * cmps.u32.lt dst, a, b
> *
> - * UCMP(a,b,c) = (a < 0) ? b : c
> - * cmps.u32.lt tmp0, a, {0}
> - * sel.b16 dst, b, tmp0, c
> */
> static void
> trans_icmp(const struct instr_translater *t,
> @@ -1486,8 +1483,9 @@ trans_icmp(const struct instr_translater *t,
> {
> struct ir3_instruction *instr;
> struct tgsi_dst_register *dst = get_dst(ctx, inst);
> - struct tgsi_src_register constval0;
> - struct tgsi_src_register *a0, *a1, *a2;
> + struct tgsi_dst_register tmp_dst;
> + struct tgsi_src_register *tmp_src;
> + struct tgsi_src_register *a0, *a1;
> unsigned condition;
>
> a0 = &inst->Src[0].Register; /* a */
> @@ -1508,12 +1506,6 @@ trans_icmp(const struct instr_translater *t,
> case TGSI_OPCODE_USLT:
> condition = IR3_COND_LT;
> break;
> - case TGSI_OPCODE_UCMP:
> - get_immediate(ctx, &constval0, 0);
> - a0 = &inst->Src[0].Register; /* a */
> - a1 = &constval0; /* {0} */
> - condition = IR3_COND_LT;
> - break;
>
> default:
> compile_assert(ctx, 0);
> @@ -1523,37 +1515,44 @@ trans_icmp(const struct instr_translater *t,
> if (is_const(a0) && is_const(a1))
> a0 = get_unconst(ctx, a0);
>
> - if (t->tgsi_opc == TGSI_OPCODE_UCMP) {
> - struct tgsi_dst_register tmp_dst;
> - struct tgsi_src_register *tmp_src;
> - tmp_src = get_internal_temp(ctx, &tmp_dst);
> - /* cmps.u32.lt tmp, a0, a1 */
> - instr = instr_create(ctx, 2, t->opc);
> - instr->cat2.condition = condition;
> - vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0);
> + tmp_src = get_internal_temp(ctx, &tmp_dst);
> + /* cmps.{u32,s32}.<cond> tmp, a0, a1 */
> + instr = instr_create(ctx, 2, t->opc);
> + instr->cat2.condition = condition;
> + vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0);
>
> - a1 = &inst->Src[1].Register;
> - a2 = &inst->Src[2].Register;
> - /* sel.{b32,b16} dst, src2, tmp, src1 */
> - instr = instr_create(ctx, 3, OPC_SEL_B32);
> - vectorize(ctx, instr, dst, 3, a1, 0, tmp_src, 0, a2, 0);
> - } else {
> - struct tgsi_dst_register tmp_dst;
> - struct tgsi_src_register *tmp_src;
> - tmp_src = get_internal_temp(ctx, &tmp_dst);
> - /* cmps.{u32,s32}.<cond> tmp, a0, a1 */
> - instr = instr_create(ctx, 2, t->opc);
> - instr->cat2.condition = condition;
> - vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0);
> + /* absneg.s dst, (neg)tmp */
> + instr = instr_create(ctx, 2, OPC_ABSNEG_S);
> + vectorize(ctx, instr, dst, 1, tmp_src, IR3_REG_NEGATE);
>
> - /* absneg.s dst, (neg)tmp */
> - instr = instr_create(ctx, 2, OPC_ABSNEG_S);
> - vectorize(ctx, instr, dst, 1, tmp_src, IR3_REG_NEGATE);
> - }
> put_dst(ctx, inst, dst);
> }
>
> /*
> + * UCMP(a,b,c) = a ? b : c
> + * sel.b16 dst, c, a, b
> + */
> +static void
> +trans_ucmp(const struct instr_translater *t,
> + struct ir3_compile_context *ctx,
> + struct tgsi_full_instruction *inst)
> +{
> + struct ir3_instruction *instr;
> + struct tgsi_dst_register *dst = get_dst(ctx, inst);
> + struct tgsi_src_register *a0, *a1, *a2;
> +
> + a0 = &inst->Src[0].Register; /* a */
> + a1 = &inst->Src[1].Register; /* b */
> + a2 = &inst->Src[2].Register; /* c */
> +
> + /* sel.{b32,b16} dst, src2, src0, src1 */
> + instr = instr_create(ctx, 3, OPC_SEL_B32);
> + vectorize(ctx, instr, dst, 3, a2, 0, a0, 0, a1, 0);
> + put_dst(ctx, inst, dst);
> +}
> +
> +
> +/*
> * Conditional / Flow control
> */
>
> @@ -2111,7 +2110,7 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
> INSTR(USGE, trans_icmp, .opc = OPC_CMPS_U),
> INSTR(ISLT, trans_icmp, .opc = OPC_CMPS_S),
> INSTR(USLT, trans_icmp, .opc = OPC_CMPS_U),
> - INSTR(UCMP, trans_icmp, .opc = OPC_CMPS_U),
> + INSTR(UCMP, trans_ucmp),
> INSTR(IF, trans_if, .opc = OPC_CMPS_F),
> INSTR(UIF, trans_if, .opc = OPC_CMPS_U),
> INSTR(ELSE, trans_else),
> --
> 1.8.5.5
>
More information about the Freedreno
mailing list