[Freedreno] [PATCH] freedreno/ir3: fix UCMP handling

Ilia Mirkin imirkin at alum.mit.edu
Thu Sep 11 06:35:11 PDT 2014


On Thu, Sep 11, 2014 at 7:24 AM, Rob Clark <robdclark at gmail.com> wrote:
> On Thu, Sep 11, 2014 at 1:58 AM, Ilia Mirkin <imirkin at alum.mit.edu> wrote:
>> UCMP does not require a compare, only a select.
>>
>> Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
>> ---
>>
>> Not sure if I need to do some sort of get_unconst here... looked at
>> instr_cat3_t, but can't quite tell.
>
> yeah, you do.. cat3 cannot seem to handle a const in 2nd src (src1)

If you say so...

union PACKED {
struct PACKED {
uint32_t src1         : 11;
uint32_t must_be_zero1: 2;
uint32_t src2_c       : 1;
uint32_t src1_neg     : 1;
uint32_t src2_r       : 1;
};

Sure looks like it can handle src2_c [those structs appear to be
1-indexed for src counts]

Although I guess src1 *and* src2 can't be const.

>
> (cat2 cannot handle 2 const's, but the 1 const can be in either position, fwiw)
>
>> Ideally this stage of the compiler wouldn't have to worry about that, and just
>> stick everything into registers + mov's, and then have an SSA pass be able to
>> propagate things into place when allowed...
>
> currently we just figure out where extra mov's are due to
> -ETOOMANYCONST in the frontend.  I guess we could have an extra pass
> to do that, but not really sure that this would add any benefit (other
> than moving a small bit of complexity from the frontend to somewhere
> else

You do copy propagation anyways... might as well make your life easier
and not worry about it at all in the frontend by always using a mov.
At least that's what we do in nouveau. Anyways, I'll resend this with
the same thing that instr_cat3 does (I noticed its existence after I
sent the patch, but I can't use it directly due to argument order).

>
> BR,
> -R
>
>>  src/gallium/drivers/freedreno/ir3/ir3_compiler.c | 73 ++++++++++++------------
>>  1 file changed, 36 insertions(+), 37 deletions(-)
>>
>> diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
>> index cd6d230..35a97ce 100644
>> --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
>> +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
>> @@ -1475,9 +1475,6 @@ trans_cmp(const struct instr_translater *t,
>>   * USLT(a,b) = (a < b) ? ~0 : 0
>>   *   cmps.u32.lt dst, a, b
>>   *
>> - * UCMP(a,b,c) = (a < 0) ? b : c
>> - *   cmps.u32.lt tmp0, a, {0}
>> - *   sel.b16 dst, b, tmp0, c
>>   */
>>  static void
>>  trans_icmp(const struct instr_translater *t,
>> @@ -1486,8 +1483,9 @@ trans_icmp(const struct instr_translater *t,
>>  {
>>         struct ir3_instruction *instr;
>>         struct tgsi_dst_register *dst = get_dst(ctx, inst);
>> -       struct tgsi_src_register constval0;
>> -       struct tgsi_src_register *a0, *a1, *a2;
>> +       struct tgsi_dst_register tmp_dst;
>> +       struct tgsi_src_register *tmp_src;
>> +       struct tgsi_src_register *a0, *a1;
>>         unsigned condition;
>>
>>         a0 = &inst->Src[0].Register;  /* a */
>> @@ -1508,12 +1506,6 @@ trans_icmp(const struct instr_translater *t,
>>         case TGSI_OPCODE_USLT:
>>                 condition = IR3_COND_LT;
>>                 break;
>> -       case TGSI_OPCODE_UCMP:
>> -               get_immediate(ctx, &constval0, 0);
>> -               a0 = &inst->Src[0].Register;  /* a */
>> -               a1 = &constval0;              /* {0} */
>> -               condition = IR3_COND_LT;
>> -               break;
>>
>>         default:
>>                 compile_assert(ctx, 0);
>> @@ -1523,37 +1515,44 @@ trans_icmp(const struct instr_translater *t,
>>         if (is_const(a0) && is_const(a1))
>>                 a0 = get_unconst(ctx, a0);
>>
>> -       if (t->tgsi_opc == TGSI_OPCODE_UCMP) {
>> -               struct tgsi_dst_register tmp_dst;
>> -               struct tgsi_src_register *tmp_src;
>> -               tmp_src = get_internal_temp(ctx, &tmp_dst);
>> -               /* cmps.u32.lt tmp, a0, a1 */
>> -               instr = instr_create(ctx, 2, t->opc);
>> -               instr->cat2.condition = condition;
>> -               vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0);
>> +       tmp_src = get_internal_temp(ctx, &tmp_dst);
>> +       /* cmps.{u32,s32}.<cond> tmp, a0, a1 */
>> +       instr = instr_create(ctx, 2, t->opc);
>> +       instr->cat2.condition = condition;
>> +       vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0);
>>
>> -               a1 = &inst->Src[1].Register;
>> -               a2 = &inst->Src[2].Register;
>> -               /* sel.{b32,b16} dst, src2, tmp, src1 */
>> -               instr = instr_create(ctx, 3, OPC_SEL_B32);
>> -               vectorize(ctx, instr, dst, 3, a1, 0, tmp_src, 0, a2, 0);
>> -       } else {
>> -               struct tgsi_dst_register tmp_dst;
>> -               struct tgsi_src_register *tmp_src;
>> -               tmp_src = get_internal_temp(ctx, &tmp_dst);
>> -               /* cmps.{u32,s32}.<cond> tmp, a0, a1 */
>> -               instr = instr_create(ctx, 2, t->opc);
>> -               instr->cat2.condition = condition;
>> -               vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0);
>> +       /* absneg.s dst, (neg)tmp */
>> +       instr = instr_create(ctx, 2, OPC_ABSNEG_S);
>> +       vectorize(ctx, instr, dst, 1, tmp_src, IR3_REG_NEGATE);
>>
>> -               /* absneg.s dst, (neg)tmp */
>> -               instr = instr_create(ctx, 2, OPC_ABSNEG_S);
>> -               vectorize(ctx, instr, dst, 1, tmp_src, IR3_REG_NEGATE);
>> -       }
>>         put_dst(ctx, inst, dst);
>>  }
>>
>>  /*
>> + * UCMP(a,b,c) = a ? b : c
>> + *   sel.b16 dst, c, a, b
>> + */
>> +static void
>> +trans_ucmp(const struct instr_translater *t,
>> +               struct ir3_compile_context *ctx,
>> +               struct tgsi_full_instruction *inst)
>> +{
>> +       struct ir3_instruction *instr;
>> +       struct tgsi_dst_register *dst = get_dst(ctx, inst);
>> +       struct tgsi_src_register *a0, *a1, *a2;
>> +
>> +       a0 = &inst->Src[0].Register;  /* a */
>> +       a1 = &inst->Src[1].Register;  /* b */
>> +       a2 = &inst->Src[2].Register;  /* c */
>> +
>> +       /* sel.{b32,b16} dst, src2, src0, src1 */
>> +       instr = instr_create(ctx, 3, OPC_SEL_B32);
>> +       vectorize(ctx, instr, dst, 3, a2, 0, a0, 0, a1, 0);
>> +       put_dst(ctx, inst, dst);
>> +}
>> +
>> +
>> +/*
>>   * Conditional / Flow control
>>   */
>>
>> @@ -2111,7 +2110,7 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
>>         INSTR(USGE,         trans_icmp, .opc = OPC_CMPS_U),
>>         INSTR(ISLT,         trans_icmp, .opc = OPC_CMPS_S),
>>         INSTR(USLT,         trans_icmp, .opc = OPC_CMPS_U),
>> -       INSTR(UCMP,         trans_icmp, .opc = OPC_CMPS_U),
>> +       INSTR(UCMP,         trans_ucmp),
>>         INSTR(IF,           trans_if,   .opc = OPC_CMPS_F),
>>         INSTR(UIF,          trans_if,   .opc = OPC_CMPS_U),
>>         INSTR(ELSE,         trans_else),
>> --
>> 1.8.5.5
>>


More information about the Freedreno mailing list