[Freedreno] [PATCH] freedreno/ir3: fix UCMP handling
Ilia Mirkin
imirkin at alum.mit.edu
Thu Sep 11 06:35:11 PDT 2014
On Thu, Sep 11, 2014 at 7:24 AM, Rob Clark <robdclark at gmail.com> wrote:
> On Thu, Sep 11, 2014 at 1:58 AM, Ilia Mirkin <imirkin at alum.mit.edu> wrote:
>> UCMP does not require a compare, only a select.
>>
>> Signed-off-by: Ilia Mirkin <imirkin at alum.mit.edu>
>> ---
>>
>> Not sure if I need to do some sort of get_unconst here... looked at
>> instr_cat3_t, but can't quite tell.
>
> yeah, you do.. cat3 cannot seem to handle a const in 2nd src (src1)
If you say so...
union PACKED {
struct PACKED {
uint32_t src1 : 11;
uint32_t must_be_zero1: 2;
uint32_t src2_c : 1;
uint32_t src1_neg : 1;
uint32_t src2_r : 1;
};
Sure looks like it can handle src2_c [those structs appear to be
1-indexed for src counts]
Although I guess src1 *and* src2 can't be const.
>
> (cat2 cannot handle 2 const's, but the 1 const can be in either position, fwiw)
>
>> Ideally this stage of the compiler wouldn't have to worry about that, and just
>> stick everything into registers + mov's, and then have an SSA pass be able to
>> propagate things into place when allowed...
>
> currently we just figure out where extra mov's are due to
> -ETOOMANYCONST in the frontend. I guess we could have an extra pass
> to do that, but not really sure that this would add any benefit (other
> than moving a small bit of complexity from the frontend to somewhere
> else
You do copy propagation anyways... might as well make your life easier
and not worry about it at all in the frontend by always using a mov.
At least that's what we do in nouveau. Anyways, I'll resend this with
the same thing that instr_cat3 does (I noticed its existence after I
sent the patch, but I can't use it directly due to argument order).
>
> BR,
> -R
>
>> src/gallium/drivers/freedreno/ir3/ir3_compiler.c | 73 ++++++++++++------------
>> 1 file changed, 36 insertions(+), 37 deletions(-)
>>
>> diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
>> index cd6d230..35a97ce 100644
>> --- a/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
>> +++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler.c
>> @@ -1475,9 +1475,6 @@ trans_cmp(const struct instr_translater *t,
>> * USLT(a,b) = (a < b) ? ~0 : 0
>> * cmps.u32.lt dst, a, b
>> *
>> - * UCMP(a,b,c) = (a < 0) ? b : c
>> - * cmps.u32.lt tmp0, a, {0}
>> - * sel.b16 dst, b, tmp0, c
>> */
>> static void
>> trans_icmp(const struct instr_translater *t,
>> @@ -1486,8 +1483,9 @@ trans_icmp(const struct instr_translater *t,
>> {
>> struct ir3_instruction *instr;
>> struct tgsi_dst_register *dst = get_dst(ctx, inst);
>> - struct tgsi_src_register constval0;
>> - struct tgsi_src_register *a0, *a1, *a2;
>> + struct tgsi_dst_register tmp_dst;
>> + struct tgsi_src_register *tmp_src;
>> + struct tgsi_src_register *a0, *a1;
>> unsigned condition;
>>
>> a0 = &inst->Src[0].Register; /* a */
>> @@ -1508,12 +1506,6 @@ trans_icmp(const struct instr_translater *t,
>> case TGSI_OPCODE_USLT:
>> condition = IR3_COND_LT;
>> break;
>> - case TGSI_OPCODE_UCMP:
>> - get_immediate(ctx, &constval0, 0);
>> - a0 = &inst->Src[0].Register; /* a */
>> - a1 = &constval0; /* {0} */
>> - condition = IR3_COND_LT;
>> - break;
>>
>> default:
>> compile_assert(ctx, 0);
>> @@ -1523,37 +1515,44 @@ trans_icmp(const struct instr_translater *t,
>> if (is_const(a0) && is_const(a1))
>> a0 = get_unconst(ctx, a0);
>>
>> - if (t->tgsi_opc == TGSI_OPCODE_UCMP) {
>> - struct tgsi_dst_register tmp_dst;
>> - struct tgsi_src_register *tmp_src;
>> - tmp_src = get_internal_temp(ctx, &tmp_dst);
>> - /* cmps.u32.lt tmp, a0, a1 */
>> - instr = instr_create(ctx, 2, t->opc);
>> - instr->cat2.condition = condition;
>> - vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0);
>> + tmp_src = get_internal_temp(ctx, &tmp_dst);
>> + /* cmps.{u32,s32}.<cond> tmp, a0, a1 */
>> + instr = instr_create(ctx, 2, t->opc);
>> + instr->cat2.condition = condition;
>> + vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0);
>>
>> - a1 = &inst->Src[1].Register;
>> - a2 = &inst->Src[2].Register;
>> - /* sel.{b32,b16} dst, src2, tmp, src1 */
>> - instr = instr_create(ctx, 3, OPC_SEL_B32);
>> - vectorize(ctx, instr, dst, 3, a1, 0, tmp_src, 0, a2, 0);
>> - } else {
>> - struct tgsi_dst_register tmp_dst;
>> - struct tgsi_src_register *tmp_src;
>> - tmp_src = get_internal_temp(ctx, &tmp_dst);
>> - /* cmps.{u32,s32}.<cond> tmp, a0, a1 */
>> - instr = instr_create(ctx, 2, t->opc);
>> - instr->cat2.condition = condition;
>> - vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0);
>> + /* absneg.s dst, (neg)tmp */
>> + instr = instr_create(ctx, 2, OPC_ABSNEG_S);
>> + vectorize(ctx, instr, dst, 1, tmp_src, IR3_REG_NEGATE);
>>
>> - /* absneg.s dst, (neg)tmp */
>> - instr = instr_create(ctx, 2, OPC_ABSNEG_S);
>> - vectorize(ctx, instr, dst, 1, tmp_src, IR3_REG_NEGATE);
>> - }
>> put_dst(ctx, inst, dst);
>> }
>>
>> /*
>> + * UCMP(a,b,c) = a ? b : c
>> + * sel.b16 dst, c, a, b
>> + */
>> +static void
>> +trans_ucmp(const struct instr_translater *t,
>> + struct ir3_compile_context *ctx,
>> + struct tgsi_full_instruction *inst)
>> +{
>> + struct ir3_instruction *instr;
>> + struct tgsi_dst_register *dst = get_dst(ctx, inst);
>> + struct tgsi_src_register *a0, *a1, *a2;
>> +
>> + a0 = &inst->Src[0].Register; /* a */
>> + a1 = &inst->Src[1].Register; /* b */
>> + a2 = &inst->Src[2].Register; /* c */
>> +
>> + /* sel.{b32,b16} dst, src2, src0, src1 */
>> + instr = instr_create(ctx, 3, OPC_SEL_B32);
>> + vectorize(ctx, instr, dst, 3, a2, 0, a0, 0, a1, 0);
>> + put_dst(ctx, inst, dst);
>> +}
>> +
>> +
>> +/*
>> * Conditional / Flow control
>> */
>>
>> @@ -2111,7 +2110,7 @@ static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
>> INSTR(USGE, trans_icmp, .opc = OPC_CMPS_U),
>> INSTR(ISLT, trans_icmp, .opc = OPC_CMPS_S),
>> INSTR(USLT, trans_icmp, .opc = OPC_CMPS_U),
>> - INSTR(UCMP, trans_icmp, .opc = OPC_CMPS_U),
>> + INSTR(UCMP, trans_ucmp),
>> INSTR(IF, trans_if, .opc = OPC_CMPS_F),
>> INSTR(UIF, trans_if, .opc = OPC_CMPS_U),
>> INSTR(ELSE, trans_else),
>> --
>> 1.8.5.5
>>
More information about the Freedreno
mailing list