[Mesa-dev] [PATCH] mesa/st: don't use CMP / I2F for conditional assignments with native integers

Fri Dec 5 10:58:35 PST 2014

Ahh the comment referred to that part of the code...
Yes indeed looks better.

Roland

Am 05.12.2014 um 19:35 schrieb Jose Fonseca:
> What I mean is, instead of
> 
>   if (switch_order) {
>      FOO(l_src, r)
>   } else {
>      FOO(r, l_src,)
>   }
> 
>    ...
> 
>   if (switch_order) {
>      BOO(l_src, r)
>   } else {
>      BOO(r, l_src,)
>   }
> 
> simply do
> 
>    op1 = l_src,
>    op2 = r;
>    if (switch_order) {
>       op1 = r;
>       op2 = l_src
>    }
> 
> 
>    FOO(op1, op2)
> 
>    ...
> 
>    BOO(op1, op2)
> 
> 
> 
> Jose
> 
> On 05/12/14 18:13, Roland Scheidegger wrote:
>> I am not quite sure what formulation do you suggest? This one seemed
>> about as simple as any other one I could quickly come up with. (though
>> the switch_order = false statements are redundant as they are the
>> default, so if you prefer that they can easily be killed, and the "if
>> (expr->operation == ir_binop_nequal)" should have been a ("else if
>> (expr->operation == ir_binop_nequal)" indeed.)
>>
>> Roland
>>
>>
>>
>> Am 05.12.2014 um 13:01 schrieb Jose Fonseca:
>>> Looks good AFAICT.
>>>
>>> But I think we should probably swap the operands only once, in one
>>> place, instead of having two branches for switch_order.
>>>
>>> Jose
>>>
>>> On 04/12/14 23:25, sroland at vmware.com wrote:
>>>> From: Roland Scheidegger <sroland at vmware.com>
>>>>
>>>> The original idea was to optimize away the condition by integrating it
>>>> directly
>>>> into the CMP instruction. However, with native integers this requires
>>>> an extra
>>>> I2F instruction. It is also fishy because the negation used didn't
>>>> really honor
>>>> ieee754 float comparison rules, not to mention the CMP instruction
>>>> itself
>>>> (being pretty much a legacy instruction) doesn't really have defined
>>>> special
>>>> float value behavior in any case.
>>>> So, use UCMP and adjust the code trying to optimize the condition away
>>>> accordingly (I have absolutely no idea if such conditions are actually
>>>> hit
>>>> or would be translated away somewhere else already).
>>>>
>>>> No piglit regressions on llvmpipe.
>>>> ---
>>>>    src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 65
>>>> ++++++++++++++++++++++--------
>>>>    1 file changed, 48 insertions(+), 17 deletions(-)
>>>>
>>>> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>>>> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>>>> index 8e91c4b..ad0b05d 100644
>>>> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>>>> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>>>> @@ -2288,6 +2288,39 @@
>>>> glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir)
>>>>       bool switch_order = false;
>>>>
>>>>       ir_expression *const expr = ir->as_expression();
>>>> +
>>>> +   if (native_integers) {
>>>> +      if ((expr != NULL) && (expr->get_num_operands() == 2)) {
>>>> +         enum glsl_base_type type =
>>>> expr->operands[0]->type->base_type;
>>>> +         if (type == GLSL_TYPE_INT || type == GLSL_TYPE_UINT ||
>>>> +             type == GLSL_TYPE_BOOL) {
>>>> +            if (expr->operation == ir_binop_equal) {
>>>> +               if (expr->operands[0]->is_zero()) {
>>>> +                  src_ir = expr->operands[1];
>>>> +                  switch_order = true;
>>>> +               }
>>>> +               else if (expr->operands[1]->is_zero()) {
>>>> +                  src_ir = expr->operands[0];
>>>> +                  switch_order = true;
>>>> +               }
>>>> +            }
>>>> +            if (expr->operation == ir_binop_nequal) {
>>>> +               if (expr->operands[0]->is_zero()) {
>>>> +                  src_ir = expr->operands[1];
>>>> +                  switch_order = false;
>>>> +               }
>>>> +               else if (expr->operands[1]->is_zero()) {
>>>> +                  src_ir = expr->operands[0];
>>>> +                  switch_order = false;
>>>> +               }
>>>> +            }
>>>> +         }
>>>> +      }
>>>> +
>>>> +      src_ir->accept(this);
>>>> +      return switch_order;
>>>> +   }
>>>> +
>>>>       if ((expr != NULL) && (expr->get_num_operands() == 2)) {
>>>>          bool zero_on_left = false;
>>>>
>>>> @@ -2379,7 +2412,7 @@
>>>> glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct
>>>> glsl_type *
>>>>          const struct glsl_type *vec_type;
>>>>
>>>>          vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
>>>> -                     type->vector_elements, 1);
>>>> +                                         type->vector_elements, 1);
>>>>
>>>>          for (int i = 0; i < type->matrix_columns; i++) {
>>>>             emit_block_mov(ir, vec_type, l, r);
>>>> @@ -2447,7 +2480,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
>>>>                swizzles[i] = first_enabled_chan;
>>>>          }
>>>>          r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1],
>>>> -                    swizzles[2], swizzles[3]);
>>>> +                                swizzles[2], swizzles[3]);
>>>>       }
>>>>
>>>>       assert(l.file != PROGRAM_UNDEFINED);
>>>> @@ -2461,23 +2494,21 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
>>>>             st_src_reg l_src = st_src_reg(l);
>>>>             st_src_reg condition_temp = condition;
>>>>             l_src.swizzle =
>>>> swizzle_for_size(ir->lhs->type->vector_elements);
>>>> -
>>>> +
>>>>             if (native_integers) {
>>>> -            /* This is necessary because TGSI's CMP instruction
>>>> expects the
>>>> -             * condition to be a float, and we store booleans as
>>>> integers.
>>>> -             * TODO: really want to avoid i2f path and use UCMP.
>>>> Requires
>>>> -             * changes to process_move_condition though too.
>>>> -             */
>>>> -            condition_temp = get_temp(glsl_type::vec4_type);
>>>> -            condition.negate = 0;
>>>> -            emit(ir, TGSI_OPCODE_I2F, st_dst_reg(condition_temp),
>>>> condition);
>>>> -            condition_temp.swizzle = condition.swizzle;
>>>> +            if (switch_order) {
>>>> +               emit(ir, TGSI_OPCODE_UCMP, l, condition_temp, l_src,
>>>> r);
>>>> +            } else {
>>>> +               emit(ir, TGSI_OPCODE_UCMP, l, condition_temp, r,
>>>> l_src);
>>>> +            }
>>>>             }
>>>> -
>>>> -         if (switch_order) {
>>>> -            emit(ir, TGSI_OPCODE_CMP, l, condition_temp, l_src, r);
>>>> -         } else {
>>>> -            emit(ir, TGSI_OPCODE_CMP, l, condition_temp, r, l_src);
>>>> +         else {
>>>> +            if (switch_order) {
>>>> +               emit(ir, TGSI_OPCODE_CMP, l, condition_temp, l_src, r);
>>>> +            } else {
>>>> +               emit(ir, TGSI_OPCODE_CMP, l, condition_temp, r, l_src);
>>>> +            }
>>>> +
>>>>             }
>>>>
>>>>             l.index++;
>>>>
>>>
>>
>