[Mesa-dev] [PATCH] mesa/st: don't use CMP / I2F for conditional assignments with native integers

Fri Dec 5 10:13:01 PST 2014

I am not quite sure what formulation do you suggest? This one seemed
about as simple as any other one I could quickly come up with. (though
the switch_order = false statements are redundant as they are the
default, so if you prefer that they can easily be killed, and the "if
(expr->operation == ir_binop_nequal)" should have been a ("else if
(expr->operation == ir_binop_nequal)" indeed.)

Roland

Am 05.12.2014 um 13:01 schrieb Jose Fonseca:
> Looks good AFAICT.
> 
> But I think we should probably swap the operands only once, in one
> place, instead of having two branches for switch_order.
> 
> Jose
> 
> On 04/12/14 23:25, sroland at vmware.com wrote:
>> From: Roland Scheidegger <sroland at vmware.com>
>>
>> The original idea was to optimize away the condition by integrating it
>> directly
>> into the CMP instruction. However, with native integers this requires
>> an extra
>> I2F instruction. It is also fishy because the negation used didn't
>> really honor
>> ieee754 float comparison rules, not to mention the CMP instruction itself
>> (being pretty much a legacy instruction) doesn't really have defined
>> special
>> float value behavior in any case.
>> So, use UCMP and adjust the code trying to optimize the condition away
>> accordingly (I have absolutely no idea if such conditions are actually
>> hit
>> or would be translated away somewhere else already).
>>
>> No piglit regressions on llvmpipe.
>> ---
>>   src/mesa/state_tracker/st_glsl_to_tgsi.cpp | 65
>> ++++++++++++++++++++++--------
>>   1 file changed, 48 insertions(+), 17 deletions(-)
>>
>> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> index 8e91c4b..ad0b05d 100644
>> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
>> @@ -2288,6 +2288,39 @@
>> glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir)
>>      bool switch_order = false;
>>
>>      ir_expression *const expr = ir->as_expression();
>> +
>> +   if (native_integers) {
>> +      if ((expr != NULL) && (expr->get_num_operands() == 2)) {
>> +         enum glsl_base_type type = expr->operands[0]->type->base_type;
>> +         if (type == GLSL_TYPE_INT || type == GLSL_TYPE_UINT ||
>> +             type == GLSL_TYPE_BOOL) {
>> +            if (expr->operation == ir_binop_equal) {
>> +               if (expr->operands[0]->is_zero()) {
>> +                  src_ir = expr->operands[1];
>> +                  switch_order = true;
>> +               }
>> +               else if (expr->operands[1]->is_zero()) {
>> +                  src_ir = expr->operands[0];
>> +                  switch_order = true;
>> +               }
>> +            }
>> +            if (expr->operation == ir_binop_nequal) {
>> +               if (expr->operands[0]->is_zero()) {
>> +                  src_ir = expr->operands[1];
>> +                  switch_order = false;
>> +               }
>> +               else if (expr->operands[1]->is_zero()) {
>> +                  src_ir = expr->operands[0];
>> +                  switch_order = false;
>> +               }
>> +            }
>> +         }
>> +      }
>> +
>> +      src_ir->accept(this);
>> +      return switch_order;
>> +   }
>> +
>>      if ((expr != NULL) && (expr->get_num_operands() == 2)) {
>>         bool zero_on_left = false;
>>
>> @@ -2379,7 +2412,7 @@
>> glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct
>> glsl_type *
>>         const struct glsl_type *vec_type;
>>
>>         vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
>> -                     type->vector_elements, 1);
>> +                                         type->vector_elements, 1);
>>
>>         for (int i = 0; i < type->matrix_columns; i++) {
>>            emit_block_mov(ir, vec_type, l, r);
>> @@ -2447,7 +2480,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
>>               swizzles[i] = first_enabled_chan;
>>         }
>>         r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1],
>> -                    swizzles[2], swizzles[3]);
>> +                                swizzles[2], swizzles[3]);
>>      }
>>
>>      assert(l.file != PROGRAM_UNDEFINED);
>> @@ -2461,23 +2494,21 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
>>            st_src_reg l_src = st_src_reg(l);
>>            st_src_reg condition_temp = condition;
>>            l_src.swizzle =
>> swizzle_for_size(ir->lhs->type->vector_elements);
>> -
>> +
>>            if (native_integers) {
>> -            /* This is necessary because TGSI's CMP instruction
>> expects the
>> -             * condition to be a float, and we store booleans as
>> integers.
>> -             * TODO: really want to avoid i2f path and use UCMP.
>> Requires
>> -             * changes to process_move_condition though too.
>> -             */
>> -            condition_temp = get_temp(glsl_type::vec4_type);
>> -            condition.negate = 0;
>> -            emit(ir, TGSI_OPCODE_I2F, st_dst_reg(condition_temp),
>> condition);
>> -            condition_temp.swizzle = condition.swizzle;
>> +            if (switch_order) {
>> +               emit(ir, TGSI_OPCODE_UCMP, l, condition_temp, l_src, r);
>> +            } else {
>> +               emit(ir, TGSI_OPCODE_UCMP, l, condition_temp, r, l_src);
>> +            }
>>            }
>> -
>> -         if (switch_order) {
>> -            emit(ir, TGSI_OPCODE_CMP, l, condition_temp, l_src, r);
>> -         } else {
>> -            emit(ir, TGSI_OPCODE_CMP, l, condition_temp, r, l_src);
>> +         else {
>> +            if (switch_order) {
>> +               emit(ir, TGSI_OPCODE_CMP, l, condition_temp, l_src, r);
>> +            } else {
>> +               emit(ir, TGSI_OPCODE_CMP, l, condition_temp, r, l_src);
>> +            }
>> +
>>            }
>>
>>            l.index++;
>>
>