[Mesa-dev] [PATCH v2] glsl_to_tgsi, mesa: fixes for native integers and integer booleans

Fri Sep 2 15:27:44 PDT 2011

Are there any objections to pushing this?

Bryan

On 08/31/2011 01:33 AM, Bryan Cain wrote:
> With this patch, there are no piglit regressions on softpipe with native
> integers enabled.  Unlike my previous patch, this uses integer values of
> ~0 and 0 for true and false, respectively, instead of the float values 1.0
> and 0.0.
> ---
>  src/mesa/main/uniforms.c                   |    6 +-
>  src/mesa/state_tracker/st_glsl_to_tgsi.cpp |  160 ++++++++++++++++++++--------
>  2 files changed, 116 insertions(+), 50 deletions(-)
>
> diff --git a/src/mesa/main/uniforms.c b/src/mesa/main/uniforms.c
> index cda840f..fa96fd3 100644
> --- a/src/mesa/main/uniforms.c
> +++ b/src/mesa/main/uniforms.c
> @@ -777,12 +777,12 @@ set_program_uniform(struct gl_context *ctx, struct gl_program *program,
>           if (isUniformBool) {
>              for (i = 0; i < elems; i++) {
>                 if (basicType == GL_FLOAT)
> -                  uniformVal[i].b = uniformVal[i].f != 0.0f ? 1 : 0;
> +                  uniformVal[i].u = uniformVal[i].f != 0.0f ? ~0 : 0;
>                 else
> -                  uniformVal[i].b = uniformVal[i].u ? 1 : 0;
> +                  uniformVal[i].u = uniformVal[i].u ? ~0 : 0;
>                 
>                 if (!ctx->Const.NativeIntegers)
> -                  uniformVal[i].f = uniformVal[i].b ? 1.0f : 0.0f;
> +                  uniformVal[i].f = uniformVal[i].u ? 1.0f : 0.0f;
>              }
>           }
>        }
> diff --git a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> index 2266083..c8f790a 100644
> --- a/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> +++ b/src/mesa/state_tracker/st_glsl_to_tgsi.cpp
> @@ -385,6 +385,8 @@ public:
>     void emit_scalar(ir_instruction *ir, unsigned op,
>          	    st_dst_reg dst, st_src_reg src0, st_src_reg src1);
>  
> +   void try_emit_float_set(ir_instruction *ir, unsigned op, st_dst_reg dst);
> +
>     void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
>  
>     void emit_scs(ir_instruction *ir, unsigned op,
> @@ -562,7 +564,10 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
>     }
>  
>     this->instructions.push_tail(inst);
> -   
> +
> +   if (native_integers)
> +      try_emit_float_set(ir, op, dst);
> +
>     return inst;
>  }
>  
> @@ -588,6 +593,25 @@ glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op)
>     return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
>  }
>  
> + /**
> + * Emits the code to convert the result of float SET instructions to integers.
> + */
> +void
> +glsl_to_tgsi_visitor::try_emit_float_set(ir_instruction *ir, unsigned op,
> +        		 st_dst_reg dst)
> +{
> +   if ((op == TGSI_OPCODE_SEQ ||
> +        op == TGSI_OPCODE_SNE ||
> +        op == TGSI_OPCODE_SGE ||
> +        op == TGSI_OPCODE_SLT))
> +   {
> +      st_src_reg src = st_src_reg(dst);
> +      src.negate = ~src.negate;
> +      dst.type = GLSL_TYPE_FLOAT;
> +      emit(ir, TGSI_OPCODE_F2I, dst, src);
> +   }
> +}
> +
>  /**
>   * Determines whether to use an integer, unsigned integer, or float opcode 
>   * based on the operands and input opcode, then emits the result.
> @@ -604,7 +628,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
>     if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
>        type = GLSL_TYPE_FLOAT;
>     else if (native_integers)
> -      type = src0.type;
> +      type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type;
>  
>  #define case4(c, f, i, u) \
>     case TGSI_OPCODE_##c: \
> @@ -630,12 +654,7 @@ glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
>        case3(SGE, ISGE, USGE);
>        case3(SLT, ISLT, USLT);
>        
> -      case2iu(SHL, SHL);
>        case2iu(ISHR, USHR);
> -      case2iu(NOT, NOT);
> -      case2iu(AND, AND);
> -      case2iu(OR, OR);
> -      case2iu(XOR, XOR);
>        
>        default: break;
>     }
> @@ -1389,7 +1408,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
>     switch (ir->operation) {
>     case ir_unop_logic_not:
>        if (result_dst.type != GLSL_TYPE_FLOAT)
> -         emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], st_src_reg_for_type(result_dst.type, 0));
> +         emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
>        else {
>           /* Previously 'SEQ dst, src, 0.0' was used for this.  However, many
>            * older GPUs implement SEQ using multiple instructions (i915 uses two
> @@ -1489,10 +1508,10 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
>        emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]);
>        break;
>     case ir_binop_greater:
> -      emit(ir, TGSI_OPCODE_SGT, result_dst, op[0], op[1]);
> +      emit(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]);
>        break;
>     case ir_binop_lequal:
> -      emit(ir, TGSI_OPCODE_SLE, result_dst, op[0], op[1]);
> +      emit(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]);
>        break;
>     case ir_binop_gequal:
>        emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]);
> @@ -1605,41 +1624,52 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
>     }
>  
>     case ir_binop_logic_xor:
> -      emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
> +      if (native_integers)
> +         emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
> +      else
> +         emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
>        break;
>  
>     case ir_binop_logic_or: {
> -      /* After the addition, the value will be an integer on the
> -       * range [0,2].  Zero stays zero, and positive values become 1.0.
> -       */
> -      glsl_to_tgsi_instruction *add =
> -         emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
> -      if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
> -          result_dst.type == GLSL_TYPE_FLOAT) {
> -         /* The clamping to [0,1] can be done for free in the fragment
> -          * shader with a saturate if floats are being used as boolean values.
> -          */
> -         add->saturate = true;
> -      } else if (result_dst.type == GLSL_TYPE_FLOAT) {
> -         /* Negating the result of the addition gives values on the range
> -          * [-2, 0].  Zero stays zero, and negative values become 1.0.  This
> -          * is achieved using SLT.
> +      if (native_integers) {
> +         /* If integers are used as booleans, we can use an actual "or" 
> +          * instruction.
>            */
> -         st_src_reg slt_src = result_src;
> -         slt_src.negate = ~slt_src.negate;
> -         emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
> +         assert(native_integers);
> +         emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
>        } else {
> -         /* Use an SNE on the result of the addition.  Zero stays zero,
> -          * 1 stays 1, and 2 becomes 1.
> +         /* After the addition, the value will be an integer on the
> +          * range [0,2].  Zero stays zero, and positive values become 1.0.
>            */
> -         emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
> +         glsl_to_tgsi_instruction *add =
> +            emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
> +         if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
> +            /* The clamping to [0,1] can be done for free in the fragment
> +             * shader with a saturate if floats are being used as boolean values.
> +             */
> +            add->saturate = true;
> +         } else {
> +            /* Negating the result of the addition gives values on the range
> +             * [-2, 0].  Zero stays zero, and negative values become 1.0.  This
> +             * is achieved using SLT.
> +             */
> +            st_src_reg slt_src = result_src;
> +            slt_src.negate = ~slt_src.negate;
> +            emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
> +         }
>        }
>        break;
>     }
>  
>     case ir_binop_logic_and:
> -      /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
> -      emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
> +      /* If native integers are disabled, the bool args are stored as float 0.0
> +       * or 1.0, so "mul" gives us "and".  If they're enabled, just use the
> +       * actual AND opcode.
> +       */
> +      if (native_integers)
> +         emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
> +      else
> +         emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
>        break;
>  
>     case ir_binop_dot:
> @@ -1662,18 +1692,36 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
>        emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
>        break;
>     case ir_unop_i2f:
> -   case ir_unop_b2f:
>        if (native_integers) {
>           emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]);
>           break;
>        }
> +      /* fallthrough to next case otherwise */
> +   case ir_unop_b2f:
> +      if (native_integers) {
> +         emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0));
> +         break;
> +      }
> +      /* fallthrough to next case otherwise */
>     case ir_unop_i2u:
>     case ir_unop_u2i:
>        /* Converting between signed and unsigned integers is a no-op. */
> -   case ir_unop_b2i:
> -      /* Booleans are stored as integers (or floats in GLSL 1.20 and lower). */
>        result_src = op[0];
>        break;
> +   case ir_unop_b2i:
> +      if (native_integers) {
> +         /* Booleans are stored as integers using ~0 for true and 0 for false.
> +          * GLSL requires that int(bool) return 1 for true and 0 for false.
> +          * This conversion is done with AND, but it could be done with NEG.
> +          */
> +         emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1));
> +      } else {
> +         /* Booleans and integers are both stored as floats when native 
> +          * integers are disabled.
> +          */
> +         result_src = op[0];
> +      }
> +      break;
>     case ir_unop_f2i:
>        if (native_integers)
>           emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]);
> @@ -1681,9 +1729,13 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
>           emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
>        break;
>     case ir_unop_f2b:
> +      emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
> +      break;
>     case ir_unop_i2b:
> -      emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], 
> -            st_src_reg_for_type(result_dst.type, 0));
> +      if (native_integers)
> +         emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
> +      else
> +         emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
>        break;
>     case ir_unop_trunc:
>        emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
> @@ -1711,7 +1763,7 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
>        break;
>  
>     case ir_unop_bit_not:
> -      if (glsl_version >= 130) {
> +      if (native_integers) {
>           emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
>           break;
>        }
> @@ -1721,27 +1773,27 @@ glsl_to_tgsi_visitor::visit(ir_expression *ir)
>           break;
>        }
>     case ir_binop_lshift:
> -      if (glsl_version >= 130) {
> +      if (native_integers) {
>           emit(ir, TGSI_OPCODE_SHL, result_dst, op[0]);
>           break;
>        }
>     case ir_binop_rshift:
> -      if (glsl_version >= 130) {
> +      if (native_integers) {
>           emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0]);
>           break;
>        }
>     case ir_binop_bit_and:
> -      if (glsl_version >= 130) {
> +      if (native_integers) {
>           emit(ir, TGSI_OPCODE_AND, result_dst, op[0]);
>           break;
>        }
>     case ir_binop_bit_xor:
> -      if (glsl_version >= 130) {
> +      if (native_integers) {
>           emit(ir, TGSI_OPCODE_XOR, result_dst, op[0]);
>           break;
>        }
>     case ir_binop_bit_or:
> -      if (glsl_version >= 130) {
> +      if (native_integers) {
>           emit(ir, TGSI_OPCODE_OR, result_dst, op[0]);
>           break;
>        }
> @@ -2129,12 +2181,25 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
>  
>        for (i = 0; i < type_size(ir->lhs->type); i++) {
>           st_src_reg l_src = st_src_reg(l);
> +         st_src_reg condition_temp = condition;
>           l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements);
>           
> +         if (native_integers) {
> +            /* This is necessary because TGSI's CMP instruction expects the
> +             * condition to be a float, and we store booleans as integers.
> +             * If TGSI had a UCMP instruction or similar, this extra
> +             * instruction would not be necessary.
> +             */
> +            condition_temp = get_temp(glsl_type::vec4_type);
> +            condition.negate = 0;
> +            emit(ir, TGSI_OPCODE_I2F, st_dst_reg(condition_temp), condition);
> +            condition_temp.swizzle = condition.swizzle;
> +         }
> +         
>           if (switch_order) {
> -            emit(ir, TGSI_OPCODE_CMP, l, condition, l_src, r);
> +            emit(ir, TGSI_OPCODE_CMP, l, condition_temp, l_src, r);
>           } else {
> -            emit(ir, TGSI_OPCODE_CMP, l, condition, r, l_src);
> +            emit(ir, TGSI_OPCODE_CMP, l, condition_temp, r, l_src);
>           }
>  
>           l.index++;
> @@ -2154,6 +2219,7 @@ glsl_to_tgsi_visitor::visit(ir_assignment *ir)
>        inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
>        new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]);
>        new_inst->saturate = inst->saturate;
> +      inst->dead_mask = inst->dst.writemask;
>     } else {
>        for (i = 0; i < type_size(ir->lhs->type); i++) {
>           emit(ir, TGSI_OPCODE_MOV, l, r);