[Mesa-dev] [PATCH 5/8] glsl: Add support for lowering 4x8 pack/unpack operations

Fri Jan 25 11:59:59 PST 2013

On 01/25/2013 11:59 AM, Chad Versace wrote:
> On 01/24/2013 07:47 PM, Matt Turner wrote:
>> Lower them to arithmetic and bit manipulation expressions.
>> ---
>>  src/glsl/ir_optimization.h          |    6 +
>>  src/glsl/lower_packing_builtins.cpp |  279 +++++++++++++++++++++++++++++++++++
>>  2 files changed, 285 insertions(+), 0 deletions(-)
>>
>> diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
>> index ac90b87..8f33018 100644
>> --- a/src/glsl/ir_optimization.h
>> +++ b/src/glsl/ir_optimization.h
>> @@ -54,6 +54,12 @@ enum lower_packing_builtins_op {
>>  
>>     LOWER_PACK_HALF_2x16_TO_SPLIT        = 0x0040,
>>     LOWER_UNPACK_HALF_2x16_TO_SPLIT      = 0x0080,
>> +
>> +   LOWER_PACK_SNORM_4x8                 = 0x0100,
>> +   LOWER_UNPACK_SNORM_4x8               = 0x0200,
>> +
>> +   LOWER_PACK_UNORM_4x8                 = 0x0400,
>> +   LOWER_UNPACK_UNORM_4x8               = 0x0800,
>>  };
>>  
>>  bool do_common_optimization(exec_list *ir, bool linked,
>> diff --git a/src/glsl/lower_packing_builtins.cpp b/src/glsl/lower_packing_builtins.cpp
>> index 49176cc..aa6765f 100644
>> --- a/src/glsl/lower_packing_builtins.cpp
>> +++ b/src/glsl/lower_packing_builtins.cpp
>> @@ -85,9 +85,15 @@ public:
>>        case LOWER_PACK_SNORM_2x16:
>>           *rvalue = lower_pack_snorm_2x16(op0);
>>           break;
>> +      case LOWER_PACK_SNORM_4x8:
>> +         *rvalue = lower_pack_snorm_4x8(op0);
>> +         break;
>>        case LOWER_PACK_UNORM_2x16:
>>           *rvalue = lower_pack_unorm_2x16(op0);
>>           break;
>> +      case LOWER_PACK_UNORM_4x8:
>> +         *rvalue = lower_pack_unorm_4x8(op0);
>> +         break;
>>        case LOWER_PACK_HALF_2x16:
>>           *rvalue = lower_pack_half_2x16(op0);
>>           break;
>> @@ -97,9 +103,15 @@ public:
>>        case LOWER_UNPACK_SNORM_2x16:
>>           *rvalue = lower_unpack_snorm_2x16(op0);
>>           break;
>> +      case LOWER_UNPACK_SNORM_4x8:
>> +         *rvalue = lower_unpack_snorm_4x8(op0);
>> +         break;
>>        case LOWER_UNPACK_UNORM_2x16:
>>           *rvalue = lower_unpack_unorm_2x16(op0);
>>           break;
>> +      case LOWER_UNPACK_UNORM_4x8:
>> +         *rvalue = lower_unpack_unorm_4x8(op0);
>> +         break;
>>        case LOWER_UNPACK_HALF_2x16:
>>           *rvalue = lower_unpack_half_2x16(op0);
>>           break;
>> @@ -137,18 +149,30 @@ private:
>>        case ir_unop_pack_snorm_2x16:
>>           result = op_mask & LOWER_PACK_SNORM_2x16;
>>           break;
>> +      case ir_unop_pack_snorm_4x8:
>> +         result = op_mask & LOWER_PACK_SNORM_4x8;
>> +         break;
>>        case ir_unop_pack_unorm_2x16:
>>           result = op_mask & LOWER_PACK_UNORM_2x16;
>>           break;
>> +      case ir_unop_pack_unorm_4x8:
>> +         result = op_mask & LOWER_PACK_UNORM_4x8;
>> +         break;
>>        case ir_unop_pack_half_2x16:
>>           result = op_mask & (LOWER_PACK_HALF_2x16 | LOWER_PACK_HALF_2x16_TO_SPLIT);
>>           break;
>>        case ir_unop_unpack_snorm_2x16:
>>           result = op_mask & LOWER_UNPACK_SNORM_2x16;
>>           break;
>> +      case ir_unop_unpack_snorm_4x8:
>> +         result = op_mask & LOWER_UNPACK_SNORM_4x8;
>> +         break;
>>        case ir_unop_unpack_unorm_2x16:
>>           result = op_mask & LOWER_UNPACK_UNORM_2x16;
>>           break;
>> +      case ir_unop_unpack_unorm_4x8:
>> +         result = op_mask & LOWER_UNPACK_UNORM_4x8;
>> +         break;
>>        case ir_unop_unpack_half_2x16:
>>           result = op_mask & (LOWER_UNPACK_HALF_2x16 | LOWER_UNPACK_HALF_2x16_TO_SPLIT);
>>           break;
>> @@ -214,6 +238,30 @@ private:
>>     }
>>  
>>     /**
>> +    * \brief Pack four uint8's into a single uint32.
>> +    *
>> +    * Interpret the given uvec4 as a uint32 quad. Pack the quad into a uint32
>> +    * where the least significant bits specify the first element of the quad.
>> +    * Return the uint32.
>> +    */
> 
> I find the term "uint32 quad" confusing. It is too reminiscient of "quadword".
> This not-so-bright reviewer thought: "A uint32 quadword? Huh? Oh! That means
> a uint32 4-tuple". So, I'd like to see the phrase changed to "uint32 4-tuple"
> or something similar, but this suggestion doesn't block the patch.
> 
>> +   ir_rvalue*
>> +   pack_uvec4_to_uint(ir_rvalue *uvec4_rval)
>> +   {
>> +      assert(uvec4_rval->type == glsl_type::uvec4_type);
>> +
>> +      /* uvec4 u = UVEC4_RVAL; */
>> +      ir_variable *u = factory.make_temp(glsl_type::uvec4_type,
>> +                                          "tmp_pack_uvec4_to_uint");
>> +      factory.emit(assign(u, uvec4_rval));
>> +
>> +      /* return ((u.w 0xff) << 24) | ((u.z & 0xff) << 16) | ((u.y & 0xff) << 8) | (u.x & 0xff); */
>                         ^^^ missing &
>> +      return bit_or(bit_or(lshift(bit_and(swizzle_w(u), constant(0xffu)), constant(24u)),
>> +                           lshift(bit_and(swizzle_z(u), constant(0xffu)), constant(16u))),
>> +                    bit_or(lshift(bit_and(swizzle_y(u), constant(0xffu)), constant(8u)),
>> +                           bit_and(swizzle_x(u), constant(0xffu))));
> 
> The four bit-and instructions should be factored out to a single instruction.
> Paul already covered the best way to do that.
> 
> 
> 
>>     /**
>> +    * \brief Unpack a uint32 into four uint8's.
>> +    *
>> +    * Interpret the given uint32 as a uint8 quad where the uint32's least
>> +    * significant bits specify the quad's first element. Return the uint8
>> +    * quad as a uvec4.
>> +    */
> 
> Same complaint about "uint8 quad".
> 
> 
>> +   ir_rvalue*
>> +   unpack_uint_to_uvec4(ir_rvalue *uint_rval)
>> +   {
>> +      assert(uint_rval->type == glsl_type::uint_type);
>> +
>> +      /* uint u = UINT_RVAL; */
>> +      ir_variable *u = factory.make_temp(glsl_type::uint_type,
>> +                                          "tmp_unpack_uint_to_uvec4_u");
>> +      factory.emit(assign(u, uint_rval));
>> +
>> +      /* uvec4 u4; */
>> +      ir_variable *u4 = factory.make_temp(glsl_type::uvec4_type,
>> +                                           "tmp_unpack_uint_to_uvec4_u4");
>> +
>> +      /* u4.x = u & 0xffu; */
>> +      factory.emit(assign(u4, bit_and(u, constant(0xffu)), WRITEMASK_X));
>> +
>> +      /* u4.y = (u >> 8u) & 0xffu; */
>> +      factory.emit(assign(u4, bit_and(rshift(u, constant(8u)),
>> +                                      constant(0xffu)), WRITEMASK_Y));
>> +
>> +      /* u4.z = (u >> 16u) & 0xffu; */
>> +      factory.emit(assign(u4, bit_and(rshift(u, constant(16u)),
>> +                                      constant(0xffu)), WRITEMASK_Z));
>> +
>> +      /* u4.w = (u >> 24u) */
>> +      factory.emit(assign(u4, rshift(u, constant(24u)), WRITEMASK_W));
>> +
>> +      return deref(u4).val;
>> +   }
> 
> As for unpack_uvec4_to_uint(), the three bit-and instructions should be
> factored into one. Like this:
> 
> u4.x = u;
> u4.y = u >> 8u;
> u4.z = u >> 16u;
> u4.w = u >> 24u;
> u4 = u4 & 0xff;
> return u4;
> 
> 
> 
>>     /**
>> +    * \brief Lower a packUnorm4x8 expression.
>> +    *
>> +    * \param vec4_rval is packUnorm4x8's input
>> +    * \return packUnorm4x8's output as a uint rvalue
>> +    */
>> +   ir_rvalue*
>> +   lower_pack_unorm_4x8(ir_rvalue *vec4_rval)
>> +   {
>> +      /* From page 137 (143 of pdf) of the GLSL 4.30 spec:
>> +       *
>> +       *    highp uint packUnorm4x8 (vec4 v)
>> +       *    --------------------------------
>> +       *    First, converts each component of the normalized floating-point value
>> +       *    v into 16-bit integer values. Then, the results are packed into the
>> +       *    returned 32-bit unsigned integer.
>> +       *
>> +       *    The conversion for component c of v to fixed point is done as
>> +       *    follows:
>> +       *
>> +       *       packUnorm4x8: round(clamp(c, 0, +1) * 65535.0)
>                                                         ^^^^^^^
> Copy-paste error. Should be 255.0.
> 
> With the copy-paste errors fixed and the instruction reductions applied,
> this is
> 
> Reviewed-by: Chad Versace <chad.versace at linux.intel.com>
>