<div dir="ltr">On 24 January 2013 19:47, Matt Turner <<a href="mailto:mattst88@gmail.com" target="_blank">mattst88@gmail.com</a>> wrote: <div class="gmail_extra"><div class="gmail_quote"><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"> Lower them to arithmetic and bit manipulation expressions. --- src/glsl/ir_optimization.h | 6 + src/glsl/lower_packing_builtins.cpp | 279 +++++++++++++++++++++++++++++++++++ 2 files changed, 285 insertions(+), 0 deletions(-) diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h index ac90b87..8f33018 100644 --- a/src/glsl/ir_optimization.h +++ b/src/glsl/ir_optimization.h @@ -54,6 +54,12 @@ enum lower_packing_builtins_op { LOWER_PACK_HALF_2x16_TO_SPLIT = 0x0040, LOWER_UNPACK_HALF_2x16_TO_SPLIT = 0x0080, + + LOWER_PACK_SNORM_4x8 = 0x0100, + LOWER_UNPACK_SNORM_4x8 = 0x0200, + + LOWER_PACK_UNORM_4x8 = 0x0400, + LOWER_UNPACK_UNORM_4x8 = 0x0800, }; bool do_common_optimization(exec_list *ir, bool linked, diff --git a/src/glsl/lower_packing_builtins.cpp b/src/glsl/lower_packing_builtins.cpp index 49176cc..aa6765f 100644 --- a/src/glsl/lower_packing_builtins.cpp +++ b/src/glsl/lower_packing_builtins.cpp @@ -85,9 +85,15 @@ public: case LOWER_PACK_SNORM_2x16: *rvalue = lower_pack_snorm_2x16(op0); break; + case LOWER_PACK_SNORM_4x8: + *rvalue = lower_pack_snorm_4x8(op0); + break; case LOWER_PACK_UNORM_2x16: *rvalue = lower_pack_unorm_2x16(op0); break; + case LOWER_PACK_UNORM_4x8: + *rvalue = lower_pack_unorm_4x8(op0); + break; case LOWER_PACK_HALF_2x16: *rvalue = lower_pack_half_2x16(op0); break; @@ -97,9 +103,15 @@ public: case LOWER_UNPACK_SNORM_2x16: *rvalue = lower_unpack_snorm_2x16(op0); break; + case LOWER_UNPACK_SNORM_4x8: + *rvalue = lower_unpack_snorm_4x8(op0); + break; case LOWER_UNPACK_UNORM_2x16: *rvalue = lower_unpack_unorm_2x16(op0); break; + case LOWER_UNPACK_UNORM_4x8: + *rvalue = lower_unpack_unorm_4x8(op0); + break; case LOWER_UNPACK_HALF_2x16: *rvalue = lower_unpack_half_2x16(op0); break; @@ -137,18 +149,30 @@ private: case ir_unop_pack_snorm_2x16: result = op_mask & LOWER_PACK_SNORM_2x16; break; + case ir_unop_pack_snorm_4x8: + result = op_mask & LOWER_PACK_SNORM_4x8; + break; case ir_unop_pack_unorm_2x16: result = op_mask & LOWER_PACK_UNORM_2x16; break; + case ir_unop_pack_unorm_4x8: + result = op_mask & LOWER_PACK_UNORM_4x8; + break; case ir_unop_pack_half_2x16: result = op_mask & (LOWER_PACK_HALF_2x16 | LOWER_PACK_HALF_2x16_TO_SPLIT); break; case ir_unop_unpack_snorm_2x16: result = op_mask & LOWER_UNPACK_SNORM_2x16; break; + case ir_unop_unpack_snorm_4x8: + result = op_mask & LOWER_UNPACK_SNORM_4x8; + break; case ir_unop_unpack_unorm_2x16: result = op_mask & LOWER_UNPACK_UNORM_2x16; break; + case ir_unop_unpack_unorm_4x8: + result = op_mask & LOWER_UNPACK_UNORM_4x8; + break; case ir_unop_unpack_half_2x16: result = op_mask & (LOWER_UNPACK_HALF_2x16 | LOWER_UNPACK_HALF_2x16_TO_SPLIT); break; @@ -214,6 +238,30 @@ private: } /** + * \brief Pack four uint8's into a single uint32. + * + * Interpret the given uvec4 as a uint32 quad. Pack the quad into a uint32 + * where the least significant bits specify the first element of the quad. + * Return the uint32. + */ + ir_rvalue* + pack_uvec4_to_uint(ir_rvalue *uvec4_rval) + { + assert(uvec4_rval->type == glsl_type::uvec4_type); + + /* uvec4 u = UVEC4_RVAL; */ + ir_variable *u = factory.make_temp(glsl_type::uvec4_type, + "tmp_pack_uvec4_to_uint"); + factory.emit(assign(u, uvec4_rval)); </blockquote><div> </div><div>Rather than do four scalar bit_and(..., constant(0xffu)) instructions below, how about changing the above line to: factory.emit(assign(u, bit_and(uvec4_rval, constant(0xffu)))); </div><div>That way we take advantage of vector processing in the GPU to do all four bit_ands at once. </div><div>With that fixed (as well as the copy/paste errors Ian spotted), this patch is: Reviewed-by: Paul Berry <<a href="mailto:stereotype441@gmail.com">stereotype441@gmail.com</a>> </div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex"> + + /* return ((u.w 0xff) << 24) | ((u.z & 0xff) << 16) | ((u.y & 0xff) << 8) | (u.x & 0xff); */ + return bit_or(bit_or(lshift(bit_and(swizzle_w(u), constant(0xffu)), constant(24u)), + lshift(bit_and(swizzle_z(u), constant(0xffu)), constant(16u))), + bit_or(lshift(bit_and(swizzle_y(u), constant(0xffu)), constant(8u)), + bit_and(swizzle_x(u), constant(0xffu)))); + } + + /** * \brief Unpack a uint32 into two uint16's. * * Interpret the given uint32 as a uint16 pair where the uint32's least @@ -244,6 +292,44 @@ private: } /** + * \brief Unpack a uint32 into four uint8's. + * + * Interpret the given uint32 as a uint8 quad where the uint32's least + * significant bits specify the quad's first element. Return the uint8 + * quad as a uvec4. + */ + ir_rvalue* + unpack_uint_to_uvec4(ir_rvalue *uint_rval) + { + assert(uint_rval->type == glsl_type::uint_type); + + /* uint u = UINT_RVAL; */ + ir_variable *u = factory.make_temp(glsl_type::uint_type, + "tmp_unpack_uint_to_uvec4_u"); + factory.emit(assign(u, uint_rval)); + + /* uvec4 u4; */ + ir_variable *u4 = factory.make_temp(glsl_type::uvec4_type, + "tmp_unpack_uint_to_uvec4_u4"); + + /* u4.x = u & 0xffu; */ + factory.emit(assign(u4, bit_and(u, constant(0xffu)), WRITEMASK_X)); + + /* u4.y = (u >> 8u) & 0xffu; */ + factory.emit(assign(u4, bit_and(rshift(u, constant(8u)), + constant(0xffu)), WRITEMASK_Y)); + + /* u4.z = (u >> 16u) & 0xffu; */ + factory.emit(assign(u4, bit_and(rshift(u, constant(16u)), + constant(0xffu)), WRITEMASK_Z)); + + /* u4.w = (u >> 24u) */ + factory.emit(assign(u4, rshift(u, constant(24u)), WRITEMASK_W)); + + return deref(u4).val; + } + + /** * \brief Lower a packSnorm2x16 expression. * * \param vec2_rval is packSnorm2x16's input @@ -293,6 +379,55 @@ private: } /** + * \brief Lower a packSnorm4x8 expression. + * + * \param vec4_rval is packSnorm4x8's input + * \return packSnorm4x8's output as a uint rvalue + */ + ir_rvalue* + lower_pack_snorm_4x8(ir_rvalue *vec4_rval) + { + /* From page 137 (143 of pdf) of the GLSL 4.30 spec: + * + * highp uint packSnorm4x8(vec4 v) + * ------------------------------- + * First, converts each component of the normalized floating-point value + * v into 8-bit integer values. Then, the results are packed into the + * returned 32-bit unsigned integer. + * + * The conversion for component c of v to fixed point is done as + * follows: + * + * packSnorm4x8: round(clamp(c, -1, +1) * 127.0) + * + * The first component of the vector will be written to the least + * significant bits of the output; the last component will be written to + * the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return pack_uvec4_to_uint( + * uvec4(ivec4( + * round(clamp(VEC4_RVALUE, -1.0f, 1.0f) * 127.0f)))); + * + * It is necessary to first convert the vec4 to ivec4 rather than directly + * converting vec4 to uvec4 because the latter conversion is undefined. + * From page 87 (93 of pdf) of the GLSL 4.30 spec: "It is undefined to + * convert a negative floating point value to an uint". + */ + assert(vec4_rval->type == glsl_type::vec4_type); + + ir_rvalue *result = pack_uvec4_to_uint( + i2u(f2i(round_even(mul(clamp(vec4_rval, + constant(-1.0f), + constant(1.0f)), + constant(127.0f)))))); + + assert(result->type == glsl_type::uint_type); + return result; + } + + /** * \brief Lower an unpackSnorm2x16 expression. * * \param uint_rval is unpackSnorm2x16's input @@ -352,6 +487,65 @@ private: } /** + * \brief Lower an unpackSnorm4x8 expression. + * + * \param uint_rval is unpackSnorm4x8's input + * \return unpackSnorm4x8's output as a vec4 rvalue + */ + ir_rvalue* + lower_unpack_snorm_4x8(ir_rvalue *uint_rval) + { + /* From page 137 (143 of pdf) of the GLSL 4.30 spec: + * + * highp vec4 unpackSnorm4x8 (highp uint p) + * ---------------------------------------- + * First, unpacks a single 32-bit unsigned integer p into four + * 8-bit unsigned integers. Then, each component is converted to + * a normalized floating-point value to generate the returned + * four-component vector. + * + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackSnorm4x8: clamp(f / 127.0, -1, +1) + * + * The first component of the returned vector will be extracted from the + * least significant bits of the input; the last component will be + * extracted from the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return clamp( + * ((ivec4(unpack_uint_to_uvec4(UINT_RVALUE)) << 24) >> 24) / 127.0f, + * -1.0f, 1.0f); + * + * The above IR may appear unnecessarily complex, but the intermediate + * conversion to ivec4 and the bit shifts are necessary to correctly unpack + * negative floats. + * + * To see why, consider packing and then unpacking vec4(-1.0, 0.0, 0.0, + * 0.0). packSnorm4x8 encodes -1.0 as the int8 0xff. During unpacking, we + * place that int8 into an int32, which results in the *positive* integer + * 0x000000ff. The int8's sign bit becomes, in the int32, the rather + * unimportant bit 8. We must now extend the int8's sign bit into bits + * 9-32, which is accomplished by left-shifting then right-shifting. + */ + + assert(uint_rval->type == glsl_type::uint_type); + + ir_rvalue *result = + clamp(div(i2f(rshift(lshift(u2i(unpack_uint_to_uvec4(uint_rval)), + constant(24u)), + constant(24u))), + constant(127.0f)), + constant(-1.0f), + constant(1.0f)); + + assert(result->type == glsl_type::vec4_type); + return result; + } + + /** * \brief Lower a packUnorm2x16 expression. * * \param vec2_rval is packUnorm2x16's input @@ -396,6 +590,50 @@ private: } /** + * \brief Lower a packUnorm4x8 expression. + * + * \param vec4_rval is packUnorm4x8's input + * \return packUnorm4x8's output as a uint rvalue + */ + ir_rvalue* + lower_pack_unorm_4x8(ir_rvalue *vec4_rval) + { + /* From page 137 (143 of pdf) of the GLSL 4.30 spec: + * + * highp uint packUnorm4x8 (vec4 v) + * -------------------------------- + * First, converts each component of the normalized floating-point value + * v into 16-bit integer values. Then, the results are packed into the + * returned 32-bit unsigned integer. + * + * The conversion for component c of v to fixed point is done as + * follows: + * + * packUnorm4x8: round(clamp(c, 0, +1) * 65535.0) + * + * The first component of the vector will be written to the least + * significant bits of the output; the last component will be written to + * the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return pack_uvec4_to_uint(uvec4( + * round(clamp(VEC2_RVALUE, 0.0f, 1.0f) * 255.0f))); + * + * Here it is safe to directly convert the vec4 to uvec4 because the the + * vec4 has been clamped to a non-negative range. + */ + + assert(vec4_rval->type == glsl_type::vec4_type); + + ir_rvalue *result = pack_uvec4_to_uint( + f2u(round_even(mul(saturate(vec4_rval), constant(255.0f))))); + + assert(result->type == glsl_type::uint_type); + return result; + } + + /** * \brief Lower an unpackUnorm2x16 expression. * * \param uint_rval is unpackUnorm2x16's input @@ -437,6 +675,47 @@ private: } /** + * \brief Lower an unpackUnorm4x8 expression. + * + * \param uint_rval is unpackUnorm4x8's input + * \return unpackUnorm4x8's output as a vec4 rvalue + */ + ir_rvalue* + lower_unpack_unorm_4x8(ir_rvalue *uint_rval) + { + /* From page 137 (143 of pdf) of the GLSL 4.30 spec: + * + * highp vec4 unpackUnorm4x8 (highp uint p) + * ---------------------------------------- + * First, unpacks a single 32-bit unsigned integer p into four + * 8-bit unsigned integers. Then, each component is converted to + * a normalized floating-point value to generate the returned + * two-component vector. + * + * The conversion for unpacked fixed-point value f to floating point is + * done as follows: + * + * unpackUnorm4x8: f / 255.0 + * + * The first component of the returned vector will be extracted from the + * least significant bits of the input; the last component will be + * extracted from the most significant bits. + * + * This function generates IR that approximates the following pseudo-GLSL: + * + * return vec4(unpack_uint_to_uvec4(UINT_RVALUE)) / 255.0; + */ + + assert(uint_rval->type == glsl_type::uint_type); + + ir_rvalue *result = div(u2f(unpack_uint_to_uvec4(uint_rval)), + constant(255.0f)); + + assert(result->type == glsl_type::vec4_type); + return result; + } + + /** * \brief Lower the component-wise calculation of packHalf2x16. * * \param f_rval is one component of packHafl2x16's input -- 1.7.8.6 _______________________________________________ mesa-dev mailing list <a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a> <a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev" target="_blank">http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a> </blockquote></div> </div></div>