<div dir="ltr">On 24 January 2013 19:47, Matt Turner <span dir="ltr"><<a href="mailto:mattst88@gmail.com" target="_blank">mattst88@gmail.com</a>></span> wrote:<br><div class="gmail_extra"><div class="gmail_quote"><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
Lower them to arithmetic and bit manipulation expressions.<br>
---<br>
src/glsl/ir_optimization.h | 6 +<br>
src/glsl/lower_packing_builtins.cpp | 279 +++++++++++++++++++++++++++++++++++<br>
2 files changed, 285 insertions(+), 0 deletions(-)<br>
<br>
diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h<br>
index ac90b87..8f33018 100644<br>
--- a/src/glsl/ir_optimization.h<br>
+++ b/src/glsl/ir_optimization.h<br>
@@ -54,6 +54,12 @@ enum lower_packing_builtins_op {<br>
<br>
LOWER_PACK_HALF_2x16_TO_SPLIT = 0x0040,<br>
LOWER_UNPACK_HALF_2x16_TO_SPLIT = 0x0080,<br>
+<br>
+ LOWER_PACK_SNORM_4x8 = 0x0100,<br>
+ LOWER_UNPACK_SNORM_4x8 = 0x0200,<br>
+<br>
+ LOWER_PACK_UNORM_4x8 = 0x0400,<br>
+ LOWER_UNPACK_UNORM_4x8 = 0x0800,<br>
};<br>
<br>
bool do_common_optimization(exec_list *ir, bool linked,<br>
diff --git a/src/glsl/lower_packing_builtins.cpp b/src/glsl/lower_packing_builtins.cpp<br>
index 49176cc..aa6765f 100644<br>
--- a/src/glsl/lower_packing_builtins.cpp<br>
+++ b/src/glsl/lower_packing_builtins.cpp<br>
@@ -85,9 +85,15 @@ public:<br>
case LOWER_PACK_SNORM_2x16:<br>
*rvalue = lower_pack_snorm_2x16(op0);<br>
break;<br>
+ case LOWER_PACK_SNORM_4x8:<br>
+ *rvalue = lower_pack_snorm_4x8(op0);<br>
+ break;<br>
case LOWER_PACK_UNORM_2x16:<br>
*rvalue = lower_pack_unorm_2x16(op0);<br>
break;<br>
+ case LOWER_PACK_UNORM_4x8:<br>
+ *rvalue = lower_pack_unorm_4x8(op0);<br>
+ break;<br>
case LOWER_PACK_HALF_2x16:<br>
*rvalue = lower_pack_half_2x16(op0);<br>
break;<br>
@@ -97,9 +103,15 @@ public:<br>
case LOWER_UNPACK_SNORM_2x16:<br>
*rvalue = lower_unpack_snorm_2x16(op0);<br>
break;<br>
+ case LOWER_UNPACK_SNORM_4x8:<br>
+ *rvalue = lower_unpack_snorm_4x8(op0);<br>
+ break;<br>
case LOWER_UNPACK_UNORM_2x16:<br>
*rvalue = lower_unpack_unorm_2x16(op0);<br>
break;<br>
+ case LOWER_UNPACK_UNORM_4x8:<br>
+ *rvalue = lower_unpack_unorm_4x8(op0);<br>
+ break;<br>
case LOWER_UNPACK_HALF_2x16:<br>
*rvalue = lower_unpack_half_2x16(op0);<br>
break;<br>
@@ -137,18 +149,30 @@ private:<br>
case ir_unop_pack_snorm_2x16:<br>
result = op_mask & LOWER_PACK_SNORM_2x16;<br>
break;<br>
+ case ir_unop_pack_snorm_4x8:<br>
+ result = op_mask & LOWER_PACK_SNORM_4x8;<br>
+ break;<br>
case ir_unop_pack_unorm_2x16:<br>
result = op_mask & LOWER_PACK_UNORM_2x16;<br>
break;<br>
+ case ir_unop_pack_unorm_4x8:<br>
+ result = op_mask & LOWER_PACK_UNORM_4x8;<br>
+ break;<br>
case ir_unop_pack_half_2x16:<br>
result = op_mask & (LOWER_PACK_HALF_2x16 | LOWER_PACK_HALF_2x16_TO_SPLIT);<br>
break;<br>
case ir_unop_unpack_snorm_2x16:<br>
result = op_mask & LOWER_UNPACK_SNORM_2x16;<br>
break;<br>
+ case ir_unop_unpack_snorm_4x8:<br>
+ result = op_mask & LOWER_UNPACK_SNORM_4x8;<br>
+ break;<br>
case ir_unop_unpack_unorm_2x16:<br>
result = op_mask & LOWER_UNPACK_UNORM_2x16;<br>
break;<br>
+ case ir_unop_unpack_unorm_4x8:<br>
+ result = op_mask & LOWER_UNPACK_UNORM_4x8;<br>
+ break;<br>
case ir_unop_unpack_half_2x16:<br>
result = op_mask & (LOWER_UNPACK_HALF_2x16 | LOWER_UNPACK_HALF_2x16_TO_SPLIT);<br>
break;<br>
@@ -214,6 +238,30 @@ private:<br>
}<br>
<br>
/**<br>
+ * \brief Pack four uint8's into a single uint32.<br>
+ *<br>
+ * Interpret the given uvec4 as a uint32 quad. Pack the quad into a uint32<br>
+ * where the least significant bits specify the first element of the quad.<br>
+ * Return the uint32.<br>
+ */<br>
+ ir_rvalue*<br>
+ pack_uvec4_to_uint(ir_rvalue *uvec4_rval)<br>
+ {<br>
+ assert(uvec4_rval->type == glsl_type::uvec4_type);<br>
+<br>
+ /* uvec4 u = UVEC4_RVAL; */<br>
+ ir_variable *u = factory.make_temp(glsl_type::uvec4_type,<br>
+ "tmp_pack_uvec4_to_uint");<br>
+ factory.emit(assign(u, uvec4_rval));<br></blockquote><div><br></div><div>Rather than do four scalar bit_and(..., constant(0xffu)) instructions below, how about changing the above line to:<br><br>factory.emit(assign(u, bit_and(uvec4_rval, constant(0xffu))));<br>
<br></div><div>That way we take advantage of vector processing in the GPU to do all four bit_ands at once.<br><br></div><div>With that fixed (as well as the copy/paste errors Ian spotted), this patch is:<br><br>Reviewed-by: Paul Berry <<a href="mailto:stereotype441@gmail.com">stereotype441@gmail.com</a>><br>
</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+<br>
+ /* return ((u.w 0xff) << 24) | ((u.z & 0xff) << 16) | ((u.y & 0xff) << 8) | (u.x & 0xff); */<br>
+ return bit_or(bit_or(lshift(bit_and(swizzle_w(u), constant(0xffu)), constant(24u)),<br>
+ lshift(bit_and(swizzle_z(u), constant(0xffu)), constant(16u))),<br>
+ bit_or(lshift(bit_and(swizzle_y(u), constant(0xffu)), constant(8u)),<br>
+ bit_and(swizzle_x(u), constant(0xffu))));<br>
+ }<br>
+<br>
+ /**<br>
* \brief Unpack a uint32 into two uint16's.<br>
*<br>
* Interpret the given uint32 as a uint16 pair where the uint32's least<br>
@@ -244,6 +292,44 @@ private:<br>
}<br>
<br>
/**<br>
+ * \brief Unpack a uint32 into four uint8's.<br>
+ *<br>
+ * Interpret the given uint32 as a uint8 quad where the uint32's least<br>
+ * significant bits specify the quad's first element. Return the uint8<br>
+ * quad as a uvec4.<br>
+ */<br>
+ ir_rvalue*<br>
+ unpack_uint_to_uvec4(ir_rvalue *uint_rval)<br>
+ {<br>
+ assert(uint_rval->type == glsl_type::uint_type);<br>
+<br>
+ /* uint u = UINT_RVAL; */<br>
+ ir_variable *u = factory.make_temp(glsl_type::uint_type,<br>
+ "tmp_unpack_uint_to_uvec4_u");<br>
+ factory.emit(assign(u, uint_rval));<br>
+<br>
+ /* uvec4 u4; */<br>
+ ir_variable *u4 = factory.make_temp(glsl_type::uvec4_type,<br>
+ "tmp_unpack_uint_to_uvec4_u4");<br>
+<br>
+ /* u4.x = u & 0xffu; */<br>
+ factory.emit(assign(u4, bit_and(u, constant(0xffu)), WRITEMASK_X));<br>
+<br>
+ /* u4.y = (u >> 8u) & 0xffu; */<br>
+ factory.emit(assign(u4, bit_and(rshift(u, constant(8u)),<br>
+ constant(0xffu)), WRITEMASK_Y));<br>
+<br>
+ /* u4.z = (u >> 16u) & 0xffu; */<br>
+ factory.emit(assign(u4, bit_and(rshift(u, constant(16u)),<br>
+ constant(0xffu)), WRITEMASK_Z));<br>
+<br>
+ /* u4.w = (u >> 24u) */<br>
+ factory.emit(assign(u4, rshift(u, constant(24u)), WRITEMASK_W));<br>
+<br>
+ return deref(u4).val;<br>
+ }<br>
+<br>
+ /**<br>
* \brief Lower a packSnorm2x16 expression.<br>
*<br>
* \param vec2_rval is packSnorm2x16's input<br>
@@ -293,6 +379,55 @@ private:<br>
}<br>
<br>
/**<br>
+ * \brief Lower a packSnorm4x8 expression.<br>
+ *<br>
+ * \param vec4_rval is packSnorm4x8's input<br>
+ * \return packSnorm4x8's output as a uint rvalue<br>
+ */<br>
+ ir_rvalue*<br>
+ lower_pack_snorm_4x8(ir_rvalue *vec4_rval)<br>
+ {<br>
+ /* From page 137 (143 of pdf) of the GLSL 4.30 spec:<br>
+ *<br>
+ * highp uint packSnorm4x8(vec4 v)<br>
+ * -------------------------------<br>
+ * First, converts each component of the normalized floating-point value<br>
+ * v into 8-bit integer values. Then, the results are packed into the<br>
+ * returned 32-bit unsigned integer.<br>
+ *<br>
+ * The conversion for component c of v to fixed point is done as<br>
+ * follows:<br>
+ *<br>
+ * packSnorm4x8: round(clamp(c, -1, +1) * 127.0)<br>
+ *<br>
+ * The first component of the vector will be written to the least<br>
+ * significant bits of the output; the last component will be written to<br>
+ * the most significant bits.<br>
+ *<br>
+ * This function generates IR that approximates the following pseudo-GLSL:<br>
+ *<br>
+ * return pack_uvec4_to_uint(<br>
+ * uvec4(ivec4(<br>
+ * round(clamp(VEC4_RVALUE, -1.0f, 1.0f) * 127.0f))));<br>
+ *<br>
+ * It is necessary to first convert the vec4 to ivec4 rather than directly<br>
+ * converting vec4 to uvec4 because the latter conversion is undefined.<br>
+ * From page 87 (93 of pdf) of the GLSL 4.30 spec: "It is undefined to<br>
+ * convert a negative floating point value to an uint".<br>
+ */<br>
+ assert(vec4_rval->type == glsl_type::vec4_type);<br>
+<br>
+ ir_rvalue *result = pack_uvec4_to_uint(<br>
+ i2u(f2i(round_even(mul(clamp(vec4_rval,<br>
+ constant(-1.0f),<br>
+ constant(1.0f)),<br>
+ constant(127.0f))))));<br>
+<br>
+ assert(result->type == glsl_type::uint_type);<br>
+ return result;<br>
+ }<br>
+<br>
+ /**<br>
* \brief Lower an unpackSnorm2x16 expression.<br>
*<br>
* \param uint_rval is unpackSnorm2x16's input<br>
@@ -352,6 +487,65 @@ private:<br>
}<br>
<br>
/**<br>
+ * \brief Lower an unpackSnorm4x8 expression.<br>
+ *<br>
+ * \param uint_rval is unpackSnorm4x8's input<br>
+ * \return unpackSnorm4x8's output as a vec4 rvalue<br>
+ */<br>
+ ir_rvalue*<br>
+ lower_unpack_snorm_4x8(ir_rvalue *uint_rval)<br>
+ {<br>
+ /* From page 137 (143 of pdf) of the GLSL 4.30 spec:<br>
+ *<br>
+ * highp vec4 unpackSnorm4x8 (highp uint p)<br>
+ * ----------------------------------------<br>
+ * First, unpacks a single 32-bit unsigned integer p into four<br>
+ * 8-bit unsigned integers. Then, each component is converted to<br>
+ * a normalized floating-point value to generate the returned<br>
+ * four-component vector.<br>
+ *<br>
+ * The conversion for unpacked fixed-point value f to floating point is<br>
+ * done as follows:<br>
+ *<br>
+ * unpackSnorm4x8: clamp(f / 127.0, -1, +1)<br>
+ *<br>
+ * The first component of the returned vector will be extracted from the<br>
+ * least significant bits of the input; the last component will be<br>
+ * extracted from the most significant bits.<br>
+ *<br>
+ * This function generates IR that approximates the following pseudo-GLSL:<br>
+ *<br>
+ * return clamp(<br>
+ * ((ivec4(unpack_uint_to_uvec4(UINT_RVALUE)) << 24) >> 24) / 127.0f,<br>
+ * -1.0f, 1.0f);<br>
+ *<br>
+ * The above IR may appear unnecessarily complex, but the intermediate<br>
+ * conversion to ivec4 and the bit shifts are necessary to correctly unpack<br>
+ * negative floats.<br>
+ *<br>
+ * To see why, consider packing and then unpacking vec4(-1.0, 0.0, 0.0,<br>
+ * 0.0). packSnorm4x8 encodes -1.0 as the int8 0xff. During unpacking, we<br>
+ * place that int8 into an int32, which results in the *positive* integer<br>
+ * 0x000000ff. The int8's sign bit becomes, in the int32, the rather<br>
+ * unimportant bit 8. We must now extend the int8's sign bit into bits<br>
+ * 9-32, which is accomplished by left-shifting then right-shifting.<br>
+ */<br>
+<br>
+ assert(uint_rval->type == glsl_type::uint_type);<br>
+<br>
+ ir_rvalue *result =<br>
+ clamp(div(i2f(rshift(lshift(u2i(unpack_uint_to_uvec4(uint_rval)),<br>
+ constant(24u)),<br>
+ constant(24u))),<br>
+ constant(127.0f)),<br>
+ constant(-1.0f),<br>
+ constant(1.0f));<br>
+<br>
+ assert(result->type == glsl_type::vec4_type);<br>
+ return result;<br>
+ }<br>
+<br>
+ /**<br>
* \brief Lower a packUnorm2x16 expression.<br>
*<br>
* \param vec2_rval is packUnorm2x16's input<br>
@@ -396,6 +590,50 @@ private:<br>
}<br>
<br>
/**<br>
+ * \brief Lower a packUnorm4x8 expression.<br>
+ *<br>
+ * \param vec4_rval is packUnorm4x8's input<br>
+ * \return packUnorm4x8's output as a uint rvalue<br>
+ */<br>
+ ir_rvalue*<br>
+ lower_pack_unorm_4x8(ir_rvalue *vec4_rval)<br>
+ {<br>
+ /* From page 137 (143 of pdf) of the GLSL 4.30 spec:<br>
+ *<br>
+ * highp uint packUnorm4x8 (vec4 v)<br>
+ * --------------------------------<br>
+ * First, converts each component of the normalized floating-point value<br>
+ * v into 16-bit integer values. Then, the results are packed into the<br>
+ * returned 32-bit unsigned integer.<br>
+ *<br>
+ * The conversion for component c of v to fixed point is done as<br>
+ * follows:<br>
+ *<br>
+ * packUnorm4x8: round(clamp(c, 0, +1) * 65535.0)<br>
+ *<br>
+ * The first component of the vector will be written to the least<br>
+ * significant bits of the output; the last component will be written to<br>
+ * the most significant bits.<br>
+ *<br>
+ * This function generates IR that approximates the following pseudo-GLSL:<br>
+ *<br>
+ * return pack_uvec4_to_uint(uvec4(<br>
+ * round(clamp(VEC2_RVALUE, 0.0f, 1.0f) * 255.0f)));<br>
+ *<br>
+ * Here it is safe to directly convert the vec4 to uvec4 because the the<br>
+ * vec4 has been clamped to a non-negative range.<br>
+ */<br>
+<br>
+ assert(vec4_rval->type == glsl_type::vec4_type);<br>
+<br>
+ ir_rvalue *result = pack_uvec4_to_uint(<br>
+ f2u(round_even(mul(saturate(vec4_rval), constant(255.0f)))));<br>
+<br>
+ assert(result->type == glsl_type::uint_type);<br>
+ return result;<br>
+ }<br>
+<br>
+ /**<br>
* \brief Lower an unpackUnorm2x16 expression.<br>
*<br>
* \param uint_rval is unpackUnorm2x16's input<br>
@@ -437,6 +675,47 @@ private:<br>
}<br>
<br>
/**<br>
+ * \brief Lower an unpackUnorm4x8 expression.<br>
+ *<br>
+ * \param uint_rval is unpackUnorm4x8's input<br>
+ * \return unpackUnorm4x8's output as a vec4 rvalue<br>
+ */<br>
+ ir_rvalue*<br>
+ lower_unpack_unorm_4x8(ir_rvalue *uint_rval)<br>
+ {<br>
+ /* From page 137 (143 of pdf) of the GLSL 4.30 spec:<br>
+ *<br>
+ * highp vec4 unpackUnorm4x8 (highp uint p)<br>
+ * ----------------------------------------<br>
+ * First, unpacks a single 32-bit unsigned integer p into four<br>
+ * 8-bit unsigned integers. Then, each component is converted to<br>
+ * a normalized floating-point value to generate the returned<br>
+ * two-component vector.<br>
+ *<br>
+ * The conversion for unpacked fixed-point value f to floating point is<br>
+ * done as follows:<br>
+ *<br>
+ * unpackUnorm4x8: f / 255.0<br>
+ *<br>
+ * The first component of the returned vector will be extracted from the<br>
+ * least significant bits of the input; the last component will be<br>
+ * extracted from the most significant bits.<br>
+ *<br>
+ * This function generates IR that approximates the following pseudo-GLSL:<br>
+ *<br>
+ * return vec4(unpack_uint_to_uvec4(UINT_RVALUE)) / 255.0;<br>
+ */<br>
+<br>
+ assert(uint_rval->type == glsl_type::uint_type);<br>
+<br>
+ ir_rvalue *result = div(u2f(unpack_uint_to_uvec4(uint_rval)),<br>
+ constant(255.0f));<br>
+<br>
+ assert(result->type == glsl_type::vec4_type);<br>
+ return result;<br>
+ }<br>
+<br>
+ /**<br>
* \brief Lower the component-wise calculation of packHalf2x16.<br>
*<br>
* \param f_rval is one component of packHafl2x16's input<br>
<span class="HOEnZb"><font color="#888888">--<br>
1.7.8.6<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="http://lists.freedesktop.org/mailman/listinfo/mesa-dev" target="_blank">http://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div></div>