[Mesa-dev] [PATCH 5/8] glsl: Add support for lowering 4x8 pack/unpack operations
Ian Romanick
idr at freedesktop.org
Fri Jan 25 06:44:07 PST 2013
On 01/24/2013 10:47 PM, Matt Turner wrote:
> Lower them to arithmetic and bit manipulation expressions.
> ---
> src/glsl/ir_optimization.h | 6 +
> src/glsl/lower_packing_builtins.cpp | 279 +++++++++++++++++++++++++++++++++++
> 2 files changed, 285 insertions(+), 0 deletions(-)
>
> diff --git a/src/glsl/ir_optimization.h b/src/glsl/ir_optimization.h
> index ac90b87..8f33018 100644
> --- a/src/glsl/ir_optimization.h
> +++ b/src/glsl/ir_optimization.h
> @@ -54,6 +54,12 @@ enum lower_packing_builtins_op {
>
> LOWER_PACK_HALF_2x16_TO_SPLIT = 0x0040,
> LOWER_UNPACK_HALF_2x16_TO_SPLIT = 0x0080,
> +
> + LOWER_PACK_SNORM_4x8 = 0x0100,
> + LOWER_UNPACK_SNORM_4x8 = 0x0200,
> +
> + LOWER_PACK_UNORM_4x8 = 0x0400,
> + LOWER_UNPACK_UNORM_4x8 = 0x0800,
> };
>
> bool do_common_optimization(exec_list *ir, bool linked,
> diff --git a/src/glsl/lower_packing_builtins.cpp b/src/glsl/lower_packing_builtins.cpp
> index 49176cc..aa6765f 100644
> --- a/src/glsl/lower_packing_builtins.cpp
> +++ b/src/glsl/lower_packing_builtins.cpp
> @@ -85,9 +85,15 @@ public:
> case LOWER_PACK_SNORM_2x16:
> *rvalue = lower_pack_snorm_2x16(op0);
> break;
> + case LOWER_PACK_SNORM_4x8:
> + *rvalue = lower_pack_snorm_4x8(op0);
> + break;
> case LOWER_PACK_UNORM_2x16:
> *rvalue = lower_pack_unorm_2x16(op0);
> break;
> + case LOWER_PACK_UNORM_4x8:
> + *rvalue = lower_pack_unorm_4x8(op0);
> + break;
> case LOWER_PACK_HALF_2x16:
> *rvalue = lower_pack_half_2x16(op0);
> break;
> @@ -97,9 +103,15 @@ public:
> case LOWER_UNPACK_SNORM_2x16:
> *rvalue = lower_unpack_snorm_2x16(op0);
> break;
> + case LOWER_UNPACK_SNORM_4x8:
> + *rvalue = lower_unpack_snorm_4x8(op0);
> + break;
> case LOWER_UNPACK_UNORM_2x16:
> *rvalue = lower_unpack_unorm_2x16(op0);
> break;
> + case LOWER_UNPACK_UNORM_4x8:
> + *rvalue = lower_unpack_unorm_4x8(op0);
> + break;
> case LOWER_UNPACK_HALF_2x16:
> *rvalue = lower_unpack_half_2x16(op0);
> break;
> @@ -137,18 +149,30 @@ private:
> case ir_unop_pack_snorm_2x16:
> result = op_mask & LOWER_PACK_SNORM_2x16;
> break;
> + case ir_unop_pack_snorm_4x8:
> + result = op_mask & LOWER_PACK_SNORM_4x8;
> + break;
> case ir_unop_pack_unorm_2x16:
> result = op_mask & LOWER_PACK_UNORM_2x16;
> break;
> + case ir_unop_pack_unorm_4x8:
> + result = op_mask & LOWER_PACK_UNORM_4x8;
> + break;
> case ir_unop_pack_half_2x16:
> result = op_mask & (LOWER_PACK_HALF_2x16 | LOWER_PACK_HALF_2x16_TO_SPLIT);
> break;
> case ir_unop_unpack_snorm_2x16:
> result = op_mask & LOWER_UNPACK_SNORM_2x16;
> break;
> + case ir_unop_unpack_snorm_4x8:
> + result = op_mask & LOWER_UNPACK_SNORM_4x8;
> + break;
> case ir_unop_unpack_unorm_2x16:
> result = op_mask & LOWER_UNPACK_UNORM_2x16;
> break;
> + case ir_unop_unpack_unorm_4x8:
> + result = op_mask & LOWER_UNPACK_UNORM_4x8;
> + break;
> case ir_unop_unpack_half_2x16:
> result = op_mask & (LOWER_UNPACK_HALF_2x16 | LOWER_UNPACK_HALF_2x16_TO_SPLIT);
> break;
> @@ -214,6 +238,30 @@ private:
> }
>
> /**
> + * \brief Pack four uint8's into a single uint32.
> + *
> + * Interpret the given uvec4 as a uint32 quad. Pack the quad into a uint32
> + * where the least significant bits specify the first element of the quad.
> + * Return the uint32.
> + */
> + ir_rvalue*
> + pack_uvec4_to_uint(ir_rvalue *uvec4_rval)
> + {
> + assert(uvec4_rval->type == glsl_type::uvec4_type);
> +
> + /* uvec4 u = UVEC4_RVAL; */
> + ir_variable *u = factory.make_temp(glsl_type::uvec4_type,
> + "tmp_pack_uvec4_to_uint");
> + factory.emit(assign(u, uvec4_rval));
> +
> + /* return ((u.w 0xff) << 24) | ((u.z & 0xff) << 16) | ((u.y & 0xff) << 8) | (u.x & 0xff); */
> + return bit_or(bit_or(lshift(bit_and(swizzle_w(u), constant(0xffu)), constant(24u)),
> + lshift(bit_and(swizzle_z(u), constant(0xffu)), constant(16u))),
> + bit_or(lshift(bit_and(swizzle_y(u), constant(0xffu)), constant(8u)),
> + bit_and(swizzle_x(u), constant(0xffu))));
> + }
> +
> + /**
> * \brief Unpack a uint32 into two uint16's.
> *
> * Interpret the given uint32 as a uint16 pair where the uint32's least
> @@ -244,6 +292,44 @@ private:
> }
>
> /**
> + * \brief Unpack a uint32 into four uint8's.
> + *
> + * Interpret the given uint32 as a uint8 quad where the uint32's least
> + * significant bits specify the quad's first element. Return the uint8
> + * quad as a uvec4.
> + */
> + ir_rvalue*
> + unpack_uint_to_uvec4(ir_rvalue *uint_rval)
> + {
> + assert(uint_rval->type == glsl_type::uint_type);
> +
> + /* uint u = UINT_RVAL; */
> + ir_variable *u = factory.make_temp(glsl_type::uint_type,
> + "tmp_unpack_uint_to_uvec4_u");
> + factory.emit(assign(u, uint_rval));
> +
> + /* uvec4 u4; */
> + ir_variable *u4 = factory.make_temp(glsl_type::uvec4_type,
> + "tmp_unpack_uint_to_uvec4_u4");
> +
> + /* u4.x = u & 0xffu; */
> + factory.emit(assign(u4, bit_and(u, constant(0xffu)), WRITEMASK_X));
> +
> + /* u4.y = (u >> 8u) & 0xffu; */
> + factory.emit(assign(u4, bit_and(rshift(u, constant(8u)),
> + constant(0xffu)), WRITEMASK_Y));
> +
> + /* u4.z = (u >> 16u) & 0xffu; */
> + factory.emit(assign(u4, bit_and(rshift(u, constant(16u)),
> + constant(0xffu)), WRITEMASK_Z));
> +
> + /* u4.w = (u >> 24u) */
> + factory.emit(assign(u4, rshift(u, constant(24u)), WRITEMASK_W));
> +
> + return deref(u4).val;
> + }
> +
> + /**
> * \brief Lower a packSnorm2x16 expression.
> *
> * \param vec2_rval is packSnorm2x16's input
> @@ -293,6 +379,55 @@ private:
> }
>
> /**
> + * \brief Lower a packSnorm4x8 expression.
> + *
> + * \param vec4_rval is packSnorm4x8's input
> + * \return packSnorm4x8's output as a uint rvalue
> + */
> + ir_rvalue*
> + lower_pack_snorm_4x8(ir_rvalue *vec4_rval)
> + {
> + /* From page 137 (143 of pdf) of the GLSL 4.30 spec:
> + *
> + * highp uint packSnorm4x8(vec4 v)
> + * -------------------------------
> + * First, converts each component of the normalized floating-point value
> + * v into 8-bit integer values. Then, the results are packed into the
> + * returned 32-bit unsigned integer.
> + *
> + * The conversion for component c of v to fixed point is done as
> + * follows:
> + *
> + * packSnorm4x8: round(clamp(c, -1, +1) * 127.0)
> + *
> + * The first component of the vector will be written to the least
> + * significant bits of the output; the last component will be written to
> + * the most significant bits.
> + *
> + * This function generates IR that approximates the following pseudo-GLSL:
> + *
> + * return pack_uvec4_to_uint(
> + * uvec4(ivec4(
> + * round(clamp(VEC4_RVALUE, -1.0f, 1.0f) * 127.0f))));
> + *
> + * It is necessary to first convert the vec4 to ivec4 rather than directly
> + * converting vec4 to uvec4 because the latter conversion is undefined.
> + * From page 87 (93 of pdf) of the GLSL 4.30 spec: "It is undefined to
> + * convert a negative floating point value to an uint".
> + */
> + assert(vec4_rval->type == glsl_type::vec4_type);
> +
> + ir_rvalue *result = pack_uvec4_to_uint(
> + i2u(f2i(round_even(mul(clamp(vec4_rval,
> + constant(-1.0f),
> + constant(1.0f)),
> + constant(127.0f))))));
> +
> + assert(result->type == glsl_type::uint_type);
> + return result;
> + }
> +
> + /**
> * \brief Lower an unpackSnorm2x16 expression.
> *
> * \param uint_rval is unpackSnorm2x16's input
> @@ -352,6 +487,65 @@ private:
> }
>
> /**
> + * \brief Lower an unpackSnorm4x8 expression.
> + *
> + * \param uint_rval is unpackSnorm4x8's input
> + * \return unpackSnorm4x8's output as a vec4 rvalue
> + */
> + ir_rvalue*
> + lower_unpack_snorm_4x8(ir_rvalue *uint_rval)
> + {
> + /* From page 137 (143 of pdf) of the GLSL 4.30 spec:
> + *
> + * highp vec4 unpackSnorm4x8 (highp uint p)
> + * ----------------------------------------
> + * First, unpacks a single 32-bit unsigned integer p into four
> + * 8-bit unsigned integers. Then, each component is converted to
> + * a normalized floating-point value to generate the returned
> + * four-component vector.
> + *
> + * The conversion for unpacked fixed-point value f to floating point is
> + * done as follows:
> + *
> + * unpackSnorm4x8: clamp(f / 127.0, -1, +1)
> + *
> + * The first component of the returned vector will be extracted from the
> + * least significant bits of the input; the last component will be
> + * extracted from the most significant bits.
> + *
> + * This function generates IR that approximates the following pseudo-GLSL:
> + *
> + * return clamp(
> + * ((ivec4(unpack_uint_to_uvec4(UINT_RVALUE)) << 24) >> 24) / 127.0f,
> + * -1.0f, 1.0f);
> + *
> + * The above IR may appear unnecessarily complex, but the intermediate
> + * conversion to ivec4 and the bit shifts are necessary to correctly unpack
> + * negative floats.
> + *
> + * To see why, consider packing and then unpacking vec4(-1.0, 0.0, 0.0,
> + * 0.0). packSnorm4x8 encodes -1.0 as the int8 0xff. During unpacking, we
> + * place that int8 into an int32, which results in the *positive* integer
> + * 0x000000ff. The int8's sign bit becomes, in the int32, the rather
> + * unimportant bit 8. We must now extend the int8's sign bit into bits
> + * 9-32, which is accomplished by left-shifting then right-shifting.
> + */
> +
> + assert(uint_rval->type == glsl_type::uint_type);
> +
> + ir_rvalue *result =
> + clamp(div(i2f(rshift(lshift(u2i(unpack_uint_to_uvec4(uint_rval)),
> + constant(24u)),
> + constant(24u))),
> + constant(127.0f)),
> + constant(-1.0f),
> + constant(1.0f));
> +
> + assert(result->type == glsl_type::vec4_type);
> + return result;
> + }
> +
> + /**
> * \brief Lower a packUnorm2x16 expression.
> *
> * \param vec2_rval is packUnorm2x16's input
> @@ -396,6 +590,50 @@ private:
> }
>
> /**
> + * \brief Lower a packUnorm4x8 expression.
> + *
> + * \param vec4_rval is packUnorm4x8's input
> + * \return packUnorm4x8's output as a uint rvalue
> + */
> + ir_rvalue*
> + lower_pack_unorm_4x8(ir_rvalue *vec4_rval)
> + {
> + /* From page 137 (143 of pdf) of the GLSL 4.30 spec:
> + *
> + * highp uint packUnorm4x8 (vec4 v)
> + * --------------------------------
> + * First, converts each component of the normalized floating-point value
> + * v into 16-bit integer values. Then, the results are packed into the
^^^^^^
Cut and paste error?
> + * returned 32-bit unsigned integer.
> + *
> + * The conversion for component c of v to fixed point is done as
> + * follows:
> + *
> + * packUnorm4x8: round(clamp(c, 0, +1) * 65535.0)
^^^^^^^^
Ditto.
> + *
> + * The first component of the vector will be written to the least
> + * significant bits of the output; the last component will be written to
> + * the most significant bits.
> + *
> + * This function generates IR that approximates the following pseudo-GLSL:
> + *
> + * return pack_uvec4_to_uint(uvec4(
> + * round(clamp(VEC2_RVALUE, 0.0f, 1.0f) * 255.0f)));
> + *
> + * Here it is safe to directly convert the vec4 to uvec4 because the the
> + * vec4 has been clamped to a non-negative range.
> + */
> +
> + assert(vec4_rval->type == glsl_type::vec4_type);
> +
> + ir_rvalue *result = pack_uvec4_to_uint(
> + f2u(round_even(mul(saturate(vec4_rval), constant(255.0f)))));
> +
> + assert(result->type == glsl_type::uint_type);
> + return result;
> + }
> +
> + /**
> * \brief Lower an unpackUnorm2x16 expression.
> *
> * \param uint_rval is unpackUnorm2x16's input
> @@ -437,6 +675,47 @@ private:
> }
>
> /**
> + * \brief Lower an unpackUnorm4x8 expression.
> + *
> + * \param uint_rval is unpackUnorm4x8's input
> + * \return unpackUnorm4x8's output as a vec4 rvalue
> + */
> + ir_rvalue*
> + lower_unpack_unorm_4x8(ir_rvalue *uint_rval)
> + {
> + /* From page 137 (143 of pdf) of the GLSL 4.30 spec:
> + *
> + * highp vec4 unpackUnorm4x8 (highp uint p)
> + * ----------------------------------------
> + * First, unpacks a single 32-bit unsigned integer p into four
> + * 8-bit unsigned integers. Then, each component is converted to
> + * a normalized floating-point value to generate the returned
> + * two-component vector.
> + *
> + * The conversion for unpacked fixed-point value f to floating point is
> + * done as follows:
> + *
> + * unpackUnorm4x8: f / 255.0
> + *
> + * The first component of the returned vector will be extracted from the
> + * least significant bits of the input; the last component will be
> + * extracted from the most significant bits.
> + *
> + * This function generates IR that approximates the following pseudo-GLSL:
> + *
> + * return vec4(unpack_uint_to_uvec4(UINT_RVALUE)) / 255.0;
> + */
> +
> + assert(uint_rval->type == glsl_type::uint_type);
> +
> + ir_rvalue *result = div(u2f(unpack_uint_to_uvec4(uint_rval)),
> + constant(255.0f));
> +
> + assert(result->type == glsl_type::vec4_type);
> + return result;
> + }
> +
> + /**
> * \brief Lower the component-wise calculation of packHalf2x16.
> *
> * \param f_rval is one component of packHafl2x16's input
>
More information about the mesa-dev
mailing list