[Mesa-dev] [PATCH 04/10] glsl: Evaluate constant GLSL ES 3.00 pack/unpack operations

Thu Jan 10 10:30:49 PST 2013

On 01/10/2013 12:10 AM, Chad Versace wrote:
> That is, evaluate constant expressions of the following functions:
>    packSnorm2x16  unpackSnorm2x16
>    packUnorm2x16  unpackUnorm2x16
>    packHalf2x16   unpackHalf2x16

I believe that there is already code somewhere in Mesa for doing 
float/half conversion.  Could we just re-use that?

> Signed-off-by: Chad Versace <chad.versace at linux.intel.com>
> ---
>   src/glsl/ir_constant_expression.cpp | 362 ++++++++++++++++++++++++++++++++++++
>   1 file changed, 362 insertions(+)
>
> diff --git a/src/glsl/ir_constant_expression.cpp b/src/glsl/ir_constant_expression.cpp
> index 17b54b9..2038498 100644
> --- a/src/glsl/ir_constant_expression.cpp
> +++ b/src/glsl/ir_constant_expression.cpp
> @@ -94,6 +94,332 @@ bitcast_f2u(float f)
>      return u;
>   }
>
> +/**
> + * Evaluate one component of a floating-poing 2x16 unpacking function.
> + */
> +typedef uint16_t
> +(*pack_1x16_func_t)(float);
> +
> +/**
> + * Evaluate one component of a floating-poing 2x16 unpacking function.
> + */
> +typedef void
> +(*unpack_1x16_func_t)(uint16_t, float*);
> +
> +/**
> + * Evaluate a 2x16 floating-point packing function.
> + */
> +static uint32_t
> +pack_2x16(pack_1x16_func_t pack_1x16,
> +          float x, float y)
> +{
> +   /* From section 8.4 of the GLSL ES 3.00 spec:
> +    *
> +    *    packSnorm2x16
> +    *    -------------
> +    *    The first component of the vector will be written to the least
> +    *    significant bits of the output; the last component will be written to
> +    *    the most significant bits.
> +    *
> +    * The specifications for the other packing functions contain similar
> +    * language.
> +    */
> +   uint32_t u = 0;
> +   u |= ((uint32_t) pack_1x16(x) << 0);
> +   u |= ((uint32_t) pack_1x16(y) << 16);
> +   return u;
> +}
> +
> +/**
> + * Evaluate a 2x16 floating-point unpacking function.
> + */
> +static void
> +unpack_2x16(unpack_1x16_func_t unpack_1x16,
> +            uint32_t u,
> +            float *x, float *y)
> +{
> +    /* From section 8.4 of the GLSL ES 3.00 spec:
> +     *
> +     *    unpackSnorm2x16
> +     *    ---------------
> +     *    The first component of the returned vector will be extracted from
> +     *    the least significant bits of the input; the last component will be
> +     *    extracted from the most significant bits.
> +     *
> +     * The specifications for the other unpacking functions contain similar
> +     * language.
> +     */
> +   unpack_1x16((uint16_t) (u & 0xffff), x);
> +   unpack_1x16((uint16_t) (u >> 16), y);
> +}
> +
> +/**
> + * Evaluate one component of packSnorm2x16.
> + */
> +static uint16_t
> +pack_snorm_1x16(float x)
> +{
> +    /* From section 8.4 of the GLSL ES 3.00 spec:
> +     *
> +     *    packSnorm2x16
> +     *    ---------------
> +     *    The conversion for component c of v to fixed point is done as
> +     *    follows:
> +     *
> +     *      packSnorm2x16: round(clamp(c, -1, +1) * 32767.0)
> +     */
> +   return (uint16_t) round_to_even(CLAMP(x, -1.0f, +1.0f) * 32767.0f);
> +}
> +
> +/**
> + * Evaluate one component of unpackSnorm2x16.
> + */
> +static void
> +unpack_snorm_1x16(uint16_t u, float *f)
> +{
> +    /* From section 8.4 of the GLSL ES 3.00 spec:
> +     *
> +     *    unpackSnorm2x16
> +     *    ---------------
> +     *    The conversion for unpacked fixed-point value f to floating point is
> +     *    done as follows:
> +     *
> +     *       unpackSnorm2x16: clamp(f / 32767.0, -1,+1)
> +     */
> +   *f = CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f);
> +}
> +
> +/**
> + * Evaluate one component packUnorm2x16.
> + */
> +static uint16_t
> +pack_unorm_1x16(float x)
> +{
> +    /* From section 8.4 of the GLSL ES 3.00 spec:
> +     *
> +     *    packUnorm2x16
> +     *    ---------------
> +     *    The conversion for component c of v to fixed point is done as
> +     *    follows:
> +     *
> +     *       packUnorm2x16: round(clamp(c, 0, +1) * 65535.0)
> +     */
> +   return (uint16_t) round_to_even(CLAMP(x, 0.0f, 1.0f) * 65535.0f);
> +}
> +
> +
> +/**
> + * Evaluate one component of unpackUnorm2x16.
> + */
> +static void
> +unpack_unorm_1x16(uint16_t u, float *f)
> +{
> +    /* From section 8.4 of the GLSL ES 3.00 spec:
> +     *
> +     *    unpackUnorm2x16
> +     *    ---------------
> +     *    The conversion for unpacked fixed-point value f to floating point is
> +     *    done as follows:
> +     *
> +     *       unpackUnorm2x16: f / 65535.0
> +     */
> +   *f = (float) u / 65535.0f;
> +}
> +
> +
> +/**
> + * Evaluate one component of packHalf2x16.
> + */
> +static uint16_t
> +pack_half_1x16(float x)
> +{
> +   /* The bit layout of a float16 is:
> +    *   sign:     15
> +    *   exponent: 10:14
> +    *   mantissa: 0:9
> +    *
> +    * The sign, exponent, and mantissa of a float16 determine its value thus:
> +    *
> +    *  if e = 0 and m = 0, then zero:       (-1)^s * 0
> +    *  if e = 0 and m != 0, then subnormal: (-1)^s * 2^(e - 14) * (m / 2^10)
> +    *  if 0 < e < 31, then normal:          (-1)^s * 2^(e - 15) * (1 + m / 2^10)
> +    *  if e = 31 and m = 0, then inf:       (-1)^s * inf
> +    *  if e = 31 and m != 0, then NaN
> +    *
> +    *  where 0 <= m < 2^10 .
> +    */
> +
> +   /* Calculate the resultant float16's sign, exponent, and mantissa
> +    * bits.
> +    */
> +   const int s = (copysign(1.0f, x) < 0) ? 1 : 0;
> +   int e;
> +   int m;
> +
> +   switch (fpclassify(x)) {
> +   case FP_NAN:
> +       /* Any representation with e = 31 and m != 0 suffices. */
> +      return 0xffffu;
> +   case FP_INFINITE:
> +      e = 31;
> +      m = 0;
> +      break;
> +   case FP_SUBNORMAL:
> +   case FP_ZERO:
> +      /* Subnormal float32 values are too small to be represented as
> +       * a float16.
> +       */
> +      e = 0;
> +      m = 0;
> +      break;
> +   case FP_NORMAL: {
> +      /* Represent the absolute value of the float32 input in the form
> +       *
> +       *   2^E * F, where 0.5 <= F < 1 .
> +       */
> +      int E;
> +      float F;
> +      F = frexpf(fabs(x), &E);
> +
> +      /* Some key boundary values of float16 are:
> +       *
> +       *   min_subnormal = 2^(-14) * (1 / 2^10)
> +       *   max_subnormal = 2^(-14) * (1023 / 2^10)
> +       *   min_normal    = 2^(1 - 15) * (1 + 1 / 2^10)
> +       *   max_normal    = 2^(30 - 15) * (1 + 1023 / 2^10)
> +       *
> +       * Representing the same boundary values in the form returned
> +       * by frexpf(),
> +       *
> +       *   2^e * f where 0.5 <= f < 1,
> +       *
> +       * gives
> +       *
> +       *   min_subnormal = 2^(-14) * (1 / 2^10)
> +       *                 = 2^(-23) * (1 / 2)
> +       *                 = 2^(-23) * 0.5
> +       *
> +       *   max_subnormal = 2^(-14) * (1023 / 2^10)
> +       *                 = 2^(-14) * 0.9990234375
> +       *
> +       *   min_normal    = 2^(1 - 15) * (1 + 0 / 2^10)
> +       *                 = 2^(-14)
> +       *                 = 2^(-13) * 0.5
> +       *
> +       *   max_normal    = 2^(30 - 15) * (1 + 1023 / 2^10)
> +       *                 = 2^15 * (2^10 + 1023) / 2^10
> +       *                 = 2^16 * (2^10 + 1023) / 2^11
> +       *                 = 2^16 * 0.99951171875
> +       *
> +       * Now calculate the results's exponent and mantissa by comparing the
> +       * float32 input against the boundary values above.
> +       */
> +      if (E == -23 && F < 0.5f) {
> +         /* The float32 input is too small to be represented as a float16. The
> +          * result is zero.
> +          */
> +         e = 0;
> +         m = 0;
> +      } else if (E < -13 || (E == -13 && F < 0.5f)) {
> +         /* The resultant float16 value is subnormal. Calculate m:
> +          *
> +          *   2^E * F = 2^(14) * (m / 2^10)
> +          *           = 2^(-24) * m
> +          *         m = 2^(E + 24) * F
> +          */
> +         e = 0;
> +         m = powf(2, E + 24) * F;
> +      } else if (E < 16 || (E == 16 && F <= 0.99951171875f)) {
> +         /* The resultant float16 is normal. Calculate e and m:
> +          *
> +          *   2^E * F = 2^(e - 15) * (1 + m / 2^10)          (1)
> +          *           = 2^(e - 15) * (2^10 + m) / 2^10       (2)
> +          *           = 2^(e - 14) * (2^10 + m) / 2^11       (3)
> +          *
> +          * Substituting
> +          *
> +          *   e1 := E                                        (4)
> +          *   f1 := F                                        (5)
> +          *   e2 := e - 14                                   (6)
> +          *   f2 := (2^10 + m) / 2^11                        (7)
> +          *
> +          * transforms the equation to
> +          *
> +          *   2^e1 * f1 = 2^e2 * f2                          (8)
> +          *
> +          * By definition, f1 lies in the range [0.5, 1). By equation 7, f2
> +          * lies there also. This observation combined with equation 8 implies
> +          * f1 = f2, which in turn implies e1 = e2. Therefore
> +          *
> +          *   e = E + 14
> +          *   m = 2^11 * F - 2^10
> +          */
> +         e = E + 14;
> +         m = powf(2, 11) * F - powf(2, 10);
> +      } else {
> +         /* The float32 input is too large to represent as a float16. The
> +          * result is infinite.
> +          */
> +         e = 31;
> +         m = 0;
> +      }
> +      break;
> +   }
> +   default:
> +      assert(0);
> +      break;
> +   }
> +
> +   assert(s == 0 || s == 1);
> +   assert(0 <= e && e <= 31);
> +   assert(0 <= m && m <= 1023);
> +
> +   return (s << 15) | (e << 10) | m;
> +}
> +
> +/**
> + * Evaluate one component of unpackHalf2x16.
> + */
> +static void
> +unpack_half_1x16(uint16_t u, float *f)
> +{
> +   /* The bit layout of a float16 is:
> +    *   sign:     15
> +    *   exponent: 10:14
> +    *   mantissa: 0:9
> +    *
> +    * The sign, exponent, and mantissa of a float16 determine its value thus:
> +    *
> +    *  if e = 0 and m = 0, then zero:       (-1)^s * 0
> +    *  if e = 0 and m != 0, then subnormal: (-1)^s * 2^(e - 14) * (m / 2^10)
> +    *  if 0 < e < 31, then normal:          (-1)^s * 2^(e - 15) * (1 + m / 2^10)
> +    *  if e = 31 and m = 0, then inf:       (-1)^s * inf
> +    *  if e = 31 and m != 0, then NaN
> +    *
> +    *  where 0 <= m < 2^10 .
> +    */
> +
> +   int s = (u >> 15) & 0x1;
> +   int e = (u >> 10) & 0x1f;
> +   int m = u & 0x3ff;
> +
> +   float sign = s ? -1 : 1;
> +
> +   if (e == 0) {
> +      /* The float16 is zero or subnormal. */
> +      *f = sign * pow(2, -24) * m;
> +   } else if (1 <= e && e <= 30) {
> +       /* The float16 is normal. */
> +      *f = sign * pow(2, e - 15) * (1.0 + m / 1024.0);
> +   } else if (e == 31 && m == 0) {
> +      *f = sign * INFINITY;
> +   } else if (e == 31 && m != 0) {
> +      *f = NAN;
> +   } else {
> +      assert(0);
> +   }
> +}
> +
>   ir_constant *
>   ir_rvalue::constant_expression_value(struct hash_table *variable_context)
>   {
> @@ -459,6 +785,42 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
>         }
>         break;
>
> +   case ir_unop_pack_snorm_2x16:
> +      assert(op[0]->type == glsl_type::vec2_type);
> +      data.u[0] = pack_2x16(pack_snorm_1x16,
> +                            op[0]->value.f[0],
> +                            op[0]->value.f[1]);
> +      break;
> +   case ir_unop_unpack_snorm_2x16:
> +      assert(op[0]->type == glsl_type::uint_type);
> +      unpack_2x16(unpack_snorm_1x16,
> +                  op[0]->value.u[0],
> +                  &data.f[0], &data.f[1]);
> +      break;
> +   case ir_unop_pack_unorm_2x16:
> +      assert(op[0]->type == glsl_type::vec2_type);
> +      data.u[0] = pack_2x16(pack_unorm_1x16,
> +                            op[0]->value.f[0],
> +                            op[0]->value.f[1]);
> +      break;
> +   case ir_unop_unpack_unorm_2x16:
> +      assert(op[0]->type == glsl_type::uint_type);
> +      unpack_2x16(unpack_unorm_1x16,
> +                  op[0]->value.u[0],
> +                  &data.f[0], &data.f[1]);
> +      break;
> +   case ir_unop_pack_half_2x16:
> +      assert(op[0]->type == glsl_type::vec2_type);
> +      data.u[0] = pack_2x16(pack_half_1x16,
> +                            op[0]->value.f[0],
> +                            op[0]->value.f[1]);
> +      break;
> +   case ir_unop_unpack_half_2x16:
> +      assert(op[0]->type == glsl_type::uint_type);
> +      unpack_2x16(unpack_half_1x16,
> +                  op[0]->value.u[0],
> +                  &data.f[0], &data.f[1]);
> +      break;
>      case ir_binop_pow:
>         assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
>         for (unsigned c = 0; c < op[0]->type->components(); c++) {
>