[Mesa-dev] [PATCH 04/10] glsl: Evaluate constant GLSL ES 3.00 pack/unpack operations

Thu Jan 10 00:10:22 PST 2013

That is, evaluate constant expressions of the following functions:
  packSnorm2x16  unpackSnorm2x16
  packUnorm2x16  unpackUnorm2x16
  packHalf2x16   unpackHalf2x16

Signed-off-by: Chad Versace <chad.versace at linux.intel.com>
---
 src/glsl/ir_constant_expression.cpp | 362 ++++++++++++++++++++++++++++++++++++
 1 file changed, 362 insertions(+)

diff --git a/src/glsl/ir_constant_expression.cpp b/src/glsl/ir_constant_expression.cpp
index 17b54b9..2038498 100644
--- a/src/glsl/ir_constant_expression.cpp
+++ b/src/glsl/ir_constant_expression.cpp
@@ -94,6 +94,332 @@ bitcast_f2u(float f)
    return u;
 }
 
+/**
+ * Evaluate one component of a floating-poing 2x16 unpacking function.
+ */
+typedef uint16_t
+(*pack_1x16_func_t)(float);
+
+/**
+ * Evaluate one component of a floating-poing 2x16 unpacking function.
+ */
+typedef void
+(*unpack_1x16_func_t)(uint16_t, float*);
+
+/**
+ * Evaluate a 2x16 floating-point packing function.
+ */
+static uint32_t
+pack_2x16(pack_1x16_func_t pack_1x16,
+          float x, float y)
+{
+   /* From section 8.4 of the GLSL ES 3.00 spec:
+    *
+    *    packSnorm2x16
+    *    -------------
+    *    The first component of the vector will be written to the least
+    *    significant bits of the output; the last component will be written to
+    *    the most significant bits.
+    *
+    * The specifications for the other packing functions contain similar
+    * language.
+    */
+   uint32_t u = 0;
+   u |= ((uint32_t) pack_1x16(x) << 0);
+   u |= ((uint32_t) pack_1x16(y) << 16);
+   return u;
+}
+
+/**
+ * Evaluate a 2x16 floating-point unpacking function.
+ */
+static void
+unpack_2x16(unpack_1x16_func_t unpack_1x16,
+            uint32_t u,
+            float *x, float *y)
+{
+    /* From section 8.4 of the GLSL ES 3.00 spec:
+     *
+     *    unpackSnorm2x16
+     *    ---------------
+     *    The first component of the returned vector will be extracted from
+     *    the least significant bits of the input; the last component will be
+     *    extracted from the most significant bits.
+     *
+     * The specifications for the other unpacking functions contain similar
+     * language.
+     */
+   unpack_1x16((uint16_t) (u & 0xffff), x);
+   unpack_1x16((uint16_t) (u >> 16), y);
+}
+
+/**
+ * Evaluate one component of packSnorm2x16.
+ */
+static uint16_t
+pack_snorm_1x16(float x)
+{
+    /* From section 8.4 of the GLSL ES 3.00 spec:
+     *
+     *    packSnorm2x16
+     *    ---------------
+     *    The conversion for component c of v to fixed point is done as
+     *    follows:
+     *
+     *      packSnorm2x16: round(clamp(c, -1, +1) * 32767.0)
+     */
+   return (uint16_t) round_to_even(CLAMP(x, -1.0f, +1.0f) * 32767.0f);
+}
+
+/**
+ * Evaluate one component of unpackSnorm2x16.
+ */
+static void
+unpack_snorm_1x16(uint16_t u, float *f)
+{
+    /* From section 8.4 of the GLSL ES 3.00 spec:
+     *
+     *    unpackSnorm2x16
+     *    ---------------
+     *    The conversion for unpacked fixed-point value f to floating point is
+     *    done as follows:
+     *
+     *       unpackSnorm2x16: clamp(f / 32767.0, -1,+1)
+     */
+   *f = CLAMP((int16_t) u / 32767.0f, -1.0f, +1.0f);
+}
+
+/**
+ * Evaluate one component packUnorm2x16.
+ */
+static uint16_t
+pack_unorm_1x16(float x)
+{
+    /* From section 8.4 of the GLSL ES 3.00 spec:
+     *
+     *    packUnorm2x16
+     *    ---------------
+     *    The conversion for component c of v to fixed point is done as
+     *    follows:
+     *
+     *       packUnorm2x16: round(clamp(c, 0, +1) * 65535.0)
+     */
+   return (uint16_t) round_to_even(CLAMP(x, 0.0f, 1.0f) * 65535.0f);
+}
+
+
+/**
+ * Evaluate one component of unpackUnorm2x16.
+ */
+static void
+unpack_unorm_1x16(uint16_t u, float *f)
+{
+    /* From section 8.4 of the GLSL ES 3.00 spec:
+     *
+     *    unpackUnorm2x16
+     *    ---------------
+     *    The conversion for unpacked fixed-point value f to floating point is
+     *    done as follows:
+     *
+     *       unpackUnorm2x16: f / 65535.0
+     */
+   *f = (float) u / 65535.0f;
+}
+
+
+/**
+ * Evaluate one component of packHalf2x16.
+ */
+static uint16_t
+pack_half_1x16(float x)
+{
+   /* The bit layout of a float16 is:
+    *   sign:     15
+    *   exponent: 10:14
+    *   mantissa: 0:9
+    *
+    * The sign, exponent, and mantissa of a float16 determine its value thus:
+    *
+    *  if e = 0 and m = 0, then zero:       (-1)^s * 0
+    *  if e = 0 and m != 0, then subnormal: (-1)^s * 2^(e - 14) * (m / 2^10)
+    *  if 0 < e < 31, then normal:          (-1)^s * 2^(e - 15) * (1 + m / 2^10)
+    *  if e = 31 and m = 0, then inf:       (-1)^s * inf
+    *  if e = 31 and m != 0, then NaN
+    *
+    *  where 0 <= m < 2^10 .
+    */
+
+   /* Calculate the resultant float16's sign, exponent, and mantissa
+    * bits.
+    */
+   const int s = (copysign(1.0f, x) < 0) ? 1 : 0;
+   int e;
+   int m;
+
+   switch (fpclassify(x)) {
+   case FP_NAN:
+       /* Any representation with e = 31 and m != 0 suffices. */
+      return 0xffffu;
+   case FP_INFINITE:
+      e = 31;
+      m = 0;
+      break;
+   case FP_SUBNORMAL:
+   case FP_ZERO:
+      /* Subnormal float32 values are too small to be represented as
+       * a float16.
+       */
+      e = 0;
+      m = 0;
+      break;
+   case FP_NORMAL: {
+      /* Represent the absolute value of the float32 input in the form
+       *
+       *   2^E * F, where 0.5 <= F < 1 .
+       */
+      int E;
+      float F;
+      F = frexpf(fabs(x), &E);
+
+      /* Some key boundary values of float16 are:
+       *
+       *   min_subnormal = 2^(-14) * (1 / 2^10)
+       *   max_subnormal = 2^(-14) * (1023 / 2^10)
+       *   min_normal    = 2^(1 - 15) * (1 + 1 / 2^10)
+       *   max_normal    = 2^(30 - 15) * (1 + 1023 / 2^10)
+       *
+       * Representing the same boundary values in the form returned
+       * by frexpf(),
+       *
+       *   2^e * f where 0.5 <= f < 1,
+       *
+       * gives
+       *
+       *   min_subnormal = 2^(-14) * (1 / 2^10)
+       *                 = 2^(-23) * (1 / 2)
+       *                 = 2^(-23) * 0.5
+       *
+       *   max_subnormal = 2^(-14) * (1023 / 2^10)
+       *                 = 2^(-14) * 0.9990234375
+       *
+       *   min_normal    = 2^(1 - 15) * (1 + 0 / 2^10)
+       *                 = 2^(-14)
+       *                 = 2^(-13) * 0.5
+       *
+       *   max_normal    = 2^(30 - 15) * (1 + 1023 / 2^10)
+       *                 = 2^15 * (2^10 + 1023) / 2^10
+       *                 = 2^16 * (2^10 + 1023) / 2^11
+       *                 = 2^16 * 0.99951171875
+       *
+       * Now calculate the results's exponent and mantissa by comparing the
+       * float32 input against the boundary values above.
+       */
+      if (E == -23 && F < 0.5f) {
+         /* The float32 input is too small to be represented as a float16. The
+          * result is zero.
+          */
+         e = 0;
+         m = 0;
+      } else if (E < -13 || (E == -13 && F < 0.5f)) {
+         /* The resultant float16 value is subnormal. Calculate m:
+          *
+          *   2^E * F = 2^(14) * (m / 2^10)
+          *           = 2^(-24) * m
+          *         m = 2^(E + 24) * F
+          */
+         e = 0;
+         m = powf(2, E + 24) * F;
+      } else if (E < 16 || (E == 16 && F <= 0.99951171875f)) {
+         /* The resultant float16 is normal. Calculate e and m:
+          *
+          *   2^E * F = 2^(e - 15) * (1 + m / 2^10)          (1)
+          *           = 2^(e - 15) * (2^10 + m) / 2^10       (2)
+          *           = 2^(e - 14) * (2^10 + m) / 2^11       (3)
+          *
+          * Substituting
+          *
+          *   e1 := E                                        (4)
+          *   f1 := F                                        (5)
+          *   e2 := e - 14                                   (6)
+          *   f2 := (2^10 + m) / 2^11                        (7)
+          *
+          * transforms the equation to
+          *
+          *   2^e1 * f1 = 2^e2 * f2                          (8)
+          *
+          * By definition, f1 lies in the range [0.5, 1). By equation 7, f2
+          * lies there also. This observation combined with equation 8 implies
+          * f1 = f2, which in turn implies e1 = e2. Therefore
+          *
+          *   e = E + 14
+          *   m = 2^11 * F - 2^10
+          */
+         e = E + 14;
+         m = powf(2, 11) * F - powf(2, 10);
+      } else {
+         /* The float32 input is too large to represent as a float16. The
+          * result is infinite.
+          */
+         e = 31;
+         m = 0;
+      }
+      break;
+   }
+   default:
+      assert(0);
+      break;
+   }
+
+   assert(s == 0 || s == 1);
+   assert(0 <= e && e <= 31);
+   assert(0 <= m && m <= 1023);
+
+   return (s << 15) | (e << 10) | m;
+}
+
+/**
+ * Evaluate one component of unpackHalf2x16.
+ */
+static void
+unpack_half_1x16(uint16_t u, float *f)
+{
+   /* The bit layout of a float16 is:
+    *   sign:     15
+    *   exponent: 10:14
+    *   mantissa: 0:9
+    *
+    * The sign, exponent, and mantissa of a float16 determine its value thus:
+    *
+    *  if e = 0 and m = 0, then zero:       (-1)^s * 0
+    *  if e = 0 and m != 0, then subnormal: (-1)^s * 2^(e - 14) * (m / 2^10)
+    *  if 0 < e < 31, then normal:          (-1)^s * 2^(e - 15) * (1 + m / 2^10)
+    *  if e = 31 and m = 0, then inf:       (-1)^s * inf
+    *  if e = 31 and m != 0, then NaN
+    *
+    *  where 0 <= m < 2^10 .
+    */
+
+   int s = (u >> 15) & 0x1;
+   int e = (u >> 10) & 0x1f;
+   int m = u & 0x3ff;
+
+   float sign = s ? -1 : 1;
+
+   if (e == 0) {
+      /* The float16 is zero or subnormal. */
+      *f = sign * pow(2, -24) * m;
+   } else if (1 <= e && e <= 30) {
+       /* The float16 is normal. */
+      *f = sign * pow(2, e - 15) * (1.0 + m / 1024.0);
+   } else if (e == 31 && m == 0) {
+      *f = sign * INFINITY;
+   } else if (e == 31 && m != 0) {
+      *f = NAN;
+   } else {
+      assert(0);
+   }
+}
+
 ir_constant *
 ir_rvalue::constant_expression_value(struct hash_table *variable_context)
 {
@@ -459,6 +785,42 @@ ir_expression::constant_expression_value(struct hash_table *variable_context)
       }
       break;
 
+   case ir_unop_pack_snorm_2x16:
+      assert(op[0]->type == glsl_type::vec2_type);
+      data.u[0] = pack_2x16(pack_snorm_1x16,
+                            op[0]->value.f[0],
+                            op[0]->value.f[1]);
+      break;
+   case ir_unop_unpack_snorm_2x16:
+      assert(op[0]->type == glsl_type::uint_type);
+      unpack_2x16(unpack_snorm_1x16,
+                  op[0]->value.u[0],
+                  &data.f[0], &data.f[1]);
+      break;
+   case ir_unop_pack_unorm_2x16:
+      assert(op[0]->type == glsl_type::vec2_type);
+      data.u[0] = pack_2x16(pack_unorm_1x16,
+                            op[0]->value.f[0],
+                            op[0]->value.f[1]);
+      break;
+   case ir_unop_unpack_unorm_2x16:
+      assert(op[0]->type == glsl_type::uint_type);
+      unpack_2x16(unpack_unorm_1x16,
+                  op[0]->value.u[0],
+                  &data.f[0], &data.f[1]);
+      break;
+   case ir_unop_pack_half_2x16:
+      assert(op[0]->type == glsl_type::vec2_type);
+      data.u[0] = pack_2x16(pack_half_1x16,
+                            op[0]->value.f[0],
+                            op[0]->value.f[1]);
+      break;
+   case ir_unop_unpack_half_2x16:
+      assert(op[0]->type == glsl_type::uint_type);
+      unpack_2x16(unpack_half_1x16,
+                  op[0]->value.u[0],
+                  &data.f[0], &data.f[1]);
+      break;
    case ir_binop_pow:
       assert(op[0]->type->base_type == GLSL_TYPE_FLOAT);
       for (unsigned c = 0; c < op[0]->type->components(); c++) {
-- 
1.8.1