Mesa (master): gallivm: Add code for rgb9e5 shared exponent format to float conversion

Sun Mar 24 01:09:18 UTC 2013

Module: Mesa
Branch: master
Commit: b50e362dbbced42c93fb28a420977dfecd38c823
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=b50e362dbbced42c93fb28a420977dfecd38c823

Author: Roland Scheidegger <sroland at vmware.com>
Date:   Sat Mar 23 02:05:54 2013 +0100

gallivm: Add code for rgb9e5 shared exponent format to float conversion

And use this (and the code for r11g11b10 packed float to float conversion)
in the soa texturing code (the generated code looks quite good).
Should be an order of magnitude faster probably than using the fallback
(not measured).
Tested with piglit texwrap GL_EXT_packed_float and
GL_EXT_texture_shared_exponent respectively (didn't find much else using
it).

Reviewed-by: Jose Fonseca <jfonseca at vmware.com>

---

 src/gallium/auxiliary/gallivm/lp_bld_conv.c       |   86 +++++++++++++++++++++
 src/gallium/auxiliary/gallivm/lp_bld_conv.h       |    5 +
 src/gallium/auxiliary/gallivm/lp_bld_format_soa.c |   30 +++++++-
 3 files changed, 118 insertions(+), 3 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
index 053f413..2f39abc 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
@@ -397,6 +397,92 @@ lp_build_r11g11b10_to_float(struct gallivm_state *gallivm,
 }
 
 
+static LLVMValueRef
+lp_build_rgb9_to_float_helper(struct gallivm_state *gallivm,
+                              struct lp_type f32_type,
+                              LLVMValueRef src,
+                              LLVMValueRef scale,
+                              unsigned mantissa_start)
+{
+   LLVMValueRef shift, mask;
+
+   struct lp_type i32_type = lp_type_int_vec(32, 32 * f32_type.length);
+   struct lp_build_context i32_bld, f32_bld;
+
+   lp_build_context_init(&i32_bld, gallivm, i32_type);
+   lp_build_context_init(&f32_bld, gallivm, f32_type);
+
+   /*
+    * This is much easier as other weirdo float formats, since
+    * there's no sign, no Inf/NaN, and there's nothing special
+    * required for normals/denormals neither (as without the implied one
+    * for the mantissa for other formats, everything looks like a denormal).
+    * So just do (float)comp_bits * scale
+    */
+   shift = lp_build_const_int_vec(gallivm, i32_type, mantissa_start);
+   mask = lp_build_const_int_vec(gallivm, i32_type, 0x1ff);
+   src = lp_build_shr(&i32_bld, src, shift);
+   src = lp_build_and(&i32_bld, src, mask);
+   src = lp_build_int_to_float(&f32_bld, src);
+   return lp_build_mul(&f32_bld, src, scale);
+}
+
+
+/**
+ * Convert shared exponent format (rgb9e5) value(s) to rgba float SoA values.
+ *
+ * @param src   packed AoS rgb9e5 values (as (vector) int32)
+ * @param dst   pointer to the SoA result values
+ */
+void
+lp_build_rgb9e5_to_float(struct gallivm_state *gallivm,
+                         LLVMValueRef src,
+                         LLVMValueRef *dst)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMTypeRef src_type = LLVMTypeOf(src);
+   LLVMValueRef shift, scale, bias, exp;
+   unsigned src_length = LLVMGetTypeKind(src_type) == LLVMVectorTypeKind ?
+                            LLVMGetVectorSize(src_type) : 1;
+   struct lp_type i32_type = lp_type_int_vec(32, 32 * src_length);
+   struct lp_type u32_type = lp_type_uint_vec(32, 32 * src_length);
+   struct lp_type f32_type = lp_type_float_vec(32, 32 * src_length);
+   struct lp_build_context i32_bld, u32_bld, f32_bld;
+
+   lp_build_context_init(&i32_bld, gallivm, i32_type);
+   lp_build_context_init(&u32_bld, gallivm, u32_type);
+   lp_build_context_init(&f32_bld, gallivm, f32_type);
+
+   /* extract exponent */
+   shift = lp_build_const_int_vec(gallivm, i32_type, 27);
+   /* this shift needs to be unsigned otherwise need mask */
+   exp = lp_build_shr(&u32_bld, src, shift);
+
+   /*
+    * scale factor is 2 ^ (exp - bias)
+    * (and additionally corrected here for the mantissa bits)
+    * not using shift because
+    * a) don't have vector shift in a lot of cases
+    * b) shift direction changes hence need 2 shifts + conditional
+    *    (or rotate instruction which is even more rare (for instance XOP))
+    * so use whacky float 2 ^ function instead manipulating exponent
+    * (saves us the float conversion at the end too)
+    */
+   bias = lp_build_const_int_vec(gallivm, i32_type, 127 - (15 + 9));
+   scale = lp_build_add(&i32_bld, exp, bias);
+   shift = lp_build_const_int_vec(gallivm, i32_type, 23);
+   scale = lp_build_shl(&i32_bld, scale, shift);
+   scale = LLVMBuildBitCast(builder, scale, f32_bld.vec_type, "");
+
+   dst[0] = lp_build_rgb9_to_float_helper(gallivm, f32_type, src, scale, 0);
+   dst[1] = lp_build_rgb9_to_float_helper(gallivm, f32_type, src, scale, 9);
+   dst[2] = lp_build_rgb9_to_float_helper(gallivm, f32_type, src, scale, 18);
+
+   /* Just set alpha to one */
+   dst[3] = f32_bld.one;
+}
+
+
 /**
  * Converts int16 half-float to float32
  * Note this can be performed in 1 instruction if vcvtph2ps exists (sse5 i think?)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.h b/src/gallium/auxiliary/gallivm/lp_bld_conv.h
index 5bd6f4f..d8bc294 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.h
@@ -70,6 +70,11 @@ lp_build_r11g11b10_to_float(struct gallivm_state *gallivm,
                             LLVMValueRef src,
                             LLVMValueRef *dst);
 
+void
+lp_build_rgb9e5_to_float(struct gallivm_state *gallivm,
+                         LLVMValueRef src,
+                         LLVMValueRef *dst);
+
 LLVMValueRef
 lp_build_clamped_float_to_unsigned_norm(struct gallivm_state *gallivm,
                                         struct lp_type src_type,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
index 9eb6ef5..54ca61a 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_format_soa.c
@@ -310,9 +310,10 @@ lp_build_rgba8_to_fi32_soa(struct gallivm_state *gallivm,
  * \param type  the desired return type for 'rgba'.  The vector length
  *              is the number of texels to fetch
  *
- * \param base_ptr  points to start of the texture image block.  For non-
- *                  compressed formats, this simply points to the texel.
- *                  For compressed formats, it points to the start of the
+ * \param base_ptr  points to the base of the texture mip tree.
+ * \param offset    offset to start of the texture image block.  For non-
+ *                  compressed formats, this simply is an offset to the texel.
+ *                  For compressed formats, it is an offset to the start of the
  *                  compressed data block.
  *
  * \param i, j  the sub-block pixel coordinates.  For non-compressed formats
@@ -368,6 +369,29 @@ lp_build_fetch_rgba_soa(struct gallivm_state *gallivm,
       return;
    }
 
+   if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT ||
+       format_desc->format == PIPE_FORMAT_R9G9B9E5_FLOAT) {
+      /*
+       * similar conceptually to above but requiring special
+       * AoS packed -> SoA float conversion code.
+       */
+      LLVMValueRef packed;
+
+      assert(type.floating);
+      assert(type.width == 32);
+
+      packed = lp_build_gather(gallivm, type.length,
+                               format_desc->block.bits,
+                               type.width, base_ptr, offset);
+      if (format_desc->format == PIPE_FORMAT_R11G11B10_FLOAT) {
+         lp_build_r11g11b10_to_float(gallivm, packed, rgba_out);
+      }
+      else {
+         lp_build_rgb9e5_to_float(gallivm, packed, rgba_out);
+      }
+      return;
+   }
+
    /*
     * Try calling lp_build_fetch_rgba_aos for all pixels.
     */