Mesa (master): gallivm: special case conversion 4x4f to 1x16ub

Keith Whitwell keithw at kemper.freedesktop.org
Fri Oct 8 16:32:05 UTC 2010


Module: Mesa
Branch: master
Commit: 607e3c542cedd645da91c96abfe6698623acf503
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=607e3c542cedd645da91c96abfe6698623acf503

Author: Keith Whitwell <keithw at vmware.com>
Date:   Mon Oct  4 15:00:34 2010 +0100

gallivm: special case conversion 4x4f to 1x16ub

Nice reduction in the number of operations required for final color
output in many shaders.

---

 src/gallium/auxiliary/gallivm/lp_bld_conv.c |   84 +++++++++++++++++++++++++++
 1 files changed, 84 insertions(+), 0 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
index 8b47731..605eb04 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c
@@ -69,6 +69,7 @@
 #include "lp_bld_arit.h"
 #include "lp_bld_pack.h"
 #include "lp_bld_conv.h"
+#include "lp_bld_intr.h"
 
 
 /**
@@ -241,6 +242,89 @@ lp_build_conv(LLVMBuilderRef builder,
    }
    num_tmps = num_srcs;
 
+
+   /* Special case 4x4f --> 1x16ub 
+    */
+   if (src_type.floating == 1 &&
+       src_type.fixed    == 0 &&
+       src_type.sign     == 1 &&
+       src_type.norm     == 0 &&
+       src_type.width    == 32 &&
+       src_type.length   == 4 &&
+
+       dst_type.floating == 0 &&
+       dst_type.fixed    == 0 &&
+       dst_type.sign     == 0 &&
+       dst_type.norm     == 1 &&
+       dst_type.width    == 8 &&
+       dst_type.length   == 16)
+   {
+      int i;
+
+      for (i = 0; i < num_dsts; i++, src += 4) {
+         struct lp_type int16_type = dst_type;
+         struct lp_type int32_type = dst_type;
+         LLVMValueRef lo, hi;
+         LLVMValueRef src_int0;
+         LLVMValueRef src_int1;
+         LLVMValueRef src_int2;
+         LLVMValueRef src_int3;
+         LLVMTypeRef int16_vec_type;
+         LLVMTypeRef int32_vec_type;
+         LLVMTypeRef src_vec_type;
+         LLVMTypeRef dst_vec_type;
+         LLVMValueRef const_255f;
+
+         int16_type.width *= 2;
+         int16_type.length /= 2;
+         int16_type.sign = 1;
+
+         int32_type.width *= 4;
+         int32_type.length /= 4;
+         int32_type.sign = 1;
+
+         src_vec_type   = lp_build_vec_type(src_type);
+         dst_vec_type   = lp_build_vec_type(dst_type);
+         int16_vec_type = lp_build_vec_type(int16_type);
+         int32_vec_type = lp_build_vec_type(int32_type);
+
+         const_255f = lp_build_const_vec(src_type, 255.0);
+
+         src_int0 = LLVMBuildFPToSI(builder,
+                                    LLVMBuildFMul(builder, src[0], const_255f, ""),
+                                    int32_vec_type, "");
+
+         src_int1 = LLVMBuildFPToSI(builder,
+                                    LLVMBuildFMul(builder, src[1], const_255f, ""),
+                                    int32_vec_type, "");
+
+         src_int2 = LLVMBuildFPToSI(builder,
+                                    LLVMBuildFMul(builder, src[2], const_255f, ""),
+                                    int32_vec_type, "");
+
+         src_int3 = LLVMBuildFPToSI(builder,
+                                    LLVMBuildFMul(builder, src[3], const_255f, ""),
+                                    int32_vec_type, "");
+
+#if HAVE_LLVM >= 0x0207
+         lo = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128",
+                                        int16_vec_type, src_int0, src_int1);
+         hi = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128",
+                                        int16_vec_type, src_int2, src_int3);
+         dst[i] = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128",
+                                            dst_vec_type, lo, hi);
+#else
+         lo = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128",
+                                        int32_vec_type, src_int0, src_int1);
+         hi = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packssdw.128",
+                                        int32_vec_type, src_int2, src_int3);
+         dst[i] = lp_build_intrinsic_binary(builder, "llvm.x86.sse2.packuswb.128",
+                                            int16_vec_type, lo, hi);
+#endif
+      }
+      return; 
+   }
+
    /*
     * Clamp if necessary
     */




More information about the mesa-commit mailing list