[Mesa-dev] [PATCH] llvmpipe: fix blending with half-float formats
Zack Rusin
zackr at vmware.com
Mon Dec 9 19:57:19 PST 2013
The fact that we flush denorms to zero breaks our half-float
conversion and blending. This patches enables denorms for
blending. It's a little tricky due to the llvm bug that makes
it incorrectly reorder the mxcsr intrinsics:
http://llvm.org/bugs/show_bug.cgi?id=6393
Signed-off-by: Zack Rusin <zackr at vmware.com>
---
src/gallium/auxiliary/gallivm/lp_bld_arit.c | 67 +++++++++++++++++++++++++++++
src/gallium/auxiliary/gallivm/lp_bld_arit.h | 11 +++++
src/gallium/drivers/llvmpipe/lp_state_fs.c | 31 ++++++++++---
3 files changed, 104 insertions(+), 5 deletions(-)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index 70929e7..47e778c 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -64,6 +64,13 @@
#include "lp_bld_arit.h"
#include "lp_bld_flow.h"
+#if defined(PIPE_ARCH_SSE)
+#include <xmmintrin.h>
+/* This is defined in pmmintrin.h, but it can only be included when -msse3 is
+ * used, so just define it here to avoid further. */
+#define _MM_DENORMALS_ZERO_MASK 0x0040
+#endif
+
#define EXP_POLY_DEGREE 5
@@ -3489,3 +3496,63 @@ lp_build_is_inf_or_nan(struct gallivm_state *gallivm,
return ret;
}
+
+LLVMValueRef
+lp_build_fpstate_get(struct gallivm_state *gallivm)
+{
+ if (util_cpu_caps.has_sse) {
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef mxcsr_ptr = lp_build_alloca(
+ gallivm,
+ LLVMInt32TypeInContext(gallivm->context),
+ "mxcsr_ptr");
+ lp_build_intrinsic(builder,
+ "llvm.x86.sse.stmxcsr",
+ LLVMVoidTypeInContext(gallivm->context),
+ &mxcsr_ptr, 1);
+ return mxcsr_ptr;
+ }
+ return 0;
+}
+
+void
+lp_build_fpstate_set_denorms_zero(struct gallivm_state *gallivm,
+ boolean zero)
+{
+ if (util_cpu_caps.has_sse) {
+ /* turn on DAZ (64) | FTZ (32768) = 32832 if available */
+ int daz_ftz = _MM_FLUSH_ZERO_MASK;
+
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef mxcsr_ptr = lp_build_fpstate_get(gallivm);
+ LLVMValueRef mxcsr =
+ LLVMBuildLoad(builder, mxcsr_ptr, "mxcsr");
+
+ if (util_cpu_caps.has_daz) {
+ /* Enable denormals are zero mode */
+ daz_ftz |= _MM_DENORMALS_ZERO_MASK;
+ }
+ if (zero) {
+ mxcsr = LLVMBuildOr(builder, mxcsr,
+ LLVMConstInt(LLVMTypeOf(mxcsr), daz_ftz, 0), "");
+ } else {
+ mxcsr = LLVMBuildAnd(builder, mxcsr,
+ LLVMConstInt(LLVMTypeOf(mxcsr), ~daz_ftz, 0), "");
+ }
+
+ LLVMBuildStore(builder, mxcsr, mxcsr_ptr);
+ lp_build_fpstate_set(gallivm, mxcsr_ptr);
+ }
+}
+
+void
+lp_build_fpstate_set(struct gallivm_state *gallivm,
+ LLVMValueRef mxcsr_ptr)
+{
+ if (util_cpu_caps.has_sse) {
+ lp_build_intrinsic(gallivm->builder,
+ "llvm.x86.sse.ldmxcsr",
+ LLVMVoidTypeInContext(gallivm->context),
+ &mxcsr_ptr, 1);
+ }
+}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
index 75bf89e..9d29093 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
@@ -358,4 +358,15 @@ lp_build_is_inf_or_nan(struct gallivm_state *gallivm,
const struct lp_type type,
LLVMValueRef x);
+
+LLVMValueRef
+lp_build_fpstate_get(struct gallivm_state *gallivm);
+
+void
+lp_build_fpstate_set_denorms_zero(struct gallivm_state *gallivm,
+ boolean zero);
+void
+lp_build_fpstate_set(struct gallivm_state *gallivm,
+ LLVMValueRef mxcsr);
+
#endif /* !LP_BLD_ARIT_H */
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index b5816e0..d0fdc80 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -1490,6 +1490,28 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
const boolean is_1d = variant->key.resource_1d;
unsigned num_fullblock_fs = is_1d ? 2 * num_fs : num_fs;
+ LLVMValueRef fpstate = 0;
+
+ /* Get type from output format */
+ lp_blend_type_from_format_desc(out_format_desc, &row_type);
+ lp_mem_type_from_format_desc(out_format_desc, &dst_type);
+
+ /*
+ * Technically this code should go into lp_build_smallfloat_to_float
+ * and lp_build_float_to_smallfloat but due to the
+ * http://llvm.org/bugs/show_bug.cgi?id=6393
+ * llvm reorders the mxcsr intrinsics in a way that breaks the code.
+ * So the ordering is important here and there shouldn't be any
+ * llvm ir instrunctions in this function before
+ * this, otherwise half-float format conversions won't work
+ * (again due to llvm bug #6393).
+ */
+ if (dst_type.floating && dst_type.width == 16) {
+ /* We need to make sure that denorms are ok for half float
+ conversions */
+ fpstate = lp_build_fpstate_get(gallivm);
+ lp_build_fpstate_set_denorms_zero(gallivm, FALSE);
+ }
mask_type = lp_int32_vec4_type();
mask_type.length = fs_type.length;
@@ -1523,11 +1545,6 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
undef_src_val = lp_build_undef(gallivm, fs_type);
#endif
-
- /* Get type from output format */
- lp_blend_type_from_format_desc(out_format_desc, &row_type);
- lp_mem_type_from_format_desc(out_format_desc, &dst_type);
-
row_type.length = fs_type.length;
vector_width = dst_type.floating ? lp_native_vector_width : lp_integer_vector_width;
@@ -1987,6 +2004,10 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
dst, dst_type, dst_count, dst_alignment);
}
+ if (dst_type.floating && dst_type.width == 16) {
+ lp_build_fpstate_set(gallivm, fpstate);
+ }
+
if (do_branch) {
lp_build_mask_end(&mask_ctx);
}
--
1.8.3.2
More information about the mesa-dev
mailing list