[Mesa-dev] [PATCH] llvmpipe: fix blending with half-float formats
Roland Scheidegger
sroland at vmware.com
Tue Dec 10 07:23:28 PST 2013
Am 10.12.2013 04:57, schrieb Zack Rusin:
> The fact that we flush denorms to zero breaks our half-float
> conversion and blending. This patches enables denorms for
> blending. It's a little tricky due to the llvm bug that makes
> it incorrectly reorder the mxcsr intrinsics:
> http://llvm.org/bugs/show_bug.cgi?id=6393
>
> Signed-off-by: Zack Rusin <zackr at vmware.com>
> ---
> src/gallium/auxiliary/gallivm/lp_bld_arit.c | 67 +++++++++++++++++++++++++++++
> src/gallium/auxiliary/gallivm/lp_bld_arit.h | 11 +++++
> src/gallium/drivers/llvmpipe/lp_state_fs.c | 31 ++++++++++---
> 3 files changed, 104 insertions(+), 5 deletions(-)
>
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
> index 70929e7..47e778c 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
> @@ -64,6 +64,13 @@
> #include "lp_bld_arit.h"
> #include "lp_bld_flow.h"
>
> +#if defined(PIPE_ARCH_SSE)
> +#include <xmmintrin.h>
> +/* This is defined in pmmintrin.h, but it can only be included when -msse3 is
> + * used, so just define it here to avoid further. */
> +#define _MM_DENORMALS_ZERO_MASK 0x0040
> +#endif
> +
>
> #define EXP_POLY_DEGREE 5
>
> @@ -3489,3 +3496,63 @@ lp_build_is_inf_or_nan(struct gallivm_state *gallivm,
> return ret;
> }
>
> +
> +LLVMValueRef
> +lp_build_fpstate_get(struct gallivm_state *gallivm)
> +{
> + if (util_cpu_caps.has_sse) {
> + LLVMBuilderRef builder = gallivm->builder;
> + LLVMValueRef mxcsr_ptr = lp_build_alloca(
> + gallivm,
> + LLVMInt32TypeInContext(gallivm->context),
> + "mxcsr_ptr");
> + lp_build_intrinsic(builder,
> + "llvm.x86.sse.stmxcsr",
> + LLVMVoidTypeInContext(gallivm->context),
> + &mxcsr_ptr, 1);
> + return mxcsr_ptr;
> + }
> + return 0;
> +}
> +
> +void
> +lp_build_fpstate_set_denorms_zero(struct gallivm_state *gallivm,
> + boolean zero)
> +{
> + if (util_cpu_caps.has_sse) {
> + /* turn on DAZ (64) | FTZ (32768) = 32832 if available */
> + int daz_ftz = _MM_FLUSH_ZERO_MASK;
> +
> + LLVMBuilderRef builder = gallivm->builder;
> + LLVMValueRef mxcsr_ptr = lp_build_fpstate_get(gallivm);
> + LLVMValueRef mxcsr =
> + LLVMBuildLoad(builder, mxcsr_ptr, "mxcsr");
> +
> + if (util_cpu_caps.has_daz) {
> + /* Enable denormals are zero mode */
> + daz_ftz |= _MM_DENORMALS_ZERO_MASK;
> + }
> + if (zero) {
> + mxcsr = LLVMBuildOr(builder, mxcsr,
> + LLVMConstInt(LLVMTypeOf(mxcsr), daz_ftz, 0), "");
> + } else {
> + mxcsr = LLVMBuildAnd(builder, mxcsr,
> + LLVMConstInt(LLVMTypeOf(mxcsr), ~daz_ftz, 0), "");
> + }
> +
> + LLVMBuildStore(builder, mxcsr, mxcsr_ptr);
> + lp_build_fpstate_set(gallivm, mxcsr_ptr);
> + }
> +}
> +
> +void
> +lp_build_fpstate_set(struct gallivm_state *gallivm,
> + LLVMValueRef mxcsr_ptr)
> +{
> + if (util_cpu_caps.has_sse) {
> + lp_build_intrinsic(gallivm->builder,
> + "llvm.x86.sse.ldmxcsr",
> + LLVMVoidTypeInContext(gallivm->context),
> + &mxcsr_ptr, 1);
> + }
> +}
> diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
> index 75bf89e..9d29093 100644
> --- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h
> +++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
> @@ -358,4 +358,15 @@ lp_build_is_inf_or_nan(struct gallivm_state *gallivm,
> const struct lp_type type,
> LLVMValueRef x);
>
> +
> +LLVMValueRef
> +lp_build_fpstate_get(struct gallivm_state *gallivm);
> +
> +void
> +lp_build_fpstate_set_denorms_zero(struct gallivm_state *gallivm,
> + boolean zero);
> +void
> +lp_build_fpstate_set(struct gallivm_state *gallivm,
> + LLVMValueRef mxcsr);
> +
> #endif /* !LP_BLD_ARIT_H */
> diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
> index b5816e0..d0fdc80 100644
> --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
> +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
> @@ -1490,6 +1490,28 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
>
> const boolean is_1d = variant->key.resource_1d;
> unsigned num_fullblock_fs = is_1d ? 2 * num_fs : num_fs;
> + LLVMValueRef fpstate = 0;
> +
> + /* Get type from output format */
> + lp_blend_type_from_format_desc(out_format_desc, &row_type);
> + lp_mem_type_from_format_desc(out_format_desc, &dst_type);
> +
> + /*
> + * Technically this code should go into lp_build_smallfloat_to_float
> + * and lp_build_float_to_smallfloat but due to the
> + * http://llvm.org/bugs/show_bug.cgi?id=6393
> + * llvm reorders the mxcsr intrinsics in a way that breaks the code.
> + * So the ordering is important here and there shouldn't be any
> + * llvm ir instrunctions in this function before
> + * this, otherwise half-float format conversions won't work
> + * (again due to llvm bug #6393).
> + */
> + if (dst_type.floating && dst_type.width == 16) {
This still forgets the packed float format (r11g11b10_float) (same in
the disabling code below of course).
Otherwise looks good to me.
Roland
> + /* We need to make sure that denorms are ok for half float
> + conversions */
> + fpstate = lp_build_fpstate_get(gallivm);
> + lp_build_fpstate_set_denorms_zero(gallivm, FALSE);
> + }
>
> mask_type = lp_int32_vec4_type();
> mask_type.length = fs_type.length;
> @@ -1523,11 +1545,6 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
> undef_src_val = lp_build_undef(gallivm, fs_type);
> #endif
>
> -
> - /* Get type from output format */
> - lp_blend_type_from_format_desc(out_format_desc, &row_type);
> - lp_mem_type_from_format_desc(out_format_desc, &dst_type);
> -
> row_type.length = fs_type.length;
> vector_width = dst_type.floating ? lp_native_vector_width : lp_integer_vector_width;
>
> @@ -1987,6 +2004,10 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
> dst, dst_type, dst_count, dst_alignment);
> }
>
> + if (dst_type.floating && dst_type.width == 16) {
> + lp_build_fpstate_set(gallivm, fpstate);
> + }
> +
> if (do_branch) {
> lp_build_mask_end(&mask_ctx);
> }
>
More information about the mesa-dev
mailing list