[Mesa-dev] [PATCH] llvmpipe: fix snorm blending
sroland at vmware.com
sroland at vmware.com
Fri Nov 17 07:00:42 UTC 2017
From: Roland Scheidegger <sroland at vmware.com>
The blend math gets a bit funky due to inverse blend factors being
in range [0,2] rather than [-1,1], our normalized math can't really
cover this.
src_alpha_saturate blend factor has a similar problem too.
(Note that piglit fbo-blending-formats test is mostly useless for
anything but unorm formats, since not just all src/dst values are
between [0,1], but the tests are crafted in a way that the results
are between [0,1] too.)
---
src/gallium/auxiliary/gallivm/lp_bld_arit.c | 10 +-
src/gallium/auxiliary/gallivm/lp_bld_arit.h | 7 ++
src/gallium/drivers/llvmpipe/lp_bld_blend.c | 120 +++++++++++++++++++++++-
src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c | 28 ++++--
4 files changed, 149 insertions(+), 16 deletions(-)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.c b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
index a1edd34..628dedd 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.c
@@ -548,10 +548,10 @@ lp_build_add(struct lp_build_context *bld,
if(a == bld->undef || b == bld->undef)
return bld->undef;
- if(bld->type.norm) {
+ if(type.norm) {
const char *intrinsic = NULL;
- if(a == bld->one || b == bld->one)
+ if(!type.sign && (a == bld->one || b == bld->one))
return bld->one;
if (!type.floating && !type.fixed) {
@@ -849,10 +849,10 @@ lp_build_sub(struct lp_build_context *bld,
if(a == b)
return bld->zero;
- if(bld->type.norm) {
+ if(type.norm) {
const char *intrinsic = NULL;
- if(b == bld->one)
+ if(!type.sign && b == bld->one)
return bld->zero;
if (!type.floating && !type.fixed) {
@@ -963,7 +963,7 @@ lp_build_sub(struct lp_build_context *bld,
* @sa Michael Herf, The "double blend trick", May 2000,
* http://www.stereopsis.com/doubleblend.html
*/
-static LLVMValueRef
+LLVMValueRef
lp_build_mul_norm(struct gallivm_state *gallivm,
struct lp_type wide_type,
LLVMValueRef a, LLVMValueRef b)
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_arit.h b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
index 2a4137a..f5b2800 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_arit.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_arit.h
@@ -71,6 +71,13 @@ lp_build_sub(struct lp_build_context *bld,
LLVMValueRef a,
LLVMValueRef b);
+
+LLVMValueRef
+lp_build_mul_norm(struct gallivm_state *gallivm,
+ struct lp_type wide_type,
+ LLVMValueRef a,
+ LLVMValueRef b);
+
LLVMValueRef
lp_build_mul(struct lp_build_context *bld,
LLVMValueRef a,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.c b/src/gallium/drivers/llvmpipe/lp_bld_blend.c
index 1feb415..bd886dc 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.c
@@ -35,6 +35,7 @@
#include "gallivm/lp_bld_swizzle.h"
#include "gallivm/lp_bld_flow.h"
#include "gallivm/lp_bld_debug.h"
+#include "gallivm/lp_bld_pack.h"
#include "lp_bld_blend.h"
@@ -86,6 +87,56 @@ lp_build_blend_factor_complementary(unsigned src_factor, unsigned dst_factor)
/**
+ * Whether this is a inverse blend factor
+ */
+static inline boolean
+is_inverse_factor(unsigned factor)
+{
+ return factor > 0x11;
+}
+
+
+/**
+ * Calculates the (expanded to wider type) multiplication
+ * of 2 normalized numbers.
+ */
+static void
+lp_build_mul_norm_expand(struct lp_build_context *bld,
+ LLVMValueRef a, LLVMValueRef b,
+ LLVMValueRef *resl, LLVMValueRef *resh,
+ boolean signedness_differs)
+{
+ const struct lp_type type = bld->type;
+ struct lp_type wide_type = lp_wider_type(type);
+ struct lp_type wide_type2 = wide_type;
+ struct lp_type type2 = type;
+ LLVMValueRef al, ah, bl, bh;
+
+ assert(lp_check_value(type, a));
+ assert(lp_check_value(type, b));
+ assert(!type.floating && !type.fixed && type.norm);
+
+ if(a == bld->zero || b == bld->zero) {
+ LLVMValueRef zero = LLVMConstNull(lp_build_vec_type(bld->gallivm, wide_type));
+ *resl = zero;
+ *resh = zero;
+ return;
+ }
+
+ if (signedness_differs) {
+ type2.sign = !type.sign;
+ wide_type2.sign = !wide_type2.sign;
+ }
+
+ lp_build_unpack2_native(bld->gallivm, type, wide_type, a, &al, &ah);
+ lp_build_unpack2_native(bld->gallivm, type2, wide_type2, b, &bl, &bh);
+
+ *resl = lp_build_mul_norm(bld->gallivm, wide_type, al, bl);
+ *resh = lp_build_mul_norm(bld->gallivm, wide_type, ah, bh);
+}
+
+
+/**
* @sa http://www.opengl.org/sdk/docs/man/xhtml/glBlendEquationSeparate.xml
*/
LLVMValueRef
@@ -192,9 +243,72 @@ lp_build_blend(struct lp_build_context *bld,
if (optimise_only)
return NULL;
- src_term = lp_build_mul(bld, src, src_factor);
- dst_term = lp_build_mul(bld, dst, dst_factor);
- return lp_build_blend_func(bld, func, src_term, dst_term);
+ if ((bld->type.norm && bld->type.sign) &&
+ (is_inverse_factor(factor_src) || is_inverse_factor(factor_dst))) {
+ /*
+ * With snorm blending, the inverse blend factors range from [0,2]
+ * instead of [-1,1], so the ordinary signed normalized arithmetic
+ * doesn't quite work. Unpack must be unsigned, and the add/sub
+ * must be done with wider type.
+ * (Note that it's not quite obvious what the blend equation wrt to
+ * clamping should actually be based on GL spec in this case, but
+ * really the incoming src values are clamped to [-1,1] (the dst is
+ * always clamped already), and then NO further clamping occurs until
+ * the end.)
+ */
+ struct lp_build_context bldw;
+ struct lp_type wide_type = lp_wider_type(bld->type);
+ LLVMValueRef src_terml, src_termh, dst_terml, dst_termh;
+ LLVMValueRef resl, resh;
+
+ /*
+ * We don't need saturate math for the sub/add, since we have
+ * x+1 bit numbers in x*2 wide type (result is x+2 bits).
+ * (Doesn't really matter on x86 sse2 though as we use saturated
+ * intrinsics.)
+ */
+ wide_type.norm = 0;
+ lp_build_context_init(&bldw, bld->gallivm, wide_type);
+
+ /*
+ * XXX This is a bit hackish. Note that -128 really should
+ * be -1.0, the same as -127. However, we did not actually clamp
+ * things anywhere (relying on pack intrinsics instead) therefore
+ * we will get -128, and the inverted factor then 255. But the mul
+ * can overflow in this case (rather the rounding fixups for the mul,
+ * -128*255 will be positive).
+ * So we clamp the src and dst up here but only when necessary (we
+ * should do this before calculating blend factors but it's enough
+ * for avoiding overflow).
+ */
+ if (is_inverse_factor(factor_src)) {
+ src = lp_build_max(bld, src,
+ lp_build_const_vec(bld->gallivm, bld->type, -1.0));
+ }
+ if (is_inverse_factor(factor_dst)) {
+ dst = lp_build_max(bld, dst,
+ lp_build_const_vec(bld->gallivm, bld->type, -1.0));
+ }
+
+ lp_build_mul_norm_expand(bld, src, src_factor, &src_terml, &src_termh,
+ is_inverse_factor(factor_src) ? TRUE : FALSE);
+ lp_build_mul_norm_expand(bld, dst, dst_factor, &dst_terml, &dst_termh,
+ is_inverse_factor(factor_dst) ? TRUE : FALSE);
+ resl = lp_build_blend_func(&bldw, func, src_terml, dst_terml);
+ resh = lp_build_blend_func(&bldw, func, src_termh, dst_termh);
+
+ /*
+ * XXX pack2_native is not ok because the values have to be in dst
+ * range. We need native pack though for the correct order on avx2.
+ * Will break on everything not implementing clamping pack intrinsics
+ * (i.e. everything but sse2 and altivec).
+ */
+ return lp_build_pack2_native(bld->gallivm, wide_type, bld->type, resl, resh);
+ } else {
+ src_term = lp_build_mul(bld, src, src_factor);
+ dst_term = lp_build_mul(bld, dst, dst_factor);
+ return lp_build_blend_func(bld, func, src_term, dst_term);
+ }
}
void
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
index 45c5c2b..cc46458 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
@@ -117,17 +117,29 @@ lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld,
else {
/*
* If there's no dst alpha the complement is zero but for unclamped
- * float inputs min can be non-zero (negative).
+ * float inputs (or snorm inputs) min can be non-zero (negative).
*/
- if (!bld->has_dst_alpha) {
- if (!bld->saturate)
+ if (!bld->saturate) {
+ if (!bld->has_dst_alpha) {
bld->saturate = lp_build_min(&bld->base, src_alpha, bld->base.zero);
- }
- else {
- if(!bld->inv_dst)
- bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
- if(!bld->saturate)
+ }
+ else if (bld->base.type.norm && bld->base.type.sign) {
+ /*
+ * The complement/min totally doesn't work, since
+ * the complement is in range [0,2] but the other
+ * min input is [-1,1]. However, we can just clamp to 0
+ * before doing the complement...
+ */
+ LLVMValueRef inv_dst;
+ inv_dst = lp_build_max(&bld->base, bld->base.zero, bld->dst);
+ inv_dst = lp_build_comp(&bld->base, inv_dst);
+ bld->saturate = lp_build_min(&bld->base, src_alpha, inv_dst);
+ } else {
+ if(!bld->inv_dst) {
+ bld->inv_dst = lp_build_comp(&bld->base, bld->dst);
+ }
bld->saturate = lp_build_min(&bld->base, src_alpha, bld->inv_dst);
+ }
}
return bld->saturate;
}
--
2.7.4
More information about the mesa-dev
mailing list