Mesa (floating): llvmpipe: rework blending to work properly

Luca Barbieri lb at kemper.freedesktop.org
Fri Aug 27 17:25:22 UTC 2010


Module: Mesa
Branch: floating
Commit: 62c4db141ea06f095851bd41bbe60bc505930482
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=62c4db141ea06f095851bd41bbe60bc505930482

Author: Luca Barbieri <luca at luca-barbieri.com>
Date:   Fri Aug 27 00:58:13 2010 +0200

llvmpipe: rework blending to work properly

Blending has likely been broken from the move to float tiles, since
values are no longer automatically clamped.

Also, we need to rework it to properly support blending for both
fixed and floating point formats.

Note that this approach is probably suboptimal, and we should instead
always keep tiles with whatever data type the resource has, and blend
in the resource data type.

Blending in the resource data type gives the correct clamping semantics
for free, and should be faster.

---

 src/gallium/drivers/llvmpipe/lp_bld_blend.h     |   10 ++-
 src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c |    2 +-
 src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c |  120 +++++++++++++++++-----
 src/gallium/drivers/llvmpipe/lp_setup.c         |   14 ++-
 src/gallium/drivers/llvmpipe/lp_state_fs.c      |   79 +++++++++++++--
 src/gallium/drivers/llvmpipe/lp_state_fs.h      |    8 ++
 src/gallium/drivers/llvmpipe/lp_test_blend.c    |    2 +-
 7 files changed, 190 insertions(+), 45 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend.h b/src/gallium/drivers/llvmpipe/lp_bld_blend.h
index 5cecec3..fb8506a 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend.h
@@ -59,7 +59,7 @@ lp_build_blend_func(struct lp_build_context *bld,
                     LLVMValueRef term1,
                     LLVMValueRef term2);
 
-
+/* XXX: this needs fixing for correct blend clamping */
 LLVMValueRef
 lp_build_blend_aos(LLVMBuilderRef builder,
                    const struct pipe_blend_state *blend,
@@ -78,8 +78,12 @@ lp_build_blend_soa(LLVMBuilderRef builder,
                    unsigned rt,
                    LLVMValueRef src[4],
                    LLVMValueRef dst[4],
-                   LLVMValueRef const_[4],
-                   LLVMValueRef res[4]);
+                   LLVMValueRef ucon[4],
+                   LLVMValueRef ccon[4],
+                   LLVMValueRef res[4],
+                   struct lp_type physical_type,
+                   boolean clamp_blend_source_factors_and_results,
+                   boolean clamp_blend_dest);
 
 
 /**
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
index 09e9833..e08bada 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
@@ -300,7 +300,7 @@ lp_build_blend_func(struct lp_build_context *bld,
    }
 }
 
-
+/* XXX: this needs fixing for correct blend clamping */
 LLVMValueRef
 lp_build_blend_aos(LLVMBuilderRef builder,
                    const struct pipe_blend_state *blend,
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c
index 2f1bc77..8d0567e 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_soa.c
@@ -73,6 +73,7 @@
 
 #include "gallivm/lp_bld_type.h"
 #include "gallivm/lp_bld_arit.h"
+#include "gallivm/lp_bld_conv.h"
 #include "lp_bld_blend.h"
 
 
@@ -87,11 +88,13 @@ struct lp_build_blend_soa_context
 
    LLVMValueRef src[4];
    LLVMValueRef dst[4];
-   LLVMValueRef con[4];
+   LLVMValueRef ccon[4];
+   LLVMValueRef ucon[4];
 
    LLVMValueRef inv_src[4];
    LLVMValueRef inv_dst[4];
-   LLVMValueRef inv_con[4];
+   LLVMValueRef inv_ccon[4];
+   LLVMValueRef inv_ucon[4];
 
    LLVMValueRef src_alpha_saturate;
 
@@ -116,7 +119,7 @@ struct lp_build_blend_soa_context
  */
 static LLVMValueRef
 lp_build_blend_soa_factor(struct lp_build_blend_soa_context *bld,
-                          unsigned factor, unsigned i)
+                          unsigned factor, unsigned i, boolean clamp)
 {
    /*
     * Compute src/first term RGB
@@ -143,9 +146,9 @@ lp_build_blend_soa_factor(struct lp_build_blend_soa_context *bld,
          return bld->src_alpha_saturate;
       }
    case PIPE_BLENDFACTOR_CONST_COLOR:
-      return bld->con[i];
+      return clamp ? bld->ccon[i] : bld->ucon[i];
    case PIPE_BLENDFACTOR_CONST_ALPHA:
-      return bld->con[3];
+      return clamp ? bld->ccon[3] : bld->ucon[3];
    case PIPE_BLENDFACTOR_SRC1_COLOR:
       /* TODO */
       assert(0);
@@ -173,13 +176,31 @@ lp_build_blend_soa_factor(struct lp_build_blend_soa_context *bld,
          bld->inv_dst[3] = lp_build_comp(&bld->base, bld->dst[3]);
       return bld->inv_dst[3];
    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
-      if(!bld->inv_con[i])
-         bld->inv_con[i] = lp_build_comp(&bld->base, bld->con[i]);
-      return bld->inv_con[i];
+      if(clamp)
+      {
+         if(!bld->inv_ccon[i])
+            bld->inv_ccon[i] = lp_build_comp(&bld->base, bld->ccon[i]);
+         return bld->inv_ccon[i];
+      }
+      else
+      {
+         if(!bld->inv_ucon[i])
+            bld->inv_ucon[i] = lp_build_comp(&bld->base, bld->ucon[i]);
+         return bld->inv_ucon[i];
+      }
    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
-      if(!bld->inv_con[3])
-         bld->inv_con[3] = lp_build_comp(&bld->base, bld->con[3]);
-      return bld->inv_con[3];
+      if(clamp)
+      {
+         if(!bld->inv_ccon[3])
+            bld->inv_ccon[3] = lp_build_comp(&bld->base, bld->ccon[3]);
+         return bld->inv_ccon[3];
+      }
+      else
+      {
+         if(!bld->inv_ucon[3])
+            bld->inv_ucon[3] = lp_build_comp(&bld->base, bld->ucon[3]);
+         return bld->inv_ucon[3];
+      }
    case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
       /* TODO */
       assert(0);
@@ -200,7 +221,8 @@ lp_build_blend_soa_factor(struct lp_build_blend_soa_context *bld,
  * \param rt  render target index (to index the blend / colormask state)
  * \param src  src/fragment color
  * \param dst  dst/framebuffer color
- * \param con  constant blend color
+ * \param ucon  unclamped constant blend color
+ * \param ccon  clamped constant blend color
  * \param res  the result/output
  */
 void
@@ -210,8 +232,12 @@ lp_build_blend_soa(LLVMBuilderRef builder,
                    unsigned rt,
                    LLVMValueRef src[4],
                    LLVMValueRef dst[4],
-                   LLVMValueRef con[4],
-                   LLVMValueRef res[4])
+                   LLVMValueRef ucon[4],
+                   LLVMValueRef ccon[4],
+                   LLVMValueRef res[4],
+                   struct lp_type physical_type,
+                   boolean clamp_blend_source_factors_and_results,
+                   boolean clamp_blend_dest)
 {
    struct lp_build_blend_soa_context bld;
    unsigned i, j, k;
@@ -224,20 +250,45 @@ lp_build_blend_soa(LLVMBuilderRef builder,
    for (i = 0; i < 4; ++i) {
       bld.src[i] = src[i];
       bld.dst[i] = dst[i];
-      bld.con[i] = con[i];
+      bld.ucon[i] = ucon[i];
+      bld.ccon[i] = ccon[i];
    }
 
-   for (i = 0; i < 4; ++i) {
-      /* only compute blending for the color channels enabled for writing */
-      if (blend->rt[rt].colormask & (1 << i)) {
-         if (blend->logicop_enable) {
-            if(!type.floating) {
-               res[i] = lp_build_logicop(builder, blend->logicop_func, src[i], dst[i]);
+   if (blend->logicop_enable) {
+      for (i = 0; i < 4; ++i) {
+         if (blend->rt[rt].colormask & (1 << i)) {
+            if(!physical_type.floating)
+            {
+               LLVMValueRef srcb, dstb, resb;
+               lp_build_conv(builder, type, physical_type, &src[i], 1, &srcb, 1);
+               lp_build_conv(builder, type, physical_type, &dst[i], 1, &dstb, 1);
+               resb = lp_build_logicop(builder, blend->logicop_func, srcb, dstb);
+               lp_build_conv(builder, physical_type, type, &resb, 1, &res[i], 1);
             }
             else
-               res[i] = dst[i];
+               res[i] = src[i];
          }
-         else if (blend->rt[rt].blend_enable) {
+         else
+            res[i] = dst[i];
+      }
+   }
+   else if (blend->rt[rt].blend_enable) {
+      /* LLVM will hopefully eliminate unneeded values computed here */
+      if(clamp_blend_source_factors_and_results)
+      {
+         for (i = 0; i < 4; ++i)
+            src[i] = lp_build_clamp(&bld.base, src[i], bld.base.zero, bld.base.one);
+      }
+
+      if(clamp_blend_dest)
+      {
+         for (i = 0; i < 4; ++i)
+            dst[i] = lp_build_clamp(&bld.base, dst[i], bld.base.zero, bld.base.one);
+      }
+
+      for (i = 0; i < 4; ++i) {
+         /* only compute blending for the color channels enabled for writing */
+         if (blend->rt[rt].colormask & (1 << i)) {
             unsigned src_factor = i < 3 ? blend->rt[rt].rgb_src_factor : blend->rt[rt].alpha_src_factor;
             unsigned dst_factor = i < 3 ? blend->rt[rt].rgb_dst_factor : blend->rt[rt].alpha_dst_factor;
             unsigned func = i < 3 ? blend->rt[rt].rgb_func : blend->rt[rt].alpha_func;
@@ -248,9 +299,9 @@ lp_build_blend_soa(LLVMBuilderRef builder,
              */
 
             bld.factor[0][0][i] = src[i];
-            bld.factor[0][1][i] = lp_build_blend_soa_factor(&bld, src_factor, i);
+            bld.factor[0][1][i] = lp_build_blend_soa_factor(&bld, src_factor, i, clamp_blend_source_factors_and_results);
             bld.factor[1][0][i] = dst[i];
-            bld.factor[1][1][i] = lp_build_blend_soa_factor(&bld, dst_factor, i);
+            bld.factor[1][1][i] = lp_build_blend_soa_factor(&bld, dst_factor, i, clamp_blend_source_factors_and_results);
 
             /*
              * Compute src/dst terms
@@ -310,14 +361,27 @@ lp_build_blend_soa(LLVMBuilderRef builder,
             if(j < i)
                res[i] = res[j];
             else
+            {
+               boolean saved_norm = bld.base.type.norm;
+               /* this will cause add and sub blends to saturate properly */
+               if(clamp_blend_source_factors_and_results)
+                  bld.base.type.norm = TRUE;
                res[i] = lp_build_blend_func(&bld.base, func, bld.term[0][i], bld.term[1][i]);
+               bld.base.type.norm = saved_norm;
+            }
          }
          else {
-            res[i] = src[i];
+            res[i] = dst[i];
          }
       }
-      else {
-         res[i] = dst[i];
+   }
+   else
+   {
+      for (i = 0; i < 4; ++i) {
+         if (blend->rt[rt].colormask & (1 << i))
+            res[i] = src[i];
+         else
+            res[i] = dst[i];
       }
    }
 }
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index ad764db..b8a84ef 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -736,14 +736,20 @@ lp_setup_update_state( struct lp_setup_context *setup )
       float *stored;
       unsigned i, j;
 
-      stored = lp_scene_alloc_aligned(scene, 4 * 16 * sizeof(float), 16);
+      stored = lp_scene_alloc_aligned(scene, 2 * 4 * 4 * sizeof(float), 16);
 
       if (stored) {
-         /* smear each blend color component across 16 ubyte elements */
+         /* smear each blend color component across 4 float elements */
          for (i = 0; i < 4; ++i) {
             float c = setup->blend_color.current.color[i];
-            for (j = 0; j < 16; ++j)
-               stored[i*16 + j] = c;
+            for (j = 0; j < 4; ++j)
+               stored[i * 4+ j] = c;
+         }
+
+         for (i = 0; i < 4; ++i) {
+            float c = CLAMP(setup->blend_color.current.color[i], 0.0f, 1.0f);
+            for (j = 0; j < 4; ++j)
+               stored[(i + 4) * 4+ j] = c;
          }
 
          setup->blend_color.stored = stored;
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 4ae7485..a1cd030 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -389,14 +389,18 @@ generate_blend(const struct pipe_blend_state *blend,
                LLVMValueRef context_ptr,
                LLVMValueRef mask,
                LLVMValueRef *src,
-               LLVMValueRef dst_ptr)
+               LLVMValueRef dst_ptr,
+               struct lp_type physical_type,
+               boolean clamp_blend_source_factors_and_results,
+               boolean clamp_blend_dest)
 {
    struct lp_build_context bld;
    struct lp_build_flow_context *flow;
    struct lp_build_mask_context mask_ctx;
    LLVMTypeRef vec_type;
    LLVMValueRef const_ptr;
-   LLVMValueRef con[4];
+   LLVMValueRef ucon[4];
+   LLVMValueRef ccon[4];
    LLVMValueRef dst[4];
    LLVMValueRef res[4];
    unsigned chan;
@@ -416,17 +420,22 @@ generate_blend(const struct pipe_blend_state *blend,
 
    /* load constant blend color and colors from the dest color buffer */
    for(chan = 0; chan < 4; ++chan) {
-      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan * 4, 0);
-      con[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), "");
+      LLVMValueRef dindex = LLVMConstInt(LLVMInt32Type(), chan * 4, 0);
+      LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), chan, 0);
+      LLVMValueRef cindex = LLVMConstInt(LLVMInt32Type(), chan + 4, 0);
+      ucon[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &index, 1, ""), "");
+      ccon[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, const_ptr, &cindex, 1, ""), "");
 
-      dst[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &index, 1, ""), "");
+      dst[chan] = LLVMBuildLoad(builder, LLVMBuildGEP(builder, dst_ptr, &dindex, 1, ""), "");
 
-      lp_build_name(con[chan], "con.%c", "rgba"[chan]);
+      lp_build_name(ucon[chan], "ucon.%c", "rgba"[chan]);
+      lp_build_name(ccon[chan], "ccon.%c", "rgba"[chan]);
       lp_build_name(dst[chan], "dst.%c", "rgba"[chan]);
    }
 
    /* do blend */
-   lp_build_blend_soa(builder, blend, type, rt, src, dst, con, res);
+   lp_build_blend_soa(builder, blend, type, rt, src, dst, ucon, ccon, res,
+         physical_type, clamp_blend_source_factors_and_results, clamp_blend_dest);
 
    /* store results to color buffer */
    for(chan = 0; chan < 4; ++chan) {
@@ -671,6 +680,20 @@ generate_fragment(struct llvmpipe_context *lp,
          LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);
          LLVMValueRef quad_color_ptr = LLVMBuildGEP(builder, color_ptr, &index, 1, "");
 
+         /* TODO: this probably doesn't really work except for 8_UNORM and 16_UNORM */
+         struct lp_type physical_type;
+         unsigned logicop_width = 4 << ((key->logicop_width >> (i * 2)) & 3);
+         if(logicop_width >= 8)
+         {
+            physical_type = lp_type_unorm(logicop_width);
+            physical_type.length = blend_type.length;
+         }
+         else
+         {
+            physical_type = blend_type;
+            physical_type.floating = 1; /* hackish way to disable logicop */
+         }
+
          generate_blend(&key->blend,
                      rt,
 		     builder,
@@ -678,7 +701,11 @@ generate_fragment(struct llvmpipe_context *lp,
 		     context_ptr,
 		     blend_mask[i],
 		     blend_in_color[i],
-		     quad_color_ptr);
+		     quad_color_ptr,
+		     physical_type,
+		     !!(key->clamp_blend_source_factors_and_results & (1 << cbuf)),
+		     !!(key->clamp_blend_dest & (1 << cbuf))
+		     );
       }
    }
 
@@ -1064,6 +1091,7 @@ make_variant_key(struct llvmpipe_context *lp,
       struct pipe_rt_blend_state *blend_rt = &key->blend.rt[i];
       const struct util_format_description *format_desc;
       unsigned chan;
+      unsigned logicop_width = 0;
 
       format_desc = util_format_description(lp->framebuffer.cbufs[i]->format);
       assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
@@ -1071,6 +1099,41 @@ make_variant_key(struct llvmpipe_context *lp,
 
       blend_rt->colormask = lp->blend->rt[i].colormask;
 
+      if(key->blend.logicop_enable)
+         blend_rt->blend_enable = 0;
+
+      for(chan = 0; chan < 4; ++chan) {
+         /* blend is still disabled, according to the OpenGL specification */
+         if(format_desc->channel[chan].type == UTIL_FORMAT_TYPE_FLOAT)
+            logicop_width = 0;
+         else
+         {
+            unsigned new_logicop_width = 0;
+            if(format_desc->channel[chan].size == 8)
+               new_logicop_width = 1;
+            else if(format_desc->channel[chan].size == 16)
+               new_logicop_width = 2;
+            else if(format_desc->channel[chan].size == 32)
+               new_logicop_width = 3;
+
+            if(chan == 0)
+               logicop_width = new_logicop_width;
+            else if(logicop_width != new_logicop_width)
+               logicop_width = 0;
+
+            key->clamp_blend_source_factors_and_results |= (1 << i);
+
+            /* we can skip this for unsigned normalized, since they are
+             * already in the [0, 1] range
+             */
+            if(format_desc->channel[chan].type != UTIL_FORMAT_TYPE_UNSIGNED
+                  || !format_desc->channel[chan].normalized)
+               key->clamp_blend_dest |= (1 << i);
+         }
+      }
+
+      key->logicop_width |= (logicop_width << (i * 2));
+
       /* mask out color channels not present in the color buffer.
        * Should be simple to incorporate per-cbuf writemasks:
        */
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h b/src/gallium/drivers/llvmpipe/lp_state_fs.h
index 33c4800..036d0d0 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.h
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h
@@ -57,6 +57,14 @@ struct lp_fragment_shader_variant_key
    unsigned flatshade:1;
    unsigned occlusion_count:1;
 
+   unsigned clamp_blend_source_factors_and_results : PIPE_MAX_COLOR_BUFS;
+   unsigned clamp_blend_dest : PIPE_MAX_COLOR_BUFS;
+   /* each 2 bits represent the native type to use for logic for each render target
+    * 0 = disabled, 1 = uint8_t, 2 = uint16_t, 3 = uint32_t
+    * this will need to be revisited when we really want to render to UINT render targets
+    */
+   unsigned logicop_width : (PIPE_MAX_COLOR_BUFS * 2);
+
    struct lp_sampler_static_state sampler[PIPE_MAX_SAMPLERS];
 };
 
diff --git a/src/gallium/drivers/llvmpipe/lp_test_blend.c b/src/gallium/drivers/llvmpipe/lp_test_blend.c
index d0389f0..8414bf8 100644
--- a/src/gallium/drivers/llvmpipe/lp_test_blend.c
+++ b/src/gallium/drivers/llvmpipe/lp_test_blend.c
@@ -227,7 +227,7 @@ add_blend_test(LLVMModuleRef module,
          lp_build_name(dst[i], "dst.%c", "rgba"[i]);
       }
 
-      lp_build_blend_soa(builder, blend, type, rt, src, dst, con, res);
+      lp_build_blend_soa(builder, blend, type, rt, src, dst, con, con, res, type, TRUE, TRUE);
 
       for(i = 0; i < 4; ++i) {
          LLVMValueRef index = LLVMConstInt(LLVMInt32Type(), i, 0);




More information about the mesa-commit mailing list