Mesa (master): llvmpipe: use alpha from already converted color if possible

Fri Jan 6 22:13:48 UTC 2017

Module: Mesa
Branch: master
Commit: 04480a04b10524e156cb01d5f7bd8ec02fcc4a67
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=04480a04b10524e156cb01d5f7bd8ec02fcc4a67

Author: Roland Scheidegger <sroland at vmware.com>
Date:   Thu Dec 22 03:49:22 2016 +0100

llvmpipe: use alpha from already converted color if possible

For rgbx formats, there is no point in doing alpha conversion again (and
with different tranpose even, so llvm can't eliminate it).
Albeit it looks like there's some minimal changes needed in the blend code
(found by code inspection, no test seemed to complain) if we do this -
the blend factors are already sanitized if we have no destination alpha,
however for src_alpha_saturate it looks like it still might make a
difference (note that we forced has_alpha to true before for some formats
and nothing complained, but this seems safer).

Reviewed-by: Jose Fonseca <jfonseca at vmware.com>

---

 src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c | 29 +++++++++++++----
 src/gallium/drivers/llvmpipe/lp_state_fs.c      | 43 ++++++++++++++++++-------
 2 files changed, 54 insertions(+), 18 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
index a57670d..45c5c2b 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_blend_aos.c
@@ -74,6 +74,7 @@ struct lp_build_blend_aos_context
    LLVMValueRef dst;
    LLVMValueRef const_;
    LLVMValueRef const_alpha;
+   boolean has_dst_alpha;
 
    LLVMValueRef inv_src;
    LLVMValueRef inv_src_alpha;
@@ -115,10 +116,10 @@ lp_build_blend_factor_unswizzled(struct lp_build_blend_aos_context *bld,
          return bld->base.one;
       else {
          /*
-          * if there's separate src_alpha there's no dst alpha hence the complement
-          * is zero but for unclamped float inputs min can be non-zero (negative).
+          * If there's no dst alpha the complement is zero but for unclamped
+          * float inputs min can be non-zero (negative).
           */
-         if (bld->src_alpha) {
+         if (!bld->has_dst_alpha) {
             if (!bld->saturate)
                bld->saturate = lp_build_min(&bld->base, src_alpha, bld->base.zero);
          }
@@ -264,7 +265,8 @@ lp_build_blend_factor(struct lp_build_blend_aos_context *bld,
    if (alpha_swizzle != PIPE_SWIZZLE_NONE) {
       rgb_swizzle   = lp_build_blend_factor_swizzle(rgb_factor);
       alpha_factor_ = lp_build_blend_factor_unswizzled(bld, alpha_factor, TRUE);
-      return lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle, alpha_swizzle, num_channels);
+      return lp_build_blend_swizzle(bld, rgb_factor_, alpha_factor_, rgb_swizzle,
+                                    alpha_swizzle, num_channels);
    } else {
       return rgb_factor_;
    }
@@ -327,6 +329,7 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
    bld.src_alpha = src_alpha;
    bld.src1_alpha = src1_alpha;
    bld.const_alpha = const_alpha;
+   bld.has_dst_alpha = FALSE;
 
    /* Find the alpha channel if not provided seperately */
    if (!src_alpha) {
@@ -335,6 +338,14 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
             alpha_swizzle = i;
          }
       }
+      /*
+       * Note that we may get src_alpha included from source (and 4 channels)
+       * even if the destination doesn't have an alpha channel (for rgbx
+       * formats). Generally this shouldn't make much of a difference (we're
+       * relying on blend factors being sanitized already if there's no
+       * dst alpha).
+       */
+      bld.has_dst_alpha = desc->swizzle[3] <= PIPE_SWIZZLE_W;
    }
 
    if (blend->logicop_enable) {
@@ -347,7 +358,9 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
    } else if (!state->blend_enable) {
       result = src;
    } else {
-      boolean rgb_alpha_same = (state->rgb_src_factor == state->rgb_dst_factor && state->alpha_src_factor == state->alpha_dst_factor) || nr_channels == 1;
+      boolean rgb_alpha_same = (state->rgb_src_factor == state->rgb_dst_factor &&
+                                state->alpha_src_factor == state->alpha_dst_factor) ||
+                               nr_channels == 1;
 
       src_factor = lp_build_blend_factor(&bld, state->rgb_src_factor,
                                          state->alpha_src_factor,
@@ -370,7 +383,8 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
                               rgb_alpha_same,
                               false);
 
-      if(state->rgb_func != state->alpha_func && nr_channels > 1 && alpha_swizzle != PIPE_SWIZZLE_NONE) {
+      if(state->rgb_func != state->alpha_func && nr_channels > 1 &&
+                            alpha_swizzle != PIPE_SWIZZLE_NONE) {
          LLVMValueRef alpha;
 
          alpha = lp_build_blend(&bld.base,
@@ -397,7 +411,8 @@ lp_build_blend_aos(struct gallivm_state *gallivm,
    if (!util_format_colormask_full(desc, state->colormask)) {
       LLVMValueRef color_mask;
 
-      color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type, state->colormask, nr_channels, swizzle);
+      color_mask = lp_build_const_mask_aos_swizzled(gallivm, bld.base.type,
+                                                    state->colormask, nr_channels, swizzle);
       lp_build_name(color_mask, "color_mask");
 
       /* Combine with input mask if necessary */
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index b6f4c2a..2c0339c 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -1460,7 +1460,8 @@ convert_from_blend_type(struct gallivm_state *gallivm,
          /* Extract bits */
          chans[j] = LLVMBuildLShr(builder,
                                   dst[i],
-                                  lp_build_const_int_vec(gallivm, src_type, from_lsb * blend_type.width),
+                                  lp_build_const_int_vec(gallivm, src_type,
+                                                         from_lsb * blend_type.width),
                                   "");
 
          chans[j] = LLVMBuildAnd(builder,
@@ -1548,7 +1549,8 @@ convert_alpha(struct gallivm_state *gallivm,
       /* If there is a src for each pixel broadcast the alpha across whole row */
       if (src_count == block_size) {
          for (i = 0; i < src_count; ++i) {
-            src_alpha[i] = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, row_type), src_alpha[i]);
+            src_alpha[i] = lp_build_broadcast(gallivm,
+                              lp_build_vec_type(gallivm, row_type), src_alpha[i]);
          }
       } else {
          unsigned pixels = block_size / src_count;
@@ -1749,13 +1751,23 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
    }
 
    /* If 3 channels then pad to include alpha for 4 element transpose */
-   if (dst_channels == 3 && !has_alpha) {
+   if (dst_channels == 3) {
+      assert (!has_alpha);
       for (i = 0; i < TGSI_NUM_CHANNELS; i++) {
          if (swizzle[i] > TGSI_NUM_CHANNELS)
             swizzle[i] = 3;
       }
       if (out_format_desc->nr_channels == 4) {
          dst_channels = 4;
+         /*
+          * We use alpha from the color conversion, not separate one.
+          * We had to include it for transpose, hence it will get converted
+          * too (albeit when doing transpose after conversion, that would
+          * no longer be the case necessarily).
+          * (It works only with 4 channel dsts, e.g. rgbx formats, because
+          * otherwise we really have padding, not alpha, included.)
+          */
+         has_alpha = true;
       }
    }
 
@@ -1787,6 +1799,7 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
       /*
        * XXX If we include that here maybe could actually use it instead of
        * separate alpha for blending?
+       * (Difficult though we actually convert pad channels, not alpha.)
        */
       if (dst_channels == 3 && !has_alpha) {
          fs_src[i][3] = alpha;
@@ -1794,11 +1807,14 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
 
       /* We split the row_mask and row_alpha as we want 128bit interleave */
       if (fs_type.length == 8) {
-         src_mask[i*2 + 0]  = lp_build_extract_range(gallivm, fs_mask[i], 0, src_channels);
-         src_mask[i*2 + 1]  = lp_build_extract_range(gallivm, fs_mask[i], src_channels, src_channels);
+         src_mask[i*2 + 0]  = lp_build_extract_range(gallivm, fs_mask[i],
+                                                     0, src_channels);
+         src_mask[i*2 + 1]  = lp_build_extract_range(gallivm, fs_mask[i],
+                                                     src_channels, src_channels);
 
          src_alpha[i*2 + 0] = lp_build_extract_range(gallivm, alpha, 0, src_channels);
-         src_alpha[i*2 + 1] = lp_build_extract_range(gallivm, alpha, src_channels, src_channels);
+         src_alpha[i*2 + 1] = lp_build_extract_range(gallivm, alpha,
+                                                     src_channels, src_channels);
       } else {
          src_mask[i] = fs_mask[i];
          src_alpha[i] = alpha;
@@ -1829,7 +1845,8 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
          }
          if (fs_type.length == 8) {
             src1_alpha[i*2 + 0] = lp_build_extract_range(gallivm, alpha, 0, src_channels);
-            src1_alpha[i*2 + 1] = lp_build_extract_range(gallivm, alpha, src_channels, src_channels);
+            src1_alpha[i*2 + 1] = lp_build_extract_range(gallivm, alpha,
+                                                         src_channels, src_channels);
          } else {
             src1_alpha[i] = alpha;
          }
@@ -1911,8 +1928,10 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
     * Blend Colour conversion
     */
    blend_color = lp_jit_context_f_blend_color(gallivm, context_ptr);
-   blend_color = LLVMBuildPointerCast(builder, blend_color, LLVMPointerType(lp_build_vec_type(gallivm, fs_type), 0), "");
-   blend_color = LLVMBuildLoad(builder, LLVMBuildGEP(builder, blend_color, &i32_zero, 1, ""), "");
+   blend_color = LLVMBuildPointerCast(builder, blend_color,
+                    LLVMPointerType(lp_build_vec_type(gallivm, fs_type), 0), "");
+   blend_color = LLVMBuildLoad(builder, LLVMBuildGEP(builder, blend_color,
+                               &i32_zero, 1, ""), "");
 
    /* Convert */
    lp_build_conv(gallivm, fs_type, blend_type, &blend_color, 1, &blend_color, 1);
@@ -2141,7 +2160,8 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
     * It seems some cleanup could be done here (like skipping conversion/blend
     * when not needed).
     */
-   convert_to_blend_type(gallivm, block_size, out_format_desc, dst_type, row_type, dst, src_count);
+   convert_to_blend_type(gallivm, block_size, out_format_desc, dst_type,
+                         row_type, dst, src_count);
 
    /*
     * FIXME: Really should get logic ops / masks out of generic blend / row
@@ -2167,7 +2187,8 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
                                   pad_inline ? 4 : dst_channels);
    }
 
-   convert_from_blend_type(gallivm, block_size, out_format_desc, row_type, dst_type, dst, src_count);
+   convert_from_blend_type(gallivm, block_size, out_format_desc,
+                           row_type, dst_type, dst, src_count);
 
    /* Split the blend rows back to memory rows */
    if (dst_count > src_count) {