Mesa (master): llvmpipe: handle z32s8x24 depth/stencil format

Roland Scheidegger sroland at kemper.freedesktop.org
Fri May 17 22:46:05 UTC 2013


Module: Mesa
Branch: master
Commit: 070a9afb5476b58a2824fac5c94bbe4f78a2d8b9
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=070a9afb5476b58a2824fac5c94bbe4f78a2d8b9

Author: Roland Scheidegger <sroland at vmware.com>
Date:   Sat May 18 00:16:03 2013 +0200

llvmpipe: handle z32s8x24 depth/stencil format

We need to split up the depth and stencil values in this case, and there's
some new logic required to handle float depth and stencil simultaneously.
Also make sure we get the 64bit zs clear values and masks propagated
correctly.

---

 src/gallium/auxiliary/gallivm/lp_bld_pack.c       |    3 +-
 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c |    5 +
 src/gallium/drivers/llvmpipe/lp_bld_depth.c       |  275 +++++++++++++--------
 src/gallium/drivers/llvmpipe/lp_bld_depth.h       |   15 +-
 src/gallium/drivers/llvmpipe/lp_rast.c            |   28 ++-
 src/gallium/drivers/llvmpipe/lp_rast.h            |    6 +-
 src/gallium/drivers/llvmpipe/lp_setup.c           |   18 +-
 src/gallium/drivers/llvmpipe/lp_setup_context.h   |    4 +-
 src/gallium/drivers/llvmpipe/lp_state_fs.c        |   53 ++--
 9 files changed, 259 insertions(+), 148 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
index 9eb9ab0..0a57e39 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
@@ -255,7 +255,8 @@ lp_build_concat_n(struct gallivm_state *gallivm,
 /**
  * Interleave vector elements.
  *
- * Matches the PUNPCKLxx and PUNPCKHxx SSE instructions.
+ * Matches the PUNPCKLxx and PUNPCKHxx SSE instructions
+ * (but not for 256bit AVX vectors).
  */
 LLVMValueRef
 lp_build_interleave2(struct gallivm_state *gallivm,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index cc29c5c..7ac0029 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -1545,6 +1545,11 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
          bld.texel_type = lp_type_uint_vec(type.width, type.width * type.length);
       }
    }
+   else if (util_format_has_stencil(bld.format_desc) &&
+       !util_format_has_depth(bld.format_desc)) {
+      /* for stencil only formats, sample stencil (uint) */
+      bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
+   }
 
    if (!static_texture_state->level_zero_only) {
       derived_sampler_state.min_mip_filter = static_sampler_state->min_mip_filter;
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index 1cd36b8..2376ca7 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -313,7 +313,7 @@ lp_depth_type(const struct util_format_description *format_desc,
       if (format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT) {
          type.floating = TRUE;
          assert(z_swizzle == 0);
-         assert(format_desc->channel[z_swizzle].size == format_desc->block.bits);
+         assert(format_desc->channel[z_swizzle].size == 32);
       }
       else if(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) {
          assert(format_desc->block.bits <= 32);
@@ -347,15 +347,18 @@ static boolean
 get_z_shift_and_mask(const struct util_format_description *format_desc,
                      unsigned *shift, unsigned *width, unsigned *mask)
 {
-   const unsigned total_bits = format_desc->block.bits;
+   unsigned total_bits;
    unsigned z_swizzle;
    unsigned chan;
    unsigned padding_left, padding_right;
-   
+
    assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
    assert(format_desc->block.width == 1);
    assert(format_desc->block.height == 1);
 
+   /* 64bit d/s format is special already extracted 32 bits */
+   total_bits = format_desc->block.bits > 32 ? 32 : format_desc->block.bits;
+
    z_swizzle = format_desc->swizzle[0];
 
    if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
@@ -402,6 +405,14 @@ get_s_shift_and_mask(const struct util_format_description *format_desc,
    if (s_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
       return FALSE;
 
+   /* just special case 64bit d/s format */
+   if (format_desc->block.bits > 32) {
+      assert(format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
+      *shift = 0;
+      *mask = 0xff;
+      return TRUE;
+   }
+
    *shift = 0;
    for (chan = 0; chan < s_swizzle; chan++)
       *shift += format_desc->channel[chan].size;
@@ -517,24 +528,31 @@ lp_build_occlusion_count(struct gallivm_state *gallivm,
  * \param loop_counter  the current loop iteration
  * \param depth_ptr  pointer to the depth/stencil values of this 4x4 block
  * \param depth_stride  stride of the depth/stencil buffer
+ * \param z_fb  contains z values loaded from fb (may include padding)
+ * \param s_fb  contains s values loaded from fb (may include padding)
  */
-LLVMValueRef
+void
 lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
                                      struct lp_type z_src_type,
                                      const struct util_format_description *format_desc,
                                      LLVMValueRef depth_ptr,
                                      LLVMValueRef depth_stride,
+                                     LLVMValueRef *z_fb,
+                                     LLVMValueRef *s_fb,
                                      LLVMValueRef loop_counter)
 {
    LLVMBuilderRef builder = gallivm->builder;
    LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
-   LLVMValueRef zs_dst, zs_dst1, zs_dst2;
+   LLVMValueRef zs_dst1, zs_dst2;
    LLVMValueRef zs_dst_ptr;
    LLVMValueRef depth_offset1, depth_offset2;
-   unsigned depth_bits = format_desc->block.bits/8;
+   LLVMTypeRef load_ptr_type;
+   unsigned depth_bytes = format_desc->block.bits / 8;
    struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length);
    struct lp_type zs_load_type = zs_type;
+
    zs_load_type.length = zs_load_type.length / 2;
+   load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0);
 
    if (z_src_type.length == 4) {
       unsigned i;
@@ -545,7 +563,7 @@ lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
       LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb,
                                           depth_stride, "");
       depth_offset1 = LLVMBuildMul(builder, looplsb,
-                                   lp_build_const_int32(gallivm, depth_bits * 2), "");
+                                   lp_build_const_int32(gallivm, depth_bytes * 2), "");
       depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, "");
 
       /* just concatenate the loaded 2x2 values into 4-wide vector */
@@ -564,7 +582,6 @@ lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
        * 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately.
        */
       for (i = 0; i < 8; i++) {
-
          shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2);
       }
    }
@@ -573,63 +590,103 @@ lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
 
    /* Load current z/stencil values from z/stencil buffer */
    zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, "");
-   zs_dst_ptr = LLVMBuildBitCast(builder,
-                                 zs_dst_ptr,
-                                 LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), "");
+   zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, "");
    zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr, "");
    zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, "");
-   zs_dst_ptr = LLVMBuildBitCast(builder,
-                                 zs_dst_ptr,
-                                 LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), "");
+   zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, "");
    zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr, "");
 
-   zs_dst = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2,
-                                   LLVMConstVector(shuffles, zs_type.length), "");
+   *z_fb = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2,
+                                  LLVMConstVector(shuffles, zs_type.length), "");
+   *s_fb = *z_fb;
 
    if (format_desc->block.bits < z_src_type.width) {
       /* Extend destination ZS values (e.g., when reading from Z16_UNORM) */
-      zs_dst = LLVMBuildZExt(builder, zs_dst, lp_build_int_vec_type(gallivm, z_src_type), "");
+      *z_fb = LLVMBuildZExt(builder, *z_fb,
+                            lp_build_int_vec_type(gallivm, z_src_type), "");
    }
 
-   lp_build_name(zs_dst, "zs_dst");
+   else if (format_desc->block.bits > 32) {
+      /* rely on llvm to handle too wide vector we have here nicely */
+      unsigned i;
+      struct lp_type typex2 = zs_type;
+      struct lp_type s_type = zs_type;
+      LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH / 4];
+      LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH / 4];
+      LLVMValueRef tmp;
+
+      typex2.width = typex2.width / 2;
+      typex2.length = typex2.length * 2;
+      s_type.width = s_type.width / 2;
+      s_type.floating = 0;
+
+      tmp = LLVMBuildBitCast(builder, *z_fb,
+                             lp_build_vec_type(gallivm, typex2), "");
+
+      for (i = 0; i < zs_type.length; i++) {
+         shuffles1[i] = lp_build_const_int32(gallivm, i * 2);
+         shuffles2[i] = lp_build_const_int32(gallivm, i * 2 + 1);
+      }
+      *z_fb = LLVMBuildShuffleVector(builder, tmp, tmp,
+                                     LLVMConstVector(shuffles1, zs_type.length), "");
+      *s_fb = LLVMBuildShuffleVector(builder, tmp, tmp,
+                                     LLVMConstVector(shuffles2, zs_type.length), "");
+      *s_fb = LLVMBuildBitCast(builder, *s_fb,
+                               lp_build_vec_type(gallivm, s_type), "");
+      lp_build_name(*s_fb, "s_dst");
+   }
 
-   return zs_dst;
+   lp_build_name(*z_fb, "z_dst");
+   lp_build_name(*s_fb, "s_dst");
+   lp_build_name(*z_fb, "z_dst");
 }
 
 /**
  * Store depth/stencil values.
  * Incoming values are swizzled (typically n 2x2 quads), stored linear.
- * If there's a mask it will do reload/select/store otherwise just store.
+ * If there's a mask it will do select/store otherwise just store.
  *
  * \param type  the data type of the fragment depth/stencil values
  * \param format_desc  description of the depth/stencil surface
  * \param mask  the alive/dead pixel mask for the quad (vector)
+ * \param z_fb  z values read from fb (with padding)
+ * \param s_fb  s values read from fb (with padding)
  * \param loop_counter  the current loop iteration
  * \param depth_ptr  pointer to the depth/stencil values of this 4x4 block
  * \param depth_stride  stride of the depth/stencil buffer
- * \param zs_value  the depth/stencil values to store
+ * \param z_value the depth values to store (with padding)
+ * \param s_value the stencil values to store (with padding)
  */
 void
 lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
                                       struct lp_type z_src_type,
                                       const struct util_format_description *format_desc,
                                       struct lp_build_mask_context *mask,
+                                      LLVMValueRef z_fb,
+                                      LLVMValueRef s_fb,
                                       LLVMValueRef loop_counter,
                                       LLVMValueRef depth_ptr,
                                       LLVMValueRef depth_stride,
-                                      LLVMValueRef zs_value)
+                                      LLVMValueRef z_value,
+                                      LLVMValueRef s_value)
 {
    struct lp_build_context z_bld;
    LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
    LLVMBuilderRef builder = gallivm->builder;
    LLVMValueRef mask_value = NULL;
-   LLVMValueRef zs_dst = NULL, zs_dst1, zs_dst2;
+   LLVMValueRef zs_dst1, zs_dst2;
    LLVMValueRef zs_dst_ptr1, zs_dst_ptr2;
    LLVMValueRef depth_offset1, depth_offset2;
-   unsigned depth_bits = format_desc->block.bits/8;
+   LLVMTypeRef load_ptr_type;
+   unsigned depth_bytes = format_desc->block.bits / 8;
    struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length);
    struct lp_type zs_load_type = zs_type;
+
    zs_load_type.length = zs_load_type.length / 2;
+   load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0);
+
+   if (zs_type.width > 32)
+      zs_type.width = 32;
 
    lp_build_context_init(&z_bld, gallivm, zs_type);
 
@@ -638,7 +695,6 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
     * outside the fs loop to avoid all the swizzle stuff.
     */
    if (z_src_type.length == 4) {
-      unsigned i;
       LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter,
                                           lp_build_const_int32(gallivm, 1), "");
       LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter,
@@ -646,13 +702,8 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
       LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb,
                                           depth_stride, "");
       depth_offset1 = LLVMBuildMul(builder, looplsb,
-                                   lp_build_const_int32(gallivm, depth_bits * 2), "");
+                                   lp_build_const_int32(gallivm, depth_bytes * 2), "");
       depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, "");
-
-      /* just concatenate the loaded 2x2 values into 4-wide vector */
-      for (i = 0; i < 4; i++) {
-         shuffles[i] = lp_build_const_int32(gallivm, i);
-      }
    }
    else {
       unsigned i;
@@ -669,55 +720,75 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
       }
    }
 
-
    depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, "");
 
    zs_dst_ptr1 = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, "");
-   zs_dst_ptr1 = LLVMBuildBitCast(builder,
-                                 zs_dst_ptr1,
-                                 LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), "");
+   zs_dst_ptr1 = LLVMBuildBitCast(builder, zs_dst_ptr1, load_ptr_type, "");
    zs_dst_ptr2 = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, "");
-   zs_dst_ptr2 = LLVMBuildBitCast(builder,
-                                  zs_dst_ptr2,
-                                  LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), "");
+   zs_dst_ptr2 = LLVMBuildBitCast(builder, zs_dst_ptr2, load_ptr_type, "");
 
-   if (mask) {
-      zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr1, "");
-      zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr2, "");
-      zs_dst = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2,
-                                      LLVMConstVector(shuffles, zs_type.length),
-                                      "zsbufval");
+   if (format_desc->block.bits > 32) {
+      s_value = LLVMBuildBitCast(builder, s_value, z_bld.vec_type, "");
+   }
 
+   if (mask) {
       mask_value = lp_build_mask_value(mask);
+      z_value = lp_build_select(&z_bld, mask_value, z_value, z_fb);
+      if (format_desc->block.bits > 32) {
+         s_fb = LLVMBuildBitCast(builder, s_fb, z_bld.vec_type, "");
+         s_value = lp_build_select(&z_bld, mask_value, s_value, s_fb);
+      }
    }
 
    if (zs_type.width < z_src_type.width) {
-      /* Truncate incoming ZS and mask values (e.g., when writing to Z16_UNORM) */
-      zs_value = LLVMBuildTrunc(builder, zs_value, z_bld.vec_type, "");
-      if (mask)
-         mask_value = LLVMBuildTrunc(builder, mask_value, z_bld.vec_type, "");
+      /* Truncate ZS values (e.g., when writing to Z16_UNORM) */
+      z_value = LLVMBuildTrunc(builder, z_value, z_bld.vec_type, "");
    }
 
-   if (mask) {
-      zs_value = lp_build_select(&z_bld, mask_value, zs_value, zs_dst);
-   }
-
-   if (z_src_type.length == 4) {
-      zs_dst1 = lp_build_extract_range(gallivm, zs_value, 0, 2);
-      zs_dst2 = lp_build_extract_range(gallivm, zs_value, 2, 2);
+   if (format_desc->block.bits <= 32) {
+      if (z_src_type.length == 4) {
+         zs_dst1 = lp_build_extract_range(gallivm, z_value, 0, 2);
+         zs_dst2 = lp_build_extract_range(gallivm, z_value, 2, 2);
+      }
+      else {
+         assert(z_src_type.length == 8);
+         zs_dst1 = LLVMBuildShuffleVector(builder, z_value, z_value,
+                                          LLVMConstVector(&shuffles[0],
+                                                          zs_load_type.length), "");
+         zs_dst2 = LLVMBuildShuffleVector(builder, z_value, z_value,
+                                          LLVMConstVector(&shuffles[4],
+                                                          zs_load_type.length), "");
+      }
    }
    else {
-      assert(z_src_type.length == 8);
-      zs_dst1 = LLVMBuildShuffleVector(builder, zs_value, zs_value,
-                                       LLVMConstVector(&shuffles[0],
-                                                       zs_load_type.length),
-                                       "");
-      zs_dst2 = LLVMBuildShuffleVector(builder, zs_value, zs_value,
-                                       LLVMConstVector(&shuffles[4],
-                                                       zs_load_type.length),
-                                       "");
-
+      if (z_src_type.length == 4) {
+         zs_dst1 = lp_build_interleave2(gallivm, zs_type,
+                                        z_value, s_value, 0);
+         zs_dst2 = lp_build_interleave2(gallivm, zs_type,
+                                        z_value, s_value, 1);
+      }
+      else {
+         unsigned i;
+         LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 2];
+         assert(z_src_type.length == 8);
+         for (i = 0; i < 8; i++) {
+            shuffles[i*2] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2);
+            shuffles[i*2+1] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2 +
+                                                   z_src_type.length);
+         }
+         zs_dst1 = LLVMBuildShuffleVector(builder, z_value, s_value,
+                                          LLVMConstVector(&shuffles[0],
+                                                          z_src_type.length), "");
+         zs_dst2 = LLVMBuildShuffleVector(builder, z_value, s_value,
+                                          LLVMConstVector(&shuffles[8],
+                                                          z_src_type.length), "");
+      }
+      zs_dst1 = LLVMBuildBitCast(builder, zs_dst1,
+                                 lp_build_vec_type(gallivm, zs_load_type), "");
+      zs_dst2 = LLVMBuildBitCast(builder, zs_dst2,
+                                 lp_build_vec_type(gallivm, zs_load_type), "");
    }
+
    LLVMBuildStore(builder, zs_dst1, zs_dst_ptr1);
    LLVMBuildStore(builder, zs_dst2, zs_dst_ptr2);
 }
@@ -745,13 +816,14 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
                             struct lp_build_mask_context *mask,
                             LLVMValueRef stencil_refs[2],
                             LLVMValueRef z_src,
-                            LLVMValueRef zs_dst,
+                            LLVMValueRef z_fb,
+                            LLVMValueRef s_fb,
                             LLVMValueRef face,
-                            LLVMValueRef *zs_value,
+                            LLVMValueRef *z_value,
+                            LLVMValueRef *s_value,
                             boolean do_branch)
 {
    LLVMBuilderRef builder = gallivm->builder;
-   struct lp_type zs_type;
    struct lp_type z_type;
    struct lp_build_context z_bld;
    struct lp_build_context s_bld;
@@ -763,7 +835,7 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
    LLVMValueRef z_pass = NULL, s_pass_mask = NULL;
    LLVMValueRef orig_mask = lp_build_mask_value(mask);
    LLVMValueRef front_facing = NULL;
-
+   boolean have_z, have_s;
 
    /*
     * Depths are expected to be between 0 and 1, even if they are stored in
@@ -780,12 +852,9 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
    }
 
    /* Pick the type matching the depth-stencil format. */
-   zs_type = lp_depth_type(format_desc, z_src_type.length);
+   z_type = lp_depth_type(format_desc, z_src_type.length);
 
    /* Pick the intermediate type for depth operations. */
-   z_type = zs_type;
-   /* FIXME: Cope with a depth test type with higher bit width. */
-   assert(zs_type.width <= z_src_type.width);
    z_type.width = z_src_type.width;
    assert(z_type.length == z_src_type.length);
 
@@ -819,13 +888,11 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
 
       if (depth->enabled) {
          assert(z_swizzle < 4);
-         assert(format_desc->block.bits <= z_type.width);
          if (z_type.floating) {
             assert(z_swizzle == 0);
             assert(format_desc->channel[z_swizzle].type ==
                    UTIL_FORMAT_TYPE_FLOAT);
-            assert(format_desc->channel[z_swizzle].size ==
-                   format_desc->block.bits);
+            assert(format_desc->channel[z_swizzle].size == 32);
          }
          else {
             assert(format_desc->channel[z_swizzle].type ==
@@ -849,7 +916,13 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
    {
       unsigned s_shift, s_mask;
 
-      if (get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask)) {
+      z_dst = z_fb;
+      stencil_vals = s_fb;
+
+      have_z = get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask);
+      have_s = get_s_shift_and_mask(format_desc, &s_shift, &s_mask);
+
+      if (have_z) {
          if (z_mask != 0xffffffff) {
             z_bitmask = lp_build_const_int_vec(gallivm, z_type, z_mask);
          }
@@ -859,26 +932,20 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
           */
          if (z_shift) {
             LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift);
-            z_dst = LLVMBuildLShr(builder, zs_dst, shift, "z_dst");
+            z_dst = LLVMBuildLShr(builder, z_dst, shift, "z_dst");
          } else if (z_bitmask) {
-	    /* TODO: Instead of loading a mask from memory and ANDing, it's
-	     * probably faster to just shake the bits with two shifts. */
-            z_dst = LLVMBuildAnd(builder, zs_dst, z_bitmask, "z_dst");
+            z_dst = LLVMBuildAnd(builder, z_dst, z_bitmask, "z_dst");
          } else {
-            z_dst = zs_dst;
             lp_build_name(z_dst, "z_dst");
          }
       }
 
-      if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) {
+      if (have_s) {
          if (s_shift) {
             LLVMValueRef shift = lp_build_const_int_vec(gallivm, s_type, s_shift);
-            stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, "");
+            stencil_vals = LLVMBuildLShr(builder, stencil_vals, shift, "");
             stencil_shift = shift;  /* used below */
          }
-         else {
-            stencil_vals = zs_dst;
-         }
 
          if (s_mask != 0xffffffff) {
             LLVMValueRef mask = lp_build_const_int_vec(gallivm, s_type, s_mask);
@@ -997,7 +1064,7 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
          LLVMValueRef z_fail_mask, z_pass_mask;
 
          /* apply Z-fail operator */
-         z_fail_mask = lp_build_andnot(&z_bld, orig_mask, z_pass);
+         z_fail_mask = lp_build_andnot(&s_bld, orig_mask, z_pass);
          stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP,
                                             stencil_refs, stencil_vals,
                                             z_fail_mask, front_facing);
@@ -1019,8 +1086,8 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
                                          s_pass_mask, front_facing);
    }
 
-   /* Put Z and ztencil bits in the right place */
-   if (z_dst && z_shift) {
+   /* Put Z and stencil bits in the right place */
+   if (have_z && z_shift) {
       LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift);
       z_dst = LLVMBuildShl(builder, z_dst, shift, "");
    }
@@ -1028,18 +1095,24 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
       stencil_vals = LLVMBuildShl(builder, stencil_vals,
                                   stencil_shift, "");
 
-   /* Finally, merge/store the z/stencil values */
+   /* Finally, merge the z/stencil values */
    if ((depth->enabled && depth->writemask) ||
-       (stencil[0].enabled && stencil[0].writemask)) {
-
-      if (z_dst && stencil_vals)
-         zs_dst = LLVMBuildOr(builder, z_dst, stencil_vals, "");
-      else if (z_dst)
-         zs_dst = z_dst;
-      else
-         zs_dst = stencil_vals;
-
-      *zs_value = zs_dst;
+       (stencil[0].enabled && (stencil[0].writemask ||
+                               (stencil[1].enabled && stencil[1].writemask)))) {
+
+      if (format_desc->block.bits <= 32) {
+         if (have_z && have_s)
+            *z_value = LLVMBuildOr(builder, z_dst, stencil_vals, "");
+         else if (have_z)
+            *z_value = z_dst;
+         else
+            *z_value = stencil_vals;
+         *s_value = *z_value;
+      }
+      else {
+         *z_value = z_dst;
+         *s_value = stencil_vals;
+      }
    }
 
    if (s_pass_mask)
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
index c000494..2534dc3 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
@@ -63,17 +63,21 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
                             struct lp_build_mask_context *mask,
                             LLVMValueRef stencil_refs[2],
                             LLVMValueRef z_src,
-                            LLVMValueRef zs_dst,
+                            LLVMValueRef z_fb,
+                            LLVMValueRef s_fb,
                             LLVMValueRef face,
-                            LLVMValueRef *zs_value,
+                            LLVMValueRef *z_value,
+                            LLVMValueRef *s_value,
                             boolean do_branch);
 
-LLVMValueRef
+void
 lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
                                      struct lp_type z_src_type,
                                      const struct util_format_description *format_desc,
                                      LLVMValueRef depth_ptr,
                                      LLVMValueRef depth_stride,
+                                     LLVMValueRef *z_fb,
+                                     LLVMValueRef *s_fb,
                                      LLVMValueRef loop_counter);
 
 void
@@ -81,10 +85,13 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
                                       struct lp_type z_src_type,
                                       const struct util_format_description *format_desc,
                                       struct lp_build_mask_context *mask,
+                                      LLVMValueRef z_fb,
+                                      LLVMValueRef s_fb,
                                       LLVMValueRef loop_counter,
                                       LLVMValueRef depth_ptr,
                                       LLVMValueRef depth_stride,
-                                      LLVMValueRef zs_value);
+                                      LLVMValueRef z_value,
+                                      LLVMValueRef s_value);
 
 
 void
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index a557db4..8a4b00f 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -193,8 +193,10 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
                        const union lp_rast_cmd_arg arg)
 {
    const struct lp_scene *scene = task->scene;
-   uint32_t clear_value = arg.clear_zstencil.value;
-   uint32_t clear_mask = arg.clear_zstencil.mask;
+   uint64_t clear_value64 = arg.clear_zstencil.value;
+   uint64_t clear_mask64 = arg.clear_zstencil.mask;
+   uint32_t clear_value = (uint32_t) clear_value64;
+   uint32_t clear_mask = (uint32_t) clear_mask64;
    const unsigned height = TILE_SIZE;
    const unsigned width = TILE_SIZE;
    const unsigned block_size = scene->zsbuf.blocksize;
@@ -260,6 +262,28 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
             }
          }
          break;
+      case 8:
+         clear_value64 &= clear_mask64;
+         if (clear_mask64 == 0xffffffffffULL) {
+            for (i = 0; i < height; i++) {
+               uint64_t *row = (uint64_t *)dst;
+               for (j = 0; j < width; j++)
+                  *row++ = clear_value64;
+               dst += dst_stride;
+            }
+         }
+         else {
+            for (i = 0; i < height; i++) {
+               uint64_t *row = (uint64_t *)dst;
+               for (j = 0; j < width; j++) {
+                  uint64_t tmp = ~clear_mask64 & *row;
+                  *row++ = clear_value64 | tmp;
+               }
+               dst += dst_stride;
+            }
+         }
+         break;
+
       default:
          assert(0);
          break;
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h
index 2f5fa22..8dd3615 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast.h
@@ -154,8 +154,8 @@ union lp_rast_cmd_arg {
    const struct lp_rast_state *set_state;
    union pipe_color_union clear_color;
    struct {
-      uint32_t value;
-      uint32_t mask;
+      uint64_t value;
+      uint64_t mask;
    } clear_zstencil;
    const struct lp_rast_state *state;
    struct lp_fence *fence;
@@ -217,7 +217,7 @@ lp_rast_arg_fence( struct lp_fence *fence )
 
 
 static INLINE union lp_rast_cmd_arg
-lp_rast_arg_clearzs( unsigned value, unsigned mask )
+lp_rast_arg_clearzs( uint64_t value, uint64_t mask )
 {
    union lp_rast_cmd_arg arg;
    arg.clear_zstencil.value = value;
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index f9cbdab..9fef34e 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -390,8 +390,8 @@ lp_setup_try_clear( struct lp_setup_context *setup,
                     unsigned stencil,
                     unsigned flags )
 {
-   uint32_t zsmask = 0;
-   uint32_t zsvalue = 0;
+   uint64_t zsmask = 0;
+   uint64_t zsvalue = 0;
    union lp_rast_cmd_arg color_arg;
    unsigned i;
 
@@ -404,16 +404,16 @@ lp_setup_try_clear( struct lp_setup_context *setup,
 
    if (flags & PIPE_CLEAR_DEPTHSTENCIL) {
       uint32_t zmask = (flags & PIPE_CLEAR_DEPTH) ? ~0 : 0;
-      uint32_t smask = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0;
+      uint8_t smask = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0;
 
-      zsvalue = util_pack_z_stencil(setup->fb.zsbuf->format,
-                                    depth,
-                                    stencil);
+      zsvalue = util_pack64_z_stencil(setup->fb.zsbuf->format,
+                                      depth,
+                                      stencil);
 
 
-      zsmask = util_pack_mask_z_stencil(setup->fb.zsbuf->format,
-                                        zmask,
-                                        smask);
+      zsmask = util_pack64_mask_z_stencil(setup->fb.zsbuf->format,
+                                          zmask,
+                                          smask);
 
       zsvalue &= zsmask;
    }
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h
index efc2ab7..6b35a02 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h
@@ -112,8 +112,8 @@ struct lp_setup_context
    struct {
       unsigned flags;
       union lp_rast_cmd_arg color;    /**< lp_rast_clear_color() cmd */
-      unsigned zsmask;
-      unsigned zsvalue;               /**< lp_rast_clear_zstencil() cmd */
+      uint64_t zsmask;
+      uint64_t zsvalue;               /**< lp_rast_clear_zstencil() cmd */
    } clear;
 
    enum setup_state {
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 6921210..1dfc75a 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -239,7 +239,8 @@ generate_fs_loop(struct gallivm_state *gallivm,
    LLVMValueRef mask_ptr, mask_val;
    LLVMValueRef consts_ptr;
    LLVMValueRef z;
-   LLVMValueRef zs_value = NULL;
+   LLVMValueRef z_value, s_value;
+   LLVMValueRef z_fb, s_fb;
    LLVMValueRef stencil_refs[2];
    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
    struct lp_build_for_loop_state loop_state;
@@ -259,8 +260,7 @@ generate_fs_loop(struct gallivm_state *gallivm,
    memset(&system_values, 0, sizeof(system_values));
 
    if (key->depth.enabled ||
-       key->stencil[0].enabled ||
-       key->stencil[1].enabled) {
+       key->stencil[0].enabled) {
 
       zs_format_desc = util_format_description(key->zsbuf_format);
       assert(zs_format_desc);
@@ -281,7 +281,9 @@ generate_fs_loop(struct gallivm_state *gallivm,
       }
 
       if (!(key->depth.enabled && key->depth.writemask) &&
-          !(key->stencil[0].enabled && key->stencil[0].writemask))
+          !((key->stencil[0].enabled && (key->stencil[0].writemask ||
+                                        (key->stencil[1].enabled &&
+                                         key->stencil[1].writemask)))))
          depth_mode &= ~(LATE_DEPTH_WRITE | EARLY_DEPTH_WRITE);
    }
    else {
@@ -337,11 +339,10 @@ generate_fs_loop(struct gallivm_state *gallivm,
    z = interp->pos[2];
 
    if (depth_mode & EARLY_DEPTH_TEST) {
-      LLVMValueRef zs_dst_val;
-      zs_dst_val = lp_build_depth_stencil_load_swizzled(gallivm, type,
-                                                        zs_format_desc,
-                                                        depth_ptr, depth_stride,
-                                                        loop_state.counter);
+      lp_build_depth_stencil_load_swizzled(gallivm, type,
+                                           zs_format_desc,
+                                           depth_ptr, depth_stride,
+                                           &z_fb, &s_fb, loop_state.counter);
       lp_build_depth_stencil_test(gallivm,
                                   &key->depth,
                                   key->stencil,
@@ -349,16 +350,16 @@ generate_fs_loop(struct gallivm_state *gallivm,
                                   zs_format_desc,
                                   &mask,
                                   stencil_refs,
-                                  z,
-                                  zs_dst_val,
+                                  z, z_fb, s_fb,
                                   facing,
-                                  &zs_value,
+                                  &z_value, &s_value,
                                   !simple_shader);
 
       if (depth_mode & EARLY_DEPTH_WRITE) {
          lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc,
-                                               NULL, loop_state.counter,
-                                               depth_ptr, depth_stride, zs_value);
+                                               NULL, NULL, NULL, loop_state.counter,
+                                               depth_ptr, depth_stride,
+                                               z_value, s_value);
       }
    }
 
@@ -394,7 +395,6 @@ generate_fs_loop(struct gallivm_state *gallivm,
 
    /* Late Z test */
    if (depth_mode & LATE_DEPTH_TEST) {
-      LLVMValueRef zs_dst_val;
       int pos0 = find_output_by_semantic(&shader->info.base,
                                          TGSI_SEMANTIC_POSITION,
                                          0);
@@ -403,10 +403,10 @@ generate_fs_loop(struct gallivm_state *gallivm,
          z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z");
       }
 
-      zs_dst_val = lp_build_depth_stencil_load_swizzled(gallivm, type,
-                                                        zs_format_desc,
-                                                        depth_ptr, depth_stride,
-                                                        loop_state.counter);
+      lp_build_depth_stencil_load_swizzled(gallivm, type,
+                                           zs_format_desc,
+                                           depth_ptr, depth_stride,
+                                           &z_fb, &s_fb, loop_state.counter);
 
       lp_build_depth_stencil_test(gallivm,
                                   &key->depth,
@@ -415,16 +415,16 @@ generate_fs_loop(struct gallivm_state *gallivm,
                                   zs_format_desc,
                                   &mask,
                                   stencil_refs,
-                                  z,
-                                  zs_dst_val,
+                                  z, z_fb, s_fb,
                                   facing,
-                                  &zs_value,
+                                  &z_value, &s_value,
                                   !simple_shader);
       /* Late Z write */
       if (depth_mode & LATE_DEPTH_WRITE) {
          lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc,
-                                               NULL, loop_state.counter,
-                                               depth_ptr, depth_stride, zs_value);
+                                               NULL, NULL, NULL, loop_state.counter,
+                                               depth_ptr, depth_stride,
+                                               z_value, s_value);
       }
    }
    else if ((depth_mode & EARLY_DEPTH_TEST) &&
@@ -435,8 +435,9 @@ generate_fs_loop(struct gallivm_state *gallivm,
        * write that out.
        */
       lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc,
-                                            &mask, loop_state.counter,
-                                            depth_ptr, depth_stride, zs_value);
+                                            &mask, z_fb, s_fb, loop_state.counter,
+                                            depth_ptr, depth_stride,
+                                            z_value, s_value);
    }
 
 




More information about the mesa-commit mailing list