[Mesa-dev] [PATCH 2/3] llvmpipe: handle z32s8x24 depth/stencil format

sroland at vmware.com sroland at vmware.com
Thu May 16 09:34:45 PDT 2013


From: Roland Scheidegger <sroland at vmware.com>

We need to split up the depth and stencil values in this case, and there's
some new logic required to handle float depth and stencil simultaneously.
Also make sure we get the 64bit zs clear values and masks propagated
correctly.
---
 src/gallium/auxiliary/gallivm/lp_bld_pack.c       |    3 +-
 src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c |    5 +
 src/gallium/drivers/llvmpipe/lp_bld_depth.c       |  269 +++++++++++++--------
 src/gallium/drivers/llvmpipe/lp_bld_depth.h       |   10 +-
 src/gallium/drivers/llvmpipe/lp_rast.c            |   28 ++-
 src/gallium/drivers/llvmpipe/lp_rast.h            |    6 +-
 src/gallium/drivers/llvmpipe/lp_setup.c           |   18 +-
 src/gallium/drivers/llvmpipe/lp_setup_context.h   |    4 +-
 src/gallium/drivers/llvmpipe/lp_state_fs.c        |   42 ++--
 9 files changed, 242 insertions(+), 143 deletions(-)

diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
index 9eb9ab0..0a57e39 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c
@@ -255,7 +255,8 @@ lp_build_concat_n(struct gallivm_state *gallivm,
 /**
  * Interleave vector elements.
  *
- * Matches the PUNPCKLxx and PUNPCKHxx SSE instructions.
+ * Matches the PUNPCKLxx and PUNPCKHxx SSE instructions
+ * (but not for 256bit AVX vectors).
  */
 LLVMValueRef
 lp_build_interleave2(struct gallivm_state *gallivm,
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
index beefdae..7be64bf 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample_soa.c
@@ -1540,6 +1540,11 @@ lp_build_sample_soa(struct gallivm_state *gallivm,
          bld.texel_type = lp_type_uint_vec(type.width, type.width * type.length);
       }
    }
+   else if (util_format_has_stencil(bld.format_desc) &&
+       !util_format_has_depth(bld.format_desc)) {
+      /* for stencil only formats, sample stencil (uint) */
+      bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
+   }
 
    if (!static_texture_state->level_zero_only) {
       derived_sampler_state.min_mip_filter = static_sampler_state->min_mip_filter;
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index 1cd36b8..f03bfa8 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -313,7 +313,7 @@ lp_depth_type(const struct util_format_description *format_desc,
       if (format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT) {
          type.floating = TRUE;
          assert(z_swizzle == 0);
-         assert(format_desc->channel[z_swizzle].size == format_desc->block.bits);
+         assert(format_desc->channel[z_swizzle].size == 32);
       }
       else if(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) {
          assert(format_desc->block.bits <= 32);
@@ -347,15 +347,18 @@ static boolean
 get_z_shift_and_mask(const struct util_format_description *format_desc,
                      unsigned *shift, unsigned *width, unsigned *mask)
 {
-   const unsigned total_bits = format_desc->block.bits;
+   unsigned total_bits;
    unsigned z_swizzle;
    unsigned chan;
    unsigned padding_left, padding_right;
-   
+
    assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS);
    assert(format_desc->block.width == 1);
    assert(format_desc->block.height == 1);
 
+   /* 64bit d/s format is special already extracted 32 bits */
+   total_bits = format_desc->block.bits > 32 ? 32 : format_desc->block.bits;
+
    z_swizzle = format_desc->swizzle[0];
 
    if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
@@ -402,6 +405,14 @@ get_s_shift_and_mask(const struct util_format_description *format_desc,
    if (s_swizzle == UTIL_FORMAT_SWIZZLE_NONE)
       return FALSE;
 
+   /* just special case 64bit d/s format */
+   if (format_desc->block.bits > 32) {
+      assert(format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT);
+      *shift = 0;
+      *mask = 0xff;
+      return TRUE;
+   }
+
    *shift = 0;
    for (chan = 0; chan < s_swizzle; chan++)
       *shift += format_desc->channel[chan].size;
@@ -517,24 +528,29 @@ lp_build_occlusion_count(struct gallivm_state *gallivm,
  * \param loop_counter  the current loop iteration
  * \param depth_ptr  pointer to the depth/stencil values of this 4x4 block
  * \param depth_stride  stride of the depth/stencil buffer
+ * \param zs_dst  pointer to results (two values for 64bit ds format)
  */
-LLVMValueRef
+void
 lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
                                      struct lp_type z_src_type,
                                      const struct util_format_description *format_desc,
                                      LLVMValueRef depth_ptr,
                                      LLVMValueRef depth_stride,
+                                     LLVMValueRef zs_dst[2],
                                      LLVMValueRef loop_counter)
 {
    LLVMBuilderRef builder = gallivm->builder;
    LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
-   LLVMValueRef zs_dst, zs_dst1, zs_dst2;
+   LLVMValueRef zs_dst1, zs_dst2;
    LLVMValueRef zs_dst_ptr;
    LLVMValueRef depth_offset1, depth_offset2;
-   unsigned depth_bits = format_desc->block.bits/8;
+   LLVMTypeRef load_ptr_type;
+   unsigned depth_bytes = format_desc->block.bits / 8;
    struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length);
    struct lp_type zs_load_type = zs_type;
+
    zs_load_type.length = zs_load_type.length / 2;
+   load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0);
 
    if (z_src_type.length == 4) {
       unsigned i;
@@ -545,7 +561,7 @@ lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
       LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb,
                                           depth_stride, "");
       depth_offset1 = LLVMBuildMul(builder, looplsb,
-                                   lp_build_const_int32(gallivm, depth_bits * 2), "");
+                                   lp_build_const_int32(gallivm, depth_bytes * 2), "");
       depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, "");
 
       /* just concatenate the loaded 2x2 values into 4-wide vector */
@@ -564,7 +580,6 @@ lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
        * 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately.
        */
       for (i = 0; i < 8; i++) {
-
          shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2);
       }
    }
@@ -573,63 +588,96 @@ lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
 
    /* Load current z/stencil values from z/stencil buffer */
    zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, "");
-   zs_dst_ptr = LLVMBuildBitCast(builder,
-                                 zs_dst_ptr,
-                                 LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), "");
+   zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, "");
    zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr, "");
    zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, "");
-   zs_dst_ptr = LLVMBuildBitCast(builder,
-                                 zs_dst_ptr,
-                                 LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), "");
+   zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, "");
    zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr, "");
 
-   zs_dst = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2,
-                                   LLVMConstVector(shuffles, zs_type.length), "");
+   zs_dst[0] = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2,
+                                      LLVMConstVector(shuffles, zs_type.length), "");
 
    if (format_desc->block.bits < z_src_type.width) {
       /* Extend destination ZS values (e.g., when reading from Z16_UNORM) */
-      zs_dst = LLVMBuildZExt(builder, zs_dst, lp_build_int_vec_type(gallivm, z_src_type), "");
+      zs_dst[0] = LLVMBuildZExt(builder, zs_dst[0],
+                                lp_build_int_vec_type(gallivm, z_src_type), "");
    }
 
-   lp_build_name(zs_dst, "zs_dst");
+   else if (format_desc->block.bits > 32) {
+      /* rely on llvm to handle too wide vector we have here nicely */
+      unsigned i;
+      struct lp_type typex2 = zs_type;
+      struct lp_type s_type = zs_type;
+      LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH / 4];
+      LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH / 4];
+      LLVMValueRef tmp;
+
+      typex2.width = typex2.width / 2;
+      typex2.length = typex2.length * 2;
+      s_type.width = s_type.width / 2;
+      s_type.floating = 0;
+
+      tmp = LLVMBuildBitCast(builder, zs_dst[0],
+                             lp_build_vec_type(gallivm, typex2), "");
+
+      for (i = 0; i < zs_type.length; i++) {
+         shuffles1[i] = lp_build_const_int32(gallivm, i * 2);
+         shuffles2[i] = lp_build_const_int32(gallivm, i * 2 + 1);
+      }
+      zs_dst[0] = LLVMBuildShuffleVector(builder, tmp, tmp,
+                                         LLVMConstVector(shuffles1, zs_type.length), "");
+      zs_dst[1] = LLVMBuildShuffleVector(builder, tmp, tmp,
+                                         LLVMConstVector(shuffles2, zs_type.length), "");
+      zs_dst[1] = LLVMBuildBitCast(builder, zs_dst[1],
+                                   lp_build_vec_type(gallivm, s_type), "");
+      lp_build_name(zs_dst[1], "zs_dst1");
+   }
 
-   return zs_dst;
+   lp_build_name(zs_dst[0], "zs_dst");
 }
 
 /**
  * Store depth/stencil values.
  * Incoming values are swizzled (typically n 2x2 quads), stored linear.
- * If there's a mask it will do reload/select/store otherwise just store.
+ * If there's a mask it will do select/store otherwise just store.
  *
  * \param type  the data type of the fragment depth/stencil values
  * \param format_desc  description of the depth/stencil surface
  * \param mask  the alive/dead pixel mask for the quad (vector)
+ * \param zs_dst zs values read from fb (two values for 64bit ds format)
  * \param loop_counter  the current loop iteration
  * \param depth_ptr  pointer to the depth/stencil values of this 4x4 block
  * \param depth_stride  stride of the depth/stencil buffer
- * \param zs_value  the depth/stencil values to store
+ * \param zs_value  the depth/stencil values to store (two values for 64bit ds format)
  */
 void
 lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
                                       struct lp_type z_src_type,
                                       const struct util_format_description *format_desc,
                                       struct lp_build_mask_context *mask,
+                                      LLVMValueRef zs_dst[2],
                                       LLVMValueRef loop_counter,
                                       LLVMValueRef depth_ptr,
                                       LLVMValueRef depth_stride,
-                                      LLVMValueRef zs_value)
+                                      LLVMValueRef zs_value[2])
 {
    struct lp_build_context z_bld;
    LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
    LLVMBuilderRef builder = gallivm->builder;
    LLVMValueRef mask_value = NULL;
-   LLVMValueRef zs_dst = NULL, zs_dst1, zs_dst2;
+   LLVMValueRef zs_dst1, zs_dst2;
    LLVMValueRef zs_dst_ptr1, zs_dst_ptr2;
    LLVMValueRef depth_offset1, depth_offset2;
-   unsigned depth_bits = format_desc->block.bits/8;
+   LLVMTypeRef load_ptr_type;
+   unsigned depth_bytes = format_desc->block.bits / 8;
    struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length);
    struct lp_type zs_load_type = zs_type;
+
    zs_load_type.length = zs_load_type.length / 2;
+   load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0);
+
+   if (zs_type.width > 32)
+      zs_type.width = 32;
 
    lp_build_context_init(&z_bld, gallivm, zs_type);
 
@@ -638,7 +686,6 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
     * outside the fs loop to avoid all the swizzle stuff.
     */
    if (z_src_type.length == 4) {
-      unsigned i;
       LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter,
                                           lp_build_const_int32(gallivm, 1), "");
       LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter,
@@ -646,13 +693,8 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
       LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb,
                                           depth_stride, "");
       depth_offset1 = LLVMBuildMul(builder, looplsb,
-                                   lp_build_const_int32(gallivm, depth_bits * 2), "");
+                                   lp_build_const_int32(gallivm, depth_bytes * 2), "");
       depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, "");
-
-      /* just concatenate the loaded 2x2 values into 4-wide vector */
-      for (i = 0; i < 4; i++) {
-         shuffles[i] = lp_build_const_int32(gallivm, i);
-      }
    }
    else {
       unsigned i;
@@ -669,55 +711,75 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
       }
    }
 
-
    depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, "");
 
    zs_dst_ptr1 = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, "");
-   zs_dst_ptr1 = LLVMBuildBitCast(builder,
-                                 zs_dst_ptr1,
-                                 LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), "");
+   zs_dst_ptr1 = LLVMBuildBitCast(builder, zs_dst_ptr1, load_ptr_type, "");
    zs_dst_ptr2 = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, "");
-   zs_dst_ptr2 = LLVMBuildBitCast(builder,
-                                  zs_dst_ptr2,
-                                  LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), "");
+   zs_dst_ptr2 = LLVMBuildBitCast(builder, zs_dst_ptr2, load_ptr_type, "");
 
-   if (mask) {
-      zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr1, "");
-      zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr2, "");
-      zs_dst = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2,
-                                      LLVMConstVector(shuffles, zs_type.length),
-                                      "zsbufval");
+   if (format_desc->block.bits > 32) {
+      zs_value[1] = LLVMBuildBitCast(builder, zs_value[1], z_bld.vec_type, "");
+   }
 
+   if (mask) {
       mask_value = lp_build_mask_value(mask);
+      zs_value[0] = lp_build_select(&z_bld, mask_value, zs_value[0], zs_dst[0]);
+      if (format_desc->block.bits > 32) {
+         zs_dst[1] = LLVMBuildBitCast(builder, zs_dst[1], z_bld.vec_type, "");
+         zs_value[1] = lp_build_select(&z_bld, mask_value, zs_value[1], zs_dst[1]);
+      }
    }
 
    if (zs_type.width < z_src_type.width) {
-      /* Truncate incoming ZS and mask values (e.g., when writing to Z16_UNORM) */
-      zs_value = LLVMBuildTrunc(builder, zs_value, z_bld.vec_type, "");
-      if (mask)
-         mask_value = LLVMBuildTrunc(builder, mask_value, z_bld.vec_type, "");
+      /* Truncate ZS values (e.g., when writing to Z16_UNORM) */
+      zs_value[0] = LLVMBuildTrunc(builder, zs_value[0], z_bld.vec_type, "");
    }
 
-   if (mask) {
-      zs_value = lp_build_select(&z_bld, mask_value, zs_value, zs_dst);
-   }
-
-   if (z_src_type.length == 4) {
-      zs_dst1 = lp_build_extract_range(gallivm, zs_value, 0, 2);
-      zs_dst2 = lp_build_extract_range(gallivm, zs_value, 2, 2);
+   if (format_desc->block.bits <= 32) {
+      if (z_src_type.length == 4) {
+         zs_dst1 = lp_build_extract_range(gallivm, zs_value[0], 0, 2);
+         zs_dst2 = lp_build_extract_range(gallivm, zs_value[0], 2, 2);
+      }
+      else {
+         assert(z_src_type.length == 8);
+         zs_dst1 = LLVMBuildShuffleVector(builder, zs_value[0], zs_value[0],
+                                          LLVMConstVector(&shuffles[0],
+                                                          zs_load_type.length), "");
+         zs_dst2 = LLVMBuildShuffleVector(builder, zs_value[0], zs_value[0],
+                                          LLVMConstVector(&shuffles[4],
+                                                          zs_load_type.length), "");
+      }
    }
    else {
-      assert(z_src_type.length == 8);
-      zs_dst1 = LLVMBuildShuffleVector(builder, zs_value, zs_value,
-                                       LLVMConstVector(&shuffles[0],
-                                                       zs_load_type.length),
-                                       "");
-      zs_dst2 = LLVMBuildShuffleVector(builder, zs_value, zs_value,
-                                       LLVMConstVector(&shuffles[4],
-                                                       zs_load_type.length),
-                                       "");
-
+      if (z_src_type.length == 4) {
+         zs_dst1 = lp_build_interleave2(gallivm, zs_type,
+                                        zs_value[0], zs_value[1], 0);
+         zs_dst2 = lp_build_interleave2(gallivm, zs_type,
+                                        zs_value[0], zs_value[1], 1);
+      }
+      else {
+         unsigned i;
+         LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 2];
+         assert(z_src_type.length == 8);
+         for (i = 0; i < 8; i++) {
+            shuffles[i*2] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2);
+            shuffles[i*2+1] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2 +
+                                                   z_src_type.length);
+         }
+         zs_dst1 = LLVMBuildShuffleVector(builder, zs_value[0], zs_value[1],
+                                          LLVMConstVector(&shuffles[0],
+                                                          z_src_type.length), "");
+         zs_dst2 = LLVMBuildShuffleVector(builder, zs_value[0], zs_value[1],
+                                          LLVMConstVector(&shuffles[8],
+                                                          z_src_type.length), "");
+      }
+      zs_dst1 = LLVMBuildBitCast(builder, zs_dst1,
+                                 lp_build_vec_type(gallivm, zs_load_type), "");
+      zs_dst2 = LLVMBuildBitCast(builder, zs_dst2,
+                                 lp_build_vec_type(gallivm, zs_load_type), "");
    }
+
    LLVMBuildStore(builder, zs_dst1, zs_dst_ptr1);
    LLVMBuildStore(builder, zs_dst2, zs_dst_ptr2);
 }
@@ -745,13 +807,12 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
                             struct lp_build_mask_context *mask,
                             LLVMValueRef stencil_refs[2],
                             LLVMValueRef z_src,
-                            LLVMValueRef zs_dst,
+                            LLVMValueRef zs_dst[2],
                             LLVMValueRef face,
-                            LLVMValueRef *zs_value,
+                            LLVMValueRef zs_value[2],
                             boolean do_branch)
 {
    LLVMBuilderRef builder = gallivm->builder;
-   struct lp_type zs_type;
    struct lp_type z_type;
    struct lp_build_context z_bld;
    struct lp_build_context s_bld;
@@ -763,7 +824,7 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
    LLVMValueRef z_pass = NULL, s_pass_mask = NULL;
    LLVMValueRef orig_mask = lp_build_mask_value(mask);
    LLVMValueRef front_facing = NULL;
-
+   boolean have_z, have_s;
 
    /*
     * Depths are expected to be between 0 and 1, even if they are stored in
@@ -780,12 +841,9 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
    }
 
    /* Pick the type matching the depth-stencil format. */
-   zs_type = lp_depth_type(format_desc, z_src_type.length);
+   z_type = lp_depth_type(format_desc, z_src_type.length);
 
    /* Pick the intermediate type for depth operations. */
-   z_type = zs_type;
-   /* FIXME: Cope with a depth test type with higher bit width. */
-   assert(zs_type.width <= z_src_type.width);
    z_type.width = z_src_type.width;
    assert(z_type.length == z_src_type.length);
 
@@ -819,13 +877,11 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
 
       if (depth->enabled) {
          assert(z_swizzle < 4);
-         assert(format_desc->block.bits <= z_type.width);
          if (z_type.floating) {
             assert(z_swizzle == 0);
             assert(format_desc->channel[z_swizzle].type ==
                    UTIL_FORMAT_TYPE_FLOAT);
-            assert(format_desc->channel[z_swizzle].size ==
-                   format_desc->block.bits);
+            assert(format_desc->channel[z_swizzle].size == 32);
          }
          else {
             assert(format_desc->channel[z_swizzle].type ==
@@ -849,7 +905,19 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
    {
       unsigned s_shift, s_mask;
 
-      if (get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask)) {
+      if (format_desc->block.bits > 32) {
+         z_dst = zs_dst[0];
+         stencil_vals = zs_dst[1];
+      }
+      else {
+         z_dst = zs_dst[0];
+         stencil_vals = zs_dst[0];
+      }
+
+      have_z = get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask);
+      have_s = get_s_shift_and_mask(format_desc, &s_shift, &s_mask);
+
+      if (have_z) {
          if (z_mask != 0xffffffff) {
             z_bitmask = lp_build_const_int_vec(gallivm, z_type, z_mask);
          }
@@ -859,26 +927,20 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
           */
          if (z_shift) {
             LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift);
-            z_dst = LLVMBuildLShr(builder, zs_dst, shift, "z_dst");
+            z_dst = LLVMBuildLShr(builder, z_dst, shift, "z_dst");
          } else if (z_bitmask) {
-	    /* TODO: Instead of loading a mask from memory and ANDing, it's
-	     * probably faster to just shake the bits with two shifts. */
-            z_dst = LLVMBuildAnd(builder, zs_dst, z_bitmask, "z_dst");
+            z_dst = LLVMBuildAnd(builder, z_dst, z_bitmask, "z_dst");
          } else {
-            z_dst = zs_dst;
             lp_build_name(z_dst, "z_dst");
          }
       }
 
-      if (get_s_shift_and_mask(format_desc, &s_shift, &s_mask)) {
+      if (have_s) {
          if (s_shift) {
             LLVMValueRef shift = lp_build_const_int_vec(gallivm, s_type, s_shift);
-            stencil_vals = LLVMBuildLShr(builder, zs_dst, shift, "");
+            stencil_vals = LLVMBuildLShr(builder, stencil_vals, shift, "");
             stencil_shift = shift;  /* used below */
          }
-         else {
-            stencil_vals = zs_dst;
-         }
 
          if (s_mask != 0xffffffff) {
             LLVMValueRef mask = lp_build_const_int_vec(gallivm, s_type, s_mask);
@@ -997,7 +1059,7 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
          LLVMValueRef z_fail_mask, z_pass_mask;
 
          /* apply Z-fail operator */
-         z_fail_mask = lp_build_andnot(&z_bld, orig_mask, z_pass);
+         z_fail_mask = lp_build_andnot(&s_bld, orig_mask, z_pass);
          stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP,
                                             stencil_refs, stencil_vals,
                                             z_fail_mask, front_facing);
@@ -1019,8 +1081,8 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
                                          s_pass_mask, front_facing);
    }
 
-   /* Put Z and ztencil bits in the right place */
-   if (z_dst && z_shift) {
+   /* Put Z and stencil bits in the right place */
+   if (have_z && z_shift) {
       LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift);
       z_dst = LLVMBuildShl(builder, z_dst, shift, "");
    }
@@ -1028,18 +1090,23 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
       stencil_vals = LLVMBuildShl(builder, stencil_vals,
                                   stencil_shift, "");
 
-   /* Finally, merge/store the z/stencil values */
+   /* Finally, merge the z/stencil values */
    if ((depth->enabled && depth->writemask) ||
-       (stencil[0].enabled && stencil[0].writemask)) {
-
-      if (z_dst && stencil_vals)
-         zs_dst = LLVMBuildOr(builder, z_dst, stencil_vals, "");
-      else if (z_dst)
-         zs_dst = z_dst;
-      else
-         zs_dst = stencil_vals;
-
-      *zs_value = zs_dst;
+       (stencil[0].enabled && (stencil[0].writemask ||
+                               (stencil[1].enabled && stencil[1].writemask)))) {
+
+      if (format_desc->block.bits <= 32) {
+         if (have_z && have_s)
+            zs_value[0] = LLVMBuildOr(builder, z_dst, stencil_vals, "");
+         else if (have_z)
+            zs_value[0] = z_dst;
+         else
+            zs_value[0] = stencil_vals;
+      }
+      else {
+         zs_value[0] = z_dst;
+         zs_value[1] = stencil_vals;
+      }
    }
 
    if (s_pass_mask)
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
index c000494..3f455c1 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
@@ -63,17 +63,18 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
                             struct lp_build_mask_context *mask,
                             LLVMValueRef stencil_refs[2],
                             LLVMValueRef z_src,
-                            LLVMValueRef zs_dst,
+                            LLVMValueRef zs_dst[2],
                             LLVMValueRef face,
-                            LLVMValueRef *zs_value,
+                            LLVMValueRef zs_value[2],
                             boolean do_branch);
 
-LLVMValueRef
+void
 lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
                                      struct lp_type z_src_type,
                                      const struct util_format_description *format_desc,
                                      LLVMValueRef depth_ptr,
                                      LLVMValueRef depth_stride,
+                                     LLVMValueRef zs_dst[2],
                                      LLVMValueRef loop_counter);
 
 void
@@ -81,10 +82,11 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
                                       struct lp_type z_src_type,
                                       const struct util_format_description *format_desc,
                                       struct lp_build_mask_context *mask,
+                                      LLVMValueRef zs_dst[2],
                                       LLVMValueRef loop_counter,
                                       LLVMValueRef depth_ptr,
                                       LLVMValueRef depth_stride,
-                                      LLVMValueRef zs_value);
+                                      LLVMValueRef zs_value[2]);
 
 
 void
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index a557db4..8a4b00f 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -193,8 +193,10 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
                        const union lp_rast_cmd_arg arg)
 {
    const struct lp_scene *scene = task->scene;
-   uint32_t clear_value = arg.clear_zstencil.value;
-   uint32_t clear_mask = arg.clear_zstencil.mask;
+   uint64_t clear_value64 = arg.clear_zstencil.value;
+   uint64_t clear_mask64 = arg.clear_zstencil.mask;
+   uint32_t clear_value = (uint32_t) clear_value64;
+   uint32_t clear_mask = (uint32_t) clear_mask64;
    const unsigned height = TILE_SIZE;
    const unsigned width = TILE_SIZE;
    const unsigned block_size = scene->zsbuf.blocksize;
@@ -260,6 +262,28 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
             }
          }
          break;
+      case 8:
+         clear_value64 &= clear_mask64;
+         if (clear_mask64 == 0xffffffffffULL) {
+            for (i = 0; i < height; i++) {
+               uint64_t *row = (uint64_t *)dst;
+               for (j = 0; j < width; j++)
+                  *row++ = clear_value64;
+               dst += dst_stride;
+            }
+         }
+         else {
+            for (i = 0; i < height; i++) {
+               uint64_t *row = (uint64_t *)dst;
+               for (j = 0; j < width; j++) {
+                  uint64_t tmp = ~clear_mask64 & *row;
+                  *row++ = clear_value64 | tmp;
+               }
+               dst += dst_stride;
+            }
+         }
+         break;
+
       default:
          assert(0);
          break;
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.h b/src/gallium/drivers/llvmpipe/lp_rast.h
index 2f5fa22..8dd3615 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast.h
@@ -154,8 +154,8 @@ union lp_rast_cmd_arg {
    const struct lp_rast_state *set_state;
    union pipe_color_union clear_color;
    struct {
-      uint32_t value;
-      uint32_t mask;
+      uint64_t value;
+      uint64_t mask;
    } clear_zstencil;
    const struct lp_rast_state *state;
    struct lp_fence *fence;
@@ -217,7 +217,7 @@ lp_rast_arg_fence( struct lp_fence *fence )
 
 
 static INLINE union lp_rast_cmd_arg
-lp_rast_arg_clearzs( unsigned value, unsigned mask )
+lp_rast_arg_clearzs( uint64_t value, uint64_t mask )
 {
    union lp_rast_cmd_arg arg;
    arg.clear_zstencil.value = value;
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index 6b86aae..b5b00d1 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -390,8 +390,8 @@ lp_setup_try_clear( struct lp_setup_context *setup,
                     unsigned stencil,
                     unsigned flags )
 {
-   uint32_t zsmask = 0;
-   uint32_t zsvalue = 0;
+   uint64_t zsmask = 0;
+   uint64_t zsvalue = 0;
    union lp_rast_cmd_arg color_arg;
    unsigned i;
 
@@ -404,16 +404,16 @@ lp_setup_try_clear( struct lp_setup_context *setup,
 
    if (flags & PIPE_CLEAR_DEPTHSTENCIL) {
       uint32_t zmask = (flags & PIPE_CLEAR_DEPTH) ? ~0 : 0;
-      uint32_t smask = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0;
+      uint8_t smask = (flags & PIPE_CLEAR_STENCIL) ? ~0 : 0;
 
-      zsvalue = util_pack_z_stencil(setup->fb.zsbuf->format,
-                                    depth,
-                                    stencil);
+      zsvalue = util_pack64_z_stencil(setup->fb.zsbuf->format,
+                                      depth,
+                                      stencil);
 
 
-      zsmask = util_pack_mask_z_stencil(setup->fb.zsbuf->format,
-                                        zmask,
-                                        smask);
+      zsmask = util_pack64_mask_z_stencil(setup->fb.zsbuf->format,
+                                          zmask,
+                                          smask);
 
       zsvalue &= zsmask;
    }
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h
index efc2ab7..6b35a02 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h
@@ -112,8 +112,8 @@ struct lp_setup_context
    struct {
       unsigned flags;
       union lp_rast_cmd_arg color;    /**< lp_rast_clear_color() cmd */
-      unsigned zsmask;
-      unsigned zsvalue;               /**< lp_rast_clear_zstencil() cmd */
+      uint64_t zsmask;
+      uint64_t zsvalue;               /**< lp_rast_clear_zstencil() cmd */
    } clear;
 
    enum setup_state {
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 6921210..78d58fd 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -239,7 +239,8 @@ generate_fs_loop(struct gallivm_state *gallivm,
    LLVMValueRef mask_ptr, mask_val;
    LLVMValueRef consts_ptr;
    LLVMValueRef z;
-   LLVMValueRef zs_value = NULL;
+   LLVMValueRef zs_value[2];
+   LLVMValueRef zs_dst[2];
    LLVMValueRef stencil_refs[2];
    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
    struct lp_build_for_loop_state loop_state;
@@ -259,8 +260,7 @@ generate_fs_loop(struct gallivm_state *gallivm,
    memset(&system_values, 0, sizeof(system_values));
 
    if (key->depth.enabled ||
-       key->stencil[0].enabled ||
-       key->stencil[1].enabled) {
+       key->stencil[0].enabled) {
 
       zs_format_desc = util_format_description(key->zsbuf_format);
       assert(zs_format_desc);
@@ -281,7 +281,9 @@ generate_fs_loop(struct gallivm_state *gallivm,
       }
 
       if (!(key->depth.enabled && key->depth.writemask) &&
-          !(key->stencil[0].enabled && key->stencil[0].writemask))
+          !((key->stencil[0].enabled && (key->stencil[0].writemask ||
+                                        (key->stencil[1].enabled &&
+                                         key->stencil[1].writemask)))))
          depth_mode &= ~(LATE_DEPTH_WRITE | EARLY_DEPTH_WRITE);
    }
    else {
@@ -337,11 +339,10 @@ generate_fs_loop(struct gallivm_state *gallivm,
    z = interp->pos[2];
 
    if (depth_mode & EARLY_DEPTH_TEST) {
-      LLVMValueRef zs_dst_val;
-      zs_dst_val = lp_build_depth_stencil_load_swizzled(gallivm, type,
-                                                        zs_format_desc,
-                                                        depth_ptr, depth_stride,
-                                                        loop_state.counter);
+      lp_build_depth_stencil_load_swizzled(gallivm, type,
+                                           zs_format_desc,
+                                           depth_ptr, depth_stride,
+                                           zs_dst, loop_state.counter);
       lp_build_depth_stencil_test(gallivm,
                                   &key->depth,
                                   key->stencil,
@@ -350,14 +351,14 @@ generate_fs_loop(struct gallivm_state *gallivm,
                                   &mask,
                                   stencil_refs,
                                   z,
-                                  zs_dst_val,
+                                  zs_dst,
                                   facing,
-                                  &zs_value,
+                                  zs_value,
                                   !simple_shader);
 
       if (depth_mode & EARLY_DEPTH_WRITE) {
          lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc,
-                                               NULL, loop_state.counter,
+                                               NULL, NULL, loop_state.counter,
                                                depth_ptr, depth_stride, zs_value);
       }
    }
@@ -394,7 +395,6 @@ generate_fs_loop(struct gallivm_state *gallivm,
 
    /* Late Z test */
    if (depth_mode & LATE_DEPTH_TEST) {
-      LLVMValueRef zs_dst_val;
       int pos0 = find_output_by_semantic(&shader->info.base,
                                          TGSI_SEMANTIC_POSITION,
                                          0);
@@ -403,10 +403,10 @@ generate_fs_loop(struct gallivm_state *gallivm,
          z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z");
       }
 
-      zs_dst_val = lp_build_depth_stencil_load_swizzled(gallivm, type,
-                                                        zs_format_desc,
-                                                        depth_ptr, depth_stride,
-                                                        loop_state.counter);
+      lp_build_depth_stencil_load_swizzled(gallivm, type,
+                                           zs_format_desc,
+                                           depth_ptr, depth_stride,
+                                           zs_dst, loop_state.counter);
 
       lp_build_depth_stencil_test(gallivm,
                                   &key->depth,
@@ -416,14 +416,14 @@ generate_fs_loop(struct gallivm_state *gallivm,
                                   &mask,
                                   stencil_refs,
                                   z,
-                                  zs_dst_val,
+                                  zs_dst,
                                   facing,
-                                  &zs_value,
+                                  zs_value,
                                   !simple_shader);
       /* Late Z write */
       if (depth_mode & LATE_DEPTH_WRITE) {
          lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc,
-                                               NULL, loop_state.counter,
+                                               NULL, NULL, loop_state.counter,
                                                depth_ptr, depth_stride, zs_value);
       }
    }
@@ -435,7 +435,7 @@ generate_fs_loop(struct gallivm_state *gallivm,
        * write that out.
        */
       lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc,
-                                            &mask, loop_state.counter,
+                                            &mask, zs_dst, loop_state.counter,
                                             depth_ptr, depth_stride, zs_value);
    }
 
-- 
1.7.9.5


More information about the mesa-dev mailing list