[Mesa-dev] [PATCH] llvmpipe: get rid of depth swizzling.

sroland at vmware.com sroland at vmware.com
Fri Apr 26 09:33:13 PDT 2013


From: Roland Scheidegger <sroland at vmware.com>

Eliminating this we no longer need to copy between linear and swizzled layout.
This is probably not quite ideal since it's a bit more work for now, could do
some optimizations by moving depth testing outside the fragment shader loop
(but tricky for early depth test as we don't have neither the mask nor the
interpolated z in the right order handy).
The large amount of tile/untile code is no longer needed will be deleted
in next commit.
Still busted though for some reason in particular everything reading/writing
depth/stencil buffer directly just fails...
---
 src/gallium/drivers/llvmpipe/lp_bld_depth.c |  308 +++++++++++++++++++--------
 src/gallium/drivers/llvmpipe/lp_bld_depth.h |   36 ++--
 src/gallium/drivers/llvmpipe/lp_jit.h       |    4 +-
 src/gallium/drivers/llvmpipe/lp_rast.c      |  167 ++++++---------
 src/gallium/drivers/llvmpipe/lp_rast_priv.h |  105 +++++----
 src/gallium/drivers/llvmpipe/lp_state_fs.c  |   56 ++---
 6 files changed, 404 insertions(+), 272 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index b9dbdc5..59556d8 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -36,21 +36,13 @@
  * flushing would avoid this, but it would most likely result in depth fighting
  * artifacts.
  *
- * We are free to use a different pixel layout though. Since our basic
- * processing unit is a quad (2x2 pixel block) we store the depth/stencil
- * values tiled, a quad at time. That is, a depth buffer containing 
- *
- *  Z11 Z12 Z13 Z14 ...
- *  Z21 Z22 Z23 Z24 ...
- *  Z31 Z32 Z33 Z34 ...
- *  Z41 Z42 Z43 Z44 ...
- *  ... ... ... ... ...
- *
- * will actually be stored in memory as
- *
- *  Z11 Z12 Z21 Z22 Z13 Z14 Z23 Z24 ...
- *  Z31 Z32 Z41 Z42 Z33 Z34 Z43 Z44 ...
- *  ... ... ... ... ... ... ... ... ...
+ * Since we're using linear layout for everything, but we need to deal with
+ * 2x2 quads, we need to load/store multiple values and swizzle them into
+ * place (we could avoid this by doing depth/stencil testing in linear format,
+ * which would be easy for late depth/stencil test as we could do that after
+ * the fragment shader loop just as we do for color buffers, but more tricky
+ * for early depth test as we'd need both masks and interpolated depth in
+ * linear format).
  *
  *
  * @author Jose Fonseca <jfonseca at vmware.com>
@@ -71,6 +63,7 @@
 #include "gallivm/lp_bld_intr.h"
 #include "gallivm/lp_bld_debug.h"
 #include "gallivm/lp_bld_swizzle.h"
+#include "gallivm/lp_bld_pack.h"
 
 #include "lp_bld_depth.h"
 
@@ -515,6 +508,210 @@ lp_build_occlusion_count(struct gallivm_state *gallivm,
 }
 
 
+/**
+ * Load depth/stencil values.
+ * The stored values are linear, swizzle them.
+ *
+ * \param type  the data type of the fragment depth/stencil values
+ * \param format_desc  description of the depth/stencil surface
+ * \param loop_counter  the current loop iteration
+ * \param depth_ptr  pointer to the depth/stencil values of this 4x4 block
+ * \param depth_stride  stride of the depth/stencil buffer
+ */
+LLVMValueRef
+lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
+                                     struct lp_type z_src_type,
+                                     const struct util_format_description *format_desc,
+                                     LLVMValueRef depth_ptr,
+                                     LLVMValueRef depth_stride,
+                                     LLVMValueRef loop_counter)
+{
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
+   LLVMValueRef zs_dst, zs_dst1, zs_dst2;
+   LLVMValueRef zs_dst_ptr;
+   LLVMValueRef depth_offset1, depth_offset2;
+   unsigned depth_bits = format_desc->block.bits/8;
+   struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length);
+   struct lp_type zs_load_type = zs_type;
+   zs_load_type.length = zs_load_type.length / 2;
+
+   if (z_src_type.length == 4) {
+      unsigned i;
+      LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter,
+                                          lp_build_const_int32(gallivm, 1), "");
+      LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter,
+                                          lp_build_const_int32(gallivm, 2), "");
+      LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb,
+                                          depth_stride, "");
+      depth_offset1 = LLVMBuildMul(builder, looplsb,
+                                   lp_build_const_int32(gallivm, depth_bits * 2), "");
+      depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, "");
+
+      for (i = 0; i < 4; i++) {
+         shuffles[i] = lp_build_const_int32(gallivm, i);
+      }
+   }
+   else {
+      unsigned i;
+      LLVMValueRef loopx2 = LLVMBuildShl(builder, loop_counter,
+                                         lp_build_const_int32(gallivm, 1), "");
+      assert(z_src_type.length == 8);
+      depth_offset1 = LLVMBuildMul(builder, loopx2, depth_stride, "");
+      for (i = 0; i < 8; i++) {
+         /* This shuffle (0,1,4,5,2,3,6,7) isn't so hot with avx */
+         shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2);
+      }
+   }
+
+   depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, "");
+
+   /* Load current z/stencil values from z/stencil buffer */
+   zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, "");
+   zs_dst_ptr = LLVMBuildBitCast(builder,
+                                 zs_dst_ptr,
+                                 LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), "");
+   zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr, "");
+   zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, "");
+   zs_dst_ptr = LLVMBuildBitCast(builder,
+                                 zs_dst_ptr,
+                                 LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), "");
+   zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr, "");
+
+   zs_dst = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2,
+                                   LLVMConstVector(shuffles, zs_type.length), "");
+
+   if (format_desc->block.bits < z_src_type.width) {
+      /* Extend destination ZS values (e.g., when reading from Z16_UNORM) */
+      zs_dst = LLVMBuildZExt(builder, zs_dst, lp_build_vec_type(gallivm, zs_type), "");
+   }
+
+   lp_build_name(zs_dst, "zs_dst");
+
+   return zs_dst;
+}
+
+/**
+ * Store depth/stencil values.
+ * Incoming values are swizzled (typically n 2x2 quads), stored linear.
+ * If there's a mask it will do reload/select/store otherwise just store.
+ *
+ * \param type  the data type of the fragment depth/stencil values
+ * \param format_desc  description of the depth/stencil surface
+ * \param mask  the alive/dead pixel mask for the quad (vector)
+ * \param loop_counter  the current loop iteration
+ * \param depth_ptr  pointer to the depth/stencil values of this 4x4 block
+ * \param depth_stride  stride of the depth/stencil buffer
+ * \param zs_value  the depth/stencil values to store
+ */
+void
+lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
+                                      struct lp_type z_src_type,
+                                      const struct util_format_description *format_desc,
+                                      struct lp_build_mask_context *mask,
+                                      LLVMValueRef loop_counter,
+                                      LLVMValueRef depth_ptr,
+                                      LLVMValueRef depth_stride,
+                                      LLVMValueRef zs_value)
+{
+   struct lp_build_context z_bld;
+   LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
+   LLVMBuilderRef builder = gallivm->builder;
+   LLVMValueRef mask_value;
+   LLVMValueRef zs_dst, zs_dst1, zs_dst2;
+   LLVMValueRef zs_dst_ptr1, zs_dst_ptr2;
+   LLVMValueRef depth_offset1, depth_offset2;
+   unsigned depth_bits = format_desc->block.bits/8;
+   struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length);
+   struct lp_type zs_load_type = zs_type;
+   zs_load_type.length = zs_load_type.length / 2;
+
+   lp_build_context_init(&z_bld, gallivm, zs_type);
+
+   /*
+    * This is far from ideal, at least for late depth write we should do this
+    * outside the fs loop to avoid all the swizzle stuff.
+    */
+   if (z_src_type.length == 4) {
+      unsigned i;
+      LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter,
+                                          lp_build_const_int32(gallivm, 1), "");
+      LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter,
+                                          lp_build_const_int32(gallivm, 2), "");
+      LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb,
+                                          depth_stride, "");
+      depth_offset1 = LLVMBuildMul(builder, looplsb,
+                                   lp_build_const_int32(gallivm, depth_bits * 2), "");
+      depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, "");
+
+      for (i = 0; i < 4; i++) {
+         shuffles[i] = lp_build_const_int32(gallivm, i);
+      }
+   }
+   else {
+      unsigned i;
+      LLVMValueRef loopx2 = LLVMBuildShl(builder, loop_counter,
+                                         lp_build_const_int32(gallivm, 1), "");
+      assert(z_src_type.length == 8);
+      depth_offset1 = LLVMBuildMul(builder, loopx2, depth_stride, "");
+      for (i = 0; i < 8; i++) {
+         /* This shuffle (0,1,4,5,2,3,6,7) isn't so hot with avx */
+         shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2);
+      }
+   }
+
+
+   depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, "");
+
+   zs_dst_ptr1 = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, "");
+   zs_dst_ptr1 = LLVMBuildBitCast(builder,
+                                 zs_dst_ptr1,
+                                 LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), "");
+   zs_dst_ptr2 = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, "");
+   zs_dst_ptr2 = LLVMBuildBitCast(builder,
+                                  zs_dst_ptr2,
+                                  LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0), "");
+
+   if (mask) {
+      zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr1, "");
+      zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr2, "");
+      zs_dst = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2,
+                                      LLVMConstVector(shuffles, zs_type.length),
+                                      "zsbufval");
+
+      mask_value = lp_build_mask_value(mask);
+   }
+
+   if (zs_type.width < z_src_type.width) {
+      /* Truncate incoming ZS and mask values (e.g., when writing to Z16_UNORM) */
+      zs_value = LLVMBuildTrunc(builder, zs_value, z_bld.vec_type, "");
+      if (mask)
+         mask_value = LLVMBuildTrunc(builder, mask_value, z_bld.int_vec_type, "");
+   }
+
+   if (mask) {
+      zs_value = lp_build_select(&z_bld, mask_value, zs_value, zs_dst);
+   }
+
+   if (z_src_type.length == 4) {
+      zs_dst1 = lp_build_extract_range(gallivm, zs_value, 0, 2);
+      zs_dst2 = lp_build_extract_range(gallivm, zs_value, 2, 2);
+   }
+   else {
+      assert(z_src_type.length == 8);
+      zs_dst1 = LLVMBuildShuffleVector(builder, zs_value, zs_value,
+                                       LLVMConstVector(&shuffles[0],
+                                                       zs_load_type.length),
+                                       "");
+      zs_dst2 = LLVMBuildShuffleVector(builder, zs_value, zs_value,
+                                       LLVMConstVector(&shuffles[4],
+                                                       zs_load_type.length),
+                                       "");
+
+   }
+   LLVMBuildStore(builder, zs_dst1, zs_dst_ptr1);
+   LLVMBuildStore(builder, zs_dst2, zs_dst_ptr2);
+}
 
 /**
  * Generate code for performing depth and/or stencil tests.
@@ -527,7 +724,7 @@ lp_build_occlusion_count(struct gallivm_state *gallivm,
  * \param mask  the alive/dead pixel mask for the quad (vector)
  * \param stencil_refs  the front/back stencil ref values (scalar)
  * \param z_src  the incoming depth/stencil values (n 2x2 quad values, float32)
- * \param zs_dst_ptr  pointer to depth/stencil values in framebuffer
+ * \param zs_dst  the depth/stencil values in framebuffer
  * \param face  contains boolean value indicating front/back facing polygon
  */
 void
@@ -539,7 +736,7 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
                             struct lp_build_mask_context *mask,
                             LLVMValueRef stencil_refs[2],
                             LLVMValueRef z_src,
-                            LLVMValueRef zs_dst_ptr,
+                            LLVMValueRef zs_dst,
                             LLVMValueRef face,
                             LLVMValueRef *zs_value,
                             boolean do_branch)
@@ -551,7 +748,7 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
    struct lp_build_context s_bld;
    struct lp_type s_type;
    unsigned z_shift = 0, z_width = 0, z_mask = 0;
-   LLVMValueRef zs_dst, z_dst = NULL;
+   LLVMValueRef z_dst = NULL;
    LLVMValueRef stencil_vals = NULL;
    LLVMValueRef z_bitmask = NULL, stencil_shift = NULL;
    LLVMValueRef z_pass = NULL, s_pass_mask = NULL;
@@ -638,19 +835,6 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
    s_type = lp_int_type(z_type);
    lp_build_context_init(&s_bld, gallivm, s_type);
 
-   /* Load current z/stencil value from z/stencil buffer */
-   zs_dst_ptr = LLVMBuildBitCast(builder,
-                                 zs_dst_ptr,
-                                 LLVMPointerType(lp_build_vec_type(gallivm, zs_type), 0), "");
-   zs_dst = LLVMBuildLoad(builder, zs_dst_ptr, "");
-   if (format_desc->block.bits < z_type.width) {
-      /* Extend destination ZS values (e.g., when reading from Z16_UNORM) */
-      zs_dst = LLVMBuildZExt(builder, zs_dst, z_bld.vec_type, "");
-   }
-
-   lp_build_name(zs_dst, "zs_dst");
-
-
    /* Compute and apply the Z/stencil bitmasks and shifts.
     */
    {
@@ -860,65 +1044,3 @@ lp_build_depth_stencil_test(struct gallivm_state *gallivm,
 
 }
 
-
-void
-lp_build_depth_write(struct gallivm_state *gallivm,
-                     struct lp_type z_src_type,
-                     const struct util_format_description *format_desc,
-                     LLVMValueRef zs_dst_ptr,
-                     LLVMValueRef zs_value)
-{
-   LLVMBuilderRef builder = gallivm->builder;
-
-   if (format_desc->block.bits < z_src_type.width) {
-      /* Truncate income ZS values (e.g., when writing to Z16_UNORM) */
-      LLVMTypeRef zs_type = LLVMIntTypeInContext(gallivm->context, format_desc->block.bits);
-      if (z_src_type.length > 1) {
-         zs_type = LLVMVectorType(zs_type, z_src_type.length);
-      }
-      zs_value = LLVMBuildTrunc(builder, zs_value, zs_type, "");
-   }
-
-   zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr,
-                                 LLVMPointerType(LLVMTypeOf(zs_value), 0), "");
-
-   LLVMBuildStore(builder, zs_value, zs_dst_ptr);
-}
-
-
-void
-lp_build_deferred_depth_write(struct gallivm_state *gallivm,
-                              struct lp_type z_src_type,
-                              const struct util_format_description *format_desc,
-                              struct lp_build_mask_context *mask,
-                              LLVMValueRef zs_dst_ptr,
-                              LLVMValueRef zs_value)
-{
-   struct lp_type z_type;
-   struct lp_build_context z_bld;
-   LLVMValueRef z_dst;
-   LLVMBuilderRef builder = gallivm->builder;
-   LLVMValueRef mask_value;
-
-   /* XXX: pointlessly redo type logic:
-    */
-   z_type = lp_depth_type(format_desc, z_src_type.length);
-   lp_build_context_init(&z_bld, gallivm, z_type);
-
-   zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr,
-                                 LLVMPointerType(z_bld.vec_type, 0), "");
-
-   z_dst = LLVMBuildLoad(builder, zs_dst_ptr, "zsbufval");
-
-   mask_value = lp_build_mask_value(mask);
-
-   if (z_type.width < z_src_type.width) {
-      /* Truncate incoming ZS and mask values (e.g., when writing to Z16_UNORM) */
-      zs_value = LLVMBuildTrunc(builder, zs_value, z_bld.vec_type, "");
-      mask_value = LLVMBuildTrunc(builder, mask_value, z_bld.int_vec_type, "");
-   }
-
-   z_dst = lp_build_select(&z_bld, mask_value, zs_value, z_dst);
-
-   LLVMBuildStore(builder, z_dst, zs_dst_ptr);
-}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
index 33cb0dd..c000494 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
@@ -58,30 +58,34 @@ void
 lp_build_depth_stencil_test(struct gallivm_state *gallivm,
                             const struct pipe_depth_state *depth,
                             const struct pipe_stencil_state stencil[2],
-                            struct lp_type type,
+                            struct lp_type z_src_type,
                             const struct util_format_description *format_desc,
                             struct lp_build_mask_context *mask,
                             LLVMValueRef stencil_refs[2],
-                            LLVMValueRef zs_src,
-                            LLVMValueRef zs_dst_ptr,
-                            LLVMValueRef facing,
+                            LLVMValueRef z_src,
+                            LLVMValueRef zs_dst,
+                            LLVMValueRef face,
                             LLVMValueRef *zs_value,
                             boolean do_branch);
 
-void
-lp_build_depth_write(struct gallivm_state *gallivm,
-                     struct lp_type z_src_type,
-                     const struct util_format_description *format_desc,
-                     LLVMValueRef zs_dst_ptr,
-                     LLVMValueRef zs_value);
+LLVMValueRef
+lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
+                                     struct lp_type z_src_type,
+                                     const struct util_format_description *format_desc,
+                                     LLVMValueRef depth_ptr,
+                                     LLVMValueRef depth_stride,
+                                     LLVMValueRef loop_counter);
 
 void
-lp_build_deferred_depth_write(struct gallivm_state *gallivm,
-                              struct lp_type z_src_type,
-                              const struct util_format_description *format_desc,
-                              struct lp_build_mask_context *mask,
-                              LLVMValueRef zs_dst_ptr,
-                              LLVMValueRef zs_value);
+lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
+                                      struct lp_type z_src_type,
+                                      const struct util_format_description *format_desc,
+                                      struct lp_build_mask_context *mask,
+                                      LLVMValueRef loop_counter,
+                                      LLVMValueRef depth_ptr,
+                                      LLVMValueRef depth_stride,
+                                      LLVMValueRef zs_value);
+
 
 void
 lp_build_occlusion_count(struct gallivm_state *gallivm,
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.h b/src/gallium/drivers/llvmpipe/lp_jit.h
index 4eddb2a..4e9ca76 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.h
+++ b/src/gallium/drivers/llvmpipe/lp_jit.h
@@ -193,6 +193,7 @@ enum {
  * @param mask          mask of visible pixels in block
  * @param thread_data   task thread data
  * @param stride        color buffer row stride in bytes
+ * @param depth_stride  depth buffer row stride in bytes
  */
 typedef void
 (*lp_jit_frag_func)(const struct lp_jit_context *context,
@@ -206,7 +207,8 @@ typedef void
                     void *depth,
                     uint32_t mask,
                     struct lp_jit_thread_data *thread_data,
-                    unsigned *stride);
+                    unsigned *stride,
+                    unsigned depth_stride);
 
 
 void
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index 29dd933..6b096fc 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -89,51 +89,15 @@ static void
 lp_rast_tile_begin(struct lp_rasterizer_task *task,
                    const struct cmd_bin *bin)
 {
-   const struct lp_scene *scene = task->scene;
-   enum lp_texture_usage usage;
-
    LP_DBG(DEBUG_RAST, "%s %d,%d\n", __FUNCTION__, bin->x, bin->y);
 
    task->bin = bin;
    task->x = bin->x * TILE_SIZE;
    task->y = bin->y * TILE_SIZE;
 
-   /* reset pointers to color tile(s) */
+   /* reset pointers to color and depth tile(s) */
    memset(task->color_tiles, 0, sizeof(task->color_tiles));
-
-   /* get pointer to depth/stencil tile */
-   {
-      struct pipe_surface *zsbuf = task->scene->fb.zsbuf;
-      if (zsbuf) {
-         struct llvmpipe_resource *lpt = llvmpipe_resource(zsbuf->texture);
-
-         if (scene->has_depthstencil_clear)
-            usage = LP_TEX_USAGE_WRITE_ALL;
-         else
-            usage = LP_TEX_USAGE_READ_WRITE;
-
-         /* "prime" the tile: convert data from linear to tiled if necessary
-          * and update the tile's layout info.
-          */
-         (void) llvmpipe_get_texture_tile(lpt,
-                                          zsbuf->u.tex.first_layer,
-                                          zsbuf->u.tex.level,
-                                          usage,
-                                          task->x,
-                                          task->y);
-         /* Get actual pointer to the tile data.  Note that depth/stencil
-          * data is tiled differently than color data.
-          */
-         task->depth_tile = lp_rast_get_depth_block_pointer(task,
-                                                            task->x,
-                                                            task->y);
-
-         assert(task->depth_tile);
-      }
-      else {
-         task->depth_tile = NULL;
-      }
-   }
+   task->depth_tile = NULL;
 }
 
 
@@ -220,8 +184,6 @@ lp_rast_clear_color(struct lp_rasterizer_task *task,
 
 
 
-
-
 /**
  * Clear the rasterizer's current z/stencil tile.
  * This is a bin command called during bin processing.
@@ -233,10 +195,10 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
    const struct lp_scene *scene = task->scene;
    uint32_t clear_value = arg.clear_zstencil.value;
    uint32_t clear_mask = arg.clear_zstencil.mask;
-   const unsigned height = TILE_SIZE / TILE_VECTOR_HEIGHT;
-   const unsigned width = TILE_SIZE * TILE_VECTOR_HEIGHT;
+   const unsigned height = TILE_SIZE;
+   const unsigned width = TILE_SIZE;
    const unsigned block_size = scene->zsbuf.blocksize;
-   const unsigned dst_stride = scene->zsbuf.stride * TILE_VECTOR_HEIGHT;
+   const unsigned dst_stride = scene->zsbuf.stride;
    uint8_t *dst;
    unsigned i, j;
 
@@ -244,65 +206,64 @@ lp_rast_clear_zstencil(struct lp_rasterizer_task *task,
            __FUNCTION__, clear_value, clear_mask);
 
    /*
-    * Clear the area of the swizzled depth/depth buffer matching this tile, in
-    * stripes of TILE_VECTOR_HEIGHT x TILE_SIZE at a time.
-    *
-    * The swizzled depth format is such that the depths for
-    * TILE_VECTOR_HEIGHT x TILE_VECTOR_WIDTH pixels have consecutive offsets.
+    * Clear the area of the depth/depth buffer matching this tile.
     */
 
-   dst = task->depth_tile;
+   if (scene->fb.zsbuf) {
 
-   clear_value &= clear_mask;
+      dst = lp_rast_get_unswizzled_depth_tile_pointer(task, LP_TEX_USAGE_READ_WRITE);
 
-   switch (block_size) {
-   case 1:
-      assert(clear_mask == 0xff);
-      memset(dst, (uint8_t) clear_value, height * width);
-      break;
-   case 2:
-      if (clear_mask == 0xffff) {
-         for (i = 0; i < height; i++) {
-            uint16_t *row = (uint16_t *)dst;
-            for (j = 0; j < width; j++)
-               *row++ = (uint16_t) clear_value;
-            dst += dst_stride;
+      clear_value &= clear_mask;
+
+      switch (block_size) {
+      case 1:
+         assert(clear_mask == 0xff);
+         memset(dst, (uint8_t) clear_value, height * width);
+         break;
+      case 2:
+         if (clear_mask == 0xffff) {
+            for (i = 0; i < height; i++) {
+               uint16_t *row = (uint16_t *)dst;
+               for (j = 0; j < width; j++)
+                  *row++ = (uint16_t) clear_value;
+               dst += dst_stride;
+            }
          }
-      }
-      else {
-         for (i = 0; i < height; i++) {
-            uint16_t *row = (uint16_t *)dst;
-            for (j = 0; j < width; j++) {
-               uint16_t tmp = ~clear_mask & *row;
-               *row++ = clear_value | tmp;
+         else {
+            for (i = 0; i < height; i++) {
+               uint16_t *row = (uint16_t *)dst;
+               for (j = 0; j < width; j++) {
+                  uint16_t tmp = ~clear_mask & *row;
+                  *row++ = clear_value | tmp;
+               }
+               dst += dst_stride;
             }
-            dst += dst_stride;
          }
-      }
-      break;
-   case 4:
-      if (clear_mask == 0xffffffff) {
-         for (i = 0; i < height; i++) {
-            uint32_t *row = (uint32_t *)dst;
-            for (j = 0; j < width; j++)
-               *row++ = clear_value;
-            dst += dst_stride;
+         break;
+      case 4:
+         if (clear_mask == 0xffffffff) {
+            for (i = 0; i < height; i++) {
+               uint32_t *row = (uint32_t *)dst;
+               for (j = 0; j < width; j++)
+                  *row++ = clear_value;
+               dst += dst_stride;
+            }
          }
-      }
-      else {
-         for (i = 0; i < height; i++) {
-            uint32_t *row = (uint32_t *)dst;
-            for (j = 0; j < width; j++) {
-               uint32_t tmp = ~clear_mask & *row;
-               *row++ = clear_value | tmp;
+         else {
+            for (i = 0; i < height; i++) {
+               uint32_t *row = (uint32_t *)dst;
+               for (j = 0; j < width; j++) {
+                  uint32_t tmp = ~clear_mask & *row;
+                  *row++ = clear_value | tmp;
+               }
+               dst += dst_stride;
             }
-            dst += dst_stride;
          }
+         break;
+      default:
+         assert(0);
+         break;
       }
-      break;
-   default:
-      assert(0);
-      break;
    }
 }
 
@@ -343,7 +304,8 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
       for (x = 0; x < TILE_SIZE; x += 4) {
          uint8_t *color[PIPE_MAX_COLOR_BUFS];
          unsigned stride[PIPE_MAX_COLOR_BUFS];
-         uint32_t *depth;
+         uint8_t *depth = NULL;
+         unsigned depth_stride = 0;
          unsigned i;
 
          /* color buffer */
@@ -354,7 +316,11 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
          }
 
          /* depth buffer */
-         depth = lp_rast_get_depth_block_pointer(task, tile_x + x, tile_y + y);
+         if (scene->zsbuf.map) {
+            depth = lp_rast_get_unswizzled_depth_block_pointer(task, tile_x + x, tile_y + y);
+            depth_stride = scene->zsbuf.stride;
+         }
+
 
          /* run shader on 4x4 block */
          BEGIN_JIT_CALL(state, task);
@@ -368,7 +334,8 @@ lp_rast_shade_tile(struct lp_rasterizer_task *task,
                                             depth,
                                             0xffff,
                                             &task->thread_data,
-                                            stride);
+                                            stride,
+                                            depth_stride);
          END_JIT_CALL();
       }
    }
@@ -412,7 +379,8 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
    const struct lp_scene *scene = task->scene;
    uint8_t *color[PIPE_MAX_COLOR_BUFS];
    unsigned stride[PIPE_MAX_COLOR_BUFS];
-   void *depth;
+   void *depth = NULL;
+   unsigned depth_stride = 0;
    unsigned i;
 
    assert(state);
@@ -434,8 +402,10 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
    }
 
    /* depth buffer */
-   depth = lp_rast_get_depth_block_pointer(task, x, y);
-
+   if (scene->zsbuf.map) {
+      depth_stride = scene->zsbuf.stride;
+      depth = lp_rast_get_unswizzled_depth_block_pointer(task, x, y);
+   }
 
    assert(lp_check_alignment(state->jit_context.u8_blend_color, 16));
 
@@ -451,7 +421,8 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
                                          depth,
                                          mask,
                                          &task->thread_data,
-                                         stride);
+                                         stride,
+                                         depth_stride);
    END_JIT_CALL();
 }
 
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_priv.h b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
index c0f41f6..7d01da1 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast_priv.h
+++ b/src/gallium/drivers/llvmpipe/lp_rast_priv.h
@@ -140,48 +140,39 @@ lp_rast_shade_quads_mask(struct lp_rasterizer_task *task,
 
 
 /**
- * Get the pointer to a 4x4 depth/stencil block.
- * We'll map the z/stencil buffer on demand here.
- * Note that this may be called even when there's no z/stencil buffer - return
- * NULL in that case.
- * \param x, y location of 4x4 block in window coords
+ * Get pointer to the unswizzled color tile
  */
-static INLINE void *
-lp_rast_get_depth_block_pointer(struct lp_rasterizer_task *task,
-                                unsigned x, unsigned y)
+static INLINE uint8_t *
+lp_rast_get_unswizzled_color_tile_pointer(struct lp_rasterizer_task *task,
+                                          unsigned buf, enum lp_texture_usage usage)
 {
    const struct lp_scene *scene = task->scene;
-   void *depth;
+   unsigned format_bytes;
 
-   assert(x < scene->tiles_x * TILE_SIZE);
-   assert(y < scene->tiles_y * TILE_SIZE);
-   assert((x % TILE_VECTOR_WIDTH) == 0);
-   assert((y % TILE_VECTOR_HEIGHT) == 0);
+   assert(task->x < scene->tiles_x * TILE_SIZE);
+   assert(task->y < scene->tiles_y * TILE_SIZE);
+   assert(task->x % TILE_SIZE == 0);
+   assert(task->y % TILE_SIZE == 0);
+   assert(buf < scene->fb.nr_cbufs);
 
-   if (!scene->zsbuf.map) {
-      /* Either out of memory or no zsbuf.  Can't tell without access
-       * to the state.  Just use dummy tile memory, but don't print
-       * the oom warning as this most likely because there is no
-       * zsbuf.
-       */
-      return lp_dummy_tile;
-   }
+   if (!task->color_tiles[buf]) {
+      struct pipe_surface *cbuf = scene->fb.cbufs[buf];
+      assert(cbuf);
 
-   depth = (scene->zsbuf.map +
-            scene->zsbuf.stride * y +
-            scene->zsbuf.blocksize * x * TILE_VECTOR_HEIGHT);
+      format_bytes = util_format_get_blocksize(cbuf->format);
+      task->color_tiles[buf] = scene->cbufs[buf].map + scene->cbufs[buf].stride * task->y + format_bytes * task->x;
+   }
 
-   assert(lp_check_alignment(depth, 16));
-   return depth;
+   return task->color_tiles[buf];
 }
 
 
 /**
- * Get pointer to the unswizzled color tile
+ * Get pointer to the unswizzled depth tile
  */
 static INLINE uint8_t *
-lp_rast_get_unswizzled_color_tile_pointer(struct lp_rasterizer_task *task,
-                                          unsigned buf, enum lp_texture_usage usage)
+lp_rast_get_unswizzled_depth_tile_pointer(struct lp_rasterizer_task *task,
+                                          enum lp_texture_usage usage)
 {
    const struct lp_scene *scene = task->scene;
    unsigned format_bytes;
@@ -190,17 +181,16 @@ lp_rast_get_unswizzled_color_tile_pointer(struct lp_rasterizer_task *task,
    assert(task->y < scene->tiles_y * TILE_SIZE);
    assert(task->x % TILE_SIZE == 0);
    assert(task->y % TILE_SIZE == 0);
-   assert(buf < scene->fb.nr_cbufs);
 
-   if (!task->color_tiles[buf]) {
-      struct pipe_surface *cbuf = scene->fb.cbufs[buf];
-      assert(cbuf);
+   if (!task->depth_tile) {
+      struct pipe_surface *dbuf = scene->fb.zsbuf;
+      assert(dbuf);
 
-      format_bytes = util_format_get_blocksize(cbuf->format);
-      task->color_tiles[buf] = scene->cbufs[buf].map + scene->cbufs[buf].stride * task->y + format_bytes * task->x;
+      format_bytes = util_format_get_blocksize(dbuf->format);
+      task->depth_tile = scene->zsbuf.map + scene->zsbuf.stride * task->y + format_bytes * task->x;
    }
 
-   return task->color_tiles[buf];
+   return task->depth_tile;
 }
 
 
@@ -237,6 +227,38 @@ lp_rast_get_unswizzled_color_block_pointer(struct lp_rasterizer_task *task,
 }
 
 
+/**
+ * Get the pointer to an unswizzled 4x4 depth block (within an unswizzled 64x64 tile).
+ * \param x, y location of 4x4 block in window coords
+ */
+static INLINE uint8_t *
+lp_rast_get_unswizzled_depth_block_pointer(struct lp_rasterizer_task *task,
+                                           unsigned x, unsigned y)
+{
+   unsigned px, py, pixel_offset, format_bytes;
+   uint8_t *depth;
+
+   assert(x < task->scene->tiles_x * TILE_SIZE);
+   assert(y < task->scene->tiles_y * TILE_SIZE);
+   assert((x % TILE_VECTOR_WIDTH) == 0);
+   assert((y % TILE_VECTOR_HEIGHT) == 0);
+
+   format_bytes = util_format_get_blocksize(task->scene->fb.zsbuf->format);
+
+   depth = lp_rast_get_unswizzled_depth_tile_pointer(task, LP_TEX_USAGE_READ_WRITE);
+   assert(depth);
+
+   px = x % TILE_SIZE;
+   py = y % TILE_SIZE;
+   pixel_offset = px * format_bytes + py * task->scene->zsbuf.stride;
+
+   depth = depth + pixel_offset;
+
+   assert(lp_check_alignment(depth, llvmpipe_get_format_alignment(task->scene->fb.zsbuf->format)));
+   return depth;
+}
+
+
 
 /**
  * Shade all pixels in a 4x4 block.  The fragment code omits the
@@ -253,7 +275,8 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
    struct lp_fragment_shader_variant *variant = state->variant;
    uint8_t *color[PIPE_MAX_COLOR_BUFS];
    unsigned stride[PIPE_MAX_COLOR_BUFS];
-   void *depth;
+   void *depth = NULL;
+   unsigned depth_stride = 0;
    unsigned i;
 
    /* color buffer */
@@ -263,7 +286,10 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
       color[i] = lp_rast_get_unswizzled_color_block_pointer(task, i, x, y);
    }
 
-   depth = lp_rast_get_depth_block_pointer(task, x, y);
+   if (scene->zsbuf.map) {
+      depth = lp_rast_get_unswizzled_depth_block_pointer(task, x, y);
+      depth_stride = scene->zsbuf.stride;
+   }
 
    /* run shader on 4x4 block */
    BEGIN_JIT_CALL(state, task);
@@ -277,7 +303,8 @@ lp_rast_shade_quads_all( struct lp_rasterizer_task *task,
                                       depth,
                                       0xffff,
                                       &task->thread_data,
-                                      stride );
+                                      stride,
+                                      depth_stride);
    END_JIT_CALL();
 }
 
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 1a9a194..6921210 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -229,7 +229,7 @@ generate_fs_loop(struct gallivm_state *gallivm,
                  LLVMValueRef mask_store,
                  LLVMValueRef (*out_color)[4],
                  LLVMValueRef depth_ptr,
-                 unsigned depth_bits,
+                 LLVMValueRef depth_stride,
                  LLVMValueRef facing,
                  LLVMValueRef thread_data_ptr)
 {
@@ -241,8 +241,6 @@ generate_fs_loop(struct gallivm_state *gallivm,
    LLVMValueRef z;
    LLVMValueRef zs_value = NULL;
    LLVMValueRef stencil_refs[2];
-   LLVMValueRef depth_ptr_i;
-   LLVMValueRef depth_offset;
    LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
    struct lp_build_for_loop_state loop_state;
    struct lp_build_mask_context mask;
@@ -308,12 +306,6 @@ generate_fs_loop(struct gallivm_state *gallivm,
                            &loop_state.counter, 1, "mask_ptr");
    mask_val = LLVMBuildLoad(builder, mask_ptr, "");
 
-   depth_offset = LLVMBuildMul(builder, loop_state.counter,
-                               lp_build_const_int32(gallivm, depth_bits * type.length),
-                               "");
-
-   depth_ptr_i = LLVMBuildGEP(builder, depth_ptr, &depth_offset, 1, "");
-
    memset(outputs, 0, sizeof outputs);
 
    for(cbuf = 0; cbuf < key->nr_cbufs; cbuf++) {
@@ -345,6 +337,11 @@ generate_fs_loop(struct gallivm_state *gallivm,
    z = interp->pos[2];
 
    if (depth_mode & EARLY_DEPTH_TEST) {
+      LLVMValueRef zs_dst_val;
+      zs_dst_val = lp_build_depth_stencil_load_swizzled(gallivm, type,
+                                                        zs_format_desc,
+                                                        depth_ptr, depth_stride,
+                                                        loop_state.counter);
       lp_build_depth_stencil_test(gallivm,
                                   &key->depth,
                                   key->stencil,
@@ -353,12 +350,15 @@ generate_fs_loop(struct gallivm_state *gallivm,
                                   &mask,
                                   stencil_refs,
                                   z,
-                                  depth_ptr_i, facing,
+                                  zs_dst_val,
+                                  facing,
                                   &zs_value,
                                   !simple_shader);
 
       if (depth_mode & EARLY_DEPTH_WRITE) {
-         lp_build_depth_write(gallivm, type, zs_format_desc, depth_ptr_i, zs_value);
+         lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc,
+                                               NULL, loop_state.counter,
+                                               depth_ptr, depth_stride, zs_value);
       }
    }
 
@@ -394,6 +394,7 @@ generate_fs_loop(struct gallivm_state *gallivm,
 
    /* Late Z test */
    if (depth_mode & LATE_DEPTH_TEST) {
+      LLVMValueRef zs_dst_val;
       int pos0 = find_output_by_semantic(&shader->info.base,
                                          TGSI_SEMANTIC_POSITION,
                                          0);
@@ -402,6 +403,11 @@ generate_fs_loop(struct gallivm_state *gallivm,
          z = LLVMBuildLoad(builder, outputs[pos0][2], "output.z");
       }
 
+      zs_dst_val = lp_build_depth_stencil_load_swizzled(gallivm, type,
+                                                        zs_format_desc,
+                                                        depth_ptr, depth_stride,
+                                                        loop_state.counter);
+
       lp_build_depth_stencil_test(gallivm,
                                   &key->depth,
                                   key->stencil,
@@ -410,12 +416,15 @@ generate_fs_loop(struct gallivm_state *gallivm,
                                   &mask,
                                   stencil_refs,
                                   z,
-                                  depth_ptr_i, facing,
+                                  zs_dst_val,
+                                  facing,
                                   &zs_value,
                                   !simple_shader);
       /* Late Z write */
       if (depth_mode & LATE_DEPTH_WRITE) {
-         lp_build_depth_write(gallivm, type, zs_format_desc, depth_ptr_i, zs_value);
+         lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc,
+                                               NULL, loop_state.counter,
+                                               depth_ptr, depth_stride, zs_value);
       }
    }
    else if ((depth_mode & EARLY_DEPTH_TEST) &&
@@ -425,12 +434,9 @@ generate_fs_loop(struct gallivm_state *gallivm,
        * depth value, update from zs_value with the new mask value and
        * write that out.
        */
-      lp_build_deferred_depth_write(gallivm,
-                                    type,
-                                    zs_format_desc,
-                                    &mask,
-                                    depth_ptr_i,
-                                    zs_value);
+      lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc,
+                                            &mask, loop_state.counter,
+                                            depth_ptr, depth_stride, zs_value);
    }
 
 
@@ -1749,7 +1755,7 @@ generate_fragment(struct llvmpipe_context *lp,
    struct lp_type blend_type;
    LLVMTypeRef fs_elem_type;
    LLVMTypeRef blend_vec_type;
-   LLVMTypeRef arg_types[12];
+   LLVMTypeRef arg_types[13];
    LLVMTypeRef func_type;
    LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
    LLVMTypeRef int8_type = LLVMInt8TypeInContext(gallivm->context);
@@ -1762,6 +1768,7 @@ generate_fragment(struct llvmpipe_context *lp,
    LLVMValueRef color_ptr_ptr;
    LLVMValueRef stride_ptr;
    LLVMValueRef depth_ptr;
+   LLVMValueRef depth_stride;
    LLVMValueRef mask_input;
    LLVMValueRef thread_data_ptr;
    LLVMBasicBlockRef block;
@@ -1772,7 +1779,6 @@ generate_fragment(struct llvmpipe_context *lp,
    LLVMValueRef fs_out_color[PIPE_MAX_COLOR_BUFS][TGSI_NUM_CHANNELS][16 / 4];
    LLVMValueRef function;
    LLVMValueRef facing;
-   const struct util_format_description *zs_format_desc;
    unsigned num_fs;
    unsigned i;
    unsigned chan;
@@ -1847,6 +1853,7 @@ generate_fragment(struct llvmpipe_context *lp,
    arg_types[9] = int32_type;                          /* mask_input */
    arg_types[10] = variant->jit_thread_data_ptr_type;  /* per thread data */
    arg_types[11] = LLVMPointerType(int32_type, 0);     /* stride */
+   arg_types[12] = int32_type;                         /* depth_stride */
 
    func_type = LLVMFunctionType(LLVMVoidTypeInContext(gallivm->context),
                                 arg_types, Elements(arg_types), 0);
@@ -1875,6 +1882,7 @@ generate_fragment(struct llvmpipe_context *lp,
    mask_input   = LLVMGetParam(function, 9);
    thread_data_ptr  = LLVMGetParam(function, 10);
    stride_ptr   = LLVMGetParam(function, 11);
+   depth_stride = LLVMGetParam(function, 12);
 
    lp_build_name(context_ptr, "context");
    lp_build_name(x, "x");
@@ -1887,6 +1895,7 @@ generate_fragment(struct llvmpipe_context *lp,
    lp_build_name(thread_data_ptr, "thread_data");
    lp_build_name(mask_input, "mask_input");
    lp_build_name(stride_ptr, "stride_ptr");
+   lp_build_name(depth_stride, "depth_stride");
 
    /*
     * Function body
@@ -1900,10 +1909,7 @@ generate_fragment(struct llvmpipe_context *lp,
    /* code generated texture sampling */
    sampler = lp_llvm_sampler_soa_create(key->state, context_ptr);
 
-   zs_format_desc = util_format_description(key->zsbuf_format);
-
    {
-      unsigned depth_bits = zs_format_desc->block.bits/8;
       LLVMValueRef num_loop = lp_build_const_int32(gallivm, num_fs);
       LLVMTypeRef mask_type = lp_build_int_vec_type(gallivm, fs_type);
       LLVMValueRef mask_store = lp_build_array_alloca(gallivm, mask_type,
@@ -1951,7 +1957,7 @@ generate_fragment(struct llvmpipe_context *lp,
                        mask_store, /* output */
                        color_store,
                        depth_ptr,
-                       depth_bits,
+                       depth_stride,
                        facing,
                        thread_data_ptr);
 
-- 
1.7.9.5



More information about the mesa-dev mailing list