Mesa (gallium-llvmpipe): llvmpipe: Code generate the depth test, and include in the shader.

Jose Fonseca jrfonseca at kemper.freedesktop.org
Fri Aug 21 07:02:44 UTC 2009


Module: Mesa
Branch: gallium-llvmpipe
Commit: 75a31a6201e7769745fd1ea484f0e1af804e531d
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=75a31a6201e7769745fd1ea484f0e1af804e531d

Author: José Fonseca <jfonseca at vmware.com>
Date:   Fri Aug 21 07:48:04 2009 +0100

llvmpipe: Code generate the depth test, and include in the shader.

Only 32bit depth/stencil surfaces supported for now. Stencil ops not
implemented yet.

---

 src/gallium/drivers/llvmpipe/lp_clear.c           |    5 +-
 src/gallium/drivers/llvmpipe/lp_context.c         |    8 -
 src/gallium/drivers/llvmpipe/lp_context.h         |    2 -
 src/gallium/drivers/llvmpipe/lp_flush.c           |    3 +-
 src/gallium/drivers/llvmpipe/lp_quad.h            |    1 -
 src/gallium/drivers/llvmpipe/lp_quad_depth_test.c |  792 ---------------------
 src/gallium/drivers/llvmpipe/lp_quad_fs.c         |   78 ++-
 src/gallium/drivers/llvmpipe/lp_quad_pipe.c       |   16 +-
 src/gallium/drivers/llvmpipe/lp_quad_pipe.h       |    3 -
 src/gallium/drivers/llvmpipe/lp_screen.c          |    5 +
 src/gallium/drivers/llvmpipe/lp_state.h           |   17 +-
 src/gallium/drivers/llvmpipe/lp_state_fs.c        |   99 +++-
 src/gallium/drivers/llvmpipe/lp_state_surface.c   |    6 -
 src/gallium/drivers/llvmpipe/lp_texture.c         |    6 +
 14 files changed, 172 insertions(+), 869 deletions(-)

diff --git a/src/gallium/drivers/llvmpipe/lp_clear.c b/src/gallium/drivers/llvmpipe/lp_clear.c
index 4a7ffde..c3e62e8 100644
--- a/src/gallium/drivers/llvmpipe/lp_clear.c
+++ b/src/gallium/drivers/llvmpipe/lp_clear.c
@@ -79,11 +79,8 @@ llvmpipe_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba,
       struct pipe_surface *ps = llvmpipe->framebuffer.zsbuf;
 
       cv = util_pack_z_stencil(ps->format, depth, stencil);
-      lp_tile_cache_clear(llvmpipe->zsbuf_cache, zero, cv);
 
-#if !TILE_CLEAR_OPTIMIZATION
       /* non-cached surface */
       pipe->surface_fill(pipe, ps, 0, 0, ps->width, ps->height, cv);
-#endif
-      }
+   }
 }
diff --git a/src/gallium/drivers/llvmpipe/lp_context.c b/src/gallium/drivers/llvmpipe/lp_context.c
index 7e7015d..b335e03 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.c
+++ b/src/gallium/drivers/llvmpipe/lp_context.c
@@ -60,8 +60,6 @@ llvmpipe_map_transfers(struct llvmpipe_context *lp)
    for (i = 0; i < lp->framebuffer.nr_cbufs; i++) {
       lp_tile_cache_map_transfers(lp->cbuf_cache[i]);
    }
-
-   lp_tile_cache_map_transfers(lp->zsbuf_cache);
 }
 
 
@@ -76,8 +74,6 @@ llvmpipe_unmap_transfers(struct llvmpipe_context *lp)
    for (i = 0; i < lp->framebuffer.nr_cbufs; i++) {
       lp_tile_cache_unmap_transfers(lp->cbuf_cache[i]);
    }
-
-   lp_tile_cache_unmap_transfers(lp->zsbuf_cache);
 }
 
 
@@ -90,12 +86,10 @@ static void llvmpipe_destroy( struct pipe_context *pipe )
       draw_destroy( llvmpipe->draw );
 
       llvmpipe->quad.shade->destroy( llvmpipe->quad.shade );
-      llvmpipe->quad.depth_test->destroy( llvmpipe->quad.depth_test );
       llvmpipe->quad.blend->destroy( llvmpipe->quad.blend );
 
    for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++)
       lp_destroy_tile_cache(llvmpipe->cbuf_cache[i]);
-   lp_destroy_tile_cache(llvmpipe->zsbuf_cache);
 
    for (i = 0; i < PIPE_MAX_SAMPLERS; i++)
       lp_destroy_tex_tile_cache(llvmpipe->tex_cache[i]);
@@ -216,7 +210,6 @@ llvmpipe_create( struct pipe_screen *screen )
     */
    for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++)
       llvmpipe->cbuf_cache[i] = lp_create_tile_cache( screen );
-   llvmpipe->zsbuf_cache = lp_create_tile_cache( screen );
 
    for (i = 0; i < PIPE_MAX_SAMPLERS; i++)
       llvmpipe->tex_cache[i] = lp_create_tex_tile_cache( screen );
@@ -224,7 +217,6 @@ llvmpipe_create( struct pipe_screen *screen )
 
    /* setup quad rendering stages */
       llvmpipe->quad.shade = lp_quad_shade_stage(llvmpipe);
-      llvmpipe->quad.depth_test = lp_quad_depth_test_stage(llvmpipe);
       llvmpipe->quad.blend = lp_quad_blend_stage(llvmpipe);
 
    /* vertex shader samplers */
diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h
index 6cda5e6..c31df0b 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_context.h
@@ -117,7 +117,6 @@ struct llvmpipe_context {
    /** Software quad rendering pipeline */
    struct {
       struct quad_stage *shade;
-      struct quad_stage *depth_test;
       struct quad_stage *blend;
 
       struct quad_stage *first; /**< points to one of the above stages */
@@ -140,7 +139,6 @@ struct llvmpipe_context {
    boolean dirty_render_cache;
    
    struct llvmpipe_tile_cache *cbuf_cache[PIPE_MAX_COLOR_BUFS];
-   struct llvmpipe_tile_cache *zsbuf_cache;
    
    unsigned tex_timestamp;
    struct llvmpipe_tex_tile_cache *tex_cache[PIPE_MAX_SAMPLERS];
diff --git a/src/gallium/drivers/llvmpipe/lp_flush.c b/src/gallium/drivers/llvmpipe/lp_flush.c
index f3b43cf..44b4696 100644
--- a/src/gallium/drivers/llvmpipe/lp_flush.c
+++ b/src/gallium/drivers/llvmpipe/lp_flush.c
@@ -80,8 +80,7 @@ llvmpipe_flush( struct pipe_context *pipe,
          if (llvmpipe->cbuf_cache[i])
             lp_flush_tile_cache(llvmpipe->cbuf_cache[i]);
 
-      if (llvmpipe->zsbuf_cache)
-         lp_flush_tile_cache(llvmpipe->zsbuf_cache);
+      /* FIXME: untile zsbuf! */
      
       llvmpipe->dirty_render_cache = FALSE;
    }
diff --git a/src/gallium/drivers/llvmpipe/lp_quad.h b/src/gallium/drivers/llvmpipe/lp_quad.h
index 9297749..0902716 100644
--- a/src/gallium/drivers/llvmpipe/lp_quad.h
+++ b/src/gallium/drivers/llvmpipe/lp_quad.h
@@ -84,7 +84,6 @@ struct quad_header_output
 {
    /** colors in SOA format (rrrr, gggg, bbbb, aaaa) */
    float ALIGN16_ATTRIB color[PIPE_MAX_COLOR_BUFS][NUM_CHANNELS][QUAD_SIZE];
-   float depth[QUAD_SIZE];
 };
 
 
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_depth_test.c b/src/gallium/drivers/llvmpipe/lp_quad_depth_test.c
deleted file mode 100644
index 1243016..0000000
--- a/src/gallium/drivers/llvmpipe/lp_quad_depth_test.c
+++ /dev/null
@@ -1,792 +0,0 @@
-/**************************************************************************
- * 
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
- * All Rights Reserved.
- * 
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the
- * "Software"), to deal in the Software without restriction, including
- * without limitation the rights to use, copy, modify, merge, publish,
- * distribute, sub license, and/or sell copies of the Software, and to
- * permit persons to whom the Software is furnished to do so, subject to
- * the following conditions:
- * 
- * The above copyright notice and this permission notice (including the
- * next paragraph) shall be included in all copies or substantial portions
- * of the Software.
- * 
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
- * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
- * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
- * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
- * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
- * 
- **************************************************************************/
-
-/**
- * \brief  Quad depth testing
- */
-
-#include "pipe/p_defines.h"
-#include "util/u_memory.h"
-#include "tgsi/tgsi_scan.h"
-#include "lp_context.h"
-#include "lp_quad.h"
-#include "lp_surface.h"
-#include "lp_quad_pipe.h"
-#include "lp_tile_cache.h"
-#include "lp_state.h"           /* for lp_fragment_shader */
-
-
-struct depth_data {
-   struct pipe_surface *ps;
-   enum pipe_format format;
-   unsigned bzzzz[QUAD_SIZE];  /**< Z values fetched from depth buffer */
-   unsigned qzzzz[QUAD_SIZE];  /**< Z values from the quad */
-   ubyte stencilVals[QUAD_SIZE];
-   struct llvmpipe_cached_tile *tile;
-};
-
-
-
-static void
-get_depth_stencil_values( struct depth_data *data,
-                          const struct quad_header *quad )
-{
-   unsigned j;
-   const struct llvmpipe_cached_tile *tile = data->tile;
-
-   switch (data->format) {
-   case PIPE_FORMAT_Z16_UNORM:
-      for (j = 0; j < QUAD_SIZE; j++) {
-         int x = quad->input.x0 % TILE_SIZE + (j & 1);
-         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
-         data->bzzzz[j] = tile->data.depth16[y][x];
-      }
-      break;
-   case PIPE_FORMAT_Z32_UNORM:
-      for (j = 0; j < QUAD_SIZE; j++) {
-         int x = quad->input.x0 % TILE_SIZE + (j & 1);
-         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
-         data->bzzzz[j] = tile->data.depth32[y][x];
-      }
-      break;
-   case PIPE_FORMAT_X8Z24_UNORM:
-   case PIPE_FORMAT_S8Z24_UNORM:
-      for (j = 0; j < QUAD_SIZE; j++) {
-         int x = quad->input.x0 % TILE_SIZE + (j & 1);
-         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
-         data->bzzzz[j] = tile->data.depth32[y][x] & 0xffffff;
-         data->stencilVals[j] = tile->data.depth32[y][x] >> 24;
-      }
-   break;
-   case PIPE_FORMAT_Z24X8_UNORM:
-   case PIPE_FORMAT_Z24S8_UNORM:
-      for (j = 0; j < QUAD_SIZE; j++) {
-         int x = quad->input.x0 % TILE_SIZE + (j & 1);
-         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
-         data->bzzzz[j] = tile->data.depth32[y][x] >> 8;
-         data->stencilVals[j] = tile->data.depth32[y][x] & 0xff;
-      }
-      break;
-   default:
-      assert(0);
-   }
-}
-
-/* If the shader has not been run, interpolate the depth values
- * ourselves.
- */
-static void
-interpolate_quad_depth( struct quad_header *quad )
-{
-   const float fx = (float) quad->input.x0;
-   const float fy = (float) quad->input.y0;
-   const float dzdx = quad->coef->dadx[0][2];
-   const float dzdy = quad->coef->dady[0][2];
-   const float z0 = quad->coef->a0[0][2] + dzdx * fx + dzdy * fy;
-
-   quad->output.depth[0] = z0;
-   quad->output.depth[1] = z0 + dzdx;
-   quad->output.depth[2] = z0 + dzdy;
-   quad->output.depth[3] = z0 + dzdx + dzdy;
-}
-
-
-static void
-convert_quad_depth( struct depth_data *data, 
-                    const struct quad_header *quad )
-{
-   unsigned j;
-
-   /* Convert quad's float depth values to int depth values (qzzzz).
-    * If the Z buffer stores integer values, we _have_ to do the depth
-    * compares with integers (not floats).  Otherwise, the float->int->float
-    * conversion of Z values (which isn't an identity function) will cause
-    * Z-fighting errors.
-    */
-   switch (data->format) {
-   case PIPE_FORMAT_Z16_UNORM:
-      {
-         float scale = 65535.0;
-
-         for (j = 0; j < QUAD_SIZE; j++) {
-            data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
-         }
-      }
-      break;
-   case PIPE_FORMAT_Z32_UNORM:
-      {
-         double scale = (double) (uint) ~0UL;
-
-         for (j = 0; j < QUAD_SIZE; j++) {
-            data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
-         }
-      }
-      break;
-   case PIPE_FORMAT_X8Z24_UNORM:
-   case PIPE_FORMAT_S8Z24_UNORM:
-      {
-         float scale = (float) ((1 << 24) - 1);
-
-         for (j = 0; j < QUAD_SIZE; j++) {
-            data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
-         }
-      }
-      break;
-   case PIPE_FORMAT_Z24X8_UNORM:
-   case PIPE_FORMAT_Z24S8_UNORM:
-      {
-         float scale = (float) ((1 << 24) - 1);
-
-         for (j = 0; j < QUAD_SIZE; j++) {
-            data->qzzzz[j] = (unsigned) (quad->output.depth[j] * scale);
-         }
-      }
-      break;
-   default:
-      assert(0);
-   }
-}
-
-
-
-static void
-write_depth_stencil_values( struct depth_data *data,
-                            struct quad_header *quad )
-{
-   struct llvmpipe_cached_tile *tile = data->tile;
-   unsigned j;
-
-   /* put updated Z values back into cached tile */
-   switch (data->format) {
-   case PIPE_FORMAT_Z16_UNORM:
-      for (j = 0; j < QUAD_SIZE; j++) {
-         int x = quad->input.x0 % TILE_SIZE + (j & 1);
-         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
-         tile->data.depth16[y][x] = (ushort) data->bzzzz[j];
-      }
-      break;
-   case PIPE_FORMAT_X8Z24_UNORM:
-   case PIPE_FORMAT_Z32_UNORM:
-      for (j = 0; j < QUAD_SIZE; j++) {
-         int x = quad->input.x0 % TILE_SIZE + (j & 1);
-         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
-         tile->data.depth32[y][x] = data->bzzzz[j];
-      }
-      break;
-   case PIPE_FORMAT_S8Z24_UNORM:
-      for (j = 0; j < QUAD_SIZE; j++) {
-         int x = quad->input.x0 % TILE_SIZE + (j & 1);
-         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
-         tile->data.depth32[y][x] = (data->stencilVals[j] << 24) | data->bzzzz[j];
-      }
-      break;
-   case PIPE_FORMAT_Z24S8_UNORM:
-      for (j = 0; j < QUAD_SIZE; j++) {
-         int x = quad->input.x0 % TILE_SIZE + (j & 1);
-         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
-         tile->data.depth32[y][x] = (data->bzzzz[j] << 8) | data->stencilVals[j];
-      }
-      break;
-   case PIPE_FORMAT_Z24X8_UNORM:
-      for (j = 0; j < QUAD_SIZE; j++) {
-         int x = quad->input.x0 % TILE_SIZE + (j & 1);
-         int y = quad->input.y0 % TILE_SIZE + (j >> 1);
-         tile->data.depth32[y][x] = data->bzzzz[j] << 8;
-      }
-      break;
-   default:
-      assert(0);
-   }
-}
-
-
-
-
-/** Only 8-bit stencil supported */
-#define STENCIL_MAX 0xff
-
-
-/**
- * Do the basic stencil test (compare stencil buffer values against the
- * reference value.
- *
- * \param data->stencilVals  the stencil values from the stencil buffer
- * \param func  the stencil func (PIPE_FUNC_x)
- * \param ref  the stencil reference value
- * \param valMask  the stencil value mask indicating which bits of the stencil
- *                 values and ref value are to be used.
- * \return mask indicating which pixels passed the stencil test
- */
-static unsigned
-do_stencil_test(struct depth_data *data,
-                unsigned func,
-                unsigned ref, unsigned valMask)
-{
-   unsigned passMask = 0x0;
-   unsigned j;
-
-   ref &= valMask;
-
-   switch (func) {
-   case PIPE_FUNC_NEVER:
-      /* passMask = 0x0 */
-      break;
-   case PIPE_FUNC_LESS:
-      for (j = 0; j < QUAD_SIZE; j++) {
-         if (ref < (data->stencilVals[j] & valMask)) {
-            passMask |= (1 << j);
-         }
-      }
-      break;
-   case PIPE_FUNC_EQUAL:
-      for (j = 0; j < QUAD_SIZE; j++) {
-         if (ref == (data->stencilVals[j] & valMask)) {
-            passMask |= (1 << j);
-         }
-      }
-      break;
-   case PIPE_FUNC_LEQUAL:
-      for (j = 0; j < QUAD_SIZE; j++) {
-         if (ref <= (data->stencilVals[j] & valMask)) {
-            passMask |= (1 << j);
-         }
-      }
-      break;
-   case PIPE_FUNC_GREATER:
-      for (j = 0; j < QUAD_SIZE; j++) {
-         if (ref > (data->stencilVals[j] & valMask)) {
-            passMask |= (1 << j);
-         }
-      }
-      break;
-   case PIPE_FUNC_NOTEQUAL:
-      for (j = 0; j < QUAD_SIZE; j++) {
-         if (ref != (data->stencilVals[j] & valMask)) {
-            passMask |= (1 << j);
-         }
-      }
-      break;
-   case PIPE_FUNC_GEQUAL:
-      for (j = 0; j < QUAD_SIZE; j++) {
-         if (ref >= (data->stencilVals[j] & valMask)) {
-            passMask |= (1 << j);
-         }
-      }
-      break;
-   case PIPE_FUNC_ALWAYS:
-      passMask = MASK_ALL;
-      break;
-   default:
-      assert(0);
-   }
-
-   return passMask;
-}
-
-
-/**
- * Apply the stencil operator to stencil values.
- *
- * \param data->stencilVals  the stencil buffer values (read and written)
- * \param mask  indicates which pixels to update
- * \param op  the stencil operator (PIPE_STENCIL_OP_x)
- * \param ref  the stencil reference value
- * \param wrtMask  writemask controlling which bits are changed in the
- *                 stencil values
- */
-static void
-apply_stencil_op(struct depth_data *data,
-                 unsigned mask, unsigned op, ubyte ref, ubyte wrtMask)
-{
-   unsigned j;
-   ubyte newstencil[QUAD_SIZE];
-
-   for (j = 0; j < QUAD_SIZE; j++) {
-      newstencil[j] = data->stencilVals[j];
-   }
-
-   switch (op) {
-   case PIPE_STENCIL_OP_KEEP:
-      /* no-op */
-      break;
-   case PIPE_STENCIL_OP_ZERO:
-      for (j = 0; j < QUAD_SIZE; j++) {
-         if (mask & (1 << j)) {
-            newstencil[j] = 0;
-         }
-      }
-      break;
-   case PIPE_STENCIL_OP_REPLACE:
-      for (j = 0; j < QUAD_SIZE; j++) {
-         if (mask & (1 << j)) {
-            newstencil[j] = ref;
-         }
-      }
-      break;
-   case PIPE_STENCIL_OP_INCR:
-      for (j = 0; j < QUAD_SIZE; j++) {
-         if (mask & (1 << j)) {
-            if (data->stencilVals[j] < STENCIL_MAX) {
-               newstencil[j] = data->stencilVals[j] + 1;
-            }
-         }
-      }
-      break;
-   case PIPE_STENCIL_OP_DECR:
-      for (j = 0; j < QUAD_SIZE; j++) {
-         if (mask & (1 << j)) {
-            if (data->stencilVals[j] > 0) {
-               newstencil[j] = data->stencilVals[j] - 1;
-            }
-         }
-      }
-      break;
-   case PIPE_STENCIL_OP_INCR_WRAP:
-      for (j = 0; j < QUAD_SIZE; j++) {
-         if (mask & (1 << j)) {
-            newstencil[j] = data->stencilVals[j] + 1;
-         }
-      }
-      break;
-   case PIPE_STENCIL_OP_DECR_WRAP:
-      for (j = 0; j < QUAD_SIZE; j++) {
-         if (mask & (1 << j)) {
-            newstencil[j] = data->stencilVals[j] - 1;
-         }
-      }
-      break;
-   case PIPE_STENCIL_OP_INVERT:
-      for (j = 0; j < QUAD_SIZE; j++) {
-         if (mask & (1 << j)) {
-            newstencil[j] = ~data->stencilVals[j];
-         }
-      }
-      break;
-   default:
-      assert(0);
-   }
-
-   /*
-    * update the stencil values
-    */
-   if (wrtMask != STENCIL_MAX) {
-      /* apply bit-wise stencil buffer writemask */
-      for (j = 0; j < QUAD_SIZE; j++) {
-         data->stencilVals[j] = (wrtMask & newstencil[j]) | (~wrtMask & data->stencilVals[j]);
-      }
-   }
-   else {
-      for (j = 0; j < QUAD_SIZE; j++) {
-         data->stencilVals[j] = newstencil[j];
-      }
-   }
-}
-
-   
-
-/*
- * To increase efficiency, we should probably have multiple versions
- * of this function that are specifically for Z16, Z32 and FP Z buffers.
- * Try to effectively do that with codegen...
- */
-
-static boolean
-depth_test_quad(struct quad_stage *qs, 
-                struct depth_data *data,
-                struct quad_header *quad)
-{
-   struct llvmpipe_context *llvmpipe = qs->llvmpipe;
-   unsigned zmask = 0;
-   unsigned j;
-
-   switch (llvmpipe->depth_stencil->depth.func) {
-   case PIPE_FUNC_NEVER:
-      /* zmask = 0 */
-      break;
-   case PIPE_FUNC_LESS:
-      /* Note this is pretty much a single sse or cell instruction.  
-       * Like this:  quad->mask &= (quad->outputs.depth < zzzz);
-       */
-      for (j = 0; j < QUAD_SIZE; j++) {
-	 if (data->qzzzz[j] < data->bzzzz[j]) 
-	    zmask |= 1 << j;
-      }
-      break;
-   case PIPE_FUNC_EQUAL:
-      for (j = 0; j < QUAD_SIZE; j++) {
-	 if (data->qzzzz[j] == data->bzzzz[j]) 
-	    zmask |= 1 << j;
-      }
-      break;
-   case PIPE_FUNC_LEQUAL:
-      for (j = 0; j < QUAD_SIZE; j++) {
-	 if (data->qzzzz[j] <= data->bzzzz[j]) 
-	    zmask |= (1 << j);
-      }
-      break;
-   case PIPE_FUNC_GREATER:
-      for (j = 0; j < QUAD_SIZE; j++) {
-	 if (data->qzzzz[j] > data->bzzzz[j]) 
-	    zmask |= (1 << j);
-      }
-      break;
-   case PIPE_FUNC_NOTEQUAL:
-      for (j = 0; j < QUAD_SIZE; j++) {
-	 if (data->qzzzz[j] != data->bzzzz[j]) 
-	    zmask |= (1 << j);
-      }
-      break;
-   case PIPE_FUNC_GEQUAL:
-      for (j = 0; j < QUAD_SIZE; j++) {
-	 if (data->qzzzz[j] >= data->bzzzz[j]) 
-	    zmask |= (1 << j);
-      }
-      break;
-   case PIPE_FUNC_ALWAYS:
-      zmask = MASK_ALL;
-      break;
-   default:
-      assert(0);
-   }
-
-   quad->inout.mask &= zmask;
-   if (quad->inout.mask == 0)
-      return FALSE;
-
-   /* Update our internal copy only if writemask set.  Even if
-    * depth.writemask is FALSE, may still need to write out buffer
-    * data due to stencil changes.
-    */
-   if (llvmpipe->depth_stencil->depth.writemask) {
-      for (j = 0; j < QUAD_SIZE; j++) {
-         if (quad->inout.mask & (1 << j)) {
-            data->bzzzz[j] = data->qzzzz[j];
-         }
-      }
-   }
-
-   return TRUE;
-}
-
-
-
-/**
- * Do stencil (and depth) testing.  Stenciling depends on the outcome of
- * depth testing.
- */
-static boolean
-depth_stencil_test_quad(struct quad_stage *qs, 
-                        struct depth_data *data,
-                        struct quad_header *quad)
-{
-   struct llvmpipe_context *llvmpipe = qs->llvmpipe;
-   unsigned func, zFailOp, zPassOp, failOp;
-   ubyte ref, wrtMask, valMask;
-   uint face = quad->input.facing;
-
-   if (!llvmpipe->depth_stencil->stencil[1].enabled) {
-      /* single-sided stencil test, use front (face=0) state */
-      face = 0;
-   }
-
-   /* choose front or back face function, operator, etc */
-   /* XXX we could do these initializations once per primitive */
-   func    = llvmpipe->depth_stencil->stencil[face].func;
-   failOp  = llvmpipe->depth_stencil->stencil[face].fail_op;
-   zFailOp = llvmpipe->depth_stencil->stencil[face].zfail_op;
-   zPassOp = llvmpipe->depth_stencil->stencil[face].zpass_op;
-   ref     = llvmpipe->depth_stencil->stencil[face].ref_value;
-   wrtMask = llvmpipe->depth_stencil->stencil[face].writemask;
-   valMask = llvmpipe->depth_stencil->stencil[face].valuemask;
-
-
-   /* do the stencil test first */
-   {
-      unsigned passMask, failMask;
-      passMask = do_stencil_test(data, func, ref, valMask);
-      failMask = quad->inout.mask & ~passMask;
-      quad->inout.mask &= passMask;
-
-      if (failOp != PIPE_STENCIL_OP_KEEP) {
-         apply_stencil_op(data, failMask, failOp, ref, wrtMask);
-      }
-   }
-
-   if (quad->inout.mask) {
-      /* now the pixels that passed the stencil test are depth tested */
-      if (llvmpipe->depth_stencil->depth.enabled) {
-         const unsigned origMask = quad->inout.mask;
-
-         depth_test_quad(qs, data, quad);  /* quad->mask is updated */
-
-         /* update stencil buffer values according to z pass/fail result */
-         if (zFailOp != PIPE_STENCIL_OP_KEEP) {
-            const unsigned failMask = origMask & ~quad->inout.mask;
-            apply_stencil_op(data, failMask, zFailOp, ref, wrtMask);
-         }
-
-         if (zPassOp != PIPE_STENCIL_OP_KEEP) {
-            const unsigned passMask = origMask & quad->inout.mask;
-            apply_stencil_op(data, passMask, zPassOp, ref, wrtMask);
-         }
-      }
-      else {
-         /* no depth test, apply Zpass operator to stencil buffer values */
-         apply_stencil_op(data, quad->inout.mask, zPassOp, ref, wrtMask);
-      }
-   }
-
-   return quad->inout.mask != 0;
-}
-
-
-
-static unsigned mask_count[0x8] = 
-{
-   0,                           /* 0x0 */
-   1,                           /* 0x1 */
-   1,                           /* 0x2 */
-   2,                           /* 0x3 */
-   1,                           /* 0x4 */
-   2,                           /* 0x5 */
-   2,                           /* 0x6 */
-   3,                           /* 0x7 */
-};
-
-
-
-static void
-depth_test_quads_fallback(struct quad_stage *qs, 
-                          struct quad_header *quads[],
-                          unsigned nr)
-{
-   unsigned i, pass = 0;
-   const struct lp_fragment_shader *fs = qs->llvmpipe->fs;
-   boolean interp_depth = !fs->info.writes_z;
-   struct depth_data data;
-
-
-   if (qs->llvmpipe->framebuffer.zsbuf && 
-       (qs->llvmpipe->depth_stencil->depth.enabled ||
-        qs->llvmpipe->depth_stencil->stencil[0].enabled)) {
-
-      data.ps = qs->llvmpipe->framebuffer.zsbuf;
-      data.format = data.ps->format;
-      data.tile = lp_get_cached_tile(qs->llvmpipe->zsbuf_cache, 
-                                     quads[0]->input.x0, 
-                                     quads[0]->input.y0);
-
-      for (i = 0; i < nr; i++) {
-         if(!quads[i]->inout.mask)
-            continue;
-
-         get_depth_stencil_values(&data, quads[i]);
-
-         if (qs->llvmpipe->depth_stencil->depth.enabled) {
-            if (interp_depth)
-               interpolate_quad_depth(quads[i]);
-
-            convert_quad_depth(&data, quads[i]);
-         }
-
-         if (qs->llvmpipe->depth_stencil->stencil[0].enabled) {
-            if (!depth_stencil_test_quad(qs, &data, quads[i]))
-               continue;
-         }
-         else {
-            if (!depth_test_quad(qs, &data, quads[i]))
-               continue;
-         }
-
-         if (qs->llvmpipe->depth_stencil->stencil[0].enabled ||
-             qs->llvmpipe->depth_stencil->depth.writemask)
-            write_depth_stencil_values(&data, quads[i]);
-
-         qs->llvmpipe->occlusion_count += mask_count[quads[i]->inout.mask];
-         ++pass;
-      }
-   }
-
-   if (pass)
-      qs->next->run(qs->next, quads, nr);
-}
-
-/* XXX: this function assumes setup function actually emits linear
- * spans of quads.  It seems a lot more natural to do (early)
- * depth-testing on spans rather than quads.
- */
-static void
-depth_interp_z16_less_write(struct quad_stage *qs, 
-                            struct quad_header *quads[],
-                            unsigned nr)
-{
-   unsigned i, pass = 0;
-   const unsigned ix = quads[0]->input.x0;
-   const unsigned iy = quads[0]->input.y0;
-   const float fx = (float) ix;
-   const float fy = (float) iy;
-   const float dzdx = quads[0]->coef->dadx[0][2];
-   const float dzdy = quads[0]->coef->dady[0][2];
-   const float z0 = quads[0]->coef->a0[0][2] + dzdx * fx + dzdy * fy;
-   struct llvmpipe_cached_tile *tile;
-   ushort (*depth16)[TILE_SIZE];
-   ushort idepth[4], depth_step;
-   const float scale = 65535.0;
-
-   idepth[0] = (ushort)((z0) * scale);
-   idepth[1] = (ushort)((z0 + dzdx) * scale);
-   idepth[2] = (ushort)((z0 + dzdy) * scale);
-   idepth[3] = (ushort)((z0 + dzdx + dzdy) * scale);
-
-   depth_step = (ushort)(dzdx * 2 * scale);
-
-   tile = lp_get_cached_tile(qs->llvmpipe->zsbuf_cache, ix, iy);
-
-   depth16 = (ushort (*)[TILE_SIZE])
-      &tile->data.depth16[iy % TILE_SIZE][ix % TILE_SIZE];
-
-   for (i = 0; i < nr; i++) {
-      unsigned outmask = quads[i]->inout.mask;
-      unsigned mask = 0;
-      
-      if ((outmask & 1) && idepth[0] < depth16[0][0]) {
-         depth16[0][0] = idepth[0];
-         mask |= (1 << 0);
-      }
-
-      if ((outmask & 2) && idepth[1] < depth16[0][1]) {
-         depth16[0][1] = idepth[1];
-         mask |= (1 << 1);
-      }
-
-      if ((outmask & 4) && idepth[2] < depth16[1][0]) {
-         depth16[1][0] = idepth[2];
-         mask |= (1 << 2);
-      }
-
-      if ((outmask & 8) && idepth[3] < depth16[1][1]) {
-         depth16[1][1] = idepth[3];
-         mask |= (1 << 3);
-      }
-
-      idepth[0] += depth_step;
-      idepth[1] += depth_step;
-      idepth[2] += depth_step;
-      idepth[3] += depth_step;
-
-      depth16 = (ushort (*)[TILE_SIZE]) &depth16[0][2];
-
-      quads[i]->inout.mask = mask;
-      if (quads[i]->inout.mask)
-         ++pass;
-   }
-
-   if (pass)
-      qs->next->run(qs->next, quads, nr);
-
-}
-
-
-static void
-depth_noop(struct quad_stage *qs, 
-           struct quad_header *quads[],
-           unsigned nr)
-{
-   qs->next->run(qs->next, quads, nr);
-}
-
-
-
-static void
-choose_depth_test(struct quad_stage *qs, 
-                  struct quad_header *quads[],
-                  unsigned nr)
-{
-   boolean interp_depth = !qs->llvmpipe->fs->info.writes_z;
-
-   boolean depth = (qs->llvmpipe->framebuffer.zsbuf && 
-                    qs->llvmpipe->depth_stencil->depth.enabled);
-
-   unsigned depthfunc = qs->llvmpipe->depth_stencil->depth.func;
-
-   boolean stencil = qs->llvmpipe->depth_stencil->stencil[0].enabled;
-
-   boolean depthwrite = qs->llvmpipe->depth_stencil->depth.writemask;
-
-
-   qs->run = depth_test_quads_fallback;
-
-   if (!depth &&
-       !stencil) {
-      qs->run = depth_noop;
-   }
-   else if (interp_depth &&
-            depth && 
-            depthfunc == PIPE_FUNC_LESS && 
-            depthwrite && 
-            !stencil) 
-   {
-      switch (qs->llvmpipe->framebuffer.zsbuf->format) {
-      case PIPE_FORMAT_Z16_UNORM:
-         qs->run = depth_interp_z16_less_write;
-         break;
-      default:
-         break;
-      }
-   }
-
-   qs->run( qs, quads, nr );
-}
-
-
-
-
-
-static void depth_test_begin(struct quad_stage *qs)
-{
-   qs->run = choose_depth_test;
-   qs->next->begin(qs->next);
-}
-
-
-static void depth_test_destroy(struct quad_stage *qs)
-{
-   FREE( qs );
-}
-
-
-struct quad_stage *lp_quad_depth_test_stage( struct llvmpipe_context *llvmpipe )
-{
-   struct quad_stage *stage = CALLOC_STRUCT(quad_stage);
-
-   stage->llvmpipe = llvmpipe;
-   stage->begin = depth_test_begin;
-   stage->run = choose_depth_test;
-   stage->destroy = depth_test_destroy;
-
-   return stage;
-}
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_fs.c b/src/gallium/drivers/llvmpipe/lp_quad_fs.c
index 2736efc..4f7a061 100644
--- a/src/gallium/drivers/llvmpipe/lp_quad_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_quad_fs.c
@@ -39,23 +39,21 @@
 #include "util/u_math.h"
 #include "util/u_memory.h"
 #include "pipe/p_defines.h"
-#include "pipe/p_shader_tokens.h"
+#include "pipe/p_screen.h"
 
 #include "lp_context.h"
 #include "lp_state.h"
 #include "lp_quad.h"
 #include "lp_quad_pipe.h"
 #include "lp_texture.h"
-#include "lp_tex_sample.h"
 
 
 struct quad_shade_stage
 {
    struct quad_stage stage;  /**< base class */
 
-   union tgsi_exec_channel ALIGN16_ATTRIB pos[NUM_CHANNELS];
-
-   uint32_t ALIGN16_ATTRIB mask[NUM_CHANNELS];
+   struct pipe_transfer *transfer;
+   uint8_t *map;
 };
 
 
@@ -79,6 +77,10 @@ shade_quad(struct quad_stage *qs, struct quad_header *quad)
    struct lp_fragment_shader *fs = llvmpipe->fs;
    void *constants;
    struct tgsi_sampler **samplers;
+   const unsigned x = quad->input.x0;
+   const unsigned y = quad->input.y0;
+   void *depth;
+   uint32_t ALIGN16_ATTRIB mask[NUM_CHANNELS];
    unsigned chan_index;
 
    assert(fs->current);
@@ -89,23 +91,38 @@ shade_quad(struct quad_stage *qs, struct quad_header *quad)
    samplers = (struct tgsi_sampler **)llvmpipe->tgsi.frag_samplers_list;
 
    for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index)
-      qss->mask[chan_index] = ~0;
+      mask[chan_index] = quad->inout.mask & (1 << chan_index) ? ~0 : 0;
+
+   if(qss->map) {
+      assert((x % 2) == 0);
+      assert((y % 2) == 0);
+      depth = qss->map +
+              y*qss->transfer->stride +
+              2*x*qss->transfer->block.size;
+   }
+   else
+      depth = NULL;
+
+   assert((((uintptr_t)mask) & 0xf) == 0);
+   assert((((uintptr_t)quad->output.color) & 0xf) == 0);
+   assert((((uintptr_t)depth) & 0xf) == 0);
 
    /* run shader */
-   fs->current->jit_function( quad->input.x0,
-                              quad->input.y0,
+   fs->current->jit_function( x,
+                              y,
                               quad->coef->a0,
                               quad->coef->dadx,
                               quad->coef->dady,
                               constants,
-                              qss->mask,
+                              mask,
                               quad->output.color,
-                              quad->output.depth,
+                              depth,
                               samplers);
 
    for (chan_index = 0; chan_index < NUM_CHANNELS; ++chan_index)
-      if(!qss->mask[chan_index])
+      if(!mask[chan_index])
          quad->inout.mask &= ~(1 << chan_index);
+
    if (quad->inout.mask == 0)
       return FALSE;
 
@@ -168,6 +185,31 @@ shade_quads(struct quad_stage *qs,
 static void
 shade_begin(struct quad_stage *qs)
 {
+   struct quad_shade_stage *qss = quad_shade_stage( qs );
+   struct llvmpipe_context *llvmpipe = qs->llvmpipe;
+   struct pipe_screen *screen = llvmpipe->pipe.screen;
+   struct pipe_surface *zsbuf = llvmpipe->framebuffer.zsbuf;
+
+   if(qss->transfer) {
+      if(qss->map) {
+         screen->transfer_unmap(screen, qss->transfer);
+         qss->map = NULL;
+      }
+
+      screen->tex_transfer_destroy(qss->transfer);
+      qss->transfer = NULL;
+   }
+
+   if(zsbuf) {
+      qss->transfer = screen->get_tex_transfer(screen, zsbuf->texture,
+                                               zsbuf->face, zsbuf->level, zsbuf->zslice,
+                                               PIPE_TRANSFER_READ_WRITE,
+                                               0, 0, zsbuf->width, zsbuf->height);
+      if(qss->transfer)
+         qss->map = screen->transfer_map(screen, qss->transfer);
+
+   }
+
    qs->next->begin(qs->next);
 }
 
@@ -175,6 +217,20 @@ shade_begin(struct quad_stage *qs)
 static void
 shade_destroy(struct quad_stage *qs)
 {
+   struct quad_shade_stage *qss = quad_shade_stage( qs );
+   struct llvmpipe_context *llvmpipe = qs->llvmpipe;
+   struct pipe_screen *screen = llvmpipe->pipe.screen;
+
+   if(qss->transfer) {
+      if(qss->map) {
+         screen->transfer_unmap(screen, qss->transfer);
+         qss->map = NULL;
+      }
+
+      screen->tex_transfer_destroy(qss->transfer);
+      qss->transfer = NULL;
+   }
+
    align_free( qs );
 }
 
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_pipe.c b/src/gallium/drivers/llvmpipe/lp_quad_pipe.c
index 5371e1c..70d3ad3 100644
--- a/src/gallium/drivers/llvmpipe/lp_quad_pipe.c
+++ b/src/gallium/drivers/llvmpipe/lp_quad_pipe.c
@@ -42,22 +42,8 @@ lp_push_quad_first( struct llvmpipe_context *lp,
 void
 lp_build_quad_pipeline(struct llvmpipe_context *lp)
 {
-   boolean early_depth_test =
-      lp->depth_stencil->depth.enabled &&
-      lp->framebuffer.zsbuf &&
-      !lp->depth_stencil->alpha.enabled &&
-      !lp->fs->info.uses_kill &&
-      !lp->fs->info.writes_z;
-
    lp->quad.first = lp->quad.blend;
 
-   if (early_depth_test) {
-      lp_push_quad_first( lp, lp->quad.shade );
-      lp_push_quad_first( lp, lp->quad.depth_test );
-   }
-   else {
-      lp_push_quad_first( lp, lp->quad.depth_test );
-      lp_push_quad_first( lp, lp->quad.shade );
-   }
+   lp_push_quad_first( lp, lp->quad.shade );
 }
 
diff --git a/src/gallium/drivers/llvmpipe/lp_quad_pipe.h b/src/gallium/drivers/llvmpipe/lp_quad_pipe.h
index ee0dd65..52d4d68 100644
--- a/src/gallium/drivers/llvmpipe/lp_quad_pipe.h
+++ b/src/gallium/drivers/llvmpipe/lp_quad_pipe.h
@@ -58,13 +58,10 @@ struct quad_stage {
 struct quad_stage *lp_quad_polygon_stipple_stage( struct llvmpipe_context *llvmpipe );
 struct quad_stage *lp_quad_earlyz_stage( struct llvmpipe_context *llvmpipe );
 struct quad_stage *lp_quad_shade_stage( struct llvmpipe_context *llvmpipe );
-struct quad_stage *lp_quad_alpha_test_stage( struct llvmpipe_context *llvmpipe );
 struct quad_stage *lp_quad_stencil_test_stage( struct llvmpipe_context *llvmpipe );
-struct quad_stage *lp_quad_depth_test_stage( struct llvmpipe_context *llvmpipe );
 struct quad_stage *lp_quad_occlusion_stage( struct llvmpipe_context *llvmpipe );
 struct quad_stage *lp_quad_coverage_stage( struct llvmpipe_context *llvmpipe );
 struct quad_stage *lp_quad_blend_stage( struct llvmpipe_context *llvmpipe );
-struct quad_stage *lp_quad_colormask_stage( struct llvmpipe_context *llvmpipe );
 struct quad_stage *lp_quad_output_stage( struct llvmpipe_context *llvmpipe );
 
 void lp_build_quad_pipeline(struct llvmpipe_context *lp);
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 2236711..750573a 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -138,6 +138,11 @@ llvmpipe_is_format_supported( struct pipe_screen *screen,
           target == PIPE_TEXTURE_3D ||
           target == PIPE_TEXTURE_CUBE);
 
+   if(format == PIPE_FORMAT_Z16_UNORM)
+      return FALSE;
+   if(format == PIPE_FORMAT_S8_UNORM)
+      return FALSE;
+
    switch(format) {
    case PIPE_FORMAT_DXT1_RGB:
    case PIPE_FORMAT_DXT1_RGBA:
diff --git a/src/gallium/drivers/llvmpipe/lp_state.h b/src/gallium/drivers/llvmpipe/lp_state.h
index db21096..2d6add8 100644
--- a/src/gallium/drivers/llvmpipe/lp_state.h
+++ b/src/gallium/drivers/llvmpipe/lp_state.h
@@ -74,15 +74,18 @@ typedef void
 struct lp_fragment_shader;
 
 
-/**
- * Subclass of pipe_shader_state (though it doesn't really need to be).
- *
- * This is starting to look an awful lot like a quad pipeline stage...
- */
+struct lp_fragment_shader_variant_key
+{
+   struct pipe_depth_state depth;
+   struct pipe_alpha_state alpha;
+};
+
+
 struct lp_fragment_shader_variant
 {
    struct lp_fragment_shader *shader;
-   struct pipe_alpha_state alpha;
+
+   struct lp_fragment_shader_variant_key key;
 
    LLVMValueRef function;
 
@@ -103,8 +106,6 @@ struct lp_fragment_shader
 
    struct tgsi_shader_info info;
 
-   struct llvmpipe_screen *screen;
-
    struct lp_fragment_shader_variant *variants;
 
    struct lp_fragment_shader_variant *current;
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 2227467..51962e6 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -28,6 +28,7 @@
 
 #include "pipe/p_defines.h"
 #include "util/u_memory.h"
+#include "util/u_format.h"
 #include "util/u_debug_dump.h"
 #include "pipe/internal/p_winsys_screen.h"
 #include "pipe/p_shader_tokens.h"
@@ -36,6 +37,8 @@
 #include "tgsi/tgsi_scan.h"
 #include "tgsi/tgsi_parse.h"
 #include "lp_bld_type.h"
+#include "lp_bld_conv.h"
+#include "lp_bld_depth.h"
 #include "lp_bld_tgsi.h"
 #include "lp_bld_alpha.h"
 #include "lp_bld_swizzle.h"
@@ -105,11 +108,54 @@ setup_pos_vector(LLVMBuilderRef builder,
 }
 
 
+static void
+depth_test_generate(struct llvmpipe_context *lp,
+                    LLVMBuilderRef builder,
+                    const struct pipe_depth_state *state,
+                    union lp_type src_type,
+                    LLVMValueRef *mask,
+                    LLVMValueRef src,
+                    LLVMValueRef dst_ptr)
+{
+   const struct util_format_description *format_desc;
+   union lp_type dst_type;
+
+   if(!lp->framebuffer.zsbuf)
+      return;
+
+   format_desc = util_format_description(lp->framebuffer.zsbuf->format);
+   assert(format_desc);
+
+   dst_type = lp_depth_type(format_desc, src_type.width*src_type.length);
+
+   assert(dst_type.width == src_type.width);
+   assert(dst_type.length == src_type.length);
+
+#if 1
+   src = lp_build_clamped_float_to_unsigned_norm(builder,
+                                                 src_type,
+                                                 dst_type.width,
+                                                 src);
+#else
+   lp_build_conv(builder, src_type, dst_type, &src, 1, &src, 1);
+#endif
+
+   lp_build_depth_test(builder,
+                       state,
+                       dst_type,
+                       format_desc,
+                       mask,
+                       src,
+                       dst_ptr);
+}
+
+
 static struct lp_fragment_shader_variant *
-shader_generate(struct llvmpipe_screen *screen,
+shader_generate(struct llvmpipe_context *lp,
                 struct lp_fragment_shader *shader,
-                const struct pipe_alpha_state *alpha)
+                const struct lp_fragment_shader_variant_key *key)
 {
+   struct llvmpipe_screen *screen = llvmpipe_screen(lp->pipe.screen);
    struct lp_fragment_shader_variant *variant;
    const struct tgsi_token *tokens = shader->base.tokens;
    union lp_type type;
@@ -140,9 +186,13 @@ shader_generate(struct llvmpipe_screen *screen,
 
 #ifdef DEBUG
    tgsi_dump(shader->base.tokens, 0);
-   debug_printf("alpha.enabled = %u\n", alpha->enabled);
-   debug_printf("alpha.func = %s\n", debug_dump_func(alpha->func, TRUE));
-   debug_printf("alpha.ref_value = %f\n", alpha->ref_value);
+   debug_printf("depth.enabled = %u\n", key->depth.enabled);
+   debug_printf("depth.func = %s\n", debug_dump_func(key->depth.func, TRUE));
+   debug_printf("depth.writemask = %u\n", key->depth.writemask);
+   debug_printf("depth.occlusion_count = %u\n", key->depth.occlusion_count);
+   debug_printf("alpha.enabled = %u\n", key->alpha.enabled);
+   debug_printf("alpha.func = %s\n", debug_dump_func(key->alpha.func, TRUE));
+   debug_printf("alpha.ref_value = %f\n", key->alpha.ref_value);
 #endif
 
    variant = CALLOC_STRUCT(lp_fragment_shader_variant);
@@ -150,7 +200,7 @@ shader_generate(struct llvmpipe_screen *screen,
       return NULL;
 
    variant->shader = shader;
-   memcpy(&variant->alpha, alpha, sizeof *alpha);
+   memcpy(&variant->key, key, sizeof *key);
 
    type.value = 0;
    type.floating = TRUE; /* floating point values */
@@ -171,7 +221,7 @@ shader_generate(struct llvmpipe_screen *screen,
    arg_types[5] = LLVMPointerType(elem_type, 0);       /* consts */
    arg_types[6] = LLVMPointerType(int_vec_type, 0);    /* mask */
    arg_types[7] = LLVMPointerType(vec_type, 0);        /* color */
-   arg_types[8] = LLVMPointerType(vec_type, 0);        /* depth */
+   arg_types[8] = LLVMPointerType(int_vec_type, 0);    /* depth */
    arg_types[9] = LLVMPointerType(LLVMInt8Type(), 0);  /* samplers */
 
    func_type = LLVMFunctionType(LLVMVoidType(), arg_types, Elements(arg_types), 0);
@@ -212,6 +262,15 @@ shader_generate(struct llvmpipe_screen *screen,
 
    mask = LLVMBuildLoad(builder, mask_ptr, "");
 
+   /* FIXME:
+   early_depth_test =
+      lp->depth_stencil->depth.enabled &&
+      lp->framebuffer.zsbuf &&
+      !lp->depth_stencil->alpha.enabled &&
+      !lp->fs->info.uses_kill &&
+      !lp->fs->info.writes_z;
+   */
+
    memset(outputs, 0, sizeof outputs);
 
    lp_build_tgsi_soa(builder, tokens, type, &mask,
@@ -242,14 +301,18 @@ shader_generate(struct llvmpipe_screen *screen,
                }
 
             case TGSI_SEMANTIC_POSITION:
-               if(chan == 3)
-                  LLVMBuildStore(builder, outputs[attrib][chan], depth_ptr);
+               if(chan == 2)
+                  pos[2] = outputs[attrib][chan];
                break;
             }
          }
       }
    }
 
+   depth_test_generate(lp, builder, &key->depth,
+                       type, &mask,
+                       pos[2], depth_ptr);
+
    if(mask)
       LLVMBuildStore(builder, mask, mask_ptr);
 
@@ -295,7 +358,6 @@ void *
 llvmpipe_create_fs_state(struct pipe_context *pipe,
                          const struct pipe_shader_state *templ)
 {
-   struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen);
    struct lp_fragment_shader *shader;
 
    shader = CALLOC_STRUCT(lp_fragment_shader);
@@ -308,8 +370,6 @@ llvmpipe_create_fs_state(struct pipe_context *pipe,
    /* we need to keep a local copy of the tokens */
    shader->base.tokens = tgsi_dup_tokens(templ->tokens);
 
-   shader->screen = screen;
-
    return shader;
 }
 
@@ -328,11 +388,12 @@ llvmpipe_bind_fs_state(struct pipe_context *pipe, void *fs)
 void
 llvmpipe_delete_fs_state(struct pipe_context *pipe, void *fs)
 {
+   struct llvmpipe_context *llvmpipe = llvmpipe_context(pipe);
+   struct llvmpipe_screen *screen = llvmpipe_screen(pipe->screen);
    struct lp_fragment_shader *shader = fs;
    struct lp_fragment_shader_variant *variant;
-   struct llvmpipe_screen *screen = shader->screen;
 
-   assert(fs != llvmpipe_context(pipe)->fs);
+   assert(fs != llvmpipe->fs);
 
    variant = shader->variants;
    while(variant) {
@@ -436,19 +497,23 @@ llvmpipe_set_constant_buffer(struct pipe_context *pipe,
 void llvmpipe_update_fs(struct llvmpipe_context *lp)
 {
    struct lp_fragment_shader *shader = lp->fs;
-   const struct pipe_alpha_state *alpha = &lp->depth_stencil->alpha;
+   struct lp_fragment_shader_variant_key key;
    struct lp_fragment_shader_variant *variant;
 
+   memset(&key, 0, sizeof key);
+   memcpy(&key.depth, &lp->depth_stencil->depth, sizeof &key.depth);
+   memcpy(&key.alpha, &lp->depth_stencil->alpha, sizeof &key.alpha);
+
    variant = shader->variants;
    while(variant) {
-      if(memcmp(&variant->alpha, alpha, sizeof *alpha) == 0)
+      if(memcmp(&variant->key, &key, sizeof key) == 0)
          break;
 
       variant = variant->next;
    }
 
    if(!variant)
-      variant = shader_generate(shader->screen, shader, alpha);
+      variant = shader_generate(lp, shader, &key);
 
    shader->current = variant;
 }
diff --git a/src/gallium/drivers/llvmpipe/lp_state_surface.c b/src/gallium/drivers/llvmpipe/lp_state_surface.c
index 1716048..00c61c4 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_surface.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_surface.c
@@ -67,15 +67,9 @@ llvmpipe_set_framebuffer_state(struct pipe_context *pipe,
 
    /* zbuf changing? */
    if (lp->framebuffer.zsbuf != fb->zsbuf) {
-      /* flush old */
-      lp_flush_tile_cache(lp->zsbuf_cache);
-
       /* assign new */
       lp->framebuffer.zsbuf = fb->zsbuf;
 
-      /* update cache */
-      lp_tile_cache_set_surface(lp->zsbuf_cache, fb->zsbuf);
-
       /* Tell draw module how deep the Z/depth buffer is */
       if (lp->framebuffer.zsbuf) {
          int depth_bits;
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
index 1c1e015..18c2362 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -133,6 +133,12 @@ llvmpipe_texture_create(struct pipe_screen *screen,
    pipe_reference_init(&lpt->base.reference, 1);
    lpt->base.screen = screen;
 
+   /* XXX: The xlib state tracker is brain-dead and will request
+    * PIPE_FORMAT_Z16_UNORM no matter how much we tell it we don't support it.
+    */
+   if(lpt->base.format == PIPE_FORMAT_Z16_UNORM)
+      lpt->base.format = PIPE_FORMAT_Z32_UNORM;
+
    if (lpt->base.tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) {
       if (!llvmpipe_displaytarget_layout(screen, lpt))
          goto fail;




More information about the mesa-commit mailing list