Mesa (gallium-0.2): CELL: stencil bug fixes

Robert Ellison papillo at kemper.freedesktop.org
Thu Oct 30 21:22:07 UTC 2008


Module: Mesa
Branch: gallium-0.2
Commit: 711f8a1dd94e2e1e715615d947e03015ef972326
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=711f8a1dd94e2e1e715615d947e03015ef972326

Author: Robert Ellison <papillo at tungstengraphics.com>
Date:   Thu Oct 30 15:24:23 2008 -0600

CELL: stencil bug fixes

Two definitive bugs in stenciling were fixed.

The first, reversed registers in the generated Select Bytes (selb)
instruction, caused the stenciling INCR and DECR operations to
fail dramatically, putting new values in where old values were
supposed to be and vice versa.

The second caused stencil tiles to not be read and written from
main memory by the SPUs.  A per-spu flag, spu.read_depth, was used
to indicate whether the SPU should be reading depth tiles, and was set
only when depth was enabled.  A second flag, spu.read_stencil, was
set when stenciling was enabled, but never referenced.

As stenciling and depth are in the same tiles on the Cell, and there
is no corresponding TAG_WRITE_TILE_STENCIL to complement
TAG_WRITE_TILE_COLOR and TAG_WRITE_TILE_Z, I fixed this by
eliminating the unused "spu.read_stencil", renaming "spu.read_depth"
to "spu.read_depth_stencil", and setting it if either stenciling or
depth is enabled.

I also added an optimization to the fragment ops generation code,
that avoids calculating stencil values and/or stencil writemask
when the stencil operations are all KEEP.

---

 progs/trivial/tri-stencil.c                      |   13 +++++++++-
 src/gallium/drivers/cell/ppu/cell_gen_fragment.c |   25 ++++++++++++++++-----
 src/gallium/drivers/cell/spu/spu_command.c       |    3 +-
 src/gallium/drivers/cell/spu/spu_main.h          |    3 +-
 src/gallium/drivers/cell/spu/spu_render.c        |    4 +-
 src/gallium/drivers/cell/spu/spu_tri.c           |    2 +-
 6 files changed, 35 insertions(+), 15 deletions(-)

diff --git a/progs/trivial/tri-stencil.c b/progs/trivial/tri-stencil.c
index 5edbef2..7686e16 100644
--- a/progs/trivial/tri-stencil.c
+++ b/progs/trivial/tri-stencil.c
@@ -49,7 +49,15 @@ static void Key(unsigned char key, int x, int y)
 
     switch (key) {
       case 27:
+        printf("Exiting...\n");
 	exit(1);
+      case 'r':
+        printf("Redisplaying...\n");
+        glutPostRedisplay();
+        break;
+      default:
+        printf("No such key '%c'...\n", key);
+        break;
     }
 }
 
@@ -89,7 +97,7 @@ static void Draw(void)
    glEnd();
 #endif
 
-#if 0
+#if 1
    glStencilFunc(GL_EQUAL, 1, 1);
    glStencilOp(GL_KEEP, GL_KEEP, GL_KEEP);
 
@@ -130,7 +138,8 @@ int main(int argc, char **argv)
 	exit(1);
     }
 
-    glutInitWindowPosition(0, 0); glutInitWindowSize( 300, 300);
+    glutInitWindowPosition(0, 0); 
+    glutInitWindowSize( 300, 300);
 
     type = GLUT_RGB | GLUT_SINGLE | GLUT_DEPTH | GLUT_STENCIL;
     glutInitDisplayMode(type);
diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
index 4e1e53e..8e4dd82 100644
--- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
+++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c
@@ -1282,7 +1282,7 @@ gen_stencil_values(struct spe_function *f, unsigned int stencil_op,
       /* Add Word Immediate computes rT = rA + 10-bit signed immediate */
       spe_ai(f, newS_reg, fbS_reg, 1);
       /* Select from the current value or the new value based on the equality test */
-      spe_selb(f, newS_reg, fbS_reg, newS_reg, equals_reg);
+      spe_selb(f, newS_reg, newS_reg, fbS_reg, equals_reg);
 
       spe_release_register(f, equals_reg);
       break;
@@ -1295,7 +1295,7 @@ gen_stencil_values(struct spe_function *f, unsigned int stencil_op,
       /* Add Word Immediate with a (-1) value works */
       spe_ai(f, newS_reg, fbS_reg, -1);
       /* Select from the current value or the new value based on the equality test */
-      spe_selb(f, newS_reg, fbS_reg, newS_reg, equals_reg);
+      spe_selb(f, newS_reg, newS_reg, fbS_reg, equals_reg);
 
       spe_release_register(f, equals_reg);
       break;
@@ -1534,15 +1534,28 @@ gen_stencil_depth_test(struct spe_function *f,
     * meaning that we have to calculate the stencil values but do not
     * need to mask them), we can avoid generating code.  Don't forget
     * that we need to consider backfacing stencil, if enabled.
+    *
+    * Note that if the backface stencil is *not* enabled, the backface
+    * stencil will have the same values as the frontface stencil.
     */
-   if (dsa->stencil[0].write_mask == 0x0 && (!dsa->stencil[1].enabled || dsa->stencil[1].write_mask == 0x00)) {
-      /* Trivial: don't need to calculate stencil values, and don't need to 
-       * write them back to the framebuffer.
+   if (dsa->stencil[0].fail_op == PIPE_STENCIL_OP_KEEP &&
+       dsa->stencil[0].zfail_op == PIPE_STENCIL_OP_KEEP &&
+       dsa->stencil[0].zpass_op == PIPE_STENCIL_OP_KEEP &&
+       dsa->stencil[1].fail_op == PIPE_STENCIL_OP_KEEP &&
+       dsa->stencil[1].zfail_op == PIPE_STENCIL_OP_KEEP &&
+       dsa->stencil[1].zpass_op == PIPE_STENCIL_OP_KEEP) {
+       /* No changes to any stencil values */
+       need_to_calculate_stencil_values = false;
+       need_to_writemask_stencil_values = false;
+    }
+    else if (dsa->stencil[0].write_mask == 0x0 && dsa->stencil[1].write_mask == 0x0) {
+      /* All changes are writemasked out, so no need to calculate
+       * what those changes might be, and no need to write anything back.
        */
       need_to_calculate_stencil_values = false;
       need_to_writemask_stencil_values = false;
    }
-   else if (dsa->stencil[0].write_mask == 0xff && (!dsa->stencil[1].enabled || dsa->stencil[1].write_mask == 0xff)) {
+   else if (dsa->stencil[0].write_mask == 0xff && dsa->stencil[1].write_mask == 0xff) {
       /* Still trivial, but a little less so.  We need to write the stencil
        * values, but we don't need to mask them.
        */
diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c
index 63818d4..d726622 100644
--- a/src/gallium/drivers/cell/spu/spu_command.c
+++ b/src/gallium/drivers/cell/spu/spu_command.c
@@ -244,8 +244,7 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)
       }
    }
 
-   spu.read_depth = spu.depth_stencil_alpha.depth.enabled;
-   spu.read_stencil = spu.depth_stencil_alpha.stencil[0].enabled;
+   spu.read_depth_stencil = (spu.depth_stencil_alpha.depth.enabled || spu.depth_stencil_alpha.stencil[0].enabled);
 }
 
 
diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h
index 668af10..692790c 100644
--- a/src/gallium/drivers/cell/spu/spu_main.h
+++ b/src/gallium/drivers/cell/spu/spu_main.h
@@ -160,8 +160,7 @@ struct spu_global
    tile_t ztile ALIGN16_ATTRIB;
 
    /** Read depth/stencil tiles? */
-   boolean read_depth;
-   boolean read_stencil;
+   boolean read_depth_stencil;
 
    /** Current tiles' status */
    ubyte cur_ctile_status, cur_ztile_status;
diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c
index 5515bb5..7c225e2 100644
--- a/src/gallium/drivers/cell/spu/spu_render.c
+++ b/src/gallium/drivers/cell/spu/spu_render.c
@@ -98,7 +98,7 @@ my_tile(uint tx, uint ty)
 static INLINE void
 get_cz_tiles(uint tx, uint ty)
 {
-   if (spu.read_depth) {
+   if (spu.read_depth_stencil) {
       if (spu.cur_ztile_status != TILE_STATUS_CLEAR) {
          //printf("SPU %u: getting Z tile %u, %u\n", spu.init.id, tx, ty);
          get_tile(tx, ty, &spu.ztile, TAG_READ_TILE_Z, 1);
@@ -153,7 +153,7 @@ static INLINE void
 wait_put_cz_tiles(void)
 {
    wait_on_mask(1 << TAG_WRITE_TILE_COLOR);
-   if (spu.read_depth) {
+   if (spu.read_depth_stencil) {
       wait_on_mask(1 << TAG_WRITE_TILE_Z);
    }
 }
diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c
index 4caf7d6..5f90815 100644
--- a/src/gallium/drivers/cell/spu/spu_tri.c
+++ b/src/gallium/drivers/cell/spu/spu_tri.c
@@ -369,7 +369,7 @@ flush_spans(void)
    }
    ASSERT(spu.cur_ctile_status != TILE_STATUS_DEFINED);
 
-   if (spu.read_depth) {
+   if (spu.read_depth_stencil) {
       if (spu.cur_ztile_status == TILE_STATUS_GETTING) {
          /* wait for mfc_get() to complete */
          //printf("SPU: %u: waiting for ztile\n", spu.init.id);




More information about the mesa-commit mailing list