Mesa (master): r300g: add fallback for back stencil reference value and masks for r3xx-r4xx

Marek Olšák mareko at kemper.freedesktop.org
Mon Apr 5 05:10:13 UTC 2010


Module: Mesa
Branch: master
Commit: a955f86b31304a1a0f35faa0e0861e920354e23b
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=a955f86b31304a1a0f35faa0e0861e920354e23b

Author: Marek Olšák <maraeo at gmail.com>
Date:   Mon Apr  5 06:26:11 2010 +0200

r300g: add fallback for back stencil reference value and masks for r3xx-r4xx

This splits rendering into two passes when front and back stencil
reference value, value mask, or write mask don't match.

The advantages of doing it in the driver instead of in st are:
* SWTCL is executed just once and the resulting vertex buffer is reused
  in the second pass.
* Lower driver overhead due to the fallback being very close to
  the actual draw emission with minimum state change.

---

 src/gallium/drivers/r300/r300_context.c |   10 ++
 src/gallium/drivers/r300/r300_context.h |   23 ++++
 src/gallium/drivers/r300/r300_render.c  |  202 ++++++++++++++++++++++++++-----
 src/gallium/drivers/r300/r300_render.h  |   36 ++++++
 src/gallium/drivers/r300/r300_state.c   |   41 +++++-
 5 files changed, 275 insertions(+), 37 deletions(-)

diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index 1e1c716..46fdf08 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -175,6 +175,16 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
         r300->context.draw_arrays = r300_draw_arrays;
         r300->context.draw_elements = r300_draw_elements;
         r300->context.draw_range_elements = r300_draw_range_elements;
+
+        if (r300screen->caps.is_r500) {
+            r300->emit_draw_arrays_immediate = r500_emit_draw_arrays_immediate;
+            r300->emit_draw_arrays = r500_emit_draw_arrays;
+            r300->emit_draw_elements = r500_emit_draw_elements;
+        } else {
+            r300->emit_draw_arrays_immediate = r300_emit_draw_arrays_immediate;
+            r300->emit_draw_arrays = r300_emit_draw_arrays;
+            r300->emit_draw_elements = r300_emit_draw_elements;
+        }
     } else {
         r300->context.draw_arrays = r300_swtcl_draw_arrays;
         r300->context.draw_elements = r300_draw_elements;
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index 351bd2c..9d7e9d1 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -79,6 +79,11 @@ struct r300_dsa_state {
     uint32_t z_stencil_control; /* R300_ZB_ZSTENCILCNTL: 0x4f04 */
     uint32_t stencil_ref_mask;  /* R300_ZB_STENCILREFMASK: 0x4f08 */
     uint32_t stencil_ref_bf;    /* R500_ZB_STENCILREFMASK_BF: 0x4fd4 */
+
+    /* Whether a two-sided stencil is enabled. */
+    boolean two_sided;
+    /* Whether a fallback should be used for a two-sided stencil ref value. */
+    boolean stencil_ref_bf_fallback;
 };
 
 struct r300_rs_state {
@@ -290,6 +295,21 @@ struct r300_context {
     /* Parent class */
     struct pipe_context context;
 
+    /* Emission of drawing packets. */
+    void (*emit_draw_arrays_immediate)(
+            struct r300_context *r300,
+            unsigned mode, unsigned start, unsigned count);
+
+    void (*emit_draw_arrays)(
+            struct r300_context *r300,
+            unsigned mode, unsigned count);
+
+    void (*emit_draw_elements)(
+            struct r300_context *r300, struct pipe_buffer* indexBuffer,
+            unsigned indexSize, unsigned minIndex, unsigned maxIndex,
+            unsigned mode, unsigned start, unsigned count);
+
+
     /* The interface to the windowing system, etc. */
     struct r300_winsys_screen *rws;
     /* Screen. */
@@ -382,6 +402,9 @@ struct r300_context {
     boolean scissor_enabled;
     /* Whether rendering is conditional and should be skipped. */
     boolean skip_rendering;
+    /* Whether the two-sided stencil ref value is different for front and
+     * back faces, and fallback should be used for r3xx-r4xx. */
+    boolean stencil_ref_bf_fallback;
     /* upload managers */
     struct u_upload_mgr *upload_vb;
     struct u_upload_mgr *upload_ib;
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index c2b1323..0935556 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -162,10 +162,15 @@ static boolean immd_is_good_idea(struct r300_context *r300,
     return TRUE;
 }
 
-static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
-                                            unsigned mode,
-                                            unsigned start,
-                                            unsigned count)
+/*****************************************************************************
+ * The emission of draw packets for r500. Older GPUs may use these functions *
+ * after resolving fallback issues (e.g. stencil ref two-sided).             *
+ ****************************************************************************/
+
+void r500_emit_draw_arrays_immediate(struct r300_context *r300,
+                                     unsigned mode,
+                                     unsigned start,
+                                     unsigned count)
 {
     struct pipe_vertex_element* velem;
     struct pipe_vertex_buffer* vbuf;
@@ -252,9 +257,9 @@ static void r300_emit_draw_arrays_immediate(struct r300_context *r300,
     }
 }
 
-static void r300_emit_draw_arrays(struct r300_context *r300,
-                                  unsigned mode,
-                                  unsigned count)
+void r500_emit_draw_arrays(struct r300_context *r300,
+                           unsigned mode,
+                           unsigned count)
 {
 #if defined(ENABLE_ALT_NUM_VERTS)
     boolean alt_num_verts = count > 65535;
@@ -282,14 +287,14 @@ static void r300_emit_draw_arrays(struct r300_context *r300,
     END_CS;
 }
 
-static void r300_emit_draw_elements(struct r300_context *r300,
-                                    struct pipe_buffer* indexBuffer,
-                                    unsigned indexSize,
-                                    unsigned minIndex,
-                                    unsigned maxIndex,
-                                    unsigned mode,
-                                    unsigned start,
-                                    unsigned count)
+void r500_emit_draw_elements(struct r300_context *r300,
+                             struct pipe_buffer* indexBuffer,
+                             unsigned indexSize,
+                             unsigned minIndex,
+                             unsigned maxIndex,
+                             unsigned mode,
+                             unsigned start,
+                             unsigned count)
 {
     uint32_t count_dwords;
     uint32_t offset_dwords = indexSize * start / sizeof(uint32_t);
@@ -347,6 +352,104 @@ static void r300_emit_draw_elements(struct r300_context *r300,
     END_CS;
 }
 
+/*****************************************************************************
+ * The emission of draw packets for r300 which take care of the two-sided    *
+ * stencil ref fallback and call r500's functions.                           *
+ ****************************************************************************/
+
+/* Set drawing for front faces. */
+static void r300_begin_stencil_ref_fallback(struct r300_context *r300)
+{
+    struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state;
+    CS_LOCALS(r300);
+
+    BEGIN_CS(2);
+    OUT_CS_REG(R300_SU_CULL_MODE, rs->cull_mode | R300_CULL_BACK);
+    END_CS;
+}
+
+/* Set drawing for back faces. */
+static void r300_switch_stencil_ref_side(struct r300_context *r300)
+{
+    struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state;
+    struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state;
+    CS_LOCALS(r300);
+
+    BEGIN_CS(4);
+    OUT_CS_REG(R300_SU_CULL_MODE, rs->cull_mode | R300_CULL_FRONT);
+    OUT_CS_REG(R300_ZB_STENCILREFMASK,
+               dsa->stencil_ref_bf | r300->stencil_ref.ref_value[1]);
+    END_CS;
+}
+
+/* Restore the original state. */
+static void r300_end_stencil_ref_fallback(struct r300_context *r300)
+{
+    struct r300_rs_state *rs = (struct r300_rs_state*)r300->rs_state.state;
+    struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state;
+    CS_LOCALS(r300);
+
+    BEGIN_CS(4);
+    OUT_CS_REG(R300_SU_CULL_MODE, rs->cull_mode);
+    OUT_CS_REG(R300_ZB_STENCILREFMASK,
+               dsa->stencil_ref_mask | r300->stencil_ref.ref_value[0]);
+    END_CS;
+}
+
+void r300_emit_draw_arrays_immediate(struct r300_context *r300,
+                                     unsigned mode,
+                                     unsigned start,
+                                     unsigned count)
+{
+    if (!r300->stencil_ref_bf_fallback) {
+        r500_emit_draw_arrays_immediate(r300, mode, start, count);
+    } else {
+        r300_begin_stencil_ref_fallback(r300);
+        r500_emit_draw_arrays_immediate(r300, mode, start, count);
+        r300_switch_stencil_ref_side(r300);
+        r500_emit_draw_arrays_immediate(r300, mode, start, count);
+        r300_end_stencil_ref_fallback(r300);
+    }
+}
+
+void r300_emit_draw_arrays(struct r300_context *r300,
+                           unsigned mode,
+                           unsigned count)
+{
+    if (!r300->stencil_ref_bf_fallback) {
+        r500_emit_draw_arrays(r300, mode, count);
+    } else {
+        r300_begin_stencil_ref_fallback(r300);
+        r500_emit_draw_arrays(r300, mode, count);
+        r300_switch_stencil_ref_side(r300);
+        r500_emit_draw_arrays(r300, mode, count);
+        r300_end_stencil_ref_fallback(r300);
+    }
+}
+
+void r300_emit_draw_elements(struct r300_context *r300,
+                             struct pipe_buffer* indexBuffer,
+                             unsigned indexSize,
+                             unsigned minIndex,
+                             unsigned maxIndex,
+                             unsigned mode,
+                             unsigned start,
+                             unsigned count)
+{
+    if (!r300->stencil_ref_bf_fallback) {
+        r500_emit_draw_elements(r300, indexBuffer, indexSize, minIndex,
+                                maxIndex, mode, start, count);
+    } else {
+        r300_begin_stencil_ref_fallback(r300);
+        r500_emit_draw_elements(r300, indexBuffer, indexSize, minIndex,
+                                maxIndex, mode, start, count);
+        r300_switch_stencil_ref_side(r300);
+        r500_emit_draw_elements(r300, indexBuffer, indexSize, minIndex,
+                                maxIndex, mode, start, count);
+        r300_end_stencil_ref_fallback(r300);
+    }
+}
+
 static void r300_shorten_ubyte_elts(struct r300_context* r300,
                                     struct pipe_buffer** elts,
                                     unsigned start,
@@ -457,13 +560,13 @@ void r300_draw_range_elements(struct pipe_context* pipe,
     u_upload_flush(r300->upload_vb);
     u_upload_flush(r300->upload_ib);
     if (alt_num_verts || count <= 65535) {
-        r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex,
-                                maxIndex, mode, start, count);
+        r300->emit_draw_elements(r300, indexBuffer, indexSize, minIndex,
+                                 maxIndex, mode, start, count);
     } else {
         do {
             short_count = MIN2(count, 65534);
-            r300_emit_draw_elements(r300, indexBuffer, indexSize, minIndex,
-                                    maxIndex, mode, start, short_count);
+            r300->emit_draw_elements(r300, indexBuffer, indexSize, minIndex,
+                                      maxIndex, mode, start, short_count);
 
             start += short_count;
             count -= short_count;
@@ -518,7 +621,7 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
     r300_update_derived_state(r300);
 
     if (immd_is_good_idea(r300, count)) {
-        r300_emit_draw_arrays_immediate(r300, mode, start, count);
+        r300->emit_draw_arrays_immediate(r300, mode, start, count);
     } else {
         /* Make sure there are at least 128 spare dwords in the command buffer.
          * (most of it being consumed by emit_aos) */
@@ -528,12 +631,12 @@ void r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
 
         if (alt_num_verts || count <= 65535) {
             r300_emit_aos(r300, start);
-            r300_emit_draw_arrays(r300, mode, count);
+            r300->emit_draw_arrays(r300, mode, count);
         } else {
             do {
                 short_count = MIN2(count, 65535);
                 r300_emit_aos(r300, start);
-                r300_emit_draw_arrays(r300, mode, short_count);
+                r300->emit_draw_arrays(r300, mode, short_count);
 
                 start += short_count;
                 count -= short_count;
@@ -746,9 +849,9 @@ static boolean r300_render_set_primitive(struct vbuf_render* render,
     return TRUE;
 }
 
-static void r300_render_draw_arrays(struct vbuf_render* render,
-                                          unsigned start,
-                                          unsigned count)
+static void r500_render_draw_arrays(struct vbuf_render* render,
+                                    unsigned start,
+                                    unsigned count)
 {
     struct r300_render* r300render = r300_render(render);
     struct r300_context* r300 = r300render->r300;
@@ -768,9 +871,9 @@ static void r300_render_draw_arrays(struct vbuf_render* render,
     END_CS;
 }
 
-static void r300_render_draw(struct vbuf_render* render,
-                                   const ushort* indices,
-                                   uint count)
+static void r500_render_draw(struct vbuf_render* render,
+                             const ushort* indices,
+                             uint count)
 {
     struct r300_render* r300render = r300_render(render);
     struct r300_context* r300 = r300render->r300;
@@ -796,6 +899,40 @@ static void r300_render_draw(struct vbuf_render* render,
     END_CS;
 }
 
+static void r300_render_draw_arrays(struct vbuf_render* render,
+                                    unsigned start,
+                                    unsigned count)
+{
+    struct r300_context* r300 = r300_render(render)->r300;
+
+    if (!r300->stencil_ref_bf_fallback) {
+        r500_render_draw_arrays(render, start, count);
+    } else {
+        r300_begin_stencil_ref_fallback(r300);
+        r500_render_draw_arrays(render, start, count);
+        r300_switch_stencil_ref_side(r300);
+        r500_render_draw_arrays(render, start, count);
+        r300_end_stencil_ref_fallback(r300);
+    }
+}
+
+static void r300_render_draw(struct vbuf_render* render,
+                             const ushort* indices,
+                             uint count)
+{
+    struct r300_context* r300 = r300_render(render)->r300;
+
+    if (!r300->stencil_ref_bf_fallback) {
+        r500_render_draw(render, indices, count);
+    } else {
+        r300_begin_stencil_ref_fallback(r300);
+        r500_render_draw(render, indices, count);
+        r300_switch_stencil_ref_side(r300);
+        r500_render_draw(render, indices, count);
+        r300_end_stencil_ref_fallback(r300);
+    }
+}
+
 static void r300_render_destroy(struct vbuf_render* render)
 {
     FREE(render);
@@ -816,8 +953,13 @@ static struct vbuf_render* r300_render_create(struct r300_context* r300)
     r300render->base.map_vertices = r300_render_map_vertices;
     r300render->base.unmap_vertices = r300_render_unmap_vertices;
     r300render->base.set_primitive = r300_render_set_primitive;
-    r300render->base.draw = r300_render_draw;
-    r300render->base.draw_arrays = r300_render_draw_arrays;
+    if (r300->screen->caps.is_r500) {
+        r300render->base.draw = r500_render_draw;
+        r300render->base.draw_arrays = r500_render_draw_arrays;
+    } else {
+        r300render->base.draw = r300_render_draw;
+        r300render->base.draw_arrays = r300_render_draw_arrays;
+    }
     r300render->base.release_vertices = r300_render_release_vertices;
     r300render->base.destroy = r300_render_destroy;
 
diff --git a/src/gallium/drivers/r300/r300_render.h b/src/gallium/drivers/r300/r300_render.h
index 27b5e6a..870e1fb 100644
--- a/src/gallium/drivers/r300/r300_render.h
+++ b/src/gallium/drivers/r300/r300_render.h
@@ -25,6 +25,42 @@
 
 uint32_t r300_translate_primitive(unsigned prim);
 
+void r500_emit_draw_arrays_immediate(struct r300_context *r300,
+                                     unsigned mode,
+                                     unsigned start,
+                                     unsigned count);
+
+void r500_emit_draw_arrays(struct r300_context *r300,
+                           unsigned mode,
+                           unsigned count);
+
+void r500_emit_draw_elements(struct r300_context *r300,
+                             struct pipe_buffer* indexBuffer,
+                             unsigned indexSize,
+                             unsigned minIndex,
+                             unsigned maxIndex,
+                             unsigned mode,
+                             unsigned start,
+                             unsigned count);
+
+void r300_emit_draw_arrays_immediate(struct r300_context *r300,
+                                     unsigned mode,
+                                     unsigned start,
+                                     unsigned count);
+
+void r300_emit_draw_arrays(struct r300_context *r300,
+                           unsigned mode,
+                           unsigned count);
+
+void r300_emit_draw_elements(struct r300_context *r300,
+                             struct pipe_buffer* indexBuffer,
+                             unsigned indexSize,
+                             unsigned minIndex,
+                             unsigned maxIndex,
+                             unsigned mode,
+                             unsigned start,
+                             unsigned count);
+
 void r300_draw_range_elements(struct pipe_context* pipe,
                               struct pipe_buffer* indexBuffer,
                               unsigned indexSize,
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index 2adb3e7..2309f35 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -444,6 +444,8 @@ static void*
                 (state->stencil[0].writemask << R300_STENCILWRITEMASK_SHIFT);
 
         if (state->stencil[1].enabled) {
+            dsa->two_sided = TRUE;
+
             dsa->z_buffer_control |= R300_STENCIL_FRONT_BACK;
             dsa->z_stencil_control |=
             (r300_translate_depth_stencil_function(state->stencil[1].func) <<
@@ -455,14 +457,16 @@ static void*
             (r300_translate_stencil_op(state->stencil[1].zfail_op) <<
                 R300_S_BACK_ZFAIL_OP_SHIFT);
 
-            if (caps->is_r500)
-            {
+            dsa->stencil_ref_bf =
+                (state->stencil[1].valuemask << R300_STENCILMASK_SHIFT) |
+                (state->stencil[1].writemask << R300_STENCILWRITEMASK_SHIFT);
+
+            if (caps->is_r500) {
                 dsa->z_buffer_control |= R500_STENCIL_REFMASK_FRONT_BACK;
-                dsa->stencil_ref_bf =
-                    (state->stencil[1].valuemask <<
-                    R300_STENCILMASK_SHIFT) |
-                    (state->stencil[1].writemask <<
-                    R300_STENCILWRITEMASK_SHIFT);
+            } else {
+                dsa->stencil_ref_bf_fallback =
+                  (state->stencil[0].valuemask != state->stencil[1].valuemask ||
+                   state->stencil[0].writemask != state->stencil[1].writemask);
             }
         }
     }
@@ -483,13 +487,33 @@ static void*
     return (void*)dsa;
 }
 
+static void r300_update_stencil_ref_fallback_status(struct r300_context *r300)
+{
+    struct r300_dsa_state *dsa = (struct r300_dsa_state*)r300->dsa_state.state;
+
+    if (r300->screen->caps.is_r500) {
+        return;
+    }
+
+    r300->stencil_ref_bf_fallback =
+        dsa->stencil_ref_bf_fallback ||
+        (dsa->two_sided &&
+         r300->stencil_ref.ref_value[0] != r300->stencil_ref.ref_value[1]);
+}
+
 /* Bind DSA state. */
 static void r300_bind_dsa_state(struct pipe_context* pipe,
                                 void* state)
 {
     struct r300_context* r300 = r300_context(pipe);
 
+    if (!state) {
+        return;
+    }
+
     UPDATE_STATE(state, r300->dsa_state);
+
+    r300_update_stencil_ref_fallback_status(r300);
 }
 
 /* Free DSA state. */
@@ -503,8 +527,11 @@ static void r300_set_stencil_ref(struct pipe_context* pipe,
                                  const struct pipe_stencil_ref* sr)
 {
     struct r300_context* r300 = r300_context(pipe);
+
     r300->stencil_ref = *sr;
     r300->dsa_state.dirty = TRUE;
+
+    r300_update_stencil_ref_fallback_status(r300);
 }
 
 /* This switcheroo is needed just because of goddamned MACRO_SWITCH. */




More information about the mesa-commit mailing list