Mesa (master): i965: Split the gen6 GS binding table to a separate table.

Eric Anholt anholt at kemper.freedesktop.org
Tue Feb 21 20:01:48 UTC 2012


Module: Mesa
Branch: master
Commit: f9c3ea32cd9b243050ee16f10d6eb9d9c8b3a8ea
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=f9c3ea32cd9b243050ee16f10d6eb9d9c8b3a8ea

Author: Eric Anholt <eric at anholt.net>
Date:   Wed Feb 15 14:15:14 2012 -0800

i965: Split the gen6 GS binding table to a separate table.

Improves VS state change microbenchmark performance by 7.08729% +/-
1.22289% (n=10) on gen7, because we don't upload the 64 dwords of
unused binding table any more.

Reviewed-by: Kenneth Graunke <kenneth at whitecape.org>

---

 src/mesa/drivers/dri/i965/brw_context.h      |   23 +++++++---
 src/mesa/drivers/dri/i965/brw_misc_state.c   |    2 +-
 src/mesa/drivers/dri/i965/brw_state.h        |    1 +
 src/mesa/drivers/dri/i965/brw_state_upload.c |    1 +
 src/mesa/drivers/dri/i965/gen6_sol.c         |   58 +++++++++++++++++++++++++-
 5 files changed, 75 insertions(+), 10 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 44a01e6..9c89617 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -484,11 +484,6 @@ struct brw_vs_ouput_sizes {
  *    |   . |     .                   |
  *    |   : |     :                   |
  *    |  24 | Texture 15              |
- *    +-----|-------------------------+
- *    |  25 | SOL Binding 0           |
- *    |   . |     .                   |
- *    |   : |     :                   |
- *    |  88 | SOL Binding 63          |
  *    +-------------------------------+
  *
  * Our VS binding tables are programmed as follows:
@@ -502,6 +497,15 @@ struct brw_vs_ouput_sizes {
  *    |  16 | Texture 15              |
  *    +-------------------------------+
  *
+ * Our (gen6) GS binding tables are programmed as follows:
+ *
+ *    +-----+-------------------------+
+ *    |  0  | SOL Binding 0           |
+ *    |   . |     .                   |
+ *    |   : |     :                   |
+ *    |  63 | SOL Binding 63          |
+ *    +-----+-------------------------+
+ *
  * Note that nothing actually uses the SURF_INDEX_DRAW macro, so it has to be
  * the identity function or things will break.  We do want to keep draw buffers
  * first so we can use headerless render target writes for RT 0.
@@ -509,15 +513,17 @@ struct brw_vs_ouput_sizes {
 #define SURF_INDEX_DRAW(d)           (d)
 #define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 1)
 #define SURF_INDEX_TEXTURE(t)        (BRW_MAX_DRAW_BUFFERS + 2 + (t))
-#define SURF_INDEX_SOL_BINDING(t)    (SURF_INDEX_TEXTURE(BRW_MAX_TEX_UNIT) + (t))
 
 /** Maximum size of the binding table. */
-#define BRW_MAX_SURFACES SURF_INDEX_SOL_BINDING(BRW_MAX_SOL_BINDINGS)
+#define BRW_MAX_SURFACES             SURF_INDEX_TEXTURE(BRW_MAX_TEX_UNIT)
 
 #define SURF_INDEX_VERT_CONST_BUFFER (0)
 #define SURF_INDEX_VS_TEXTURE(t)     (SURF_INDEX_VERT_CONST_BUFFER + 1 + (t))
 #define BRW_MAX_VS_SURFACES          SURF_INDEX_VS_TEXTURE(BRW_MAX_TEX_UNIT)
 
+#define SURF_INDEX_SOL_BINDING(t)    ((t))
+#define BRW_MAX_GS_SURFACES          SURF_INDEX_SOL_BINDING(BRW_MAX_SOL_BINDINGS)
+
 enum brw_cache_id {
    BRW_BLEND_STATE,
    BRW_DEPTH_STENCIL_STATE,
@@ -868,6 +874,9 @@ struct brw_context
       /** Offset in the program cache to the CLIP program pre-gen6 */
       uint32_t prog_offset;
       uint32_t state_offset;
+
+      uint32_t bind_bo_offset;
+      uint32_t surf_offset[BRW_MAX_VS_SURFACES];
    } gs;
 
    struct {
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 7bc7e1c..c86755d 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -116,7 +116,7 @@ static void upload_gen6_binding_table_pointers(struct brw_context *brw)
 	     GEN6_BINDING_TABLE_MODIFY_PS |
 	     (4 - 2));
    OUT_BATCH(brw->vs.bind_bo_offset); /* vs */
-   OUT_BATCH(brw->bind.bo_offset); /* gs */
+   OUT_BATCH(brw->gs.bind_bo_offset); /* gs */
    OUT_BATCH(brw->bind.bo_offset); /* wm/ps */
    ADVANCE_BATCH();
 }
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 59a2bb3..a58b4b3 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -90,6 +90,7 @@ extern const struct brw_tracked_state gen6_clip_vp;
 extern const struct brw_tracked_state gen6_color_calc_state;
 extern const struct brw_tracked_state gen6_depth_stencil_state;
 extern const struct brw_tracked_state gen6_gs_state;
+extern const struct brw_tracked_state gen6_gs_binding_table;
 extern const struct brw_tracked_state gen6_renderbuffer_surfaces;
 extern const struct brw_tracked_state gen6_sampler_state;
 extern const struct brw_tracked_state gen6_scissor_state;
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 28e4d26..3f5c03d 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -148,6 +148,7 @@ static const struct brw_tracked_state *gen6_atoms[] =
    &brw_texture_surfaces,
    &gen6_sol_surface,
    &brw_vs_binding_table,
+   &gen6_gs_binding_table,
    &brw_binding_table,
 
    &brw_samplers,
diff --git a/src/mesa/drivers/dri/i965/gen6_sol.c b/src/mesa/drivers/dri/i965/gen6_sol.c
index 41923b7..fbd8e71 100644
--- a/src/mesa/drivers/dri/i965/gen6_sol.c
+++ b/src/mesa/drivers/dri/i965/gen6_sol.c
@@ -30,6 +30,7 @@
 #include "brw_context.h"
 #include "intel_batchbuffer.h"
 #include "brw_defines.h"
+#include "brw_state.h"
 
 static void
 gen6_update_sol_surfaces(struct brw_context *brw)
@@ -54,11 +55,11 @@ gen6_update_sol_surfaces(struct brw_context *brw)
             xfb_obj->Offset[buffer] / 4 +
             linked_xfb_info->Outputs[i].DstOffset;
          brw_update_sol_surface(
-            brw, xfb_obj->Buffers[buffer], &brw->bind.surf_offset[surf_index],
+            brw, xfb_obj->Buffers[buffer], &brw->gs.surf_offset[surf_index],
             linked_xfb_info->Outputs[i].NumComponents,
             linked_xfb_info->BufferStride[buffer], buffer_offset);
       } else {
-         brw->bind.surf_offset[surf_index] = 0;
+         brw->gs.surf_offset[surf_index] = 0;
       }
    }
 
@@ -75,6 +76,59 @@ const struct brw_tracked_state gen6_sol_surface = {
    .emit = gen6_update_sol_surfaces,
 };
 
+/**
+ * Constructs the binding table for the WM surface state, which maps unit
+ * numbers to surface state objects.
+ */
+static void
+brw_gs_upload_binding_table(struct brw_context *brw)
+{
+   struct gl_context *ctx = &brw->intel.ctx;
+   /* BRW_NEW_VERTEX_PROGRAM */
+   const struct gl_shader_program *shaderprog =
+      ctx->Shader.CurrentVertexProgram;
+   const struct gl_transform_feedback_info *linked_xfb_info =
+      &shaderprog->LinkedTransformFeedback;
+   /* Currently we only ever upload surfaces for SOL. */
+   bool has_surfaces = linked_xfb_info->NumOutputs != 0;
+
+   uint32_t *bind;
+
+   /* CACHE_NEW_GS_PROG: Skip making a binding table if we don't use textures or
+    * pull constants.
+    */
+   if (!has_surfaces) {
+      if (brw->gs.bind_bo_offset != 0) {
+	 brw->state.dirty.brw |= BRW_NEW_GS_BINDING_TABLE;
+	 brw->gs.bind_bo_offset = 0;
+      }
+      return;
+   }
+
+   /* Might want to calculate nr_surfaces first, to avoid taking up so much
+    * space for the binding table.
+    */
+   bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
+			  sizeof(uint32_t) * BRW_MAX_SURFACES,
+			  32, &brw->gs.bind_bo_offset);
+
+   /* BRW_NEW_SURFACES */
+   memcpy(bind, brw->gs.surf_offset, BRW_MAX_GS_SURFACES * sizeof(uint32_t));
+
+   brw->state.dirty.brw |= BRW_NEW_GS_BINDING_TABLE;
+}
+
+const struct brw_tracked_state gen6_gs_binding_table = {
+   .dirty = {
+      .mesa = 0,
+      .brw = (BRW_NEW_BATCH |
+	      BRW_NEW_VERTEX_PROGRAM |
+	      BRW_NEW_SURFACES),
+      .cache = 0
+   },
+   .emit = brw_gs_upload_binding_table,
+};
+
 static void
 gen6_update_sol_indices(struct brw_context *brw)
 {




More information about the mesa-commit mailing list