[Mesa-dev] [PATCH 4/6] i965: Split the VS binding table to a separate table.

Eric Anholt eric at anholt.net
Wed Feb 15 15:23:35 PST 2012


This is a step toward making the samplers/binding tables reflect
sampler uniform mappings instead of embedding those in the programs.
No significant performance difference on the microbenchmark (n=10).
---
 src/mesa/drivers/dri/i965/brw_context.h          |   34 +++++++++++---
 src/mesa/drivers/dri/i965/brw_misc_state.c       |    4 +-
 src/mesa/drivers/dri/i965/brw_state.h            |    1 +
 src/mesa/drivers/dri/i965/brw_state_upload.c     |    3 +
 src/mesa/drivers/dri/i965/brw_vec4_emit.cpp      |    2 +-
 src/mesa/drivers/dri/i965/brw_vs.c               |    5 ++
 src/mesa/drivers/dri/i965/brw_vs_surface_state.c |   51 +++++++++++++++++++++-
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c |    8 ++-
 src/mesa/drivers/dri/i965/gen7_vs_state.c        |    3 +-
 9 files changed, 94 insertions(+), 17 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 98f68e7..44a01e6 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -409,6 +409,8 @@ struct brw_vs_prog_data {
    bool uses_new_param_layout;
    bool uses_vertexid;
    bool userclip;
+
+   int num_surfaces;
 };
 
 
@@ -468,7 +470,7 @@ struct brw_vs_ouput_sizes {
  * (VS, HS, DS, GS, PS), we currently share a single binding table for all of
  * them.  This is purely for convenience.
  *
- * Currently our binding tables are (arbitrarily) programmed as follows:
+ * Currently our SOL/WM binding tables are (arbitrarily) programmed as follows:
  *
  *    +-------------------------------+
  *    |   0 | Draw buffer 0           | .
@@ -476,18 +478,28 @@ struct brw_vs_ouput_sizes {
  *    |   : |     :                   |   > Only relevant to the WM.
  *    |   7 | Draw buffer 7           |  /
  *    |-----|-------------------------| `
- *    |   8 | VS Pull Constant Buffer |
- *    |   9 | WM Pull Constant Buffer |
+ *    |   8 | WM Pull Constant Buffer |
  *    |-----|-------------------------|
- *    |  10 | Texture 0               |
+ *    |   9 | Texture 0               |
  *    |   . |     .                   |
  *    |   : |     :                   |
- *    |  25 | Texture 15              |
+ *    |  24 | Texture 15              |
  *    +-----|-------------------------+
- *    |  26 | SOL Binding 0           |
+ *    |  25 | SOL Binding 0           |
+ *    |   . |     .                   |
+ *    |   : |     :                   |
+ *    |  88 | SOL Binding 63          |
+ *    +-------------------------------+
+ *
+ * Our VS binding tables are programmed as follows:
+ *
+ *    +-----+-------------------------+ `
+ *    |   0 | VS Pull Constant Buffer |
+ *    +-----+-------------------------+
+ *    |   1 | Texture 0               |
  *    |   . |     .                   |
  *    |   : |     :                   |
- *    |  89 | SOL Binding 63          |
+ *    |  16 | Texture 15              |
  *    +-------------------------------+
  *
  * Note that nothing actually uses the SURF_INDEX_DRAW macro, so it has to be
@@ -495,7 +507,6 @@ struct brw_vs_ouput_sizes {
  * first so we can use headerless render target writes for RT 0.
  */
 #define SURF_INDEX_DRAW(d)           (d)
-#define SURF_INDEX_VERT_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 0)
 #define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 1)
 #define SURF_INDEX_TEXTURE(t)        (BRW_MAX_DRAW_BUFFERS + 2 + (t))
 #define SURF_INDEX_SOL_BINDING(t)    (SURF_INDEX_TEXTURE(BRW_MAX_TEX_UNIT) + (t))
@@ -503,6 +514,10 @@ struct brw_vs_ouput_sizes {
 /** Maximum size of the binding table. */
 #define BRW_MAX_SURFACES SURF_INDEX_SOL_BINDING(BRW_MAX_SOL_BINDINGS)
 
+#define SURF_INDEX_VERT_CONST_BUFFER (0)
+#define SURF_INDEX_VS_TEXTURE(t)     (SURF_INDEX_VERT_CONST_BUFFER + 1 + (t))
+#define BRW_MAX_VS_SURFACES          SURF_INDEX_VS_TEXTURE(BRW_MAX_TEX_UNIT)
+
 enum brw_cache_id {
    BRW_BLEND_STATE,
    BRW_DEPTH_STENCIL_STATE,
@@ -841,6 +856,9 @@ struct brw_context
       */
       uint8_t *ra_reg_to_grf;
       /** @} */
+
+      uint32_t bind_bo_offset;
+      uint32_t surf_offset[BRW_MAX_VS_SURFACES];
    } vs;
 
    struct {
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index 0343ae1..7bc7e1c 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -77,7 +77,7 @@ static void upload_binding_table_pointers(struct brw_context *brw)
 
    BEGIN_BATCH(6);
    OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | (6 - 2));
-   OUT_BATCH(brw->bind.bo_offset);
+   OUT_BATCH(brw->vs.bind_bo_offset);
    OUT_BATCH(0); /* gs */
    OUT_BATCH(0); /* clip */
    OUT_BATCH(0); /* sf */
@@ -115,7 +115,7 @@ static void upload_gen6_binding_table_pointers(struct brw_context *brw)
 	     GEN6_BINDING_TABLE_MODIFY_GS |
 	     GEN6_BINDING_TABLE_MODIFY_PS |
 	     (4 - 2));
-   OUT_BATCH(brw->bind.bo_offset); /* vs */
+   OUT_BATCH(brw->vs.bind_bo_offset); /* vs */
    OUT_BATCH(brw->bind.bo_offset); /* gs */
    OUT_BATCH(brw->bind.bo_offset); /* wm/ps */
    ADVANCE_BATCH();
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 2dd5665..59a2bb3 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -71,6 +71,7 @@ extern const struct brw_tracked_state brw_wm_prog;
 extern const struct brw_tracked_state brw_renderbuffer_surfaces;
 extern const struct brw_tracked_state brw_texture_surfaces;
 extern const struct brw_tracked_state brw_binding_table;
+extern const struct brw_tracked_state brw_vs_binding_table;
 extern const struct brw_tracked_state brw_wm_unit;
 
 extern const struct brw_tracked_state brw_psp_urb_cbs;
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index ea50695..28e4d26 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -70,6 +70,7 @@ static const struct brw_tracked_state *gen4_atoms[] =
    &brw_wm_pull_constants,
    &brw_renderbuffer_surfaces,
    &brw_texture_surfaces,
+   &brw_vs_binding_table,
    &brw_binding_table,
 
    &brw_samplers,
@@ -146,6 +147,7 @@ static const struct brw_tracked_state *gen6_atoms[] =
    &gen6_renderbuffer_surfaces,
    &brw_texture_surfaces,
    &gen6_sol_surface,
+   &brw_vs_binding_table,
    &brw_binding_table,
 
    &brw_samplers,
@@ -214,6 +216,7 @@ const struct brw_tracked_state *gen7_atoms[] =
    &brw_wm_pull_constants,
    &gen6_renderbuffer_surfaces,
    &brw_texture_surfaces,
+   &brw_vs_binding_table,
    &brw_binding_table,
 
    &gen7_samplers,
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
index f9eed61..9df7b11 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -465,7 +465,7 @@ vec4_visitor::generate_tex(vec4_instruction *inst,
 	      dst,
 	      inst->base_mrf,
 	      src,
-	      SURF_INDEX_TEXTURE(inst->sampler),
+	      SURF_INDEX_VS_TEXTURE(inst->sampler),
 	      inst->sampler,
 	      WRITEMASK_XYZW,
 	      msg_type,
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index ca205cd..bd703c7 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -247,6 +247,11 @@ do_vs_prog(struct brw_context *brw,
       brw_old_vs_emit(&c);
    }
 
+   if (c.prog_data.nr_pull_params)
+      c.prog_data.num_surfaces = 1;
+   if (c.vp->program.Base.SamplersUsed)
+      c.prog_data.num_surfaces = BRW_MAX_VS_SURFACES;
+
    /* Scratch space is used for register spilling */
    if (c.last_scratch) {
       c.prog_data.total_scratch = brw_get_scratch_size(c.last_scratch);
diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
index 2f7b211..b29e414 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
@@ -65,7 +65,7 @@ brw_upload_vs_pull_constants(struct brw_context *brw)
       if (brw->vs.const_bo) {
 	 drm_intel_bo_unreference(brw->vs.const_bo);
 	 brw->vs.const_bo = NULL;
-	 brw->bind.surf_offset[SURF_INDEX_VERT_CONST_BUFFER] = 0;
+	 brw->vs.surf_offset[SURF_INDEX_VERT_CONST_BUFFER] = 0;
 	 brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF;
       }
       return;
@@ -97,7 +97,7 @@ brw_upload_vs_pull_constants(struct brw_context *brw)
    const int surf = SURF_INDEX_VERT_CONST_BUFFER;
    intel->vtbl.create_constant_surface(brw, brw->vs.const_bo,
 				       params->NumParameters,
-				       &brw->bind.surf_offset[surf]);
+				       &brw->vs.surf_offset[surf]);
 
    brw->state.dirty.brw |= BRW_NEW_VS_CONSTBUF;
 }
@@ -110,3 +110,50 @@ const struct brw_tracked_state brw_vs_pull_constants = {
    },
    .emit = brw_upload_vs_pull_constants,
 };
+
+/**
+ * Constructs the binding table for the WM surface state, which maps unit
+ * numbers to surface state objects.
+ */
+static void
+brw_vs_upload_binding_table(struct brw_context *brw)
+{
+   uint32_t *bind;
+   int i;
+
+   /* CACHE_NEW_VS_PROG: Skip making a binding table if we don't use textures or
+    * pull constants.
+    */
+   if (brw->vs.prog_data->num_surfaces == 0) {
+      if (brw->vs.bind_bo_offset != 0) {
+	 brw->state.dirty.brw |= BRW_NEW_VS_BINDING_TABLE;
+	 brw->vs.bind_bo_offset = 0;
+      }
+      return;
+   }
+
+   /* Might want to calculate nr_surfaces first, to avoid taking up so much
+    * space for the binding table.
+    */
+   bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
+			  sizeof(uint32_t) * BRW_MAX_SURFACES,
+			  32, &brw->vs.bind_bo_offset);
+
+   /* BRW_NEW_SURFACES and BRW_NEW_VS_CONSTBUF */
+   for (i = 0; i < BRW_MAX_VS_SURFACES; i++) {
+      bind[i] = brw->vs.surf_offset[i];
+   }
+
+   brw->state.dirty.brw |= BRW_NEW_VS_BINDING_TABLE;
+}
+
+const struct brw_tracked_state brw_vs_binding_table = {
+   .dirty = {
+      .mesa = 0,
+      .brw = (BRW_NEW_BATCH |
+	      BRW_NEW_VS_CONSTBUF |
+	      BRW_NEW_SURFACES),
+      .cache = CACHE_NEW_VS_PROG
+   },
+   .emit = brw_vs_upload_binding_table,
+};
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 51d3a46..6dbafab 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -1061,6 +1061,10 @@ brw_update_texture_surfaces(struct brw_context *brw)
       } else {
          brw->bind.surf_offset[surf] = 0;
       }
+
+      /* For now, just mirror the texture setup to the VS slots. */
+      brw->vs.surf_offset[SURF_INDEX_VS_TEXTURE(i)] =
+	 brw->bind.surf_offset[surf];
    }
 
    brw->state.dirty.brw |= BRW_NEW_SURFACES;
@@ -1092,12 +1096,11 @@ brw_upload_binding_table(struct brw_context *brw)
 			  sizeof(uint32_t) * BRW_MAX_SURFACES,
 			  32, &brw->bind.bo_offset);
 
-   /* BRW_NEW_SURFACES and BRW_NEW_VS_CONSTBUF */
+   /* BRW_NEW_SURFACES */
    for (i = 0; i < BRW_MAX_SURFACES; i++) {
       bind[i] = brw->bind.surf_offset[i];
    }
 
-   brw->state.dirty.brw |= BRW_NEW_VS_BINDING_TABLE;
    brw->state.dirty.brw |= BRW_NEW_PS_BINDING_TABLE;
 }
 
@@ -1105,7 +1108,6 @@ const struct brw_tracked_state brw_binding_table = {
    .dirty = {
       .mesa = 0,
       .brw = (BRW_NEW_BATCH |
-	      BRW_NEW_VS_CONSTBUF |
 	      BRW_NEW_SURFACES),
       .cache = 0
    },
diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c
index 0746e6c..8d500ca 100644
--- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
@@ -35,9 +35,10 @@ upload_vs_state(struct brw_context *brw)
    struct intel_context *intel = &brw->intel;
    uint32_t floating_point_mode = 0;
 
+   /* BRW_NEW_VS_BINDING_TABLE */
    BEGIN_BATCH(2);
    OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_VS << 16 | (2 - 2));
-   OUT_BATCH(brw->bind.bo_offset);
+   OUT_BATCH(brw->vs.bind_bo_offset);
    ADVANCE_BATCH();
 
    /* CACHE_NEW_SAMPLER */
-- 
1.7.9



More information about the mesa-dev mailing list