[Mesa-dev] [PATCH 09/13] i965: Use a single binding table for all pipeline stages.

Kenneth Graunke kenneth at whitecape.org
Tue Nov 8 14:32:08 PST 2011


Although the hardware supports separate binding tables for each pipeline
stage, we don't see much advantage over a single shared table.

Consider the contents of the binding table:
- Textures (16)
- Draw buffers (8)
- Pull constant buffers (1 for VS, 1 for WM)

OpenGL's texture bindings are global: the same set of textures is
available to all shader targets.  So our binding table entries for
textures would be exactly the same in every table.

There are only two pull constant buffers (not many), and although draw
buffers aren't interesting to the VS, it shouldn't hurt to have them in
the table.  The hardware supports up to 254 binding table entries, and
we currently only use 26.

Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_context.h           |   74 +++++++++++++--------
 src/mesa/drivers/dri/i965/brw_misc_state.c        |    8 +-
 src/mesa/drivers/dri/i965/brw_state.h             |    2 +-
 src/mesa/drivers/dri/i965/brw_state_upload.c      |    6 +-
 src/mesa/drivers/dri/i965/brw_vs_surface_state.c  |   27 +-------
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c  |   34 +++++-----
 src/mesa/drivers/dri/i965/gen7_vs_state.c         |    2 +-
 src/mesa/drivers/dri/i965/gen7_wm_state.c         |    2 +-
 src/mesa/drivers/dri/i965/gen7_wm_surface_state.c |   10 ++--
 9 files changed, 80 insertions(+), 85 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index c889e54..5d70345 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -404,31 +404,48 @@ struct brw_vs_ouput_sizes {
 #define BRW_MAX_DRAW_BUFFERS 8
 
 /**
- * Size of our surface binding table for the WM.
- * This contains pointers to the drawing surfaces and current texture
- * objects and shader constant buffers (+2).
- */
-#define BRW_WM_MAX_SURF (BRW_MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 1)
-
-/**
- * Helpers to convert drawing buffers, textures and constant buffers
- * to surface binding table indexes, for WM.
+ * Helpers to create Surface Binding Table indexes for draw buffers,
+ * textures, and constant buffers.
+ *
+ * Shader threads access surfaces via numeric handles, rather than directly
+ * using pointers.  The binding table maps these numeric handles to the
+ * address of the actual buffer.
+ *
+ * For example, a shader might ask to sample from "surface 7."  In this case,
+ * bind[7] would contain a pointer to a texture.
+ *
+ * Although the hardware supports separate binding tables per pipeline stage
+ * (VS, HS, DS, GS, PS), we currently share a single binding table for all of
+ * them.  This is purely for convenience.
+ *
+ * Currently our binding tables are (arbitrarily) programmed as follows:
+ *
+ *    +-------------------------------+
+ *    |   0 | Draw buffer 0           | .
+ *    |   . |     .                   |  \
+ *    |   : |     :                   |   > Only relevant to the WM.
+ *    |   7 | Draw buffer 7           |  /
+ *    |-----|-------------------------| `
+ *    |   8 | VS Pull Constant Buffer |
+ *    |   9 | WM Pull Constant Buffer |
+ *    |-----|-------------------------|
+ *    |  10 | Texture 0               |
+ *    |   . |     .                   |
+ *    |   : |     :                   |
+ *    |  25 | Texture 15              |
+ *    +-------------------------------+
+ *
+ * Note that nothing actually uses the SURF_INDEX_DRAW macro, so it has to be
+ * the identity function or things will break.  We do want to keep draw buffers
+ * first so we can use headerless render target writes for RT 0.
  */
 #define SURF_INDEX_DRAW(d)           (d)
-#define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS) 
-#define SURF_INDEX_TEXTURE(t)        (BRW_MAX_DRAW_BUFFERS + 1 + (t))
-
-/**
- * Size of surface binding table for the VS.
- * Only one constant buffer for now.
- */
-#define BRW_VS_MAX_SURF 1
-
-/**
- * Only a VS constant buffer
- */
-#define SURF_INDEX_VERT_CONST_BUFFER 0
+#define SURF_INDEX_VERT_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 0)
+#define SURF_INDEX_FRAG_CONST_BUFFER (BRW_MAX_DRAW_BUFFERS + 1)
+#define SURF_INDEX_TEXTURE(t)        (BRW_MAX_DRAW_BUFFERS + 2 + (t))
 
+/** Maximum size of the binding table. */
+#define BRW_MAX_SURFACES (BRW_MAX_DRAW_BUFFERS + BRW_MAX_TEX_UNIT + 2)
 
 enum brw_cache_id {
    BRW_BLEND_STATE,
@@ -723,6 +740,12 @@ struct brw_context
    } curbe;
 
    struct {
+      /** Binding table of pointers to surf_bo entries */
+      uint32_t bo_offset;
+      uint32_t surf_offset[BRW_MAX_SURFACES];
+   } bind;
+
+   struct {
       struct brw_vs_prog_data *prog_data;
       int8_t *constant_map; /* variable array following prog_data */
 
@@ -732,10 +755,6 @@ struct brw_context
       uint32_t prog_offset;
       uint32_t state_offset;
 
-      /** Binding table of pointers to surf_bo entries */
-      uint32_t bind_bo_offset;
-      uint32_t surf_offset[BRW_VS_MAX_SURF];
-
       uint32_t push_const_offset; /* Offset in the batchbuffer */
       int push_const_size; /* in 256-bit register increments */
 
@@ -814,9 +833,6 @@ struct brw_context
       /** Offset in the program cache to the WM program */
       uint32_t prog_offset;
 
-      /** Binding table of pointers to surf_bo entries */
-      uint32_t bind_bo_offset;
-      uint32_t surf_offset[BRW_WM_MAX_SURF];
       uint32_t state_offset; /* offset in batchbuffer to pre-gen6 WM state */
 
       drm_intel_bo *const_bo; /* pull constant buffer. */
diff --git a/src/mesa/drivers/dri/i965/brw_misc_state.c b/src/mesa/drivers/dri/i965/brw_misc_state.c
index fb1d3e1..514c990 100644
--- a/src/mesa/drivers/dri/i965/brw_misc_state.c
+++ b/src/mesa/drivers/dri/i965/brw_misc_state.c
@@ -76,11 +76,11 @@ static void upload_binding_table_pointers(struct brw_context *brw)
 
    BEGIN_BATCH(6);
    OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | (6 - 2));
-   OUT_BATCH(brw->vs.bind_bo_offset);
+   OUT_BATCH(brw->bind.bo_offset);
    OUT_BATCH(0); /* gs */
    OUT_BATCH(0); /* clip */
    OUT_BATCH(0); /* sf */
-   OUT_BATCH(brw->wm.bind_bo_offset);
+   OUT_BATCH(brw->bind.bo_offset);
    ADVANCE_BATCH();
 }
 
@@ -114,9 +114,9 @@ static void upload_gen6_binding_table_pointers(struct brw_context *brw)
 	     GEN6_BINDING_TABLE_MODIFY_GS |
 	     GEN6_BINDING_TABLE_MODIFY_PS |
 	     (4 - 2));
-   OUT_BATCH(brw->vs.bind_bo_offset); /* vs */
+   OUT_BATCH(brw->bind.bo_offset); /* vs */
    OUT_BATCH(0); /* gs */
-   OUT_BATCH(brw->wm.bind_bo_offset); /* wm/ps */
+   OUT_BATCH(brw->bind.bo_offset); /* wm/ps */
    ADVANCE_BATCH();
 }
 
diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
index 9b11c6f..c273996 100644
--- a/src/mesa/drivers/dri/i965/brw_state.h
+++ b/src/mesa/drivers/dri/i965/brw_state.h
@@ -71,7 +71,7 @@ extern const struct brw_tracked_state brw_wm_prog;
 extern const struct brw_tracked_state brw_wm_samplers;
 extern const struct brw_tracked_state brw_renderbuffer_surfaces;
 extern const struct brw_tracked_state brw_texture_surfaces;
-extern const struct brw_tracked_state brw_wm_binding_table;
+extern const struct brw_tracked_state brw_binding_table;
 extern const struct brw_tracked_state brw_wm_unit;
 
 extern const struct brw_tracked_state brw_psp_urb_cbs;
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 7f32c20..c94b0eb 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -69,7 +69,7 @@ static const struct brw_tracked_state *gen4_atoms[] =
    &brw_vs_surfaces,		/* must do before unit */
    &brw_renderbuffer_surfaces,  /* must do before unit */
    &brw_texture_surfaces,       /* must do before unit */
-   &brw_wm_binding_table,
+   &brw_binding_table,
    &brw_wm_samplers,
 
    /* These set up state for brw_psp_urb_cbs */
@@ -141,7 +141,7 @@ static const struct brw_tracked_state *gen6_atoms[] =
    &brw_vs_surfaces,		/* must do before unit */
    &brw_renderbuffer_surfaces,  /* must do before unit */
    &brw_texture_surfaces,       /* must do before unit */
-   &brw_wm_binding_table,
+   &brw_binding_table,
 
    &brw_wm_samplers,
    &gen6_sampler_state,
@@ -206,7 +206,7 @@ const struct brw_tracked_state *gen7_atoms[] =
    &brw_vs_surfaces,		/* must do before unit */
    &brw_renderbuffer_surfaces,  /* must do before unit */
    &brw_texture_surfaces,       /* must do before unit */
-   &brw_wm_binding_table,
+   &brw_binding_table,
 
    &gen7_samplers,
 
diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
index 4c99185..66d5545 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
@@ -119,19 +119,17 @@ brw_update_vs_constant_surface( struct gl_context *ctx,
       (struct brw_vertex_program *) brw->vertex_program;
    const struct gl_program_parameter_list *params = vp->program.Base.Parameters;
 
-   assert(surf == 0);
-
    /* If there's no constant buffer, then no surface BO is needed to point at
     * it.
     */
    if (brw->vs.const_bo == NULL) {
-      brw->vs.surf_offset[surf] = 0;
+      brw->bind.surf_offset[surf] = 0;
       return;
    }
 
    intel->vtbl.create_constant_surface(brw, brw->vs.const_bo,
 				       params->NumParameters,
-				       &brw->vs.surf_offset[surf]);
+				       &brw->bind.surf_offset[surf]);
 }
 
 /**
@@ -141,32 +139,11 @@ static void
 brw_upload_vs_surfaces(struct brw_context *brw)
 {
    struct gl_context *ctx = &brw->intel.ctx;
-   uint32_t *bind;
-   int i;
-   int nr_surfaces = 0;
 
    /* BRW_NEW_VS_CONSTBUF */
    if (brw->vs.const_bo) {
-      nr_surfaces = 1;
       brw_update_vs_constant_surface(ctx, SURF_INDEX_VERT_CONST_BUFFER);
    }
-
-   if (nr_surfaces != 0) {
-      bind = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
-			     sizeof(uint32_t) * nr_surfaces,
-			     32, &brw->vs.bind_bo_offset);
-
-      for (i = 0; i < nr_surfaces; i++) {
-	 /* BRW_NEW_VS_CONSTBUF */
-	 bind[i] = brw->vs.surf_offset[i];
-      }
-      brw->state.dirty.brw |= BRW_NEW_VS_BINDING_TABLE;
-   } else {
-      if (brw->vs.bind_bo_offset) {
-	 brw->state.dirty.brw |= BRW_NEW_VS_BINDING_TABLE;
-	 brw->vs.bind_bo_offset = 0;
-      }
-   }
 }
 
 const struct brw_tracked_state brw_vs_surfaces = {
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index b8dff89..2da2869 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -237,7 +237,7 @@ brw_update_texture_surface( struct gl_context *ctx, GLuint unit )
    intel_miptree_get_dimensions_for_image(firstImage, &width, &height, &depth);
 
    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
-			  6 * 4, 32, &brw->wm.surf_offset[surf_index]);
+			  6 * 4, 32, &brw->bind.surf_offset[surf_index]);
 
    surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT |
 	      BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT |
@@ -264,7 +264,7 @@ brw_update_texture_surface( struct gl_context *ctx, GLuint unit )
 
    /* Emit relocation to surface contents */
    drm_intel_bo_emit_reloc(brw->intel.batch.bo,
-			   brw->wm.surf_offset[surf_index] + 4,
+			   brw->bind.surf_offset[surf_index] + 4,
 			   intelObj->mt->region->bo, 0,
 			   I915_GEM_DOMAIN_SAMPLER, 0);
 }
@@ -341,7 +341,7 @@ brw_upload_wm_pull_constants(struct brw_context *brw)
       if (brw->wm.const_bo) {
 	 drm_intel_bo_unreference(brw->wm.const_bo);
 	 brw->wm.const_bo = NULL;
-	 brw->wm.surf_offset[surf_index] = 0;
+	 brw->bind.surf_offset[surf_index] = 0;
 	 brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
       }
       return;
@@ -362,7 +362,7 @@ brw_upload_wm_pull_constants(struct brw_context *brw)
 
    intel->vtbl.create_constant_surface(brw, brw->wm.const_bo,
 				       params->NumParameters,
-				       &brw->wm.surf_offset[surf_index]);
+				       &brw->bind.surf_offset[surf_index]);
 
    brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
 }
@@ -383,7 +383,7 @@ brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
    uint32_t *surf;
 
    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
-			  6 * 4, 32, &brw->wm.surf_offset[unit]);
+			  6 * 4, 32, &brw->bind.surf_offset[unit]);
 
    surf[0] = (BRW_SURFACE_NULL << BRW_SURFACE_TYPE_SHIFT |
 	      BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
@@ -419,7 +419,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
    uint32_t format = 0;
 
    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
-			  6 * 4, 32, &brw->wm.surf_offset[unit]);
+			  6 * 4, 32, &brw->bind.surf_offset[unit]);
 
    switch (irb->Base.Format) {
    case MESA_FORMAT_XRGB8888:
@@ -500,7 +500,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw,
    }
 
    drm_intel_bo_emit_reloc(brw->intel.batch.bo,
-			   brw->wm.surf_offset[unit] + 4,
+			   brw->bind.surf_offset[unit] + 4,
 			   region->bo,
 			   surf[1] - region->bo->offset,
 			   I915_GEM_DOMAIN_RENDER,
@@ -559,7 +559,7 @@ brw_update_texture_surfaces(struct brw_context *brw)
       if (texUnit->_ReallyEnabled) {
 	 brw->intel.vtbl.update_texture_surface(ctx, i);
       } else {
-         brw->wm.surf_offset[surf] = 0;
+         brw->bind.surf_offset[surf] = 0;
       }
    }
 
@@ -580,7 +580,7 @@ const struct brw_tracked_state brw_texture_surfaces = {
  * numbers to surface state objects.
  */
 static void
-brw_wm_upload_binding_table(struct brw_context *brw)
+brw_upload_binding_table(struct brw_context *brw)
 {
    uint32_t *bind;
    int i;
@@ -589,25 +589,27 @@ brw_wm_upload_binding_table(struct brw_context *brw)
     * space for the binding table.
     */
    bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
-			  sizeof(uint32_t) * BRW_WM_MAX_SURF,
-			  32, &brw->wm.bind_bo_offset);
+			  sizeof(uint32_t) * BRW_MAX_SURFACES,
+			  32, &brw->bind.bo_offset);
 
-   for (i = 0; i < BRW_WM_MAX_SURF; i++) {
-      /* BRW_NEW_WM_SURFACES */
-      bind[i] = brw->wm.surf_offset[i];
+   /* BRW_NEW_WM_SURFACES and BRW_NEW_VS_CONSTBUF */
+   for (i = 0; i < BRW_MAX_SURFACES; i++) {
+      bind[i] = brw->bind.surf_offset[i];
    }
 
+   brw->state.dirty.brw |= BRW_NEW_VS_BINDING_TABLE;
    brw->state.dirty.brw |= BRW_NEW_PS_BINDING_TABLE;
 }
 
-const struct brw_tracked_state brw_wm_binding_table = {
+const struct brw_tracked_state brw_binding_table = {
    .dirty = {
       .mesa = 0,
       .brw = (BRW_NEW_BATCH |
+	      BRW_NEW_VS_CONSTBUF |
 	      BRW_NEW_WM_SURFACES),
       .cache = 0
    },
-   .emit = brw_wm_upload_binding_table,
+   .emit = brw_upload_binding_table,
 };
 
 void
diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c
index 462db5b..e3234b5 100644
--- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
@@ -37,7 +37,7 @@ upload_vs_state(struct brw_context *brw)
 
    BEGIN_BATCH(2);
    OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_VS << 16 | (2 - 2));
-   OUT_BATCH(brw->vs.bind_bo_offset);
+   OUT_BATCH(brw->bind.bo_offset);
    ADVANCE_BATCH();
 
    if (brw->vs.push_const_size == 0) {
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c
index 8b79663..97c079a 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -104,7 +104,7 @@ upload_ps_state(struct brw_context *brw)
 
    BEGIN_BATCH(2);
    OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS_PS << 16 | (2 - 2));
-   OUT_BATCH(brw->wm.bind_bo_offset);
+   OUT_BATCH(brw->bind.bo_offset);
    ADVANCE_BATCH();
 
    /* CACHE_NEW_SAMPLER */
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
index b4730c4..f74198b 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c
@@ -68,7 +68,7 @@ gen7_update_texture_surface(struct gl_context *ctx, GLuint unit)
    intel_miptree_get_dimensions_for_image(firstImage, &width, &height, &depth);
 
    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
-			  sizeof(*surf), 32, &brw->wm.surf_offset[surf_index]);
+			  sizeof(*surf), 32, &brw->bind.surf_offset[surf_index]);
    memset(surf, 0, sizeof(*surf));
 
    surf->ss0.surface_type = translate_tex_target(tObj->Target);
@@ -118,7 +118,7 @@ gen7_update_texture_surface(struct gl_context *ctx, GLuint unit)
 
    /* Emit relocation to surface contents */
    drm_intel_bo_emit_reloc(brw->intel.batch.bo,
-			   brw->wm.surf_offset[surf_index] +
+			   brw->bind.surf_offset[surf_index] +
 			   offsetof(struct gen7_surface_state, ss1),
 			   intelObj->mt->region->bo, 0,
 			   I915_GEM_DOMAIN_SAMPLER, 0);
@@ -172,7 +172,7 @@ gen7_update_null_renderbuffer_surface(struct brw_context *brw, unsigned unit)
    struct gen7_surface_state *surf;
 
    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
-			  sizeof(*surf), 32, &brw->wm.surf_offset[unit]);
+			  sizeof(*surf), 32, &brw->bind.surf_offset[unit]);
    memset(surf, 0, sizeof(*surf));
 
    surf->ss0.surface_type = BRW_SURFACE_NULL;
@@ -197,7 +197,7 @@ gen7_update_renderbuffer_surface(struct brw_context *brw,
    uint32_t tile_x, tile_y;
 
    surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
-			  sizeof(*surf), 32, &brw->wm.surf_offset[unit]);
+			  sizeof(*surf), 32, &brw->bind.surf_offset[unit]);
    memset(surf, 0, sizeof(*surf));
 
    switch (irb->Base.Format) {
@@ -252,7 +252,7 @@ gen7_update_renderbuffer_surface(struct brw_context *brw,
    surf->ss3.pitch = (region->pitch * region->cpp) - 1;
 
    drm_intel_bo_emit_reloc(brw->intel.batch.bo,
-			   brw->wm.surf_offset[unit] +
+			   brw->bind.surf_offset[unit] +
 			   offsetof(struct gen7_surface_state, ss1),
 			   region->bo,
 			   surf->ss1.base_addr - region->bo->offset,
-- 
1.7.7.1



More information about the mesa-dev mailing list