[Mesa-dev] [PATCH 09/10] i965: Use linker-assigned sampler IDs in instruction encoding.

Kenneth Graunke kenneth at whitecape.org
Fri Aug 24 03:06:02 PDT 2012


When assigning uniform locations, the linker assigns each sampler
uniform a sequential numerical ID.  gl_shader_program::SamplerUnits maps
these sampler variable IDs to the actual texture units they reference
(specified via glUniform1i).

Previously, we encoded this mapping in the SEND instruction encoding:
the "sampler" was the texture unit number, and the binding table index
was SURF_INDEX_TEXTURE(the texture unit number).  This unfortunately
meant that whenever the application changed the value of a sampler
uniform, we had to recompile the shader to change the SEND instructions.

This was horrible for the game Cogs, which repeatedly switches between
using texture unit 0 and 1.  It also made fragment shader precompiles
useless: we'd do the precompile at glLinkShader() time, before the
application called glUniform1i to set the sampler values.  As soon as
it did that, we'd have to recompile, wasting time and space in the
program cache.

This patch encodes the SamplerUnits indirection in the binding table,
sampler state, and sampler default color tables.  Instead of baking the
texture unit number into the shader, we bake in the sampler variable ID
assigned by the linker.  Since those never change, we don't need to
recompile programs on uniform changes.

This does mean that the tables now depend on the linked shader program
being used for rendering, rather than simply representing all available
texture units.  This could cause an increase in state emission.

Another plus is that the sampler state and sampler default color tables
are now compact: we only emit as many entries as there are sampler
uniforms, with no holes in the table since the new sampler IDs are
sequential.  Previously we had to emit a full 16 entries every time,
since the tables tracked the state of all active texture units.

Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_fs_visitor.cpp     |  2 +-
 src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp   |  2 +-
 src/mesa/drivers/dri/i965/brw_wm_sampler_state.c | 27 ++++++++-----
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 51 ++++++++++++++++++------
 src/mesa/drivers/dri/i965/gen7_sampler_state.c   | 27 ++++++++-----
 5 files changed, 74 insertions(+), 35 deletions(-)

The big patch that actually changes how we do things.

diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
index 601f83c..e3d3a98 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp
@@ -1345,7 +1345,7 @@ fs_visitor::visit(ir_texture *ir)
    if (ir->offset != NULL && ir->op != ir_txf)
       inst->texture_offset = brw_texture_offset(ir->offset->as_constant());
 
-   inst->sampler = texunit;
+   inst->sampler = sampler;
 
    if (ir->shadow_comparitor)
       inst->shadow_compare = true;
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
index c22ba60..629ecb0 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -1897,7 +1897,7 @@ vec4_visitor::visit(ir_texture *ir)
    inst->header_present = ir->offset || intel->gen < 5;
    inst->base_mrf = 2;
    inst->mlen = inst->header_present + 1; /* always at least one */
-   inst->sampler = texunit;
+   inst->sampler = sampler;
    inst->dst = dst_reg(this, ir->type);
    inst->shadow_compare = ir->shadow_comparitor != NULL;
 
diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
index 10deb1d..610ef34 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
@@ -334,13 +334,14 @@ brw_upload_samplers(struct brw_context *brw)
 {
    struct gl_context *ctx = &brw->intel.ctx;
    struct brw_sampler_state *samplers;
-   int i;
 
-   brw->sampler.count = 0;
-   for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
-      if (ctx->Texture.Unit[i]._ReallyEnabled)
-	 brw->sampler.count = i + 1;
-   }
+   /* BRW_NEW_VERTEX_PROGRAM and BRW_NEW_FRAGMENT_PROGRAM */
+   struct gl_program *vs = (struct gl_program *) brw->vertex_program;
+   struct gl_program *fs = (struct gl_program *) brw->fragment_program;
+
+   GLbitfield SamplersUsed = vs->SamplersUsed | fs->SamplersUsed;
+
+   brw->sampler.count = _mesa_bitcount(SamplersUsed);
 
    if (brw->sampler.count == 0)
       return;
@@ -350,9 +351,13 @@ brw_upload_samplers(struct brw_context *brw)
 			      32, &brw->sampler.offset);
    memset(samplers, 0, brw->sampler.count * sizeof(*samplers));
 
-   for (i = 0; i < brw->sampler.count; i++) {
-      if (ctx->Texture.Unit[i]._ReallyEnabled)
-	 brw_update_sampler_state(brw, i, i, &samplers[i]);
+   for (unsigned s = 0; s < brw->sampler.count; s++) {
+      if (SamplersUsed & (1 << s)) {
+         const unsigned unit = (fs->SamplersUsed & (1 << s)) ?
+            fs->SamplerUnits[s] : vs->SamplerUnits[s];
+         if (ctx->Texture.Unit[unit]._ReallyEnabled)
+            brw_update_sampler_state(brw, unit, s, &samplers[s]);
+      }
    }
 
    brw->state.dirty.cache |= CACHE_NEW_SAMPLER;
@@ -361,7 +366,9 @@ brw_upload_samplers(struct brw_context *brw)
 const struct brw_tracked_state brw_samplers = {
    .dirty = {
       .mesa = _NEW_TEXTURE,
-      .brw = BRW_NEW_BATCH,
+      .brw = BRW_NEW_BATCH |
+             BRW_NEW_VERTEX_PROGRAM |
+             BRW_NEW_FRAGMENT_PROGRAM,
       .cache = 0
    },
    .emit = brw_upload_samplers,
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 0c87b84..eefa427 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -1238,22 +1238,45 @@ const struct brw_tracked_state gen6_renderbuffer_surfaces = {
 static void
 brw_update_texture_surfaces(struct brw_context *brw)
 {
-   struct gl_context *ctx = &brw->intel.ctx;
+   struct intel_context *intel = &brw->intel;
+   struct gl_context *ctx = &intel->ctx;
+
+   /* BRW_NEW_VERTEX_PROGRAM and BRW_NEW_FRAGMENT_PROGRAM:
+    * Unfortunately, we're stuck using the gl_program structs until the
+    * ARB_fragment_program front-end gets converted to GLSL IR.  These
+    * have the downside that SamplerUnits is split and only contains the
+    * mappings for samplers active in that stage.
+    */
+   struct gl_program *vs = (struct gl_program *) brw->vertex_program;
+   struct gl_program *fs = (struct gl_program *) brw->fragment_program;
 
-   for (unsigned i = 0; i < BRW_MAX_TEX_UNIT; i++) {
-      const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
-      const GLuint surf = SURF_INDEX_TEXTURE(i);
+   unsigned num_samplers = _mesa_bitcount(vs->SamplersUsed | fs->SamplersUsed);
 
-      /* _NEW_TEXTURE */
-      if (texUnit->_ReallyEnabled) {
-	 brw->intel.vtbl.update_texture_surface(ctx, i, brw->wm.surf_offset, surf);
-      } else {
-         brw->wm.surf_offset[surf] = 0;
+   for (unsigned s = 0; s < num_samplers; s++) {
+      brw->vs.surf_offset[SURF_INDEX_VS_TEXTURE(s)] = 0;
+      brw->wm.surf_offset[SURF_INDEX_TEXTURE(s)] = 0;
+
+      if (vs->SamplersUsed & (1 << s)) {
+         const unsigned unit = vs->SamplerUnits[s];
+
+         /* _NEW_TEXTURE */
+         if (ctx->Texture.Unit[unit]._ReallyEnabled) {
+            intel->vtbl.update_texture_surface(ctx, unit,
+                                               brw->vs.surf_offset,
+                                               SURF_INDEX_VS_TEXTURE(s));
+         }
       }
 
-      /* For now, just mirror the texture setup to the VS slots. */
-      brw->vs.surf_offset[SURF_INDEX_VS_TEXTURE(i)] =
-	 brw->wm.surf_offset[surf];
+      if (fs->SamplersUsed & (1 << s)) {
+         const unsigned unit = fs->SamplerUnits[s];
+
+         /* _NEW_TEXTURE */
+         if (ctx->Texture.Unit[unit]._ReallyEnabled) {
+            intel->vtbl.update_texture_surface(ctx, unit,
+                                               brw->wm.surf_offset,
+                                               SURF_INDEX_TEXTURE(s));
+         }
+      }
    }
 
    brw->state.dirty.brw |= BRW_NEW_SURFACES;
@@ -1262,7 +1285,9 @@ brw_update_texture_surfaces(struct brw_context *brw)
 const struct brw_tracked_state brw_texture_surfaces = {
    .dirty = {
       .mesa = _NEW_TEXTURE,
-      .brw = BRW_NEW_BATCH,
+      .brw = BRW_NEW_BATCH |
+             BRW_NEW_VERTEX_PROGRAM |
+             BRW_NEW_FRAGMENT_PROGRAM,
       .cache = 0
    },
    .emit = brw_update_texture_surfaces,
diff --git a/src/mesa/drivers/dri/i965/gen7_sampler_state.c b/src/mesa/drivers/dri/i965/gen7_sampler_state.c
index 8969119..379a9c5 100644
--- a/src/mesa/drivers/dri/i965/gen7_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sampler_state.c
@@ -188,13 +188,14 @@ gen7_upload_samplers(struct brw_context *brw)
 {
    struct gl_context *ctx = &brw->intel.ctx;
    struct gen7_sampler_state *samplers;
-   int i;
 
-   brw->sampler.count = 0;
-   for (i = 0; i < BRW_MAX_TEX_UNIT; i++) {
-      if (ctx->Texture.Unit[i]._ReallyEnabled)
-	 brw->sampler.count = i + 1;
-   }
+   /* BRW_NEW_VERTEX_PROGRAM and BRW_NEW_FRAGMENT_PROGRAM */
+   struct gl_program *vs = (struct gl_program *) brw->vertex_program;
+   struct gl_program *fs = (struct gl_program *) brw->fragment_program;
+
+   GLbitfield SamplersUsed = vs->SamplersUsed | fs->SamplersUsed;
+
+   brw->sampler.count = _mesa_bitcount(SamplersUsed);
 
    if (brw->sampler.count == 0)
       return;
@@ -204,9 +205,13 @@ gen7_upload_samplers(struct brw_context *brw)
 			      32, &brw->sampler.offset);
    memset(samplers, 0, brw->sampler.count * sizeof(*samplers));
 
-   for (i = 0; i < brw->sampler.count; i++) {
-      if (ctx->Texture.Unit[i]._ReallyEnabled)
-	 gen7_update_sampler_state(brw, i, i, &samplers[i]);
+   for (unsigned s = 0; s < brw->sampler.count; s++) {
+      if (SamplersUsed & (1 << s)) {
+         const unsigned unit = (fs->SamplersUsed & (1 << s)) ?
+            fs->SamplerUnits[s] : vs->SamplerUnits[s];
+         if (ctx->Texture.Unit[unit]._ReallyEnabled)
+            gen7_update_sampler_state(brw, unit, s, &samplers[s]);
+      }
    }
 
    brw->state.dirty.cache |= CACHE_NEW_SAMPLER;
@@ -215,7 +220,9 @@ gen7_upload_samplers(struct brw_context *brw)
 const struct brw_tracked_state gen7_samplers = {
    .dirty = {
       .mesa = _NEW_TEXTURE,
-      .brw = BRW_NEW_BATCH,
+      .brw = BRW_NEW_BATCH |
+             BRW_NEW_VERTEX_PROGRAM |
+             BRW_NEW_FRAGMENT_PROGRAM,
       .cache = 0
    },
    .emit = gen7_upload_samplers,
-- 
1.7.11.4



More information about the mesa-dev mailing list