[Mesa-dev] [PATCH 04/10] i965: Upload separate per-stage sampler state tables.

Kenneth Graunke kenneth at whitecape.org
Wed Aug 14 18:55:10 PDT 2013


Also upload separate sampler default/texture border color entries.

At the moment, this is completely idiotic: both tables contain exactly
the same contents, so we're simply wasting batch space and CPU time.

However, soon we'll only upload data for textures actually /used/ in
a particular stage, which will usually make the VS table empty and
very likely eliminate all redundancy.  This is just a stepping stone.

Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_context.h          | 21 ++++++++------
 src/mesa/drivers/dri/i965/brw_vs_state.c         |  4 +--
 src/mesa/drivers/dri/i965/brw_wm_sampler_state.c | 36 +++++++++++++++++-------
 src/mesa/drivers/dri/i965/brw_wm_state.c         |  8 +++---
 src/mesa/drivers/dri/i965/gen6_sampler_state.c   |  4 +--
 src/mesa/drivers/dri/i965/gen7_sampler_state.c   | 33 ++++++++++++++++------
 src/mesa/drivers/dri/i965/gen7_vs_state.c        |  2 +-
 src/mesa/drivers/dri/i965/gen7_wm_state.c        |  2 +-
 8 files changed, 72 insertions(+), 38 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 63136b1..18a43cb 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1063,11 +1063,6 @@ struct brw_context
       GLuint last_bufsz;
    } curbe;
 
-   /** SAMPLER_STATE count and offset */
-   struct {
-      uint32_t offset;
-   } sampler;
-
    /**
     * Layout of vertex data exiting the geometry portion of the pipleine.
     * This comes from the geometry shader if one exists, otherwise from the
@@ -1109,7 +1104,13 @@ struct brw_context
       uint32_t bind_bo_offset;
       uint32_t surf_offset[BRW_MAX_VS_SURFACES];
 
+      /** SAMPLER_STATE count and table offset */
       uint32_t sampler_count;
+      uint32_t sampler_offset;
+
+      /** Offsets in the batch to sampler default colors (texture border color)
+       */
+      uint32_t sdc_offset[BRW_MAX_TEX_UNIT];
    } vs;
 
    struct {
@@ -1152,10 +1153,6 @@ struct brw_context
    struct {
       struct brw_wm_prog_data *prog_data;
 
-      /** offsets in the batch to sampler default colors (texture border color)
-       */
-      uint32_t sdc_offset[BRW_MAX_TEX_UNIT];
-
       GLuint render_surf;
 
       drm_intel_bo *scratch_bo;
@@ -1183,7 +1180,13 @@ struct brw_context
       uint32_t bind_bo_offset;
       uint32_t surf_offset[BRW_MAX_WM_SURFACES];
 
+      /** SAMPLER_STATE count and table offset */
       uint32_t sampler_count;
+      uint32_t sampler_offset;
+
+      /** Offsets in the batch to sampler default colors (texture border color)
+       */
+      uint32_t sdc_offset[BRW_MAX_TEX_UNIT];
 
       struct {
          struct ra_regs *regs;
diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c
index 13aabac..a8729df 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_state.c
@@ -157,12 +157,12 @@ brw_upload_vs_unit(struct brw_context *brw)
     */
    if (brw->vs.sampler_count) {
       vs->vs5.sampler_state_pointer =
-         (brw->batch.bo->offset + brw->sampler.offset) >> 5;
+         (brw->batch.bo->offset + brw->vs.sampler_offset) >> 5;
       drm_intel_bo_emit_reloc(brw->batch.bo,
                               brw->vs.state_offset +
                               offsetof(struct brw_vs_unit_state, vs5),
                               brw->batch.bo,
-                              brw->sampler.offset | vs->vs5.sampler_count,
+                              brw->vs.sampler_offset | vs->vs5.sampler_count,
                               I915_GEM_DOMAIN_INSTRUCTION, 0);
    }
 
diff --git a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
index ad78864..e2b4b8d 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_sampler_state.c
@@ -195,6 +195,7 @@ static void brw_update_sampler_state(struct brw_context *brw,
 				     int unit,
                                      int ss_index,
                                      struct brw_sampler_state *sampler,
+                                     uint32_t sampler_state_table_offset,
                                      uint32_t *sdc_offset)
 {
    struct gl_context *ctx = &brw->ctx;
@@ -347,7 +348,7 @@ static void brw_update_sampler_state(struct brw_context *brw,
 					    *sdc_offset) >> 5;
 
       drm_intel_bo_emit_reloc(brw->batch.bo,
-			      brw->sampler.offset +
+			      sampler_state_table_offset +
 			      ss_index * sizeof(struct brw_sampler_state) +
 			      offsetof(struct brw_sampler_state, ss2),
 			      brw->batch.bo, *sdc_offset,
@@ -366,7 +367,10 @@ static void brw_update_sampler_state(struct brw_context *brw,
 
 
 static void
-brw_upload_samplers(struct brw_context *brw)
+brw_upload_sampler_state_table(struct brw_context *brw,
+                               uint32_t *sampler_count,
+                               uint32_t *sst_offset,
+                               uint32_t *sdc_offset)
 {
    struct gl_context *ctx = &brw->ctx;
    struct brw_sampler_state *samplers;
@@ -380,17 +384,15 @@ brw_upload_samplers(struct brw_context *brw)
    /* ARB programs use the texture unit number as the sampler index, so we
     * need to find the highest unit used.  A bit-count will not work.
     */
-   brw->wm.sampler_count = _mesa_fls(SamplersUsed);
-   /* Currently we only use one sampler state table.  Mirror the count. */
-   brw->vs.sampler_count = brw->wm.sampler_count;
+   *sampler_count = _mesa_fls(SamplersUsed);
 
-   if (brw->wm.sampler_count == 0)
+   if (*sampler_count == 0)
       return;
 
    samplers = brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE,
-			      brw->wm.sampler_count * sizeof(*samplers),
-			      32, &brw->sampler.offset);
-   memset(samplers, 0, brw->wm.sampler_count * sizeof(*samplers));
+			      *sampler_count * sizeof(*samplers),
+			      32, sst_offset);
+   memset(samplers, 0, *sampler_count * sizeof(*samplers));
 
    for (unsigned s = 0; s < brw->wm.sampler_count; s++) {
       if (SamplersUsed & (1 << s)) {
@@ -398,13 +400,27 @@ brw_upload_samplers(struct brw_context *brw)
             fs->SamplerUnits[s] : vs->SamplerUnits[s];
          if (ctx->Texture.Unit[unit]._ReallyEnabled)
             brw_update_sampler_state(brw, unit, s, &samplers[s],
-                                     &brw->wm.sdc_offset[s]);
+                                     *sst_offset, &sdc_offset[s]);
       }
    }
 
    brw->state.dirty.cache |= CACHE_NEW_SAMPLER;
 }
 
+static void
+brw_upload_samplers(struct brw_context *brw)
+{
+   brw_upload_sampler_state_table(brw,
+                                  &brw->wm.sampler_count,
+                                  &brw->wm.sampler_offset,
+                                  brw->wm.sdc_offset);
+
+   brw_upload_sampler_state_table(brw,
+                                  &brw->vs.sampler_count,
+                                  &brw->vs.sampler_offset,
+                                  brw->vs.sdc_offset);
+}
+
 const struct brw_tracked_state brw_samplers = {
    .dirty = {
       .mesa = _NEW_TEXTURE,
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index 106d628..404fdad 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -150,7 +150,7 @@ brw_upload_wm_unit(struct brw_context *brw)
    if (brw->wm.sampler_count) {
       /* reloc */
       wm->wm4.sampler_state_pointer = (brw->batch.bo->offset +
-				       brw->sampler.offset) >> 5;
+				       brw->wm.sampler_offset) >> 5;
    } else {
       wm->wm4.sampler_state_pointer = 0;
    }
@@ -229,9 +229,9 @@ brw_upload_wm_unit(struct brw_context *brw)
       drm_intel_bo_emit_reloc(brw->batch.bo,
 			      brw->wm.state_offset +
 			      offsetof(struct brw_wm_unit_state, wm4),
-			      brw->batch.bo, (brw->sampler.offset |
-						wm->wm4.stats_enable |
-						(wm->wm4.sampler_count << 2)),
+			      brw->batch.bo, (brw->wm.sampler_offset |
+                                              wm->wm4.stats_enable |
+                                              (wm->wm4.sampler_count << 2)),
 			      I915_GEM_DOMAIN_INSTRUCTION, 0);
    }
 
diff --git a/src/mesa/drivers/dri/i965/gen6_sampler_state.c b/src/mesa/drivers/dri/i965/gen6_sampler_state.c
index 0cc2a45..16be8a7 100644
--- a/src/mesa/drivers/dri/i965/gen6_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_sampler_state.c
@@ -39,9 +39,9 @@ upload_sampler_state_pointers(struct brw_context *brw)
 	     GS_SAMPLER_STATE_CHANGE |
 	     PS_SAMPLER_STATE_CHANGE |
 	     (4 - 2));
-   OUT_BATCH(brw->sampler.offset); /* VS */
+   OUT_BATCH(brw->vs.sampler_offset); /* VS */
    OUT_BATCH(0); /* GS */
-   OUT_BATCH(brw->sampler.offset);
+   OUT_BATCH(brw->wm.sampler_offset);
    ADVANCE_BATCH();
 }
 
diff --git a/src/mesa/drivers/dri/i965/gen7_sampler_state.c b/src/mesa/drivers/dri/i965/gen7_sampler_state.c
index dd2493c..45bee78 100644
--- a/src/mesa/drivers/dri/i965/gen7_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sampler_state.c
@@ -185,7 +185,10 @@ gen7_update_sampler_state(struct brw_context *brw, int unit, int ss_index,
 
 
 static void
-gen7_upload_samplers(struct brw_context *brw)
+gen7_upload_sampler_state_table(struct brw_context *brw,
+                                uint32_t *sampler_count,
+                                uint32_t *sst_offset,
+                                uint32_t *sdc_offset)
 {
    struct gl_context *ctx = &brw->ctx;
    struct gen7_sampler_state *samplers;
@@ -196,17 +199,15 @@ gen7_upload_samplers(struct brw_context *brw)
 
    GLbitfield SamplersUsed = vs->SamplersUsed | fs->SamplersUsed;
 
-   brw->wm.sampler_count = _mesa_fls(SamplersUsed);
-   /* Currently we only use one sampler state table.  Mirror the count. */
-   brw->vs.sampler_count = brw->wm.sampler_count;
+   *sampler_count = _mesa_fls(SamplersUsed);
 
-   if (brw->wm.sampler_count == 0)
+   if (*sampler_count == 0)
       return;
 
    samplers = brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE,
-			      brw->wm.sampler_count * sizeof(*samplers),
-			      32, &brw->sampler.offset);
-   memset(samplers, 0, brw->wm.sampler_count * sizeof(*samplers));
+			      *sampler_count * sizeof(*samplers),
+			      32, sst_offset);
+   memset(samplers, 0, *sampler_count * sizeof(*samplers));
 
    for (unsigned s = 0; s < brw->wm.sampler_count; s++) {
       if (SamplersUsed & (1 << s)) {
@@ -214,13 +215,27 @@ gen7_upload_samplers(struct brw_context *brw)
             fs->SamplerUnits[s] : vs->SamplerUnits[s];
          if (ctx->Texture.Unit[unit]._ReallyEnabled)
             gen7_update_sampler_state(brw, unit, s, &samplers[s],
-                                      &brw->wm.sdc_offset[s]);
+                                      &sdc_offset[s]);
       }
    }
 
    brw->state.dirty.cache |= CACHE_NEW_SAMPLER;
 }
 
+static void
+gen7_upload_samplers(struct brw_context *brw)
+{
+   gen7_upload_sampler_state_table(brw,
+                                   &brw->wm.sampler_count,
+                                   &brw->wm.sampler_offset,
+                                   brw->wm.sdc_offset);
+
+   gen7_upload_sampler_state_table(brw,
+                                   &brw->vs.sampler_count,
+                                   &brw->vs.sampler_offset,
+                                   brw->vs.sdc_offset);
+}
+
 const struct brw_tracked_state gen7_samplers = {
    .dirty = {
       .mesa = _NEW_TEXTURE,
diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c
index 634bd95..7a6ba59 100644
--- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
@@ -48,7 +48,7 @@ upload_vs_state(struct brw_context *brw)
    /* CACHE_NEW_SAMPLER */
    BEGIN_BATCH(2);
    OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_VS << 16 | (2 - 2));
-   OUT_BATCH(brw->sampler.offset);
+   OUT_BATCH(brw->vs.sampler_offset);
    ADVANCE_BATCH();
 
    if (brw->vs.push_const_size == 0) {
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c
index d079a52..ba7a53d 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -125,7 +125,7 @@ upload_ps_state(struct brw_context *brw)
    /* CACHE_NEW_SAMPLER */
    BEGIN_BATCH(2);
    OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_PS << 16 | (2 - 2));
-   OUT_BATCH(brw->sampler.offset);
+   OUT_BATCH(brw->wm.sampler_offset);
    ADVANCE_BATCH();
 
    /* CACHE_NEW_WM_PROG */
-- 
1.8.3.4



More information about the mesa-dev mailing list