[Mesa-dev] [PATCH 04/13] i965: Use 0 for the number of binding table entries in 3DSTATE_(VS|WM).

Kenneth Graunke kenneth at whitecape.org
Tue Nov 8 14:32:03 PST 2011


These fields control how many entries the hardware prefetches into the
state cache, so they only impact performance, not correctness.  However,
it's not clear how to use this in a way that's beneficial.

According to the documentation, kernels "using a large number" of
entries may wish to program this to zero to avoid thrashing the cache;
it's unclear how many is too many.  Also, Ironlake's WM was missing this
feature entirely---the count had to be zero.

The dirty bit tracking to handle this complicates the surface state
and binding table setup; removing it should simplify things and make
future refactoring easier.  So just set 0 for the number of entries
rather than trying to compute and track it.

Appears to have no impact on Nexuiz and OpenArena on Sandybridge.

Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
---
 src/mesa/drivers/dri/i965/brw_context.h          |    4 ----
 src/mesa/drivers/dri/i965/brw_state_upload.c     |    2 --
 src/mesa/drivers/dri/i965/brw_vs_state.c         |    7 +------
 src/mesa/drivers/dri/i965/brw_vs_surface_state.c |    8 --------
 src/mesa/drivers/dri/i965/brw_wm_state.c         |   10 ++--------
 src/mesa/drivers/dri/i965/brw_wm_surface_state.c |    5 -----
 src/mesa/drivers/dri/i965/gen6_vs_state.c        |    7 ++-----
 src/mesa/drivers/dri/i965/gen6_wm_state.c        |    4 ----
 src/mesa/drivers/dri/i965/gen7_vs_state.c        |    5 +----
 src/mesa/drivers/dri/i965/gen7_wm_state.c        |    4 ----
 10 files changed, 6 insertions(+), 50 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 3e05e36..f8e6f81 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -169,8 +169,6 @@ enum brw_state_id {
  */
 #define BRW_NEW_BATCH                  (1 << BRW_STATE_BATCH)
 /** \see brw.state.depth_region */
-#define BRW_NEW_NR_WM_SURFACES         (1 << BRW_STATE_NR_WM_SURFACES)
-#define BRW_NEW_NR_VS_SURFACES         (1 << BRW_STATE_NR_VS_SURFACES)
 #define BRW_NEW_INDEX_BUFFER           (1 << BRW_STATE_INDEX_BUFFER)
 #define BRW_NEW_VS_CONSTBUF            (1 << BRW_STATE_VS_CONSTBUF)
 #define BRW_NEW_WM_CONSTBUF            (1 << BRW_STATE_WM_CONSTBUF)
@@ -739,7 +737,6 @@ struct brw_context
       /** Binding table of pointers to surf_bo entries */
       uint32_t bind_bo_offset;
       uint32_t surf_offset[BRW_VS_MAX_SURF];
-      GLuint nr_surfaces;      
 
       uint32_t push_const_offset; /* Offset in the batchbuffer */
       int push_const_size; /* in 256-bit register increments */
@@ -810,7 +807,6 @@ struct brw_context
       uint32_t sdc_offset[BRW_MAX_TEX_UNIT];
 
       GLuint render_surf;
-      GLuint nr_surfaces;      
 
       drm_intel_bo *scratch_bo;
 
diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c
index 2f16891..862c5a6 100644
--- a/src/mesa/drivers/dri/i965/brw_state_upload.c
+++ b/src/mesa/drivers/dri/i965/brw_state_upload.c
@@ -357,8 +357,6 @@ static struct dirty_bit_map brw_bits[] = {
    DEFINE_BIT(BRW_NEW_INDEX_BUFFER),
    DEFINE_BIT(BRW_NEW_VERTICES),
    DEFINE_BIT(BRW_NEW_BATCH),
-   DEFINE_BIT(BRW_NEW_NR_WM_SURFACES),
-   DEFINE_BIT(BRW_NEW_NR_VS_SURFACES),
    DEFINE_BIT(BRW_NEW_VS_CONSTBUF),
    DEFINE_BIT(BRW_NEW_WM_CONSTBUF),
    DEFINE_BIT(BRW_NEW_VS_BINDING_TABLE),
diff --git a/src/mesa/drivers/dri/i965/brw_vs_state.c b/src/mesa/drivers/dri/i965/brw_vs_state.c
index 5a9032c..631e182 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_state.c
@@ -71,11 +71,7 @@ brw_upload_vs_unit(struct brw_context *brw)
    */
    vs->thread1.single_program_flow = (intel->gen == 5);
 
-   /* BRW_NEW_NR_VS_SURFACES */
-   if (intel->gen == 5)
-      vs->thread1.binding_table_entry_count = 0; /* hardware requirement */
-   else
-      vs->thread1.binding_table_entry_count = brw->vs.nr_surfaces;
+   vs->thread1.binding_table_entry_count = 0;
 
    if (brw->vs.prog_data->total_scratch != 0) {
       vs->thread2.scratch_space_base_pointer =
@@ -176,7 +172,6 @@ const struct brw_tracked_state brw_vs_unit = {
       .brw   = (BRW_NEW_BATCH |
 		BRW_NEW_PROGRAM_CACHE |
 		BRW_NEW_CURBE_OFFSETS |
-                BRW_NEW_NR_VS_SURFACES |
 		BRW_NEW_URB_FENCE |
                 BRW_NEW_VERTEX_PROGRAM),
       .cache = CACHE_NEW_VS_PROG
diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
index 84d3101..4c99185 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
@@ -136,9 +136,6 @@ brw_update_vs_constant_surface( struct gl_context *ctx,
 
 /**
  * Vertex shader surfaces (constant buffer).
- *
- * This consumes the state updates for the constant buffer needing
- * to be updated, and produces BRW_NEW_NR_VS_SURFACES for the VS unit.
  */
 static void
 brw_upload_vs_surfaces(struct brw_context *brw)
@@ -170,11 +167,6 @@ brw_upload_vs_surfaces(struct brw_context *brw)
 	 brw->vs.bind_bo_offset = 0;
       }
    }
-
-   if (brw->vs.nr_surfaces != nr_surfaces) {
-      brw->state.dirty.brw |= BRW_NEW_NR_VS_SURFACES;
-      brw->vs.nr_surfaces = nr_surfaces;
-   }
 }
 
 const struct brw_tracked_state brw_vs_surfaces = {
diff --git a/src/mesa/drivers/dri/i965/brw_wm_state.c b/src/mesa/drivers/dri/i965/brw_wm_state.c
index 51ef745..69d7a76 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_state.c
@@ -113,12 +113,7 @@ brw_upload_wm_unit(struct brw_context *brw)
    wm->thread1.depth_coef_urb_read_offset = 1;
    wm->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754;
 
-   if (intel->gen == 5)
-      wm->thread1.binding_table_entry_count = 0; /* hardware requirement */
-   else {
-      /* BRW_NEW_NR_SURFACES */
-      wm->thread1.binding_table_entry_count = brw->wm.nr_surfaces;
-   }
+   wm->thread1.binding_table_entry_count = 0;
 
    if (brw->wm.prog_data->total_scratch != 0) {
       wm->thread2.scratch_space_base_pointer =
@@ -263,8 +258,7 @@ const struct brw_tracked_state brw_wm_unit = {
       .brw = (BRW_NEW_BATCH |
 	      BRW_NEW_PROGRAM_CACHE |
 	      BRW_NEW_FRAGMENT_PROGRAM |
-	      BRW_NEW_CURBE_OFFSETS |
-	      BRW_NEW_NR_WM_SURFACES),
+	      BRW_NEW_CURBE_OFFSETS),
 
       .cache = (CACHE_NEW_WM_PROG |
 		CACHE_NEW_SAMPLER)
diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
index 06716ba..297424f 100644
--- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
+++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
@@ -590,11 +590,6 @@ brw_upload_wm_surfaces(struct brw_context *brw)
       }
    }
 
-   if (brw->wm.nr_surfaces != nr_surfaces) {
-      brw->wm.nr_surfaces = nr_surfaces;
-      brw->state.dirty.brw |= BRW_NEW_NR_WM_SURFACES;
-   }
-
    brw->state.dirty.brw |= BRW_NEW_WM_SURFACES;
 }
 
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index e22fd39..7ea7e21 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -167,9 +167,7 @@ upload_vs_state(struct brw_context *brw)
    BEGIN_BATCH(6);
    OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
    OUT_BATCH(brw->vs.prog_offset);
-   OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) |
-             floating_point_mode |
-	     (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+   OUT_BATCH(floating_point_mode | (0 << GEN6_VS_SAMPLER_COUNT_SHIFT));
 
    if (brw->vs.prog_data->total_scratch) {
       OUT_RELOC(brw->vs.scratch_bo,
@@ -220,8 +218,7 @@ upload_vs_state(struct brw_context *brw)
 const struct brw_tracked_state gen6_vs_state = {
    .dirty = {
       .mesa  = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
-      .brw   = (BRW_NEW_NR_VS_SURFACES |
-		BRW_NEW_URB_FENCE |
+      .brw   = (BRW_NEW_URB_FENCE |
 		BRW_NEW_CONTEXT |
 		BRW_NEW_VERTEX_PROGRAM |
 		BRW_NEW_BATCH),
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index 714d594..b98516e 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -140,9 +140,6 @@ upload_wm_state(struct brw_context *brw)
    if (ctx->Shader.CurrentFragmentProgram == NULL)
       dw2 |= GEN6_WM_FLOATING_POINT_MODE_ALT;
 
-   /* BRW_NEW_NR_WM_SURFACES */
-   dw2 |= brw->wm.nr_surfaces << GEN6_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT;
-
    /* CACHE_NEW_SAMPLER */
    dw2 |= (ALIGN(brw->wm.sampler_count, 4) / 4) << GEN6_WM_SAMPLER_COUNT_SHIFT;
    dw4 |= (brw->wm.prog_data->first_curbe_grf <<
@@ -217,7 +214,6 @@ const struct brw_tracked_state gen6_wm_state = {
 		_NEW_PROGRAM_CONSTANTS |
 		_NEW_POLYGON),
       .brw   = (BRW_NEW_FRAGMENT_PROGRAM |
-                BRW_NEW_NR_WM_SURFACES |
 		BRW_NEW_URB_FENCE |
 		BRW_NEW_BATCH),
       .cache = (CACHE_NEW_SAMPLER |
diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c
index dbf9346..462db5b 100644
--- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
@@ -75,9 +75,7 @@ upload_vs_state(struct brw_context *brw)
    BEGIN_BATCH(6);
    OUT_BATCH(_3DSTATE_VS << 16 | (6 - 2));
    OUT_BATCH(brw->vs.prog_offset);
-   OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) |
-             floating_point_mode |
-	     (brw->vs.nr_surfaces << GEN6_VS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
+   OUT_BATCH((0 << GEN6_VS_SAMPLER_COUNT_SHIFT) | floating_point_mode);
 
    if (brw->vs.prog_data->total_scratch) {
       OUT_RELOC(brw->vs.scratch_bo,
@@ -101,7 +99,6 @@ const struct brw_tracked_state gen7_vs_state = {
    .dirty = {
       .mesa  = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
       .brw   = (BRW_NEW_CURBE_OFFSETS |
-                BRW_NEW_NR_VS_SURFACES |
 		BRW_NEW_URB_FENCE |
 		BRW_NEW_CONTEXT |
 		BRW_NEW_VERTEX_PROGRAM |
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c
index 2dce359..8b79663 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -146,9 +146,6 @@ upload_ps_state(struct brw_context *brw)
 
    dw2 |= (ALIGN(brw->wm.sampler_count, 4) / 4) << GEN7_PS_SAMPLER_COUNT_SHIFT;
 
-   /* BRW_NEW_NR_WM_SURFACES */
-   dw2 |= brw->wm.nr_surfaces << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT;
-
    /* Use ALT floating point mode for ARB fragment programs, because they
     * require 0^0 == 1.
     */
@@ -198,7 +195,6 @@ const struct brw_tracked_state gen7_ps_state = {
       .mesa  = _NEW_PROGRAM_CONSTANTS,
       .brw   = (BRW_NEW_CURBE_OFFSETS |
 		BRW_NEW_FRAGMENT_PROGRAM |
-                BRW_NEW_NR_WM_SURFACES |
 		BRW_NEW_PS_BINDING_TABLE |
 		BRW_NEW_URB_FENCE |
 		BRW_NEW_BATCH),
-- 
1.7.7.1



More information about the mesa-dev mailing list