[Mesa-dev] [PATCH 11/11] i965: Move push constant state packets to push constant update time.

Eric Anholt eric at anholt.net
Thu May 1 14:21:33 PDT 2014


-0.553779% +/- 0.423394% effect on cairo-perf-trace runtime on glamor
(n=612)
---
 src/mesa/drivers/dri/i965/gen6_vs_state.c | 11 ++++++++++-
 src/mesa/drivers/dri/i965/gen6_wm_state.c |  8 +++++++-
 src/mesa/drivers/dri/i965/gen7_gs_state.c | 29 +++++++++++++++--------------
 src/mesa/drivers/dri/i965/gen7_vs_state.c |  8 ++------
 src/mesa/drivers/dri/i965/gen7_wm_state.c |  9 ++-------
 src/mesa/drivers/dri/i965/gen8_gs_state.c |  7 ++-----
 src/mesa/drivers/dri/i965/gen8_ps_state.c |  8 ++------
 src/mesa/drivers/dri/i965/gen8_vs_state.c |  8 ++------
 8 files changed, 42 insertions(+), 46 deletions(-)

diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index 25f55c4..9764645 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -98,13 +98,22 @@ gen6_upload_vs_push_constants(struct brw_context *brw)
 
    gen6_upload_vec4_push_constants(brw, &vp->program.Base, prog_data,
                                    stage_state, AUB_TRACE_VS_CONSTANTS);
+
+   if (brw->gen >= 7) {
+      if (brw->gen == 7 && !brw->is_haswell)
+         gen7_emit_vs_workaround_flush(brw);
+
+      gen7_upload_constant_state(brw, stage_state, true /* active */,
+                                 _3DSTATE_CONSTANT_VS);
+   }
 }
 
 const struct brw_tracked_state gen6_vs_push_constants = {
    .dirty = {
       .mesa  = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
       .brw   = (BRW_NEW_BATCH |
-		BRW_NEW_VERTEX_PROGRAM),
+                BRW_NEW_VERTEX_PROGRAM |
+                BRW_NEW_PUSH_CONSTANT_ALLOCATION),
       .cache = CACHE_NEW_VS_PROG,
    },
    .emit = gen6_upload_vs_push_constants,
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index 3f5dd29..402d9c3 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -81,13 +81,19 @@ gen6_upload_wm_push_constants(struct brw_context *brw)
 
       brw->wm.base.push_const_size = ALIGN(prog_data->base.nr_params, 8) / 8;
    }
+
+   if (brw->gen >= 7) {
+      gen7_upload_constant_state(brw, &brw->wm.base, true,
+                                 _3DSTATE_CONSTANT_PS);
+   }
 }
 
 const struct brw_tracked_state gen6_wm_push_constants = {
    .dirty = {
       .mesa  = _NEW_PROGRAM_CONSTANTS,
       .brw   = (BRW_NEW_BATCH |
-		BRW_NEW_FRAGMENT_PROGRAM),
+                BRW_NEW_FRAGMENT_PROGRAM |
+                BRW_NEW_PUSH_CONSTANT_ALLOCATION),
       .cache = CACHE_NEW_WM_PROG,
    },
    .emit = gen6_upload_wm_push_constants,
diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c b/src/mesa/drivers/dri/i965/gen7_gs_state.c
index 599997d..30dfa6b 100644
--- a/src/mesa/drivers/dri/i965/gen7_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_gs_state.c
@@ -30,25 +30,29 @@
 static void
 gen7_upload_gs_push_constants(struct brw_context *brw)
 {
+   const struct brw_stage_state *stage_state = &brw->gs.base;
    /* BRW_NEW_GEOMETRY_PROGRAM */
-   const struct brw_geometry_program *vp =
+   const struct brw_geometry_program *gp =
       (struct brw_geometry_program *) brw->geometry_program;
-   if (!vp)
-      return;
 
-   /* CACHE_NEW_GS_PROG */
-   const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base;
-   struct brw_stage_state *stage_state = &brw->gs.base;
+   if (gp) {
+      /* CACHE_NEW_GS_PROG */
+      const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base;
+      struct brw_stage_state *stage_state = &brw->gs.base;
+
+      gen6_upload_vec4_push_constants(brw, &gp->program.Base, prog_data,
+                                      stage_state, AUB_TRACE_VS_CONSTANTS);
+   }
 
-   gen6_upload_vec4_push_constants(brw, &vp->program.Base, prog_data,
-                                   stage_state, AUB_TRACE_VS_CONSTANTS);
+   gen7_upload_constant_state(brw, stage_state, gp, _3DSTATE_CONSTANT_GS);
 }
 
 const struct brw_tracked_state gen7_gs_push_constants = {
    .dirty = {
       .mesa  = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
       .brw   = (BRW_NEW_BATCH |
-		BRW_NEW_GEOMETRY_PROGRAM),
+                BRW_NEW_GEOMETRY_PROGRAM |
+                BRW_NEW_PUSH_CONSTANT_ALLOCATION),
       .cache = CACHE_NEW_GS_PROG,
    },
    .emit = gen7_upload_gs_push_constants,
@@ -66,8 +70,6 @@ upload_gs_state(struct brw_context *brw)
    /* CACHE_NEW_GS_PROG */
    const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base;
 
-   gen7_upload_constant_state(brw, stage_state, active, _3DSTATE_CONSTANT_GS);
-
    /**
     * From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages >
     * Geometry > Geometry Shader > State:
@@ -186,11 +188,10 @@ upload_gs_state(struct brw_context *brw)
 
 const struct brw_tracked_state gen7_gs_state = {
    .dirty = {
-      .mesa  = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
+      .mesa  = _NEW_TRANSFORM,
       .brw   = (BRW_NEW_CONTEXT |
                 BRW_NEW_GEOMETRY_PROGRAM |
-                BRW_NEW_BATCH |
-                BRW_NEW_PUSH_CONSTANT_ALLOCATION),
+                BRW_NEW_BATCH),
       .cache = CACHE_NEW_GS_PROG
    },
    .emit = upload_gs_state,
diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c
index a030310..4d99150 100644
--- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
@@ -75,9 +75,6 @@ upload_vs_state(struct brw_context *brw)
    if (!brw->is_haswell)
       gen7_emit_vs_workaround_flush(brw);
 
-   gen7_upload_constant_state(brw, stage_state, true /* active */,
-                              _3DSTATE_CONSTANT_VS);
-
    /* Use ALT floating point mode for ARB vertex programs, because they
     * require 0^0 == 1.
     */
@@ -114,11 +111,10 @@ upload_vs_state(struct brw_context *brw)
 
 const struct brw_tracked_state gen7_vs_state = {
    .dirty = {
-      .mesa  = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
+      .mesa  = _NEW_TRANSFORM,
       .brw   = (BRW_NEW_CONTEXT |
 		BRW_NEW_VERTEX_PROGRAM |
-		BRW_NEW_BATCH |
-                BRW_NEW_PUSH_CONSTANT_ALLOCATION),
+		BRW_NEW_BATCH),
       .cache = CACHE_NEW_VS_PROG
    },
    .emit = upload_vs_state,
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c
index 972ee00..0782841 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -143,9 +143,6 @@ upload_ps_state(struct brw_context *brw)
    const int max_threads_shift = brw->is_haswell ?
       HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT;
 
-   /* CACHE_NEW_WM_PROG */
-   gen7_upload_constant_state(brw, &brw->wm.base, true, _3DSTATE_CONSTANT_PS);
-
    dw2 = dw4 = dw5 = 0;
 
    dw2 |=
@@ -267,13 +264,11 @@ upload_ps_state(struct brw_context *brw)
 
 const struct brw_tracked_state gen7_ps_state = {
    .dirty = {
-      .mesa  = (_NEW_PROGRAM_CONSTANTS |
-		_NEW_COLOR |
+      .mesa  = (_NEW_COLOR |
                 _NEW_BUFFERS |
                 _NEW_MULTISAMPLE),
       .brw   = (BRW_NEW_FRAGMENT_PROGRAM |
-		BRW_NEW_BATCH |
-                BRW_NEW_PUSH_CONSTANT_ALLOCATION),
+                BRW_NEW_BATCH),
       .cache = (CACHE_NEW_WM_PROG)
    },
    .emit = upload_ps_state,
diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c b/src/mesa/drivers/dri/i965/gen8_gs_state.c
index ef25115..a0f933c 100644
--- a/src/mesa/drivers/dri/i965/gen8_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c
@@ -36,8 +36,6 @@ gen8_upload_gs_state(struct brw_context *brw)
    /* CACHE_NEW_GS_PROG */
    const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base;
 
-   gen7_upload_constant_state(brw, stage_state, active, _3DSTATE_CONSTANT_GS);
-
    if (active) {
       int urb_entry_write_offset = 1;
       uint32_t urb_entry_output_length =
@@ -123,11 +121,10 @@ gen8_upload_gs_state(struct brw_context *brw)
 
 const struct brw_tracked_state gen8_gs_state = {
    .dirty = {
-      .mesa  = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
+      .mesa  = _NEW_TRANSFORM,
       .brw   = (BRW_NEW_CONTEXT |
                 BRW_NEW_GEOMETRY_PROGRAM |
-                BRW_NEW_BATCH |
-                BRW_NEW_PUSH_CONSTANT_ALLOCATION),
+                BRW_NEW_BATCH),
       .cache = CACHE_NEW_GS_PROG
    },
    .emit = gen8_upload_gs_state,
diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c
index f0362a2..8b8d5ea 100644
--- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
@@ -136,9 +136,6 @@ upload_ps_state(struct brw_context *brw)
    struct gl_context *ctx = &brw->ctx;
    uint32_t dw3 = 0, dw6 = 0, dw7 = 0;
 
-   /* CACHE_NEW_WM_PROG */
-   gen7_upload_constant_state(brw, &brw->wm.base, true, _3DSTATE_CONSTANT_PS);
-
    /* Initialize the execution mask with VMask.  Otherwise, derivatives are
     * incorrect for subspans where some of the pixels are unlit.  We believe
     * the bit just didn't take effect in previous generations.
@@ -243,10 +240,9 @@ upload_ps_state(struct brw_context *brw)
 
 const struct brw_tracked_state gen8_ps_state = {
    .dirty = {
-      .mesa  = _NEW_PROGRAM_CONSTANTS | _NEW_MULTISAMPLE,
+      .mesa  = _NEW_MULTISAMPLE,
       .brw   = BRW_NEW_FRAGMENT_PROGRAM |
-               BRW_NEW_BATCH |
-               BRW_NEW_PUSH_CONSTANT_ALLOCATION,
+               BRW_NEW_BATCH,
       .cache = CACHE_NEW_WM_PROG
    },
    .emit = upload_ps_state,
diff --git a/src/mesa/drivers/dri/i965/gen8_vs_state.c b/src/mesa/drivers/dri/i965/gen8_vs_state.c
index 9ac681f..638e216 100644
--- a/src/mesa/drivers/dri/i965/gen8_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_vs_state.c
@@ -39,9 +39,6 @@ upload_vs_state(struct brw_context *brw)
    /* CACHE_NEW_VS_PROG */
    const struct brw_vec4_prog_data *prog_data = &brw->vs.prog_data->base;
 
-   gen7_upload_constant_state(brw, stage_state, true /* active */,
-                              _3DSTATE_CONSTANT_VS);
-
    /* Use ALT floating point mode for ARB vertex programs, because they
     * require 0^0 == 1.
     */
@@ -84,11 +81,10 @@ upload_vs_state(struct brw_context *brw)
 
 const struct brw_tracked_state gen8_vs_state = {
    .dirty = {
-      .mesa  = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
+      .mesa  = _NEW_TRANSFORM,
       .brw   = BRW_NEW_CONTEXT |
                BRW_NEW_VERTEX_PROGRAM |
-               BRW_NEW_BATCH |
-               BRW_NEW_PUSH_CONSTANT_ALLOCATION,
+               BRW_NEW_BATCH,
       .cache = CACHE_NEW_VS_PROG
    },
    .emit = upload_vs_state,
-- 
1.9.2



More information about the mesa-dev mailing list