[Mesa-dev] [PATCH 11/11] i965: Move push constant state packets to push constant update time.
Eric Anholt
eric at anholt.net
Thu May 1 14:21:33 PDT 2014
-0.553779% +/- 0.423394% effect on cairo-perf-trace runtime on glamor
(n=612)
---
src/mesa/drivers/dri/i965/gen6_vs_state.c | 11 ++++++++++-
src/mesa/drivers/dri/i965/gen6_wm_state.c | 8 +++++++-
src/mesa/drivers/dri/i965/gen7_gs_state.c | 29 +++++++++++++++--------------
src/mesa/drivers/dri/i965/gen7_vs_state.c | 8 ++------
src/mesa/drivers/dri/i965/gen7_wm_state.c | 9 ++-------
src/mesa/drivers/dri/i965/gen8_gs_state.c | 7 ++-----
src/mesa/drivers/dri/i965/gen8_ps_state.c | 8 ++------
src/mesa/drivers/dri/i965/gen8_vs_state.c | 8 ++------
8 files changed, 42 insertions(+), 46 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
index 25f55c4..9764645 100644
--- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
@@ -98,13 +98,22 @@ gen6_upload_vs_push_constants(struct brw_context *brw)
gen6_upload_vec4_push_constants(brw, &vp->program.Base, prog_data,
stage_state, AUB_TRACE_VS_CONSTANTS);
+
+ if (brw->gen >= 7) {
+ if (brw->gen == 7 && !brw->is_haswell)
+ gen7_emit_vs_workaround_flush(brw);
+
+ gen7_upload_constant_state(brw, stage_state, true /* active */,
+ _3DSTATE_CONSTANT_VS);
+ }
}
const struct brw_tracked_state gen6_vs_push_constants = {
.dirty = {
.mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
.brw = (BRW_NEW_BATCH |
- BRW_NEW_VERTEX_PROGRAM),
+ BRW_NEW_VERTEX_PROGRAM |
+ BRW_NEW_PUSH_CONSTANT_ALLOCATION),
.cache = CACHE_NEW_VS_PROG,
},
.emit = gen6_upload_vs_push_constants,
diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
index 3f5dd29..402d9c3 100644
--- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
@@ -81,13 +81,19 @@ gen6_upload_wm_push_constants(struct brw_context *brw)
brw->wm.base.push_const_size = ALIGN(prog_data->base.nr_params, 8) / 8;
}
+
+ if (brw->gen >= 7) {
+ gen7_upload_constant_state(brw, &brw->wm.base, true,
+ _3DSTATE_CONSTANT_PS);
+ }
}
const struct brw_tracked_state gen6_wm_push_constants = {
.dirty = {
.mesa = _NEW_PROGRAM_CONSTANTS,
.brw = (BRW_NEW_BATCH |
- BRW_NEW_FRAGMENT_PROGRAM),
+ BRW_NEW_FRAGMENT_PROGRAM |
+ BRW_NEW_PUSH_CONSTANT_ALLOCATION),
.cache = CACHE_NEW_WM_PROG,
},
.emit = gen6_upload_wm_push_constants,
diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c b/src/mesa/drivers/dri/i965/gen7_gs_state.c
index 599997d..30dfa6b 100644
--- a/src/mesa/drivers/dri/i965/gen7_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_gs_state.c
@@ -30,25 +30,29 @@
static void
gen7_upload_gs_push_constants(struct brw_context *brw)
{
+ const struct brw_stage_state *stage_state = &brw->gs.base;
/* BRW_NEW_GEOMETRY_PROGRAM */
- const struct brw_geometry_program *vp =
+ const struct brw_geometry_program *gp =
(struct brw_geometry_program *) brw->geometry_program;
- if (!vp)
- return;
- /* CACHE_NEW_GS_PROG */
- const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base;
- struct brw_stage_state *stage_state = &brw->gs.base;
+ if (gp) {
+ /* CACHE_NEW_GS_PROG */
+ const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base;
+ struct brw_stage_state *stage_state = &brw->gs.base;
+
+ gen6_upload_vec4_push_constants(brw, &gp->program.Base, prog_data,
+ stage_state, AUB_TRACE_VS_CONSTANTS);
+ }
- gen6_upload_vec4_push_constants(brw, &vp->program.Base, prog_data,
- stage_state, AUB_TRACE_VS_CONSTANTS);
+ gen7_upload_constant_state(brw, stage_state, gp, _3DSTATE_CONSTANT_GS);
}
const struct brw_tracked_state gen7_gs_push_constants = {
.dirty = {
.mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
.brw = (BRW_NEW_BATCH |
- BRW_NEW_GEOMETRY_PROGRAM),
+ BRW_NEW_GEOMETRY_PROGRAM |
+ BRW_NEW_PUSH_CONSTANT_ALLOCATION),
.cache = CACHE_NEW_GS_PROG,
},
.emit = gen7_upload_gs_push_constants,
@@ -66,8 +70,6 @@ upload_gs_state(struct brw_context *brw)
/* CACHE_NEW_GS_PROG */
const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base;
- gen7_upload_constant_state(brw, stage_state, active, _3DSTATE_CONSTANT_GS);
-
/**
* From Graphics BSpec: 3D-Media-GPGPU Engine > 3D Pipeline Stages >
* Geometry > Geometry Shader > State:
@@ -186,11 +188,10 @@ upload_gs_state(struct brw_context *brw)
const struct brw_tracked_state gen7_gs_state = {
.dirty = {
- .mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
+ .mesa = _NEW_TRANSFORM,
.brw = (BRW_NEW_CONTEXT |
BRW_NEW_GEOMETRY_PROGRAM |
- BRW_NEW_BATCH |
- BRW_NEW_PUSH_CONSTANT_ALLOCATION),
+ BRW_NEW_BATCH),
.cache = CACHE_NEW_GS_PROG
},
.emit = upload_gs_state,
diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c
index a030310..4d99150 100644
--- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
@@ -75,9 +75,6 @@ upload_vs_state(struct brw_context *brw)
if (!brw->is_haswell)
gen7_emit_vs_workaround_flush(brw);
- gen7_upload_constant_state(brw, stage_state, true /* active */,
- _3DSTATE_CONSTANT_VS);
-
/* Use ALT floating point mode for ARB vertex programs, because they
* require 0^0 == 1.
*/
@@ -114,11 +111,10 @@ upload_vs_state(struct brw_context *brw)
const struct brw_tracked_state gen7_vs_state = {
.dirty = {
- .mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
+ .mesa = _NEW_TRANSFORM,
.brw = (BRW_NEW_CONTEXT |
BRW_NEW_VERTEX_PROGRAM |
- BRW_NEW_BATCH |
- BRW_NEW_PUSH_CONSTANT_ALLOCATION),
+ BRW_NEW_BATCH),
.cache = CACHE_NEW_VS_PROG
},
.emit = upload_vs_state,
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c
index 972ee00..0782841 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -143,9 +143,6 @@ upload_ps_state(struct brw_context *brw)
const int max_threads_shift = brw->is_haswell ?
HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT;
- /* CACHE_NEW_WM_PROG */
- gen7_upload_constant_state(brw, &brw->wm.base, true, _3DSTATE_CONSTANT_PS);
-
dw2 = dw4 = dw5 = 0;
dw2 |=
@@ -267,13 +264,11 @@ upload_ps_state(struct brw_context *brw)
const struct brw_tracked_state gen7_ps_state = {
.dirty = {
- .mesa = (_NEW_PROGRAM_CONSTANTS |
- _NEW_COLOR |
+ .mesa = (_NEW_COLOR |
_NEW_BUFFERS |
_NEW_MULTISAMPLE),
.brw = (BRW_NEW_FRAGMENT_PROGRAM |
- BRW_NEW_BATCH |
- BRW_NEW_PUSH_CONSTANT_ALLOCATION),
+ BRW_NEW_BATCH),
.cache = (CACHE_NEW_WM_PROG)
},
.emit = upload_ps_state,
diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c b/src/mesa/drivers/dri/i965/gen8_gs_state.c
index ef25115..a0f933c 100644
--- a/src/mesa/drivers/dri/i965/gen8_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c
@@ -36,8 +36,6 @@ gen8_upload_gs_state(struct brw_context *brw)
/* CACHE_NEW_GS_PROG */
const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base;
- gen7_upload_constant_state(brw, stage_state, active, _3DSTATE_CONSTANT_GS);
-
if (active) {
int urb_entry_write_offset = 1;
uint32_t urb_entry_output_length =
@@ -123,11 +121,10 @@ gen8_upload_gs_state(struct brw_context *brw)
const struct brw_tracked_state gen8_gs_state = {
.dirty = {
- .mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
+ .mesa = _NEW_TRANSFORM,
.brw = (BRW_NEW_CONTEXT |
BRW_NEW_GEOMETRY_PROGRAM |
- BRW_NEW_BATCH |
- BRW_NEW_PUSH_CONSTANT_ALLOCATION),
+ BRW_NEW_BATCH),
.cache = CACHE_NEW_GS_PROG
},
.emit = gen8_upload_gs_state,
diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c
index f0362a2..8b8d5ea 100644
--- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
@@ -136,9 +136,6 @@ upload_ps_state(struct brw_context *brw)
struct gl_context *ctx = &brw->ctx;
uint32_t dw3 = 0, dw6 = 0, dw7 = 0;
- /* CACHE_NEW_WM_PROG */
- gen7_upload_constant_state(brw, &brw->wm.base, true, _3DSTATE_CONSTANT_PS);
-
/* Initialize the execution mask with VMask. Otherwise, derivatives are
* incorrect for subspans where some of the pixels are unlit. We believe
* the bit just didn't take effect in previous generations.
@@ -243,10 +240,9 @@ upload_ps_state(struct brw_context *brw)
const struct brw_tracked_state gen8_ps_state = {
.dirty = {
- .mesa = _NEW_PROGRAM_CONSTANTS | _NEW_MULTISAMPLE,
+ .mesa = _NEW_MULTISAMPLE,
.brw = BRW_NEW_FRAGMENT_PROGRAM |
- BRW_NEW_BATCH |
- BRW_NEW_PUSH_CONSTANT_ALLOCATION,
+ BRW_NEW_BATCH,
.cache = CACHE_NEW_WM_PROG
},
.emit = upload_ps_state,
diff --git a/src/mesa/drivers/dri/i965/gen8_vs_state.c b/src/mesa/drivers/dri/i965/gen8_vs_state.c
index 9ac681f..638e216 100644
--- a/src/mesa/drivers/dri/i965/gen8_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_vs_state.c
@@ -39,9 +39,6 @@ upload_vs_state(struct brw_context *brw)
/* CACHE_NEW_VS_PROG */
const struct brw_vec4_prog_data *prog_data = &brw->vs.prog_data->base;
- gen7_upload_constant_state(brw, stage_state, true /* active */,
- _3DSTATE_CONSTANT_VS);
-
/* Use ALT floating point mode for ARB vertex programs, because they
* require 0^0 == 1.
*/
@@ -84,11 +81,10 @@ upload_vs_state(struct brw_context *brw)
const struct brw_tracked_state gen8_vs_state = {
.dirty = {
- .mesa = _NEW_TRANSFORM | _NEW_PROGRAM_CONSTANTS,
+ .mesa = _NEW_TRANSFORM,
.brw = BRW_NEW_CONTEXT |
BRW_NEW_VERTEX_PROGRAM |
- BRW_NEW_BATCH |
- BRW_NEW_PUSH_CONSTANT_ALLOCATION,
+ BRW_NEW_BATCH,
.cache = CACHE_NEW_VS_PROG
},
.emit = upload_vs_state,
--
1.9.2
More information about the mesa-dev
mailing list