[Mesa-dev] [PATCH 07/11] i965/gen7+: Move sampler state packets to the stage sampler state table update.
Eric Anholt
eric at anholt.net
Thu May 1 14:21:29 PDT 2014
Now that we have the stage state coming into our setup of sampler states,
it's easy to drop an identifier into it of which stage the stage_state is,
and then look up which packet to emit in a little table.
No performance difference on cairo on glamor (n=492).
---
src/mesa/drivers/dri/i965/brw_context.c | 3 +++
src/mesa/drivers/dri/i965/brw_context.h | 1 +
src/mesa/drivers/dri/i965/gen7_gs_state.c | 8 +-------
src/mesa/drivers/dri/i965/gen7_sampler_state.c | 10 +++++++++-
src/mesa/drivers/dri/i965/gen7_vs_state.c | 8 +-------
src/mesa/drivers/dri/i965/gen7_wm_state.c | 9 +--------
src/mesa/drivers/dri/i965/gen8_gs_state.c | 8 +-------
src/mesa/drivers/dri/i965/gen8_ps_state.c | 8 +-------
src/mesa/drivers/dri/i965/gen8_vs_state.c | 8 +-------
9 files changed, 19 insertions(+), 44 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c
index cad83e2..478d05a 100644
--- a/src/mesa/drivers/dri/i965/brw_context.c
+++ b/src/mesa/drivers/dri/i965/brw_context.c
@@ -628,6 +628,9 @@ brwCreateContext(gl_api api,
brw->must_use_separate_stencil = screen->hw_must_use_separate_stencil;
brw->has_swizzling = screen->hw_has_swizzling;
+ brw->vs.base.stage = MESA_SHADER_VERTEX;
+ brw->gs.base.stage = MESA_SHADER_GEOMETRY;
+ brw->wm.base.stage = MESA_SHADER_FRAGMENT;
if (brw->gen >= 8) {
gen8_init_vtable_surface_functions(brw);
gen7_init_vtable_sampler_functions(brw);
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index 379af38..92e1592 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -925,6 +925,7 @@ struct brw_transform_feedback_object {
*/
struct brw_stage_state
{
+ gl_shader_stage stage;
struct brw_stage_prog_data *prog_data;
/**
diff --git a/src/mesa/drivers/dri/i965/gen7_gs_state.c b/src/mesa/drivers/dri/i965/gen7_gs_state.c
index d18ae15..06e6cf7 100644
--- a/src/mesa/drivers/dri/i965/gen7_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_gs_state.c
@@ -66,12 +66,6 @@ upload_gs_state(struct brw_context *brw)
/* CACHE_NEW_GS_PROG */
const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base;
- /* CACHE_NEW_SAMPLER */
- BEGIN_BATCH(2);
- OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_GS << 16 | (2 - 2));
- OUT_BATCH(stage_state->sampler_offset);
- ADVANCE_BATCH();
-
gen7_upload_constant_state(brw, stage_state, active, _3DSTATE_CONSTANT_GS);
/**
@@ -198,7 +192,7 @@ const struct brw_tracked_state gen7_gs_state = {
BRW_NEW_GS_BINDING_TABLE |
BRW_NEW_BATCH |
BRW_NEW_PUSH_CONSTANT_ALLOCATION),
- .cache = CACHE_NEW_GS_PROG | CACHE_NEW_SAMPLER
+ .cache = CACHE_NEW_GS_PROG
},
.emit = upload_gs_state,
};
diff --git a/src/mesa/drivers/dri/i965/gen7_sampler_state.c b/src/mesa/drivers/dri/i965/gen7_sampler_state.c
index 8eb337d..72acd3c 100644
--- a/src/mesa/drivers/dri/i965/gen7_sampler_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_sampler_state.c
@@ -187,6 +187,11 @@ gen7_upload_sampler_state_table(struct brw_context *brw,
struct gl_context *ctx = &brw->ctx;
struct gen7_sampler_state *samplers;
uint32_t sampler_count = stage_state->sampler_count;
+ static const uint16_t packet_headers[] = {
+ [MESA_SHADER_VERTEX] = _3DSTATE_SAMPLER_STATE_POINTERS_VS,
+ [MESA_SHADER_GEOMETRY] = _3DSTATE_SAMPLER_STATE_POINTERS_GS,
+ [MESA_SHADER_FRAGMENT] = _3DSTATE_SAMPLER_STATE_POINTERS_PS,
+ };
GLbitfield SamplersUsed = prog->SamplersUsed;
@@ -207,7 +212,10 @@ gen7_upload_sampler_state_table(struct brw_context *brw,
}
}
- brw->state.dirty.cache |= CACHE_NEW_SAMPLER;
+ BEGIN_BATCH(2);
+ OUT_BATCH(packet_headers[stage_state->stage] << 16 | (2 - 2));
+ OUT_BATCH(stage_state->sampler_offset);
+ ADVANCE_BATCH();
}
void
diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c
index b5fc871..6b1f680 100644
--- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
@@ -75,12 +75,6 @@ upload_vs_state(struct brw_context *brw)
if (!brw->is_haswell)
gen7_emit_vs_workaround_flush(brw);
- /* CACHE_NEW_SAMPLER */
- BEGIN_BATCH(2);
- OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_VS << 16 | (2 - 2));
- OUT_BATCH(stage_state->sampler_offset);
- ADVANCE_BATCH();
-
gen7_upload_constant_state(brw, stage_state, true /* active */,
_3DSTATE_CONSTANT_VS);
@@ -126,7 +120,7 @@ const struct brw_tracked_state gen7_vs_state = {
BRW_NEW_VS_BINDING_TABLE |
BRW_NEW_BATCH |
BRW_NEW_PUSH_CONSTANT_ALLOCATION),
- .cache = CACHE_NEW_VS_PROG | CACHE_NEW_SAMPLER
+ .cache = CACHE_NEW_VS_PROG
},
.emit = upload_vs_state,
};
diff --git a/src/mesa/drivers/dri/i965/gen7_wm_state.c b/src/mesa/drivers/dri/i965/gen7_wm_state.c
index eabadee..2b95ef1 100644
--- a/src/mesa/drivers/dri/i965/gen7_wm_state.c
+++ b/src/mesa/drivers/dri/i965/gen7_wm_state.c
@@ -143,12 +143,6 @@ upload_ps_state(struct brw_context *brw)
const int max_threads_shift = brw->is_haswell ?
HSW_PS_MAX_THREADS_SHIFT : IVB_PS_MAX_THREADS_SHIFT;
- /* CACHE_NEW_SAMPLER */
- BEGIN_BATCH(2);
- OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_PS << 16 | (2 - 2));
- OUT_BATCH(brw->wm.base.sampler_offset);
- ADVANCE_BATCH();
-
/* CACHE_NEW_WM_PROG */
gen7_upload_constant_state(brw, &brw->wm.base, true, _3DSTATE_CONSTANT_PS);
@@ -281,8 +275,7 @@ const struct brw_tracked_state gen7_ps_state = {
BRW_NEW_PS_BINDING_TABLE |
BRW_NEW_BATCH |
BRW_NEW_PUSH_CONSTANT_ALLOCATION),
- .cache = (CACHE_NEW_SAMPLER |
- CACHE_NEW_WM_PROG)
+ .cache = (CACHE_NEW_WM_PROG)
},
.emit = upload_ps_state,
};
diff --git a/src/mesa/drivers/dri/i965/gen8_gs_state.c b/src/mesa/drivers/dri/i965/gen8_gs_state.c
index 97fbf84..6baada3 100644
--- a/src/mesa/drivers/dri/i965/gen8_gs_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_gs_state.c
@@ -36,12 +36,6 @@ gen8_upload_gs_state(struct brw_context *brw)
/* CACHE_NEW_GS_PROG */
const struct brw_vec4_prog_data *prog_data = &brw->gs.prog_data->base;
- /* CACHE_NEW_SAMPLER */
- BEGIN_BATCH(2);
- OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_GS << 16 | (2 - 2));
- OUT_BATCH(stage_state->sampler_offset);
- ADVANCE_BATCH();
-
gen8_upload_constant_state(brw, stage_state, active, _3DSTATE_CONSTANT_GS);
if (active) {
@@ -135,7 +129,7 @@ const struct brw_tracked_state gen8_gs_state = {
BRW_NEW_GS_BINDING_TABLE |
BRW_NEW_BATCH |
BRW_NEW_PUSH_CONSTANT_ALLOCATION),
- .cache = CACHE_NEW_GS_PROG | CACHE_NEW_SAMPLER
+ .cache = CACHE_NEW_GS_PROG
},
.emit = gen8_upload_gs_state,
};
diff --git a/src/mesa/drivers/dri/i965/gen8_ps_state.c b/src/mesa/drivers/dri/i965/gen8_ps_state.c
index 0856060..aa7183b 100644
--- a/src/mesa/drivers/dri/i965/gen8_ps_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_ps_state.c
@@ -136,12 +136,6 @@ upload_ps_state(struct brw_context *brw)
struct gl_context *ctx = &brw->ctx;
uint32_t dw3 = 0, dw6 = 0, dw7 = 0;
- /* CACHE_NEW_SAMPLER */
- BEGIN_BATCH(2);
- OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_PS << 16 | (2 - 2));
- OUT_BATCH(brw->wm.base.sampler_offset);
- ADVANCE_BATCH();
-
/* CACHE_NEW_WM_PROG */
gen8_upload_constant_state(brw, &brw->wm.base, true, _3DSTATE_CONSTANT_PS);
@@ -254,7 +248,7 @@ const struct brw_tracked_state gen8_ps_state = {
BRW_NEW_PS_BINDING_TABLE |
BRW_NEW_BATCH |
BRW_NEW_PUSH_CONSTANT_ALLOCATION,
- .cache = CACHE_NEW_SAMPLER | CACHE_NEW_WM_PROG
+ .cache = CACHE_NEW_WM_PROG
},
.emit = upload_ps_state,
};
diff --git a/src/mesa/drivers/dri/i965/gen8_vs_state.c b/src/mesa/drivers/dri/i965/gen8_vs_state.c
index 373cfe4..e7634ee 100644
--- a/src/mesa/drivers/dri/i965/gen8_vs_state.c
+++ b/src/mesa/drivers/dri/i965/gen8_vs_state.c
@@ -62,12 +62,6 @@ upload_vs_state(struct brw_context *brw)
/* CACHE_NEW_VS_PROG */
const struct brw_vec4_prog_data *prog_data = &brw->vs.prog_data->base;
- /* CACHE_NEW_SAMPLER */
- BEGIN_BATCH(2);
- OUT_BATCH(_3DSTATE_SAMPLER_STATE_POINTERS_VS << 16 | (2 - 2));
- OUT_BATCH(stage_state->sampler_offset);
- ADVANCE_BATCH();
-
gen8_upload_constant_state(brw, stage_state, true /* active */,
_3DSTATE_CONSTANT_VS);
@@ -119,7 +113,7 @@ const struct brw_tracked_state gen8_vs_state = {
BRW_NEW_VS_BINDING_TABLE |
BRW_NEW_BATCH |
BRW_NEW_PUSH_CONSTANT_ALLOCATION,
- .cache = CACHE_NEW_VS_PROG | CACHE_NEW_SAMPLER
+ .cache = CACHE_NEW_VS_PROG
},
.emit = upload_vs_state,
};
--
1.9.2
More information about the mesa-dev
mailing list