[Mesa-dev] [PATCH] i965: Compute VS attribute WA bits earlier and check if they changed.
Kenneth Graunke
kenneth at whitecape.org
Wed Dec 3 17:42:04 PST 2014
BRW_NEW_VERTICES is flagged every time we draw a primitive. Having
the brw_vs_prog atom depend on BRW_NEW_VERTICES meant that we had to
compute the VS program key and do a program cache lookup for every
single primitive. This is painfully expensive.
The workaround bit computation is almost entirely based on the vertex
attribute arrays (brw->vb.inputs[i]), which are set by brw_merge_inputs.
The only thing it uses the VS program for is to see which VS inputs are
actually read. brw_merge_inputs() happens once per primitive, and can
safely look at the currently bound vertex program, as it doesn't change
in the middle of a draw.
This patch moves the workaround bit computation to brw_merge_inputs(),
right after assigning brw->vb.inputs[i], and stores the previous WA bit
values in the context. If they've actually changed from the last draw
(which is uncommon), we signal that we need a new vertex program,
causing brw_vs_prog to compute a new key.
Logically, it might make sense to create a BRW_NEW_VS_WORKAROUND_BITS
state flag, but I chose to use BRW_NEW_VERTEX_PROGRAM since it should
work just as well, and already exists.
Improves performance in Gl32Batch7 by 15.5728% +/- 1.96978% on my
Haswell GT3e system. I'm told that Baytrail shows similar gains.
Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
---
src/mesa/drivers/dri/i965/brw_context.h | 8 ++++++
src/mesa/drivers/dri/i965/brw_draw.c | 46 +++++++++++++++++++++++++++++++++
src/mesa/drivers/dri/i965/brw_vs.c | 42 +++---------------------------
3 files changed, 58 insertions(+), 38 deletions(-)
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index ec4b3dd..ace29df 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -1129,6 +1129,14 @@ struct brw_context
* the same VB packed over and over again.
*/
unsigned int start_vertex_bias;
+
+ /**
+ * Certain vertex attribute formats aren't natively handled by the
+ * hardware and require special VS code to fix up their values.
+ *
+ * These bitfields indicate which workarounds are needed.
+ */
+ uint8_t attrib_wa_flags[VERT_ATTRIB_MAX];
} vb;
struct {
diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
index 4c2802a..d2ea085 100644
--- a/src/mesa/drivers/dri/i965/brw_draw.c
+++ b/src/mesa/drivers/dri/i965/brw_draw.c
@@ -46,6 +46,7 @@
#include "brw_defines.h"
#include "brw_context.h"
#include "brw_state.h"
+#include "brw_vs.h"
#include "intel_batchbuffer.h"
#include "intel_buffers.h"
@@ -281,6 +282,7 @@ static void brw_emit_prim(struct brw_context *brw,
static void brw_merge_inputs( struct brw_context *brw,
const struct gl_client_array *arrays[])
{
+ const struct gl_context *ctx = &brw->ctx;
GLuint i;
for (i = 0; i < brw->vb.nr_buffers; i++) {
@@ -293,6 +295,50 @@ static void brw_merge_inputs( struct brw_context *brw,
brw->vb.inputs[i].buffer = -1;
brw->vb.inputs[i].glarray = arrays[i];
}
+
+ if (brw->gen < 8 && !brw->is_haswell) {
+ struct gl_program *vp = &ctx->VertexProgram._Current->Base;
+ /* Prior to Haswell, the hardware can't natively support GL_FIXED or
+ * 2_10_10_10_REV vertex formats. Set appropriate workaround flags.
+ */
+ for (i = 0; i < VERT_ATTRIB_MAX; i++) {
+ if (!(vp->InputsRead & BITFIELD64_BIT(i)))
+ continue;
+
+ uint8_t wa_flags = 0;
+
+ switch (brw->vb.inputs[i].glarray->Type) {
+
+ case GL_FIXED:
+ wa_flags = brw->vb.inputs[i].glarray->Size;
+ break;
+
+ case GL_INT_2_10_10_10_REV:
+ wa_flags |= BRW_ATTRIB_WA_SIGN;
+ /* fallthough */
+
+ case GL_UNSIGNED_INT_2_10_10_10_REV:
+ if (brw->vb.inputs[i].glarray->Format == GL_BGRA)
+ wa_flags |= BRW_ATTRIB_WA_BGRA;
+
+ if (brw->vb.inputs[i].glarray->Normalized)
+ wa_flags |= BRW_ATTRIB_WA_NORMALIZE;
+ else if (!brw->vb.inputs[i].glarray->Integer)
+ wa_flags |= BRW_ATTRIB_WA_SCALE;
+
+ break;
+ }
+
+ if (brw->vb.attrib_wa_flags[i] != wa_flags) {
+ brw->vb.attrib_wa_flags[i] = wa_flags;
+ /* New workaround flags means we need to compute a new program
+ * key and look up the associated vertex shader. Just flag that
+ * we have a new shader program, since it triggers that effect.
+ */
+ brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
+ }
+ }
+ }
}
/**
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index 2f628e5..6dbac1e 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -449,42 +449,9 @@ static void brw_upload_vs_prog(struct brw_context *brw)
brw_populate_sampler_prog_key_data(ctx, prog, brw->vs.base.sampler_count,
&key.base.tex);
- /* BRW_NEW_VERTICES */
- if (brw->gen < 8 && !brw->is_haswell) {
- /* Prior to Haswell, the hardware can't natively support GL_FIXED or
- * 2_10_10_10_REV vertex formats. Set appropriate workaround flags.
- */
- for (i = 0; i < VERT_ATTRIB_MAX; i++) {
- if (!(vp->program.Base.InputsRead & BITFIELD64_BIT(i)))
- continue;
-
- uint8_t wa_flags = 0;
-
- switch (brw->vb.inputs[i].glarray->Type) {
-
- case GL_FIXED:
- wa_flags = brw->vb.inputs[i].glarray->Size;
- break;
-
- case GL_INT_2_10_10_10_REV:
- wa_flags |= BRW_ATTRIB_WA_SIGN;
- /* fallthough */
-
- case GL_UNSIGNED_INT_2_10_10_10_REV:
- if (brw->vb.inputs[i].glarray->Format == GL_BGRA)
- wa_flags |= BRW_ATTRIB_WA_BGRA;
-
- if (brw->vb.inputs[i].glarray->Normalized)
- wa_flags |= BRW_ATTRIB_WA_NORMALIZE;
- else if (!brw->vb.inputs[i].glarray->Integer)
- wa_flags |= BRW_ATTRIB_WA_SCALE;
-
- break;
- }
-
- key.gl_attrib_wa_flags[i] = wa_flags;
- }
- }
+ /* BRW_NEW_VERTEX_PROGRAM */
+ memcpy(key.gl_attrib_wa_flags, brw->vb.attrib_wa_flags,
+ sizeof(brw->vb.attrib_wa_flags));
if (!brw_search_cache(&brw->cache, BRW_CACHE_VS_PROG,
&key, sizeof(key),
@@ -521,8 +488,7 @@ const struct brw_tracked_state brw_vs_prog = {
_NEW_POLYGON |
_NEW_TEXTURE |
_NEW_TRANSFORM,
- .brw = BRW_NEW_VERTEX_PROGRAM |
- BRW_NEW_VERTICES,
+ .brw = BRW_NEW_VERTEX_PROGRAM,
},
.emit = brw_upload_vs_prog
};
--
2.1.3
More information about the mesa-dev
mailing list