[Mesa-dev] [PATCH] i965: Compute VS attribute WA bits earlier and check if they changed.
Chris Forbes
chrisf at ijw.co.nz
Wed Dec 3 18:13:59 PST 2014
What's the perf impact on a platform which actually needs this?
I think there would be some further substantial gains to be had by
giving this its own dirty bit -- piles of other atoms listen to
BRW_NEW_VERTEX_PROGRAM, but don't care about the workaround bits.
On Thu, Dec 4, 2014 at 2:42 PM, Kenneth Graunke <kenneth at whitecape.org> wrote:
> BRW_NEW_VERTICES is flagged every time we draw a primitive. Having
> the brw_vs_prog atom depend on BRW_NEW_VERTICES meant that we had to
> compute the VS program key and do a program cache lookup for every
> single primitive. This is painfully expensive.
>
> The workaround bit computation is almost entirely based on the vertex
> attribute arrays (brw->vb.inputs[i]), which are set by brw_merge_inputs.
> The only thing it uses the VS program for is to see which VS inputs are
> actually read. brw_merge_inputs() happens once per primitive, and can
> safely look at the currently bound vertex program, as it doesn't change
> in the middle of a draw.
>
> This patch moves the workaround bit computation to brw_merge_inputs(),
> right after assigning brw->vb.inputs[i], and stores the previous WA bit
> values in the context. If they've actually changed from the last draw
> (which is uncommon), we signal that we need a new vertex program,
> causing brw_vs_prog to compute a new key.
>
> Logically, it might make sense to create a BRW_NEW_VS_WORKAROUND_BITS
> state flag, but I chose to use BRW_NEW_VERTEX_PROGRAM since it should
> work just as well, and already exists.
>
> Improves performance in Gl32Batch7 by 15.5728% +/- 1.96978% on my
> Haswell GT3e system. I'm told that Baytrail shows similar gains.
>
> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
> ---
> src/mesa/drivers/dri/i965/brw_context.h | 8 ++++++
> src/mesa/drivers/dri/i965/brw_draw.c | 46 +++++++++++++++++++++++++++++++++
> src/mesa/drivers/dri/i965/brw_vs.c | 42 +++---------------------------
> 3 files changed, 58 insertions(+), 38 deletions(-)
>
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index ec4b3dd..ace29df 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -1129,6 +1129,14 @@ struct brw_context
> * the same VB packed over and over again.
> */
> unsigned int start_vertex_bias;
> +
> + /**
> + * Certain vertex attribute formats aren't natively handled by the
> + * hardware and require special VS code to fix up their values.
> + *
> + * These bitfields indicate which workarounds are needed.
> + */
> + uint8_t attrib_wa_flags[VERT_ATTRIB_MAX];
> } vb;
>
> struct {
> diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
> index 4c2802a..d2ea085 100644
> --- a/src/mesa/drivers/dri/i965/brw_draw.c
> +++ b/src/mesa/drivers/dri/i965/brw_draw.c
> @@ -46,6 +46,7 @@
> #include "brw_defines.h"
> #include "brw_context.h"
> #include "brw_state.h"
> +#include "brw_vs.h"
>
> #include "intel_batchbuffer.h"
> #include "intel_buffers.h"
> @@ -281,6 +282,7 @@ static void brw_emit_prim(struct brw_context *brw,
> static void brw_merge_inputs( struct brw_context *brw,
> const struct gl_client_array *arrays[])
> {
> + const struct gl_context *ctx = &brw->ctx;
> GLuint i;
>
> for (i = 0; i < brw->vb.nr_buffers; i++) {
> @@ -293,6 +295,50 @@ static void brw_merge_inputs( struct brw_context *brw,
> brw->vb.inputs[i].buffer = -1;
> brw->vb.inputs[i].glarray = arrays[i];
> }
> +
> + if (brw->gen < 8 && !brw->is_haswell) {
> + struct gl_program *vp = &ctx->VertexProgram._Current->Base;
> + /* Prior to Haswell, the hardware can't natively support GL_FIXED or
> + * 2_10_10_10_REV vertex formats. Set appropriate workaround flags.
> + */
> + for (i = 0; i < VERT_ATTRIB_MAX; i++) {
> + if (!(vp->InputsRead & BITFIELD64_BIT(i)))
> + continue;
> +
> + uint8_t wa_flags = 0;
> +
> + switch (brw->vb.inputs[i].glarray->Type) {
> +
> + case GL_FIXED:
> + wa_flags = brw->vb.inputs[i].glarray->Size;
> + break;
> +
> + case GL_INT_2_10_10_10_REV:
> + wa_flags |= BRW_ATTRIB_WA_SIGN;
> + /* fallthough */
> +
> + case GL_UNSIGNED_INT_2_10_10_10_REV:
> + if (brw->vb.inputs[i].glarray->Format == GL_BGRA)
> + wa_flags |= BRW_ATTRIB_WA_BGRA;
> +
> + if (brw->vb.inputs[i].glarray->Normalized)
> + wa_flags |= BRW_ATTRIB_WA_NORMALIZE;
> + else if (!brw->vb.inputs[i].glarray->Integer)
> + wa_flags |= BRW_ATTRIB_WA_SCALE;
> +
> + break;
> + }
> +
> + if (brw->vb.attrib_wa_flags[i] != wa_flags) {
> + brw->vb.attrib_wa_flags[i] = wa_flags;
> + /* New workaround flags means we need to compute a new program
> + * key and look up the associated vertex shader. Just flag that
> + * we have a new shader program, since it triggers that effect.
> + */
> + brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
> + }
> + }
> + }
> }
>
> /**
> diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
> index 2f628e5..6dbac1e 100644
> --- a/src/mesa/drivers/dri/i965/brw_vs.c
> +++ b/src/mesa/drivers/dri/i965/brw_vs.c
> @@ -449,42 +449,9 @@ static void brw_upload_vs_prog(struct brw_context *brw)
> brw_populate_sampler_prog_key_data(ctx, prog, brw->vs.base.sampler_count,
> &key.base.tex);
>
> - /* BRW_NEW_VERTICES */
> - if (brw->gen < 8 && !brw->is_haswell) {
> - /* Prior to Haswell, the hardware can't natively support GL_FIXED or
> - * 2_10_10_10_REV vertex formats. Set appropriate workaround flags.
> - */
> - for (i = 0; i < VERT_ATTRIB_MAX; i++) {
> - if (!(vp->program.Base.InputsRead & BITFIELD64_BIT(i)))
> - continue;
> -
> - uint8_t wa_flags = 0;
> -
> - switch (brw->vb.inputs[i].glarray->Type) {
> -
> - case GL_FIXED:
> - wa_flags = brw->vb.inputs[i].glarray->Size;
> - break;
> -
> - case GL_INT_2_10_10_10_REV:
> - wa_flags |= BRW_ATTRIB_WA_SIGN;
> - /* fallthough */
> -
> - case GL_UNSIGNED_INT_2_10_10_10_REV:
> - if (brw->vb.inputs[i].glarray->Format == GL_BGRA)
> - wa_flags |= BRW_ATTRIB_WA_BGRA;
> -
> - if (brw->vb.inputs[i].glarray->Normalized)
> - wa_flags |= BRW_ATTRIB_WA_NORMALIZE;
> - else if (!brw->vb.inputs[i].glarray->Integer)
> - wa_flags |= BRW_ATTRIB_WA_SCALE;
> -
> - break;
> - }
> -
> - key.gl_attrib_wa_flags[i] = wa_flags;
> - }
> - }
> + /* BRW_NEW_VERTEX_PROGRAM */
> + memcpy(key.gl_attrib_wa_flags, brw->vb.attrib_wa_flags,
> + sizeof(brw->vb.attrib_wa_flags));
>
> if (!brw_search_cache(&brw->cache, BRW_CACHE_VS_PROG,
> &key, sizeof(key),
> @@ -521,8 +488,7 @@ const struct brw_tracked_state brw_vs_prog = {
> _NEW_POLYGON |
> _NEW_TEXTURE |
> _NEW_TRANSFORM,
> - .brw = BRW_NEW_VERTEX_PROGRAM |
> - BRW_NEW_VERTICES,
> + .brw = BRW_NEW_VERTEX_PROGRAM,
> },
> .emit = brw_upload_vs_prog
> };
> --
> 2.1.3
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list