[Mesa-dev] [PATCH] i965: Compute VS attribute WA bits earlier and check if they changed.
Chris Forbes
chrisf at ijw.co.nz
Wed Dec 3 18:17:20 PST 2014
Half-ignore the first part of that... I thinko'd and assumed Baytrail
worked the same as Haswell here, but obviously doesn't. I am
interested in how it affects earlier gens, though.
On Thu, Dec 4, 2014 at 3:13 PM, Chris Forbes <chrisf at ijw.co.nz> wrote:
> What's the perf impact on a platform which actually needs this?
>
> I think there would be some further substantial gains to be had by
> giving this its own dirty bit -- piles of other atoms listen to
> BRW_NEW_VERTEX_PROGRAM, but don't care about the workaround bits.
>
> On Thu, Dec 4, 2014 at 2:42 PM, Kenneth Graunke <kenneth at whitecape.org> wrote:
>> BRW_NEW_VERTICES is flagged every time we draw a primitive. Having
>> the brw_vs_prog atom depend on BRW_NEW_VERTICES meant that we had to
>> compute the VS program key and do a program cache lookup for every
>> single primitive. This is painfully expensive.
>>
>> The workaround bit computation is almost entirely based on the vertex
>> attribute arrays (brw->vb.inputs[i]), which are set by brw_merge_inputs.
>> The only thing it uses the VS program for is to see which VS inputs are
>> actually read. brw_merge_inputs() happens once per primitive, and can
>> safely look at the currently bound vertex program, as it doesn't change
>> in the middle of a draw.
>>
>> This patch moves the workaround bit computation to brw_merge_inputs(),
>> right after assigning brw->vb.inputs[i], and stores the previous WA bit
>> values in the context. If they've actually changed from the last draw
>> (which is uncommon), we signal that we need a new vertex program,
>> causing brw_vs_prog to compute a new key.
>>
>> Logically, it might make sense to create a BRW_NEW_VS_WORKAROUND_BITS
>> state flag, but I chose to use BRW_NEW_VERTEX_PROGRAM since it should
>> work just as well, and already exists.
>>
>> Improves performance in Gl32Batch7 by 15.5728% +/- 1.96978% on my
>> Haswell GT3e system. I'm told that Baytrail shows similar gains.
>>
>> Signed-off-by: Kenneth Graunke <kenneth at whitecape.org>
>> ---
>> src/mesa/drivers/dri/i965/brw_context.h | 8 ++++++
>> src/mesa/drivers/dri/i965/brw_draw.c | 46 +++++++++++++++++++++++++++++++++
>> src/mesa/drivers/dri/i965/brw_vs.c | 42 +++---------------------------
>> 3 files changed, 58 insertions(+), 38 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
>> index ec4b3dd..ace29df 100644
>> --- a/src/mesa/drivers/dri/i965/brw_context.h
>> +++ b/src/mesa/drivers/dri/i965/brw_context.h
>> @@ -1129,6 +1129,14 @@ struct brw_context
>> * the same VB packed over and over again.
>> */
>> unsigned int start_vertex_bias;
>> +
>> + /**
>> + * Certain vertex attribute formats aren't natively handled by the
>> + * hardware and require special VS code to fix up their values.
>> + *
>> + * These bitfields indicate which workarounds are needed.
>> + */
>> + uint8_t attrib_wa_flags[VERT_ATTRIB_MAX];
>> } vb;
>>
>> struct {
>> diff --git a/src/mesa/drivers/dri/i965/brw_draw.c b/src/mesa/drivers/dri/i965/brw_draw.c
>> index 4c2802a..d2ea085 100644
>> --- a/src/mesa/drivers/dri/i965/brw_draw.c
>> +++ b/src/mesa/drivers/dri/i965/brw_draw.c
>> @@ -46,6 +46,7 @@
>> #include "brw_defines.h"
>> #include "brw_context.h"
>> #include "brw_state.h"
>> +#include "brw_vs.h"
>>
>> #include "intel_batchbuffer.h"
>> #include "intel_buffers.h"
>> @@ -281,6 +282,7 @@ static void brw_emit_prim(struct brw_context *brw,
>> static void brw_merge_inputs( struct brw_context *brw,
>> const struct gl_client_array *arrays[])
>> {
>> + const struct gl_context *ctx = &brw->ctx;
>> GLuint i;
>>
>> for (i = 0; i < brw->vb.nr_buffers; i++) {
>> @@ -293,6 +295,50 @@ static void brw_merge_inputs( struct brw_context *brw,
>> brw->vb.inputs[i].buffer = -1;
>> brw->vb.inputs[i].glarray = arrays[i];
>> }
>> +
>> + if (brw->gen < 8 && !brw->is_haswell) {
>> + struct gl_program *vp = &ctx->VertexProgram._Current->Base;
>> + /* Prior to Haswell, the hardware can't natively support GL_FIXED or
>> + * 2_10_10_10_REV vertex formats. Set appropriate workaround flags.
>> + */
>> + for (i = 0; i < VERT_ATTRIB_MAX; i++) {
>> + if (!(vp->InputsRead & BITFIELD64_BIT(i)))
>> + continue;
>> +
>> + uint8_t wa_flags = 0;
>> +
>> + switch (brw->vb.inputs[i].glarray->Type) {
>> +
>> + case GL_FIXED:
>> + wa_flags = brw->vb.inputs[i].glarray->Size;
>> + break;
>> +
>> + case GL_INT_2_10_10_10_REV:
>> + wa_flags |= BRW_ATTRIB_WA_SIGN;
>> + /* fallthough */
>> +
>> + case GL_UNSIGNED_INT_2_10_10_10_REV:
>> + if (brw->vb.inputs[i].glarray->Format == GL_BGRA)
>> + wa_flags |= BRW_ATTRIB_WA_BGRA;
>> +
>> + if (brw->vb.inputs[i].glarray->Normalized)
>> + wa_flags |= BRW_ATTRIB_WA_NORMALIZE;
>> + else if (!brw->vb.inputs[i].glarray->Integer)
>> + wa_flags |= BRW_ATTRIB_WA_SCALE;
>> +
>> + break;
>> + }
>> +
>> + if (brw->vb.attrib_wa_flags[i] != wa_flags) {
>> + brw->vb.attrib_wa_flags[i] = wa_flags;
>> + /* New workaround flags means we need to compute a new program
>> + * key and look up the associated vertex shader. Just flag that
>> + * we have a new shader program, since it triggers that effect.
>> + */
>> + brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM;
>> + }
>> + }
>> + }
>> }
>>
>> /**
>> diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
>> index 2f628e5..6dbac1e 100644
>> --- a/src/mesa/drivers/dri/i965/brw_vs.c
>> +++ b/src/mesa/drivers/dri/i965/brw_vs.c
>> @@ -449,42 +449,9 @@ static void brw_upload_vs_prog(struct brw_context *brw)
>> brw_populate_sampler_prog_key_data(ctx, prog, brw->vs.base.sampler_count,
>> &key.base.tex);
>>
>> - /* BRW_NEW_VERTICES */
>> - if (brw->gen < 8 && !brw->is_haswell) {
>> - /* Prior to Haswell, the hardware can't natively support GL_FIXED or
>> - * 2_10_10_10_REV vertex formats. Set appropriate workaround flags.
>> - */
>> - for (i = 0; i < VERT_ATTRIB_MAX; i++) {
>> - if (!(vp->program.Base.InputsRead & BITFIELD64_BIT(i)))
>> - continue;
>> -
>> - uint8_t wa_flags = 0;
>> -
>> - switch (brw->vb.inputs[i].glarray->Type) {
>> -
>> - case GL_FIXED:
>> - wa_flags = brw->vb.inputs[i].glarray->Size;
>> - break;
>> -
>> - case GL_INT_2_10_10_10_REV:
>> - wa_flags |= BRW_ATTRIB_WA_SIGN;
>> - /* fallthough */
>> -
>> - case GL_UNSIGNED_INT_2_10_10_10_REV:
>> - if (brw->vb.inputs[i].glarray->Format == GL_BGRA)
>> - wa_flags |= BRW_ATTRIB_WA_BGRA;
>> -
>> - if (brw->vb.inputs[i].glarray->Normalized)
>> - wa_flags |= BRW_ATTRIB_WA_NORMALIZE;
>> - else if (!brw->vb.inputs[i].glarray->Integer)
>> - wa_flags |= BRW_ATTRIB_WA_SCALE;
>> -
>> - break;
>> - }
>> -
>> - key.gl_attrib_wa_flags[i] = wa_flags;
>> - }
>> - }
>> + /* BRW_NEW_VERTEX_PROGRAM */
>> + memcpy(key.gl_attrib_wa_flags, brw->vb.attrib_wa_flags,
>> + sizeof(brw->vb.attrib_wa_flags));
>>
>> if (!brw_search_cache(&brw->cache, BRW_CACHE_VS_PROG,
>> &key, sizeof(key),
>> @@ -521,8 +488,7 @@ const struct brw_tracked_state brw_vs_prog = {
>> _NEW_POLYGON |
>> _NEW_TEXTURE |
>> _NEW_TRANSFORM,
>> - .brw = BRW_NEW_VERTEX_PROGRAM |
>> - BRW_NEW_VERTICES,
>> + .brw = BRW_NEW_VERTEX_PROGRAM,
>> },
>> .emit = brw_upload_vs_prog
>> };
>> --
>> 2.1.3
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
More information about the mesa-dev
mailing list