[Mesa-dev] [PATCH 18/20] i965: Program the push constants state using the gather table

Abdiel Janulgue abdiel.janulgue at linux.intel.com
Wed Oct 28 00:09:35 PDT 2015



On 10/27/2015 03:18 PM, Francisco Jerez wrote:
> Francisco Jerez <currojerez at riseup.net> writes:
> 
>> Abdiel Janulgue <abdiel.janulgue at linux.intel.com> writes:
>>
>>> Use the gather table generated from the uniform uploads and
>>> ir_binop_ubo_load to gather and pack the constants to the gather pool.
>>>
>>> Note that the 3DSTATE_CONSTANT_* packet now refers to the gather
>>> pool generated by the resource streamer instead of the constant buffer
>>> pointed to by an offset of the dynamic state base address.
>>>
>>> v2: Support GEN8 + non-trivial rebase.
>>>
>>> Signed-off-by: Abdiel Janulgue <abdiel.janulgue at linux.intel.com>
>>> ---
>>>  src/mesa/drivers/dri/i965/brw_state.h     |  2 +-
>>>  src/mesa/drivers/dri/i965/gen6_gs_state.c |  2 +-
>>>  src/mesa/drivers/dri/i965/gen6_vs_state.c |  2 +-
>>>  src/mesa/drivers/dri/i965/gen6_wm_state.c |  2 +-
>>>  src/mesa/drivers/dri/i965/gen7_vs_state.c | 84 +++++++++++++++++++++++++++----
>>>  5 files changed, 79 insertions(+), 13 deletions(-)
>>>
>>> diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
>>> index c7c7e0b..a1e6c73 100644
>>> --- a/src/mesa/drivers/dri/i965/brw_state.h
>>> +++ b/src/mesa/drivers/dri/i965/brw_state.h
>>> @@ -361,7 +361,7 @@ brw_upload_pull_constants(struct brw_context *brw,
>>>  void
>>>  gen7_upload_constant_state(struct brw_context *brw,
>>>                             const struct brw_stage_state *stage_state,
>>> -                           bool active, unsigned opcode);
>>> +                           bool active, unsigned opcode, unsigned gather_op);
>>>  
>>>  void gen7_rs_control(struct brw_context *brw, int enable);
>>>  
>>> diff --git a/src/mesa/drivers/dri/i965/gen6_gs_state.c b/src/mesa/drivers/dri/i965/gen6_gs_state.c
>>> index eb4c586..79a899e 100644
>>> --- a/src/mesa/drivers/dri/i965/gen6_gs_state.c
>>> +++ b/src/mesa/drivers/dri/i965/gen6_gs_state.c
>>> @@ -48,7 +48,7 @@ gen6_upload_gs_push_constants(struct brw_context *brw)
>>>     }
>>>  
>>>     if (brw->gen >= 7)
>>> -      gen7_upload_constant_state(brw, stage_state, gp, _3DSTATE_CONSTANT_GS);
>>> +      gen7_upload_constant_state(brw, stage_state, gp, _3DSTATE_CONSTANT_GS, _3DSTATE_GATHER_CONSTANT_GS);
>>>  }
>>>  
>>>  const struct brw_tracked_state gen6_gs_push_constants = {
>>> diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
>>> index d43af5b..bb375b9 100644
>>> --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
>>> +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
>>> @@ -174,7 +174,7 @@ gen6_upload_vs_push_constants(struct brw_context *brw)
>>>           gen7_emit_vs_workaround_flush(brw);
>>>  
>>>        gen7_upload_constant_state(brw, stage_state, true /* active */,
>>> -                                 _3DSTATE_CONSTANT_VS);
>>> +                                 _3DSTATE_CONSTANT_VS, _3DSTATE_GATHER_CONSTANT_VS);
>>>     }
>>>  }
>>>  
>>> diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
>>> index d1748ba..c7b37c6 100644
>>> --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
>>> +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
>>> @@ -51,7 +51,7 @@ gen6_upload_wm_push_constants(struct brw_context *brw)
>>>  
>>>     if (brw->gen >= 7) {
>>>        gen7_upload_constant_state(brw, &brw->wm.base, true,
>>> -                                 _3DSTATE_CONSTANT_PS);
>>> +                                 _3DSTATE_CONSTANT_PS, _3DSTATE_GATHER_CONSTANT_PS);
>>>     }
>>>  }
>>>  
>>> diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c
>>> index b7e4858..cf07658 100644
>>> --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
>>> +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
>>> @@ -28,19 +28,70 @@
>>>  #include "program/prog_parameter.h"
>>>  #include "program/prog_statevars.h"
>>>  #include "intel_batchbuffer.h"
>>> +#include "glsl/glsl_parser_extras.h"
>>>  
>>> +static void
>>> +gen7_submit_gather_table(struct brw_context* brw,
>>> +                         const struct brw_stage_state *stage_state,
>>> +                         const struct brw_stage_prog_data *prog_data,
>>> +                         unsigned gather_opcode)
>>> +{
>>> +   uint32_t gather_dwords = 3 + prog_data->nr_gather_table;
>>> +
>>> +   /* Ordinary uniforms are assigned to the first constant buffer slot */
>>> +   unsigned cb_valid = 1;
>>> +   /* Assign subsequent constant buffer slots to UBOs if any */
>>> +   cb_valid |= (prog_data->nr_ubo_params > 0) ?
>>> +      (2 << (BRW_UBO_GATHER_INDEX_APPEND + prog_data->max_ubo_const_block)) - 1 : 0;
>>> +
>>> +   assert(cb_valid < 0xffff);
>>> +
>>> +   BEGIN_BATCH(gather_dwords);
>>> +   OUT_BATCH(gather_opcode << 16 | (gather_dwords - 2));
>>> +   OUT_BATCH(SET_FIELD(cb_valid, BRW_GATHER_BUFFER_VALID) |
>>> +             SET_FIELD(BRW_UNIFORM_GATHER_INDEX_START / 16,
>>> +                       BRW_GATHER_BINDING_TABLE_BLOCK));
>>> +   OUT_BATCH(stage_state->push_const_offset);
>>> +   for (int i = 0; i < prog_data->nr_gather_table; i++) {
>>> +      /* Which bo are we referring to? The uniform constant buffer or
>>> +       * the UBO block?
>>> +       */
>>> +      bool is_uniform = prog_data->gather_table[i].reg == -1;
>>> +      int cb_offset = is_uniform ? i :
>>> +         (prog_data->gather_table[i].const_offset / 16);
>>> +      int bt_offset = is_uniform ? 0 :
>>> +         (prog_data->gather_table[i].const_block + BRW_UBO_GATHER_INDEX_APPEND);
>>> +
>>> +      assert(cb_offset < 256);
>>> +      assert(bt_offset < 16);
>>> +
>>> +      OUT_BATCH(SET_FIELD(cb_offset, BRW_GATHER_CONST_BUFFER_OFFSET) |
>>> +                SET_FIELD(prog_data->gather_table[i].channel_mask,
>>> +                          BRW_GATHER_CHANNEL_MASK) | bt_offset);
>>> +   }
>>> +   ADVANCE_BATCH();
>>
>> This is called in response to the _NEW_PROGRAM_CONSTANTS state flag,
>> e.g. when uniforms from the default block are updated using
>> glUniform*(), but not necessarily if, say, a new buffer object is bound
>> to a UBO binding point -- Or worse, if something (e.g. glBufferSubData)
>> somehow modifies a buffer object while it's already bound to the
>> pipeline as UBO.  It doesn't look like this would notice that a buffer
>> has been modified and resend appropriate _3DSTATE_GATHER_CONSTANT_*S
>> commands so that the new buffer contents are re-gathered.
>>
>> I suspect that my "arb_shader_image_load_store/host-mem-barrier/Uniform
>> buffer/RaW" piglit test would almost reproduce the issue (it binds the
>> same buffer as UBO and shader image simultaneously, then writes to the
>> buffer using imageStore(), calls glMemoryBarrier(GL_UNIFORM_BARRIER_BIT)
>> so that subsequent uniform memory access is coherent with previous image
>> writes, and then reads back the same UBO from the shader) -- If it
>> weren't because it uses non-constant indexing of the uniform array
>> (which you don't promote to gather constants), and because it changes
>> the value of a uniform between the two draw calls so you might actually
>> end up here by luck. ;)
>>
> I've made a non-exhaustive list of things that can potentially write to
> buffer objects and you should definitely consider.  You may have to
> throw away the gather pool if a buffer currently bound as UBO is
> modified using:
> 
>  - Usual buffer upload commands (glBufferSubData and friends).
>  - Buffer object mapping (persistent and coherent mappings are likely
>    to require special care).
>  - Meta paths that render into a buffer object bound as PBO
>    (glReadPixels, glGetTexImage).
>  - Shader image stores and atomics.
>  - Shader storage buffer objects.
>  - Shader atomic counters.
>  - Transform feedback.
>  - Query objects.
> 
> The gl_buffer_object::UsageHistory field may be useful for some of these
> to find out whether you need to invalidate and re-gather your constant
> cache -- E.g. glReadPixels, glGetTexImage and glBufferSubData could just
> check if the target BO has USAGE_UNIFORM_BUFFER set among its usage
> history flags and in that case invalidate the gather constant cache.
> That would be simple but most likely overly pessimistic because the
> UsageHistory bitset won't tell you whether the buffer is still bound as
> UBO -- If it's not you wouldn't necessarily need to throw away the
> contents of the gather constant cache.
> 
> To handle images, SSBOs and persistent non-coherent maps you could
> probably just invalidate the whole gather pool whenever glMemoryBarrier
> is called with UNIFORM_BARRIER_BIT or CLIENT_MAPPED_BUFFER_BARRIER_BIT
> set.
> 
> The rest may be more difficult, and in some cases (e.g. while a buffer
> bound as UBO is persistently *and* coherently mapped) you may have to
> re-gather all constants prior to every draw call -- I'd expect that to
> be even more expensive than using plain pull constants though, so we
> might be better off disabling UBO gather in such cases even if that
> means doing state-dependent recompiles.
> 

Hi Francisco!

Looks exhaustive enough ;) Thanks for this! Wouldn't have noticed these
corner cases at all without your help. I'll try to address the
synchronisation issues in the next version!

-Abdiel


More information about the mesa-dev mailing list