[Mesa-dev] [PATCH 18/20] i965: Program the push constants state using the gather table
Francisco Jerez
currojerez at riseup.net
Tue Oct 27 06:18:15 PDT 2015
Francisco Jerez <currojerez at riseup.net> writes:
> Abdiel Janulgue <abdiel.janulgue at linux.intel.com> writes:
>
>> Use the gather table generated from the uniform uploads and
>> ir_binop_ubo_load to gather and pack the constants to the gather pool.
>>
>> Note that the 3DSTATE_CONSTANT_* packet now refers to the gather
>> pool generated by the resource streamer instead of the constant buffer
>> pointed to by an offset of the dynamic state base address.
>>
>> v2: Support GEN8 + non-trivial rebase.
>>
>> Signed-off-by: Abdiel Janulgue <abdiel.janulgue at linux.intel.com>
>> ---
>> src/mesa/drivers/dri/i965/brw_state.h | 2 +-
>> src/mesa/drivers/dri/i965/gen6_gs_state.c | 2 +-
>> src/mesa/drivers/dri/i965/gen6_vs_state.c | 2 +-
>> src/mesa/drivers/dri/i965/gen6_wm_state.c | 2 +-
>> src/mesa/drivers/dri/i965/gen7_vs_state.c | 84 +++++++++++++++++++++++++++----
>> 5 files changed, 79 insertions(+), 13 deletions(-)
>>
>> diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h
>> index c7c7e0b..a1e6c73 100644
>> --- a/src/mesa/drivers/dri/i965/brw_state.h
>> +++ b/src/mesa/drivers/dri/i965/brw_state.h
>> @@ -361,7 +361,7 @@ brw_upload_pull_constants(struct brw_context *brw,
>> void
>> gen7_upload_constant_state(struct brw_context *brw,
>> const struct brw_stage_state *stage_state,
>> - bool active, unsigned opcode);
>> + bool active, unsigned opcode, unsigned gather_op);
>>
>> void gen7_rs_control(struct brw_context *brw, int enable);
>>
>> diff --git a/src/mesa/drivers/dri/i965/gen6_gs_state.c b/src/mesa/drivers/dri/i965/gen6_gs_state.c
>> index eb4c586..79a899e 100644
>> --- a/src/mesa/drivers/dri/i965/gen6_gs_state.c
>> +++ b/src/mesa/drivers/dri/i965/gen6_gs_state.c
>> @@ -48,7 +48,7 @@ gen6_upload_gs_push_constants(struct brw_context *brw)
>> }
>>
>> if (brw->gen >= 7)
>> - gen7_upload_constant_state(brw, stage_state, gp, _3DSTATE_CONSTANT_GS);
>> + gen7_upload_constant_state(brw, stage_state, gp, _3DSTATE_CONSTANT_GS, _3DSTATE_GATHER_CONSTANT_GS);
>> }
>>
>> const struct brw_tracked_state gen6_gs_push_constants = {
>> diff --git a/src/mesa/drivers/dri/i965/gen6_vs_state.c b/src/mesa/drivers/dri/i965/gen6_vs_state.c
>> index d43af5b..bb375b9 100644
>> --- a/src/mesa/drivers/dri/i965/gen6_vs_state.c
>> +++ b/src/mesa/drivers/dri/i965/gen6_vs_state.c
>> @@ -174,7 +174,7 @@ gen6_upload_vs_push_constants(struct brw_context *brw)
>> gen7_emit_vs_workaround_flush(brw);
>>
>> gen7_upload_constant_state(brw, stage_state, true /* active */,
>> - _3DSTATE_CONSTANT_VS);
>> + _3DSTATE_CONSTANT_VS, _3DSTATE_GATHER_CONSTANT_VS);
>> }
>> }
>>
>> diff --git a/src/mesa/drivers/dri/i965/gen6_wm_state.c b/src/mesa/drivers/dri/i965/gen6_wm_state.c
>> index d1748ba..c7b37c6 100644
>> --- a/src/mesa/drivers/dri/i965/gen6_wm_state.c
>> +++ b/src/mesa/drivers/dri/i965/gen6_wm_state.c
>> @@ -51,7 +51,7 @@ gen6_upload_wm_push_constants(struct brw_context *brw)
>>
>> if (brw->gen >= 7) {
>> gen7_upload_constant_state(brw, &brw->wm.base, true,
>> - _3DSTATE_CONSTANT_PS);
>> + _3DSTATE_CONSTANT_PS, _3DSTATE_GATHER_CONSTANT_PS);
>> }
>> }
>>
>> diff --git a/src/mesa/drivers/dri/i965/gen7_vs_state.c b/src/mesa/drivers/dri/i965/gen7_vs_state.c
>> index b7e4858..cf07658 100644
>> --- a/src/mesa/drivers/dri/i965/gen7_vs_state.c
>> +++ b/src/mesa/drivers/dri/i965/gen7_vs_state.c
>> @@ -28,19 +28,70 @@
>> #include "program/prog_parameter.h"
>> #include "program/prog_statevars.h"
>> #include "intel_batchbuffer.h"
>> +#include "glsl/glsl_parser_extras.h"
>>
>> +static void
>> +gen7_submit_gather_table(struct brw_context* brw,
>> + const struct brw_stage_state *stage_state,
>> + const struct brw_stage_prog_data *prog_data,
>> + unsigned gather_opcode)
>> +{
>> + uint32_t gather_dwords = 3 + prog_data->nr_gather_table;
>> +
>> + /* Ordinary uniforms are assigned to the first constant buffer slot */
>> + unsigned cb_valid = 1;
>> + /* Assign subsequent constant buffer slots to UBOs if any */
>> + cb_valid |= (prog_data->nr_ubo_params > 0) ?
>> + (2 << (BRW_UBO_GATHER_INDEX_APPEND + prog_data->max_ubo_const_block)) - 1 : 0;
>> +
>> + assert(cb_valid < 0xffff);
>> +
>> + BEGIN_BATCH(gather_dwords);
>> + OUT_BATCH(gather_opcode << 16 | (gather_dwords - 2));
>> + OUT_BATCH(SET_FIELD(cb_valid, BRW_GATHER_BUFFER_VALID) |
>> + SET_FIELD(BRW_UNIFORM_GATHER_INDEX_START / 16,
>> + BRW_GATHER_BINDING_TABLE_BLOCK));
>> + OUT_BATCH(stage_state->push_const_offset);
>> + for (int i = 0; i < prog_data->nr_gather_table; i++) {
>> + /* Which bo are we referring to? The uniform constant buffer or
>> + * the UBO block?
>> + */
>> + bool is_uniform = prog_data->gather_table[i].reg == -1;
>> + int cb_offset = is_uniform ? i :
>> + (prog_data->gather_table[i].const_offset / 16);
>> + int bt_offset = is_uniform ? 0 :
>> + (prog_data->gather_table[i].const_block + BRW_UBO_GATHER_INDEX_APPEND);
>> +
>> + assert(cb_offset < 256);
>> + assert(bt_offset < 16);
>> +
>> + OUT_BATCH(SET_FIELD(cb_offset, BRW_GATHER_CONST_BUFFER_OFFSET) |
>> + SET_FIELD(prog_data->gather_table[i].channel_mask,
>> + BRW_GATHER_CHANNEL_MASK) | bt_offset);
>> + }
>> + ADVANCE_BATCH();
>
> This is called in response to the _NEW_PROGRAM_CONSTANTS state flag,
> e.g. when uniforms from the default block are updated using
> glUniform*(), but not necessarily if, say, a new buffer object is bound
> to a UBO binding point -- Or worse, if something (e.g. glBufferSubData)
> somehow modifies a buffer object while it's already bound to the
> pipeline as UBO. It doesn't look like this would notice that a buffer
> has been modified and resend appropriate _3DSTATE_GATHER_CONSTANT_*S
> commands so that the new buffer contents are re-gathered.
>
> I suspect that my "arb_shader_image_load_store/host-mem-barrier/Uniform
> buffer/RaW" piglit test would almost reproduce the issue (it binds the
> same buffer as UBO and shader image simultaneously, then writes to the
> buffer using imageStore(), calls glMemoryBarrier(GL_UNIFORM_BARRIER_BIT)
> so that subsequent uniform memory access is coherent with previous image
> writes, and then reads back the same UBO from the shader) -- If it
> weren't because it uses non-constant indexing of the uniform array
> (which you don't promote to gather constants), and because it changes
> the value of a uniform between the two draw calls so you might actually
> end up here by luck. ;)
>
I've made a non-exhaustive list of things that can potentially write to
buffer objects and you should definitely consider. You may have to
throw away the gather pool if a buffer currently bound as UBO is
modified using:
- Usual buffer upload commands (glBufferSubData and friends).
- Buffer object mapping (persistent and coherent mappings are likely
to require special care).
- Meta paths that render into a buffer object bound as PBO
(glReadPixels, glGetTexImage).
- Shader image stores and atomics.
- Shader storage buffer objects.
- Shader atomic counters.
- Transform feedback.
- Query objects.
The gl_buffer_object::UsageHistory field may be useful for some of these
to find out whether you need to invalidate and re-gather your constant
cache -- E.g. glReadPixels, glGetTexImage and glBufferSubData could just
check if the target BO has USAGE_UNIFORM_BUFFER set among its usage
history flags and in that case invalidate the gather constant cache.
That would be simple but most likely overly pessimistic because the
UsageHistory bitset won't tell you whether the buffer is still bound as
UBO -- If it's not you wouldn't necessarily need to throw away the
contents of the gather constant cache.
To handle images, SSBOs and persistent non-coherent maps you could
probably just invalidate the whole gather pool whenever glMemoryBarrier
is called with UNIFORM_BARRIER_BIT or CLIENT_MAPPED_BUFFER_BARRIER_BIT
set.
The rest may be more difficult, and in some cases (e.g. while a buffer
bound as UBO is persistently *and* coherently mapped) you may have to
re-gather all constants prior to every draw call -- I'd expect that to
be even more expensive than using plain pull constants though, so we
might be better off disabling UBO gather in such cases even if that
means doing state-dependent recompiles.
>> +}
>>
>> void
>> gen7_upload_constant_state(struct brw_context *brw,
>> const struct brw_stage_state *stage_state,
>> - bool active, unsigned opcode)
>> + bool active, unsigned opcode, unsigned gather_opcode)
>> {
>> uint32_t mocs = brw->gen < 8 ? GEN7_MOCS_L3 : 0;
>>
>> /* Disable if the shader stage is inactive or there are no push constants. */
>> active = active && stage_state->push_const_size != 0;
>>
>> + bool use_gather = (brw->gather_pool.bo != NULL);
>> +
>> + int const_loc = use_gather ? 16 : 0;
>> int dwords = brw->gen >= 8 ? 11 : 7;
>> +
>> + struct brw_stage_prog_data *prog_data = stage_state->prog_data;
>> + if (prog_data && use_gather && active) {
>> + gen7_submit_gather_table(brw, stage_state, prog_data, gather_opcode);
>> + }
>> +
>> BEGIN_BATCH(dwords);
>> OUT_BATCH(opcode << 16 | (dwords - 2));
>>
>> @@ -59,10 +110,9 @@ gen7_upload_constant_state(struct brw_context *brw,
>> OUT_BATCH(0);
>> OUT_BATCH(stage_state->push_const_size);
>> } else {
>> - OUT_BATCH(active ? stage_state->push_const_size : 0);
>> + OUT_BATCH(active ? stage_state->push_const_size << const_loc : 0);
>> OUT_BATCH(0);
>> }
>> -
>> /* Pointer to the constant buffer. Covered by the set of state flags
>> * from gen6_prepare_wm_contants
>> */
>> @@ -79,23 +129,39 @@ gen7_upload_constant_state(struct brw_context *brw,
>> OUT_BATCH(0);
>> OUT_BATCH(0);
>> } else if (brw->gen>= 8) {
>> - OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0);
>> - OUT_BATCH(0);
>> - OUT_BATCH(0);
>> - OUT_BATCH(0);
>> + if (use_gather) {
>> + OUT_BATCH(0);
>> + OUT_BATCH(0);
>> + OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0);
>> + OUT_BATCH(0);
>> + } else {
>> + OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0);
>> + OUT_BATCH(0);
>> + OUT_BATCH(0);
>> + OUT_BATCH(0);
>> + }
>> OUT_BATCH(0);
>> OUT_BATCH(0);
>> OUT_BATCH(0);
>> OUT_BATCH(0);
>> } else {
>> - OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0);
>> - OUT_BATCH(0);
>> + if (use_gather) {
>> + OUT_BATCH(0);
>> + OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0);
>> + } else {
>> + OUT_BATCH(active ? (stage_state->push_const_offset | mocs) : 0);
>> + OUT_BATCH(0);
>> + }
>> OUT_BATCH(0);
>> OUT_BATCH(0);
>> }
>>
>> ADVANCE_BATCH();
>>
>> + /* Re-enable gather again if required */
>> + if (gather_switched_off)
>> + gen7_toggle_gather_constants(brw, true);
>> +
>> /* On SKL+ the new constants don't take effect until the next corresponding
>> * 3DSTATE_BINDING_TABLE_POINTER_* command is parsed so we need to ensure
>> * that is sent
>> --
>> 1.9.1
>>
>> _______________________________________________
>> mesa-dev mailing list
>> mesa-dev at lists.freedesktop.org
>> http://lists.freedesktop.org/mailman/listinfo/mesa-dev
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 212 bytes
Desc: not available
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20151027/077eb5db/attachment-0001.sig>
More information about the mesa-dev
mailing list