[Mesa-dev] [PATCH 2/2] glsl: keep track of intra-stage indices for atomics
Francisco Jerez
currojerez at riseup.net
Mon Oct 26 07:16:58 PDT 2015
Timothy Arceri <t_arceri at yahoo.com.au> writes:
> This is more optimal as it means we no longer have to upload the same set
> of Atomic Buffer Object surfaces to all stages in the program.
>
> This also fixes a bug where since commit c0cd5b var->data.binding was
> being used as a replacement for atomic buffer index, but they don't have
> to be the same value they just happened to end up the same when binding is 0.
>
> Cc: Francisco Jerez <currojerez at riseup.net>
> Cc: Ilia Mirkin <imirkin at alum.mit.edu>
> Cc: Alejandro PiƱeiro <apinheiro at igalia.com>
> Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=90175
> ---
> src/glsl/ir_uniform.h | 8 ++++-
> src/glsl/link_atomics.cpp | 43 ++++++++++++++++++++++--
> src/glsl/nir/glsl_to_nir.cpp | 2 --
> src/glsl/nir/nir.h | 4 +--
> src/glsl/nir/nir_lower_atomics.c | 25 +++++++++++---
> src/mesa/drivers/dri/i965/brw_context.h | 2 +-
> src/mesa/drivers/dri/i965/brw_gs_surface_state.c | 4 +--
> src/mesa/drivers/dri/i965/brw_nir.c | 6 ++--
> src/mesa/drivers/dri/i965/brw_shader.cpp | 4 +--
> src/mesa/drivers/dri/i965/brw_vs_surface_state.c | 4 +--
> src/mesa/drivers/dri/i965/brw_wm_surface_state.c | 37 ++++++++++----------
> src/mesa/main/mtypes.h | 5 ++-
> 12 files changed, 103 insertions(+), 41 deletions(-)
>
> diff --git a/src/glsl/ir_uniform.h b/src/glsl/ir_uniform.h
> index 50fe76b..2265caf 100644
> --- a/src/glsl/ir_uniform.h
> +++ b/src/glsl/ir_uniform.h
> @@ -71,7 +71,9 @@ struct gl_uniform_driver_storage {
>
> struct gl_opaque_uniform_index {
> /**
> - * Base opaque uniform index
> + * == For types other than atomics ==
> + *
> + * This is the base opaque uniform index
> *
> * If \c gl_uniform_storage::base_type is an opaque type, this
> * represents its uniform index. If \c
> @@ -80,6 +82,10 @@ struct gl_opaque_uniform_index {
> * gl_uniform_storage::array_elements - 1, inclusive.
> *
> * Note that the index may be different in each shader stage.
> + *
> + * == For atomics ==
> + *
> + * This is the intra-stage stage index of gl_shader::AtomicBuffers[].
AFAICT this "index" field has the same semantics for atomics and
non-atomics, it's an intra-stage surface index regardless of whether
it's an atomic, sampler or image, the difference is just which array it
indexes into so I doubt it makes sense to split the comment like this.
With that and Samuel's comment taken into account this patch is:
Reviewed-by: Francisco Jerez <currojerez at riseup.net>
> */
> uint8_t index;
>
> diff --git a/src/glsl/link_atomics.cpp b/src/glsl/link_atomics.cpp
> index 100d03c..3073de0 100644
> --- a/src/glsl/link_atomics.cpp
> +++ b/src/glsl/link_atomics.cpp
> @@ -167,6 +167,7 @@ link_assign_atomic_counter_resources(struct gl_context *ctx,
> struct gl_shader_program *prog)
> {
> unsigned num_buffers;
> + unsigned num_atomic_buffers[MESA_SHADER_STAGES] = {};
> active_atomic_buffer *abs =
> find_active_atomic_counters(ctx, prog, &num_buffers);
>
> @@ -211,13 +212,49 @@ link_assign_atomic_counter_resources(struct gl_context *ctx,
> }
>
> /* Assign stage-specific fields. */
> - for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j)
> - mab.StageReferences[j] =
> - (ab.stage_references[j] ? GL_TRUE : GL_FALSE);
> + for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j) {
> + if (ab.stage_references[j]) {
> + mab.StageReferences[j] = GL_TRUE;
> + num_atomic_buffers[j]++;
> + } else {
> + mab.StageReferences[j] = GL_FALSE;
> + }
> + }
>
> i++;
> }
>
> + /* Store a list pointers to atomic buffers per stage and store the index
> + * to this intra-stage buffer list along with the uniform.
> + */
> + for (unsigned j = 0; j < MESA_SHADER_STAGES; ++j) {
> + if (prog->_LinkedShaders[j] && num_atomic_buffers[j] > 0) {
> + prog->_LinkedShaders[j]->NumAtomicBuffers = num_atomic_buffers[j];
> + prog->_LinkedShaders[j]->AtomicBuffers =
> + rzalloc_array(prog, gl_active_atomic_buffer *,
> + num_atomic_buffers[j]);
> +
> + unsigned intra_stage_idx = 0;
> + for (unsigned i = 0; i < num_buffers; i++) {
> + struct gl_active_atomic_buffer *atomic_buffer =
> + &prog->AtomicBuffers[i];
> + if (atomic_buffer->StageReferences[j]) {
> + prog->_LinkedShaders[j]->AtomicBuffers[intra_stage_idx] =
> + atomic_buffer;
> +
> + for (unsigned u = 0; u < atomic_buffer->NumUniforms; u++) {
> + prog->UniformStorage[atomic_buffer->Uniforms[u]].opaque[j].index =
> + intra_stage_idx;
> + prog->UniformStorage[atomic_buffer->Uniforms[u]].opaque[j].active =
> + true;
> + }
> +
> + intra_stage_idx++;
> + }
> + }
> + }
> + }
> +
> delete [] abs;
> assert(i == num_buffers);
> }
> diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp
> index 4dd6287..0e12a5b 100644
> --- a/src/glsl/nir/glsl_to_nir.cpp
> +++ b/src/glsl/nir/glsl_to_nir.cpp
> @@ -357,8 +357,6 @@ nir_visitor::visit(ir_variable *ir)
>
> var->data.index = ir->data.index;
> var->data.binding = ir->data.binding;
> - /* XXX Get rid of buffer_index */
> - var->data.atomic.buffer_index = ir->data.binding;
> var->data.atomic.offset = ir->data.atomic.offset;
> var->data.image.read_only = ir->data.image_read_only;
> var->data.image.write_only = ir->data.image_write_only;
> diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h
> index 268fbc2..aa3224d 100644
> --- a/src/glsl/nir/nir.h
> +++ b/src/glsl/nir/nir.h
> @@ -308,7 +308,6 @@ typedef struct {
> * Location an atomic counter is stored at.
> */
> struct {
> - unsigned buffer_index;
> unsigned offset;
> } atomic;
>
> @@ -1920,7 +1919,8 @@ void nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables);
>
> void nir_lower_two_sided_color(nir_shader *shader);
>
> -void nir_lower_atomics(nir_shader *shader);
> +void nir_lower_atomics(nir_shader *shader,
> + const struct gl_shader_program *shader_program);
> void nir_lower_to_source_mods(nir_shader *shader);
>
> bool nir_lower_gs_intrinsics(nir_shader *shader);
> diff --git a/src/glsl/nir/nir_lower_atomics.c b/src/glsl/nir/nir_lower_atomics.c
> index 6f9ecc0..ea237b5 100644
> --- a/src/glsl/nir/nir_lower_atomics.c
> +++ b/src/glsl/nir/nir_lower_atomics.c
> @@ -25,17 +25,24 @@
> *
> */
>
> +#include "ir_uniform.h"
> #include "nir.h"
> #include "main/config.h"
> #include <assert.h>
>
> +typedef struct {
> + const struct gl_shader_program *shader_program;
> + nir_shader *shader;
> +} lower_atomic_state;
> +
> /*
> * replace atomic counter intrinsics that use a variable with intrinsics
> * that directly store the buffer index and byte offset
> */
>
> static void
> -lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl)
> +lower_instr(nir_intrinsic_instr *instr,
> + lower_atomic_state *state)
> {
> nir_intrinsic_op op;
> switch (instr->intrinsic) {
> @@ -60,10 +67,11 @@ lower_instr(nir_intrinsic_instr *instr, nir_function_impl *impl)
> return; /* atomics passed as function arguments can't be lowered */
>
> void *mem_ctx = ralloc_parent(instr);
> + unsigned uniform_loc = instr->variables[0]->var->data.location;
>
> nir_intrinsic_instr *new_instr = nir_intrinsic_instr_create(mem_ctx, op);
> new_instr->const_index[0] =
> - (int) instr->variables[0]->var->data.atomic.buffer_index;
> + state->shader_program->UniformStorage[uniform_loc].opaque[state->shader->stage].index;
>
> nir_load_const_instr *offset_const = nir_load_const_instr_create(mem_ctx, 1);
> offset_const->value.u[0] = instr->variables[0]->var->data.atomic.offset;
> @@ -130,18 +138,25 @@ lower_block(nir_block *block, void *state)
> {
> nir_foreach_instr_safe(block, instr) {
> if (instr->type == nir_instr_type_intrinsic)
> - lower_instr(nir_instr_as_intrinsic(instr), state);
> + lower_instr(nir_instr_as_intrinsic(instr),
> + (lower_atomic_state *) state);
> }
>
> return true;
> }
>
> void
> -nir_lower_atomics(nir_shader *shader)
> +nir_lower_atomics(nir_shader *shader,
> + const struct gl_shader_program *shader_program)
> {
> + lower_atomic_state state = {
> + .shader = shader,
> + .shader_program = shader_program,
> + };
> +
> nir_foreach_overload(shader, overload) {
> if (overload->impl) {
> - nir_foreach_block(overload->impl, lower_block, overload->impl);
> + nir_foreach_block(overload->impl, lower_block, (void *) &state);
> nir_metadata_preserve(overload->impl, nir_metadata_block_index |
> nir_metadata_dominance);
> }
> diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
> index 19a5117..404c9f5 100644
> --- a/src/mesa/drivers/dri/i965/brw_context.h
> +++ b/src/mesa/drivers/dri/i965/brw_context.h
> @@ -1797,7 +1797,7 @@ void brw_upload_ubo_surfaces(struct brw_context *brw,
> struct brw_stage_prog_data *prog_data,
> bool dword_pitch);
> void brw_upload_abo_surfaces(struct brw_context *brw,
> - struct gl_shader_program *prog,
> + struct gl_shader *shader,
> struct brw_stage_state *stage_state,
> struct brw_stage_prog_data *prog_data);
> void brw_upload_image_surfaces(struct brw_context *brw,
> diff --git a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
> index 0bb3074..b9d40cb 100644
> --- a/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_gs_surface_state.c
> @@ -105,8 +105,8 @@ brw_upload_gs_abo_surfaces(struct brw_context *brw)
>
> if (prog) {
> /* BRW_NEW_GS_PROG_DATA */
> - brw_upload_abo_surfaces(brw, prog, &brw->gs.base,
> - &brw->gs.prog_data->base.base);
> + brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_GEOMETRY],
> + &brw->gs.base, &brw->gs.prog_data->base.base);
> }
> }
>
> diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c
> index 12f47ad..060e497 100644
> --- a/src/mesa/drivers/dri/i965/brw_nir.c
> +++ b/src/mesa/drivers/dri/i965/brw_nir.c
> @@ -158,8 +158,10 @@ brw_create_nir(struct brw_context *brw,
> nir_lower_system_values(nir);
> nir_validate_shader(nir);
>
> - nir_lower_atomics(nir);
> - nir_validate_shader(nir);
> + if (shader_prog) {
> + nir_lower_atomics(nir, shader_prog);
> + nir_validate_shader(nir);
> + }
>
> nir_optimize(nir, is_scalar);
>
> diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
> index 3960e86..baa219c 100644
> --- a/src/mesa/drivers/dri/i965/brw_shader.cpp
> +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
> @@ -1389,9 +1389,9 @@ brw_assign_common_binding_table_offsets(gl_shader_stage stage,
> stage_prog_data->binding_table.gather_texture_start = 0xd0d0d0d0;
> }
>
> - if (shader_prog && shader_prog->NumAtomicBuffers) {
> + if (shader && shader->NumAtomicBuffers) {
> stage_prog_data->binding_table.abo_start = next_binding_table_offset;
> - next_binding_table_offset += shader_prog->NumAtomicBuffers;
> + next_binding_table_offset += shader->NumAtomicBuffers;
> } else {
> stage_prog_data->binding_table.abo_start = 0xd0d0d0d0;
> }
> diff --git a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
> index 9bb48eb..79547a6 100644
> --- a/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_vs_surface_state.c
> @@ -177,8 +177,8 @@ brw_upload_vs_abo_surfaces(struct brw_context *brw)
>
> if (prog) {
> /* BRW_NEW_VS_PROG_DATA */
> - brw_upload_abo_surfaces(brw, prog, &brw->vs.base,
> - &brw->vs.prog_data->base.base);
> + brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_VERTEX],
> + &brw->vs.base, &brw->vs.prog_data->base.base);
> }
> }
>
> diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> index c671e23..89d67b0 100644
> --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c
> @@ -1029,7 +1029,7 @@ const struct brw_tracked_state brw_cs_ubo_surfaces = {
>
> void
> brw_upload_abo_surfaces(struct brw_context *brw,
> - struct gl_shader_program *prog,
> + struct gl_shader *shader,
> struct brw_stage_state *stage_state,
> struct brw_stage_prog_data *prog_data)
> {
> @@ -1037,21 +1037,22 @@ brw_upload_abo_surfaces(struct brw_context *brw,
> uint32_t *surf_offsets =
> &stage_state->surf_offset[prog_data->binding_table.abo_start];
>
> - for (unsigned i = 0; i < prog->NumAtomicBuffers; i++) {
> - struct gl_atomic_buffer_binding *binding =
> - &ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding];
> - struct intel_buffer_object *intel_bo =
> - intel_buffer_object(binding->BufferObject);
> - drm_intel_bo *bo = intel_bufferobj_buffer(
> - brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
> -
> - brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
> - binding->Offset, BRW_SURFACEFORMAT_RAW,
> - bo->size - binding->Offset, 1, true);
> - }
> + if (shader && shader->NumAtomicBuffers) {
> + for (unsigned i = 0; i < shader->NumAtomicBuffers; i++) {
> + struct gl_atomic_buffer_binding *binding =
> + &ctx->AtomicBufferBindings[(*shader->AtomicBuffers[i]).Binding];
> + struct intel_buffer_object *intel_bo =
> + intel_buffer_object(binding->BufferObject);
> + drm_intel_bo *bo = intel_bufferobj_buffer(
> + brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
> +
> + brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
> + binding->Offset, BRW_SURFACEFORMAT_RAW,
> + bo->size - binding->Offset, 1, true);
> + }
>
> - if (prog->NumAtomicBuffers)
> brw->ctx.NewDriverState |= BRW_NEW_SURFACES;
> + }
> }
>
> static void
> @@ -1063,8 +1064,8 @@ brw_upload_wm_abo_surfaces(struct brw_context *brw)
>
> if (prog) {
> /* BRW_NEW_FS_PROG_DATA */
> - brw_upload_abo_surfaces(brw, prog, &brw->wm.base,
> - &brw->wm.prog_data->base);
> + brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
> + &brw->wm.base, &brw->wm.prog_data->base);
> }
> }
>
> @@ -1088,8 +1089,8 @@ brw_upload_cs_abo_surfaces(struct brw_context *brw)
>
> if (prog) {
> /* BRW_NEW_CS_PROG_DATA */
> - brw_upload_abo_surfaces(brw, prog, &brw->cs.base,
> - &brw->cs.prog_data->base);
> + brw_upload_abo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
> + &brw->cs.base, &brw->cs.prog_data->base);
> }
> }
>
> diff --git a/src/mesa/main/mtypes.h b/src/mesa/main/mtypes.h
> index 288d757..6d0cf75 100644
> --- a/src/mesa/main/mtypes.h
> +++ b/src/mesa/main/mtypes.h
> @@ -2392,6 +2392,9 @@ struct gl_shader
> */
> GLuint NumImages;
>
> + struct gl_active_atomic_buffer **AtomicBuffers;
> + unsigned NumAtomicBuffers;
> +
> /**
> * Whether early fragment tests are enabled as defined by
> * ARB_shader_image_load_store.
> @@ -4484,7 +4487,7 @@ static inline bool
> _mesa_active_fragment_shader_has_atomic_ops(const struct gl_context *ctx)
> {
> return ctx->Shader._CurrentFragmentProgram != NULL &&
> - ctx->Shader._CurrentFragmentProgram->NumAtomicBuffers > 0;
> + ctx->Shader._CurrentFragmentProgram->_LinkedShaders[MESA_SHADER_FRAGMENT]->NumAtomicBuffers > 0;
> }
>
> #ifdef __cplusplus
> --
> 2.4.3
-------------- next part --------------
A non-text attachment was scrubbed...
Name: signature.asc
Type: application/pgp-signature
Size: 212 bytes
Desc: not available
URL: <http://lists.freedesktop.org/archives/mesa-dev/attachments/20151026/1e972260/attachment.sig>
More information about the mesa-dev
mailing list