[Mesa-dev] [PATCH v3 29/48] intel/cs: Rework the way thread local ID is handled
Iago Toral
itoral at igalia.com
Mon Oct 30 07:33:52 UTC 2017
On Fri, 2017-10-27 at 12:37 -0700, Jason Ekstrand wrote:
> On Fri, Oct 27, 2017 at 2:11 AM, Iago Toral <itoral at igalia.com>
> wrote:
> > On Wed, 2017-10-25 at 16:26 -0700, Jason Ekstrand wrote:
> >
> > > Previously, brw_nir_lower_intrinsics added the param and then
> > emitted
> >
> > > a
> >
> > > load_uniform intrinsic to load it directly. This commit switches
> >
> > > things
> >
> > > over to use a specific NIR intrinsic for the thread id. The one
> >
> > > thing I
> >
> > > don't like about this approach is that we have to copy
> >
> > > thread_local_id
> >
> > > over to the new visitor in import_uniforms.
> >
> >
> >
> > It is not clear to me why you are doing this... why do you like
> > this
> >
> > better?
> >
>
> For compute shaders, the SPIR-V subgroups stuff has a gl_subgroupId
> system value which subgroup in the dispatch you are. That
> information is basically the same as the thread_local_id only off by
> a factor of the SIMD size. It's fairly arbitrary, but I figured we
> might as well switch over to pushing the value that's defined in
> SPIR-V.
Oh, my question was not about pushing the subgroup id instead of the
thread local id (that is actually done in a later patch, not here) it
is about using a system value and changing the place where we push that
last uniform, which is what you change here. The implementation seems
exactly equivalent to what we had prior to this patch, so I was
wondering if there is any practical advantage in doing it like this.
Iago
> > > ---
> >
> > > src/compiler/nir/nir_intrinsics.h | 3 ++
> >
> > > src/intel/compiler/brw_fs.cpp | 4 +-
> >
> > > src/intel/compiler/brw_fs.h | 1 +
> >
> > > src/intel/compiler/brw_fs_nir.cpp | 14 +++++++
> >
> > > src/intel/compiler/brw_nir.h | 3 +-
> >
> > > src/intel/compiler/brw_nir_lower_cs_intrinsics.c | 53 +++++-----
> > ----
> >
> > > ----------
> >
> > > 6 files changed, 32 insertions(+), 46 deletions(-)
> >
> > >
> >
> > > diff --git a/src/compiler/nir/nir_intrinsics.h
> >
> > > b/src/compiler/nir/nir_intrinsics.h
> >
> > > index cefd18b..47022dd 100644
> >
> > > --- a/src/compiler/nir/nir_intrinsics.h
> >
> > > +++ b/src/compiler/nir/nir_intrinsics.h
> >
> > > @@ -364,6 +364,9 @@ SYSTEM_VALUE(blend_const_color_a_float, 1, 0,
> > xx,
> >
> > > xx, xx)
> >
> > > SYSTEM_VALUE(blend_const_color_rgba8888_unorm, 1, 0, xx, xx, xx)
> >
> > > SYSTEM_VALUE(blend_const_color_aaaa8888_unorm, 1, 0, xx, xx, xx)
> >
> > >
> >
> > > +/* Intel specific system values */
> >
> > > +SYSTEM_VALUE(intel_thread_local_id, 1, 0, xx, xx, xx)
> >
> > > +
> >
> > > /**
> >
> > > * Barycentric coordinate intrinsics.
> >
> > > *
> >
> > > diff --git a/src/intel/compiler/brw_fs.cpp
> >
> > > b/src/intel/compiler/brw_fs.cpp
> >
> > > index 2acd838..c0d4c05 100644
> >
> > > --- a/src/intel/compiler/brw_fs.cpp
> >
> > > +++ b/src/intel/compiler/brw_fs.cpp
> >
> > > @@ -996,6 +996,7 @@ fs_visitor::import_uniforms(fs_visitor *v)
> >
> > > this->push_constant_loc = v->push_constant_loc;
> >
> > > this->pull_constant_loc = v->pull_constant_loc;
> >
> > > this->uniforms = v->uniforms;
> >
> > > + this->thread_local_id = v->thread_local_id;
> >
> > > }
> >
> > >
> >
> > > void
> >
> > > @@ -6781,8 +6782,7 @@ brw_compile_cs(const struct brw_compiler
> >
> > > *compiler, void *log_data,
> >
> > > {
> >
> > > nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
> >
> > > shader = brw_nir_apply_sampler_key(shader, compiler, &key-
> > >tex,
> >
> > > true);
> >
> > > -
> >
> > > - brw_nir_lower_cs_intrinsics(shader, prog_data);
> >
> > > + brw_nir_lower_cs_intrinsics(shader);
> >
> > > shader = brw_postprocess_nir(shader, compiler, true);
> >
> > >
> >
> > > prog_data->local_size[0] = shader->info.cs.local_size[0];
> >
> > > diff --git a/src/intel/compiler/brw_fs.h
> >
> > > b/src/intel/compiler/brw_fs.h
> >
> > > index da32593..f51a4d8 100644
> >
> > > --- a/src/intel/compiler/brw_fs.h
> >
> > > +++ b/src/intel/compiler/brw_fs.h
> >
> > > @@ -315,6 +315,7 @@ public:
> >
> > > */
> >
> > > int *push_constant_loc;
> >
> > >
> >
> > > + fs_reg thread_local_id;
> >
> > > fs_reg frag_depth;
> >
> > > fs_reg frag_stencil;
> >
> > > fs_reg sample_mask;
> >
> > > diff --git a/src/intel/compiler/brw_fs_nir.cpp
> >
> > > b/src/intel/compiler/brw_fs_nir.cpp
> >
> > > index 05efee3..fdc6fc6 100644
> >
> > > --- a/src/intel/compiler/brw_fs_nir.cpp
> >
> > > +++ b/src/intel/compiler/brw_fs_nir.cpp
> >
> > > @@ -88,6 +88,16 @@ fs_visitor::nir_setup_uniforms()
> >
> > > }
> >
> > >
> >
> > > uniforms = nir->num_uniforms / 4;
> >
> > > +
> >
> > > + if (stage == MESA_SHADER_COMPUTE) {
> >
> > > + /* Add a uniform for the thread local id. It must be the
> > last
> >
> > > uniform
> >
> > > + * on the list.
> >
> > > + */
> >
> > > + assert(uniforms == prog_data->nr_params);
> >
> > > + uint32_t *param =
> > brw_stage_prog_data_add_params(prog_data,
> >
> > > 1);
> >
> > > + *param = BRW_PARAM_BUILTIN_THREAD_LOCAL_ID;
> >
> > > + thread_local_id = fs_reg(UNIFORM, uniforms++,
> >
> > > BRW_REGISTER_TYPE_UD);
> >
> > > + }
> >
> > > }
> >
> > >
> >
> > > static bool
> >
> > > @@ -3409,6 +3419,10 @@ fs_visitor::nir_emit_cs_intrinsic(const
> >
> > > fs_builder &bld,
> >
> > > cs_prog_data->uses_barrier = true;
> >
> > > break;
> >
> > >
> >
> > > + case nir_intrinsic_load_intel_thread_local_id:
> >
> > > + bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD),
> > thread_local_id);
> >
> > > + break;
> >
> > > +
> >
> > > case nir_intrinsic_load_local_invocation_id:
> >
> > > case nir_intrinsic_load_work_group_id: {
> >
> > > gl_system_value sv =
> > nir_system_value_from_intrinsic(instr-
> >
> > > >intrinsic);
> >
> > > diff --git a/src/intel/compiler/brw_nir.h
> >
> > > b/src/intel/compiler/brw_nir.h
> >
> > > index 1493b74..3e40712 100644
> >
> > > --- a/src/intel/compiler/brw_nir.h
> >
> > > +++ b/src/intel/compiler/brw_nir.h
> >
> > > @@ -95,8 +95,7 @@ void
> > brw_nir_analyze_boolean_resolves(nir_shader
> >
> > > *nir);
> >
> > > nir_shader *brw_preprocess_nir(const struct brw_compiler
> > *compiler,
> >
> > > nir_shader *nir);
> >
> > >
> >
> > > -bool brw_nir_lower_cs_intrinsics(nir_shader *nir,
> >
> > > - struct brw_cs_prog_data
> >
> > > *prog_data);
> >
> > > +bool brw_nir_lower_cs_intrinsics(nir_shader *nir);
> >
> > > void brw_nir_lower_vs_inputs(nir_shader *nir,
> >
> > > bool use_legacy_snorm_formula,
> >
> > > const uint8_t *vs_attrib_wa_flags);
> >
> > > diff --git a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c
> >
> > > b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c
> >
> > > index d277276..07d2dcc 100644
> >
> > > --- a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c
> >
> > > +++ b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c
> >
> > > @@ -26,47 +26,12 @@
> >
> > >
> >
> > > struct lower_intrinsics_state {
> >
> > > nir_shader *nir;
> >
> > > - struct brw_cs_prog_data *prog_data;
> >
> > > nir_function_impl *impl;
> >
> > > bool progress;
> >
> > > nir_builder builder;
> >
> > > - int thread_local_id_index;
> >
> > > + unsigned local_workgroup_size;
> >
> > > };
> >
> > >
> >
> > > -static nir_ssa_def *
> >
> > > -read_thread_local_id(struct lower_intrinsics_state *state)
> >
> > > -{
> >
> > > - struct brw_cs_prog_data *prog_data = state->prog_data;
> >
> > > - nir_builder *b = &state->builder;
> >
> > > - nir_shader *nir = state->nir;
> >
> > > - const unsigned *sizes = nir->info.cs.local_size;
> >
> > > - const unsigned group_size = sizes[0] * sizes[1] * sizes[2];
> >
> > > -
> >
> > > - /* Some programs have local_size dimensions so small that the
> >
> > > thread local
> >
> > > - * ID will always be 0.
> >
> > > - */
> >
> > > - if (group_size <= 8)
> >
> > > - return nir_imm_int(b, 0);
> >
> > > -
> >
> > > - if (state->thread_local_id_index == -1) {
> >
> > > - state->thread_local_id_index = prog_data->base.nr_params;
> >
> > > - uint32_t *param =
> > brw_stage_prog_data_add_params(&prog_data-
> >
> > > >base, 1);
> >
> > > - *param = BRW_PARAM_BUILTIN_THREAD_LOCAL_ID;
> >
> > > - nir->num_uniforms += 4;
> >
> > > - }
> >
> > > - unsigned id_index = state->thread_local_id_index;
> >
> > > -
> >
> > > - nir_intrinsic_instr *load =
> >
> > > - nir_intrinsic_instr_create(nir,
> > nir_intrinsic_load_uniform);
> >
> > > - load->num_components = 1;
> >
> > > - load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
> >
> > > - nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
> >
> > > - nir_intrinsic_set_base(load, id_index * sizeof(uint32_t));
> >
> > > - nir_intrinsic_set_range(load, sizeof(uint32_t));
> >
> > > - nir_builder_instr_insert(b, &load->instr);
> >
> > > - return &load->dest.ssa;
> >
> > > -}
> >
> > > -
> >
> > > static bool
> >
> > > lower_cs_intrinsics_convert_block(struct lower_intrinsics_state
> >
> > > *state,
> >
> > > nir_block *block)
> >
> > > @@ -91,7 +56,12 @@ lower_cs_intrinsics_convert_block(struct
> >
> > > lower_intrinsics_state *state,
> >
> > > * gl_LocalInvocationIndex =
> >
> > > * cs_thread_local_id + subgroup_invocation;
> >
> > > */
> >
> > > - nir_ssa_def *thread_local_id =
> > read_thread_local_id(state);
> >
> > > + nir_ssa_def *thread_local_id;
> >
> > > + if (state->local_workgroup_size <= 8)
> >
> > > + thread_local_id = nir_imm_int(b, 0);
> >
> > > + else
> >
> > > + thread_local_id = nir_load_intel_thread_local_id(b);
> >
> > > +
> >
> > > nir_ssa_def *channel = nir_load_subgroup_invocation(b);
> >
> > > sysval = nir_iadd(b, channel, thread_local_id);
> >
> > > break;
> >
> > > @@ -157,8 +127,7 @@ lower_cs_intrinsics_convert_impl(struct
> >
> > > lower_intrinsics_state *state)
> >
> > > }
> >
> > >
> >
> > > bool
> >
> > > -brw_nir_lower_cs_intrinsics(nir_shader *nir,
> >
> > > - struct brw_cs_prog_data *prog_data)
> >
> > > +brw_nir_lower_cs_intrinsics(nir_shader *nir)
> >
> > > {
> >
> > > assert(nir->info.stage == MESA_SHADER_COMPUTE);
> >
> > >
> >
> > > @@ -166,9 +135,9 @@ brw_nir_lower_cs_intrinsics(nir_shader *nir,
> >
> > > struct lower_intrinsics_state state;
> >
> > > memset(&state, 0, sizeof(state));
> >
> > > state.nir = nir;
> >
> > > - state.prog_data = prog_data;
> >
> > > -
> >
> > > - state.thread_local_id_index = -1;
> >
> > > + state.local_workgroup_size = nir->info.cs.local_size[0] *
> >
> > > + nir->info.cs.local_size[1] *
> >
> > > + nir->info.cs.local_size[2];
> >
> > >
> >
> > > do {
> >
> > > state.progress = false;
> >
> >
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20171030/61fb5919/attachment-0001.html>
More information about the mesa-dev
mailing list