[Mesa-dev] [PATCH v3 29/48] intel/cs: Rework the way thread local ID is handled
Jason Ekstrand
jason at jlekstrand.net
Mon Oct 30 18:34:08 UTC 2017
On Mon, Oct 30, 2017 at 12:33 AM, Iago Toral <itoral at igalia.com> wrote:
> On Fri, 2017-10-27 at 12:37 -0700, Jason Ekstrand wrote:
>
> On Fri, Oct 27, 2017 at 2:11 AM, Iago Toral <itoral at igalia.com> wrote:
>
> On Wed, 2017-10-25 at 16:26 -0700, Jason Ekstrand wrote:
> > Previously, brw_nir_lower_intrinsics added the param and then emitted
> > a
> > load_uniform intrinsic to load it directly. This commit switches
> > things
> > over to use a specific NIR intrinsic for the thread id. The one
> > thing I
> > don't like about this approach is that we have to copy
> > thread_local_id
> > over to the new visitor in import_uniforms.
>
> It is not clear to me why you are doing this... why do you like this
> better?
>
>
> For compute shaders, the SPIR-V subgroups stuff has a gl_subgroupId system
> value which subgroup in the dispatch you are. That information is
> basically the same as the thread_local_id only off by a factor of the SIMD
> size. It's fairly arbitrary, but I figured we might as well switch over to
> pushing the value that's defined in SPIR-V.
>
>
> Oh, my question was not about pushing the subgroup id instead of the
> thread local id (that is actually done in a later patch, not here) it is
> about using a system value and changing the place where we push that last
> uniform, which is what you change here. The implementation seems exactly
> equivalent to what we had prior to this patch, so I was wondering if there
> is any practical advantage in doing it like this.
>
Not really. It just seemed like, if we have a nir_load_* system value
intrinsic, we may as well treat it as a system value like everything else.
Assuming it doesn't cause too much pain, I think I'd be ok with dropping
this if you really want.
> Iago
>
> > ---
> > src/compiler/nir/nir_intrinsics.h | 3 ++
> > src/intel/compiler/brw_fs.cpp | 4 +-
> > src/intel/compiler/brw_fs.h | 1 +
> > src/intel/compiler/brw_fs_nir.cpp | 14 +++++++
> > src/intel/compiler/brw_nir.h | 3 +-
> > src/intel/compiler/brw_nir_lower_cs_intrinsics.c | 53 +++++---------
> > ----------
> > 6 files changed, 32 insertions(+), 46 deletions(-)
> >
> > diff --git a/src/compiler/nir/nir_intrinsics.h
> > b/src/compiler/nir/nir_intrinsics.h
> > index cefd18b..47022dd 100644
> > --- a/src/compiler/nir/nir_intrinsics.h
> > +++ b/src/compiler/nir/nir_intrinsics.h
> > @@ -364,6 +364,9 @@ SYSTEM_VALUE(blend_const_color_a_float, 1, 0, xx,
> > xx, xx)
> > SYSTEM_VALUE(blend_const_color_rgba8888_unorm, 1, 0, xx, xx, xx)
> > SYSTEM_VALUE(blend_const_color_aaaa8888_unorm, 1, 0, xx, xx, xx)
> >
> > +/* Intel specific system values */
> > +SYSTEM_VALUE(intel_thread_local_id, 1, 0, xx, xx, xx)
> > +
> > /**
> > * Barycentric coordinate intrinsics.
> > *
> > diff --git a/src/intel/compiler/brw_fs.cpp
> > b/src/intel/compiler/brw_fs.cpp
> > index 2acd838..c0d4c05 100644
> > --- a/src/intel/compiler/brw_fs.cpp
> > +++ b/src/intel/compiler/brw_fs.cpp
> > @@ -996,6 +996,7 @@ fs_visitor::import_uniforms(fs_visitor *v)
> > this->push_constant_loc = v->push_constant_loc;
> > this->pull_constant_loc = v->pull_constant_loc;
> > this->uniforms = v->uniforms;
> > + this->thread_local_id = v->thread_local_id;
> > }
> >
> > void
> > @@ -6781,8 +6782,7 @@ brw_compile_cs(const struct brw_compiler
> > *compiler, void *log_data,
> > {
> > nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
> > shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex,
> > true);
> > -
> > - brw_nir_lower_cs_intrinsics(shader, prog_data);
> > + brw_nir_lower_cs_intrinsics(shader);
> > shader = brw_postprocess_nir(shader, compiler, true);
> >
> > prog_data->local_size[0] = shader->info.cs.local_size[0];
> > diff --git a/src/intel/compiler/brw_fs.h
> > b/src/intel/compiler/brw_fs.h
> > index da32593..f51a4d8 100644
> > --- a/src/intel/compiler/brw_fs.h
> > +++ b/src/intel/compiler/brw_fs.h
> > @@ -315,6 +315,7 @@ public:
> > */
> > int *push_constant_loc;
> >
> > + fs_reg thread_local_id;
> > fs_reg frag_depth;
> > fs_reg frag_stencil;
> > fs_reg sample_mask;
> > diff --git a/src/intel/compiler/brw_fs_nir.cpp
> > b/src/intel/compiler/brw_fs_nir.cpp
> > index 05efee3..fdc6fc6 100644
> > --- a/src/intel/compiler/brw_fs_nir.cpp
> > +++ b/src/intel/compiler/brw_fs_nir.cpp
> > @@ -88,6 +88,16 @@ fs_visitor::nir_setup_uniforms()
> > }
> >
> > uniforms = nir->num_uniforms / 4;
> > +
> > + if (stage == MESA_SHADER_COMPUTE) {
> > + /* Add a uniform for the thread local id. It must be the last
> > uniform
> > + * on the list.
> > + */
> > + assert(uniforms == prog_data->nr_params);
> > + uint32_t *param = brw_stage_prog_data_add_params(prog_data,
> > 1);
> > + *param = BRW_PARAM_BUILTIN_THREAD_LOCAL_ID;
> > + thread_local_id = fs_reg(UNIFORM, uniforms++,
> > BRW_REGISTER_TYPE_UD);
> > + }
> > }
> >
> > static bool
> > @@ -3409,6 +3419,10 @@ fs_visitor::nir_emit_cs_intrinsic(const
> > fs_builder &bld,
> > cs_prog_data->uses_barrier = true;
> > break;
> >
> > + case nir_intrinsic_load_intel_thread_local_id:
> > + bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD), thread_local_id);
> > + break;
> > +
> > case nir_intrinsic_load_local_invocation_id:
> > case nir_intrinsic_load_work_group_id: {
> > gl_system_value sv = nir_system_value_from_intrinsic(instr-
> > >intrinsic);
> > diff --git a/src/intel/compiler/brw_nir.h
> > b/src/intel/compiler/brw_nir.h
> > index 1493b74..3e40712 100644
> > --- a/src/intel/compiler/brw_nir.h
> > +++ b/src/intel/compiler/brw_nir.h
> > @@ -95,8 +95,7 @@ void brw_nir_analyze_boolean_resolves(nir_shader
> > *nir);
> > nir_shader *brw_preprocess_nir(const struct brw_compiler *compiler,
> > nir_shader *nir);
> >
> > -bool brw_nir_lower_cs_intrinsics(nir_shader *nir,
> > - struct brw_cs_prog_data
> > *prog_data);
> > +bool brw_nir_lower_cs_intrinsics(nir_shader *nir);
> > void brw_nir_lower_vs_inputs(nir_shader *nir,
> > bool use_legacy_snorm_formula,
> > const uint8_t *vs_attrib_wa_flags);
> > diff --git a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c
> > b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c
> > index d277276..07d2dcc 100644
> > --- a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c
> > +++ b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c
> > @@ -26,47 +26,12 @@
> >
> > struct lower_intrinsics_state {
> > nir_shader *nir;
> > - struct brw_cs_prog_data *prog_data;
> > nir_function_impl *impl;
> > bool progress;
> > nir_builder builder;
> > - int thread_local_id_index;
> > + unsigned local_workgroup_size;
> > };
> >
> > -static nir_ssa_def *
> > -read_thread_local_id(struct lower_intrinsics_state *state)
> > -{
> > - struct brw_cs_prog_data *prog_data = state->prog_data;
> > - nir_builder *b = &state->builder;
> > - nir_shader *nir = state->nir;
> > - const unsigned *sizes = nir->info.cs.local_size;
> > - const unsigned group_size = sizes[0] * sizes[1] * sizes[2];
> > -
> > - /* Some programs have local_size dimensions so small that the
> > thread local
> > - * ID will always be 0.
> > - */
> > - if (group_size <= 8)
> > - return nir_imm_int(b, 0);
> > -
> > - if (state->thread_local_id_index == -1) {
> > - state->thread_local_id_index = prog_data->base.nr_params;
> > - uint32_t *param = brw_stage_prog_data_add_params(&prog_data-
> > >base, 1);
> > - *param = BRW_PARAM_BUILTIN_THREAD_LOCAL_ID;
> > - nir->num_uniforms += 4;
> > - }
> > - unsigned id_index = state->thread_local_id_index;
> > -
> > - nir_intrinsic_instr *load =
> > - nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform);
> > - load->num_components = 1;
> > - load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
> > - nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
> > - nir_intrinsic_set_base(load, id_index * sizeof(uint32_t));
> > - nir_intrinsic_set_range(load, sizeof(uint32_t));
> > - nir_builder_instr_insert(b, &load->instr);
> > - return &load->dest.ssa;
> > -}
> > -
> > static bool
> > lower_cs_intrinsics_convert_block(struct lower_intrinsics_state
> > *state,
> > nir_block *block)
> > @@ -91,7 +56,12 @@ lower_cs_intrinsics_convert_block(struct
> > lower_intrinsics_state *state,
> > * gl_LocalInvocationIndex =
> > * cs_thread_local_id + subgroup_invocation;
> > */
> > - nir_ssa_def *thread_local_id = read_thread_local_id(state);
> > + nir_ssa_def *thread_local_id;
> > + if (state->local_workgroup_size <= 8)
> > + thread_local_id = nir_imm_int(b, 0);
> > + else
> > + thread_local_id = nir_load_intel_thread_local_id(b);
> > +
> > nir_ssa_def *channel = nir_load_subgroup_invocation(b);
> > sysval = nir_iadd(b, channel, thread_local_id);
> > break;
> > @@ -157,8 +127,7 @@ lower_cs_intrinsics_convert_impl(struct
> > lower_intrinsics_state *state)
> > }
> >
> > bool
> > -brw_nir_lower_cs_intrinsics(nir_shader *nir,
> > - struct brw_cs_prog_data *prog_data)
> > +brw_nir_lower_cs_intrinsics(nir_shader *nir)
> > {
> > assert(nir->info.stage == MESA_SHADER_COMPUTE);
> >
> > @@ -166,9 +135,9 @@ brw_nir_lower_cs_intrinsics(nir_shader *nir,
> > struct lower_intrinsics_state state;
> > memset(&state, 0, sizeof(state));
> > state.nir = nir;
> > - state.prog_data = prog_data;
> > -
> > - state.thread_local_id_index = -1;
> > + state.local_workgroup_size = nir->info.cs.local_size[0] *
> > + nir->info.cs.local_size[1] *
> > + nir->info.cs.local_size[2];
> >
> > do {
> > state.progress = false;
>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20171030/f7f20c46/attachment-0001.html>
More information about the mesa-dev
mailing list