[Mesa-dev] [PATCH v3 29/48] intel/cs: Rework the way thread local ID is handled

Jason Ekstrand jason at jlekstrand.net
Fri Oct 27 19:37:34 UTC 2017


On Fri, Oct 27, 2017 at 2:11 AM, Iago Toral <itoral at igalia.com> wrote:

> On Wed, 2017-10-25 at 16:26 -0700, Jason Ekstrand wrote:
> > Previously, brw_nir_lower_intrinsics added the param and then emitted
> > a
> > load_uniform intrinsic to load it directly.  This commit switches
> > things
> > over to use a specific NIR intrinsic for the thread id.  The one
> > thing I
> > don't like about this approach is that we have to copy
> > thread_local_id
> > over to the new visitor in import_uniforms.
>
> It is not clear to me why you are doing this... why do you like this
> better?
>

For compute shaders, the SPIR-V subgroups stuff has a gl_subgroupId system
value which subgroup in the dispatch you are.  That information is
basically the same as the thread_local_id only off by a factor of the SIMD
size.  It's fairly arbitrary, but I figured we might as well switch over to
pushing the value that's defined in SPIR-V.


> > ---
> >  src/compiler/nir/nir_intrinsics.h                |  3 ++
> >  src/intel/compiler/brw_fs.cpp                    |  4 +-
> >  src/intel/compiler/brw_fs.h                      |  1 +
> >  src/intel/compiler/brw_fs_nir.cpp                | 14 +++++++
> >  src/intel/compiler/brw_nir.h                     |  3 +-
> >  src/intel/compiler/brw_nir_lower_cs_intrinsics.c | 53 +++++---------
> > ----------
> >  6 files changed, 32 insertions(+), 46 deletions(-)
> >
> > diff --git a/src/compiler/nir/nir_intrinsics.h
> > b/src/compiler/nir/nir_intrinsics.h
> > index cefd18b..47022dd 100644
> > --- a/src/compiler/nir/nir_intrinsics.h
> > +++ b/src/compiler/nir/nir_intrinsics.h
> > @@ -364,6 +364,9 @@ SYSTEM_VALUE(blend_const_color_a_float, 1, 0, xx,
> > xx, xx)
> >  SYSTEM_VALUE(blend_const_color_rgba8888_unorm, 1, 0, xx, xx, xx)
> >  SYSTEM_VALUE(blend_const_color_aaaa8888_unorm, 1, 0, xx, xx, xx)
> >
> > +/* Intel specific system values */
> > +SYSTEM_VALUE(intel_thread_local_id, 1, 0, xx, xx, xx)
> > +
> >  /**
> >   * Barycentric coordinate intrinsics.
> >   *
> > diff --git a/src/intel/compiler/brw_fs.cpp
> > b/src/intel/compiler/brw_fs.cpp
> > index 2acd838..c0d4c05 100644
> > --- a/src/intel/compiler/brw_fs.cpp
> > +++ b/src/intel/compiler/brw_fs.cpp
> > @@ -996,6 +996,7 @@ fs_visitor::import_uniforms(fs_visitor *v)
> >     this->push_constant_loc = v->push_constant_loc;
> >     this->pull_constant_loc = v->pull_constant_loc;
> >     this->uniforms = v->uniforms;
> > +   this->thread_local_id = v->thread_local_id;
> >  }
> >
> >  void
> > @@ -6781,8 +6782,7 @@ brw_compile_cs(const struct brw_compiler
> > *compiler, void *log_data,
> >  {
> >     nir_shader *shader = nir_shader_clone(mem_ctx, src_shader);
> >     shader = brw_nir_apply_sampler_key(shader, compiler, &key->tex,
> > true);
> > -
> > -   brw_nir_lower_cs_intrinsics(shader, prog_data);
> > +   brw_nir_lower_cs_intrinsics(shader);
> >     shader = brw_postprocess_nir(shader, compiler, true);
> >
> >     prog_data->local_size[0] = shader->info.cs.local_size[0];
> > diff --git a/src/intel/compiler/brw_fs.h
> > b/src/intel/compiler/brw_fs.h
> > index da32593..f51a4d8 100644
> > --- a/src/intel/compiler/brw_fs.h
> > +++ b/src/intel/compiler/brw_fs.h
> > @@ -315,6 +315,7 @@ public:
> >      */
> >     int *push_constant_loc;
> >
> > +   fs_reg thread_local_id;
> >     fs_reg frag_depth;
> >     fs_reg frag_stencil;
> >     fs_reg sample_mask;
> > diff --git a/src/intel/compiler/brw_fs_nir.cpp
> > b/src/intel/compiler/brw_fs_nir.cpp
> > index 05efee3..fdc6fc6 100644
> > --- a/src/intel/compiler/brw_fs_nir.cpp
> > +++ b/src/intel/compiler/brw_fs_nir.cpp
> > @@ -88,6 +88,16 @@ fs_visitor::nir_setup_uniforms()
> >     }
> >
> >     uniforms = nir->num_uniforms / 4;
> > +
> > +   if (stage == MESA_SHADER_COMPUTE) {
> > +      /* Add a uniform for the thread local id.  It must be the last
> > uniform
> > +       * on the list.
> > +       */
> > +      assert(uniforms == prog_data->nr_params);
> > +      uint32_t *param = brw_stage_prog_data_add_params(prog_data,
> > 1);
> > +      *param = BRW_PARAM_BUILTIN_THREAD_LOCAL_ID;
> > +      thread_local_id = fs_reg(UNIFORM, uniforms++,
> > BRW_REGISTER_TYPE_UD);
> > +   }
> >  }
> >
> >  static bool
> > @@ -3409,6 +3419,10 @@ fs_visitor::nir_emit_cs_intrinsic(const
> > fs_builder &bld,
> >        cs_prog_data->uses_barrier = true;
> >        break;
> >
> > +   case nir_intrinsic_load_intel_thread_local_id:
> > +      bld.MOV(retype(dest, BRW_REGISTER_TYPE_UD), thread_local_id);
> > +      break;
> > +
> >     case nir_intrinsic_load_local_invocation_id:
> >     case nir_intrinsic_load_work_group_id: {
> >        gl_system_value sv = nir_system_value_from_intrinsic(instr-
> > >intrinsic);
> > diff --git a/src/intel/compiler/brw_nir.h
> > b/src/intel/compiler/brw_nir.h
> > index 1493b74..3e40712 100644
> > --- a/src/intel/compiler/brw_nir.h
> > +++ b/src/intel/compiler/brw_nir.h
> > @@ -95,8 +95,7 @@ void brw_nir_analyze_boolean_resolves(nir_shader
> > *nir);
> >  nir_shader *brw_preprocess_nir(const struct brw_compiler *compiler,
> >                                 nir_shader *nir);
> >
> > -bool brw_nir_lower_cs_intrinsics(nir_shader *nir,
> > -                                 struct brw_cs_prog_data
> > *prog_data);
> > +bool brw_nir_lower_cs_intrinsics(nir_shader *nir);
> >  void brw_nir_lower_vs_inputs(nir_shader *nir,
> >                               bool use_legacy_snorm_formula,
> >                               const uint8_t *vs_attrib_wa_flags);
> > diff --git a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c
> > b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c
> > index d277276..07d2dcc 100644
> > --- a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c
> > +++ b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c
> > @@ -26,47 +26,12 @@
> >
> >  struct lower_intrinsics_state {
> >     nir_shader *nir;
> > -   struct brw_cs_prog_data *prog_data;
> >     nir_function_impl *impl;
> >     bool progress;
> >     nir_builder builder;
> > -   int thread_local_id_index;
> > +   unsigned local_workgroup_size;
> >  };
> >
> > -static nir_ssa_def *
> > -read_thread_local_id(struct lower_intrinsics_state *state)
> > -{
> > -   struct brw_cs_prog_data *prog_data = state->prog_data;
> > -   nir_builder *b = &state->builder;
> > -   nir_shader *nir = state->nir;
> > -   const unsigned *sizes = nir->info.cs.local_size;
> > -   const unsigned group_size = sizes[0] * sizes[1] * sizes[2];
> > -
> > -   /* Some programs have local_size dimensions so small that the
> > thread local
> > -    * ID will always be 0.
> > -    */
> > -   if (group_size <= 8)
> > -      return nir_imm_int(b, 0);
> > -
> > -   if (state->thread_local_id_index == -1) {
> > -      state->thread_local_id_index = prog_data->base.nr_params;
> > -      uint32_t *param = brw_stage_prog_data_add_params(&prog_data-
> > >base, 1);
> > -      *param = BRW_PARAM_BUILTIN_THREAD_LOCAL_ID;
> > -      nir->num_uniforms += 4;
> > -   }
> > -   unsigned id_index = state->thread_local_id_index;
> > -
> > -   nir_intrinsic_instr *load =
> > -      nir_intrinsic_instr_create(nir, nir_intrinsic_load_uniform);
> > -   load->num_components = 1;
> > -   load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
> > -   nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
> > -   nir_intrinsic_set_base(load, id_index * sizeof(uint32_t));
> > -   nir_intrinsic_set_range(load, sizeof(uint32_t));
> > -   nir_builder_instr_insert(b, &load->instr);
> > -   return &load->dest.ssa;
> > -}
> > -
> >  static bool
> >  lower_cs_intrinsics_convert_block(struct lower_intrinsics_state
> > *state,
> >                                    nir_block *block)
> > @@ -91,7 +56,12 @@ lower_cs_intrinsics_convert_block(struct
> > lower_intrinsics_state *state,
> >            *    gl_LocalInvocationIndex =
> >            *       cs_thread_local_id + subgroup_invocation;
> >            */
> > -         nir_ssa_def *thread_local_id = read_thread_local_id(state);
> > +         nir_ssa_def *thread_local_id;
> > +         if (state->local_workgroup_size <= 8)
> > +            thread_local_id = nir_imm_int(b, 0);
> > +         else
> > +            thread_local_id = nir_load_intel_thread_local_id(b);
> > +
> >           nir_ssa_def *channel = nir_load_subgroup_invocation(b);
> >           sysval = nir_iadd(b, channel, thread_local_id);
> >           break;
> > @@ -157,8 +127,7 @@ lower_cs_intrinsics_convert_impl(struct
> > lower_intrinsics_state *state)
> >  }
> >
> >  bool
> > -brw_nir_lower_cs_intrinsics(nir_shader *nir,
> > -                            struct brw_cs_prog_data *prog_data)
> > +brw_nir_lower_cs_intrinsics(nir_shader *nir)
> >  {
> >     assert(nir->info.stage == MESA_SHADER_COMPUTE);
> >
> > @@ -166,9 +135,9 @@ brw_nir_lower_cs_intrinsics(nir_shader *nir,
> >     struct lower_intrinsics_state state;
> >     memset(&state, 0, sizeof(state));
> >     state.nir = nir;
> > -   state.prog_data = prog_data;
> > -
> > -   state.thread_local_id_index = -1;
> > +   state.local_workgroup_size = nir->info.cs.local_size[0] *
> > +                                nir->info.cs.local_size[1] *
> > +                                nir->info.cs.local_size[2];
> >
> >     do {
> >        state.progress = false;
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20171027/30e55817/attachment-0001.html>


More information about the mesa-dev mailing list