[Mesa-dev] [PATCH 8/8] nir: specify bit_size when loading system values

Jason Ekstrand jason at jlekstrand.net
Mon Jul 16 15:54:16 UTC 2018


On Mon, Jul 16, 2018 at 7:29 AM Karol Herbst <kherbst at redhat.com> wrote:

> With OpenCL the size of some system value depends on the Physical model
> choosen, so we need a way to load any system value as 32 or 64 bit.
>
> We could probably be a lot smarter and specify which system values might
> be valid as 32 and/or 64 bit, but I get the feeling it isn't really worth
> the effort and we can simply depend on the dest type choosen by the API.
>

I'm not sure what I think about this.  Most system values (other than the
few used by OpenCL) are always 32 bits all the time and back-ends are
likely to depend on this.  At the very least, it might be nice to have some
sort of validation that the bit sizes are correct before we get there.

One solution to this would be to add a dest_bit_size field to
nir_intrinsic_info and use the convention of dest_bit_size == 0 means it
can be anything.  Then nir_builder_opcodes.py can use that to create
functions which either require the bit size or don't.  Also, we could have
the validator properly validate intrinsic destination bit sizes.  It's also
something that we could do somewhat incramentally because defaulting
everything to dest_bit_size = 0 gives the current behavior.

--Jason


> Signed-off-by: Karol Herbst <kherbst at redhat.com>
> ---
>  src/compiler/nir/nir_builder_opcodes_h.py     |  9 ++--
>  src/compiler/nir/nir_lower_alpha_test.c       |  2 +-
>  src/compiler/nir/nir_lower_clip.c             |  3 +-
>  src/compiler/nir/nir_lower_subgroups.c        |  8 +--
>  src/compiler/nir/nir_lower_system_values.c    | 49 +++++++++++--------
>  src/compiler/nir/nir_lower_two_sided_color.c  |  2 +-
>  src/compiler/nir/nir_lower_wpos_center.c      |  2 +-
>  src/compiler/spirv/vtn_subgroup.c             |  2 +-
>  src/gallium/auxiliary/nir/tgsi_to_nir.c       |  3 +-
>  src/intel/blorp/blorp_blit.c                  |  2 +-
>  src/intel/blorp/blorp_clear.c                 |  2 +-
>  .../compiler/brw_nir_lower_cs_intrinsics.c    |  6 +--
>  src/mesa/drivers/dri/i965/brw_tcs.c           |  2 +-
>  13 files changed, 52 insertions(+), 40 deletions(-)
>
> diff --git a/src/compiler/nir/nir_builder_opcodes_h.py
> b/src/compiler/nir/nir_builder_opcodes_h.py
> index 72cf5b4549d..d16dac6b16e 100644
> --- a/src/compiler/nir/nir_builder_opcodes_h.py
> +++ b/src/compiler/nir/nir_builder_opcodes_h.py
> @@ -44,22 +44,23 @@ nir_${name}(nir_builder *build,
> ${src_decl_list(opcode.num_inputs)})
>
>  /* Generic builder for system values. */
>  static inline nir_ssa_def *
> -nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index)
> +nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index,
> +                      unsigned bit_size)
>  {
>     nir_intrinsic_instr *load = nir_intrinsic_instr_create(build->shader,
> op);
>     load->num_components = nir_intrinsic_infos[op].dest_components;
>     load->const_index[0] = index;
>     nir_ssa_dest_init(&load->instr, &load->dest,
> -                     nir_intrinsic_infos[op].dest_components, 32, NULL);
> +                     nir_intrinsic_infos[op].dest_components, bit_size,
> NULL);
>     nir_builder_instr_insert(build, &load->instr);
>     return &load->dest.ssa;
>  }
>
>  % for name, opcode in filter(lambda v: v[1].sysval,
> sorted(INTR_OPCODES.iteritems())):
>  static inline nir_ssa_def *
> -nir_${name}(nir_builder *build)
> +nir_${name}(nir_builder *build, unsigned bit_size)
>  {
> -   return nir_load_system_value(build, nir_intrinsic_${name}, 0);
> +   return nir_load_system_value(build, nir_intrinsic_${name}, 0,
> bit_size);
>  }
>  % endfor
>
> diff --git a/src/compiler/nir/nir_lower_alpha_test.c
> b/src/compiler/nir/nir_lower_alpha_test.c
> index ddd815765bd..8341a0246d2 100644
> --- a/src/compiler/nir/nir_lower_alpha_test.c
> +++ b/src/compiler/nir/nir_lower_alpha_test.c
> @@ -95,7 +95,7 @@ nir_lower_alpha_test(nir_shader *shader, enum
> compare_func func,
>
>                 nir_ssa_def *condition =
>                    nir_compare_func(&b, func,
> -                                   alpha, nir_load_alpha_ref_float(&b));
> +                                   alpha, nir_load_alpha_ref_float(&b,
> 32));
>
>                 nir_intrinsic_instr *discard =
>                    nir_intrinsic_instr_create(b.shader,
> diff --git a/src/compiler/nir/nir_lower_clip.c
> b/src/compiler/nir/nir_lower_clip.c
> index ea12f51a7bb..b9a91f7d40b 100644
> --- a/src/compiler/nir/nir_lower_clip.c
> +++ b/src/compiler/nir/nir_lower_clip.c
> @@ -174,7 +174,8 @@ lower_clip_vs(nir_function_impl *impl, unsigned
> ucp_enables,
>     for (int plane = 0; plane < MAX_CLIP_PLANES; plane++) {
>        if (ucp_enables & (1 << plane)) {
>           nir_ssa_def *ucp =
> -            nir_load_system_value(&b, nir_intrinsic_load_user_clip_plane,
> plane);
> +            nir_load_system_value(&b, nir_intrinsic_load_user_clip_plane,
> +                                  plane, 32);
>
>           /* calculate clipdist[plane] - dot(ucp, cv): */
>           clipdist[plane] = nir_fdot4(&b, ucp, cv);
> diff --git a/src/compiler/nir/nir_lower_subgroups.c
> b/src/compiler/nir/nir_lower_subgroups.c
> index ee5e8bd644b..c474b9fd27a 100644
> --- a/src/compiler/nir/nir_lower_subgroups.c
> +++ b/src/compiler/nir/nir_lower_subgroups.c
> @@ -226,7 +226,7 @@ static nir_ssa_def *
>  lower_shuffle(nir_builder *b, nir_intrinsic_instr *intrin,
>                bool lower_to_scalar, bool lower_to_32bit)
>  {
> -   nir_ssa_def *index = nir_load_subgroup_invocation(b);
> +   nir_ssa_def *index = nir_load_subgroup_invocation(b, 32);
>     switch (intrin->intrinsic) {
>     case nir_intrinsic_shuffle_xor:
>        assert(intrin->src[1].is_ssa);
> @@ -338,7 +338,7 @@ lower_subgroups_intrin(nir_builder *b,
> nir_intrinsic_instr *intrin,
>        assert(options->subgroup_size <= 64);
>        uint64_t group_mask = ~0ull >> (64 - options->subgroup_size);
>
> -      nir_ssa_def *count = nir_load_subgroup_invocation(b);
> +      nir_ssa_def *count = nir_load_subgroup_invocation(b, 32);
>        nir_ssa_def *val;
>        switch (intrin->intrinsic) {
>        case nir_intrinsic_load_subgroup_eq_mask:
> @@ -411,7 +411,7 @@ lower_subgroups_intrin(nir_builder *b,
> nir_intrinsic_instr *intrin,
>
>     case nir_intrinsic_ballot_bit_count_exclusive:
>     case nir_intrinsic_ballot_bit_count_inclusive: {
> -      nir_ssa_def *count = nir_load_subgroup_invocation(b);
> +      nir_ssa_def *count = nir_load_subgroup_invocation(b, 32);
>        nir_ssa_def *mask = nir_imm_intN_t(b, ~0ull,
> options->ballot_bit_size);
>        if (intrin->intrinsic == nir_intrinsic_ballot_bit_count_inclusive) {
>           const unsigned bits = options->ballot_bit_size;
> @@ -434,7 +434,7 @@ lower_subgroups_intrin(nir_builder *b,
> nir_intrinsic_instr *intrin,
>        nir_ssa_dest_init(&first->instr, &first->dest, 1, 32, NULL);
>        nir_builder_instr_insert(b, &first->instr);
>
> -      return nir_ieq(b, nir_load_subgroup_invocation(b),
> &first->dest.ssa);
> +      return nir_ieq(b, nir_load_subgroup_invocation(b, 32),
> &first->dest.ssa);
>     }
>
>     case nir_intrinsic_shuffle:
> diff --git a/src/compiler/nir/nir_lower_system_values.c
> b/src/compiler/nir/nir_lower_system_values.c
> index da04895d66c..41f939dd935 100644
> --- a/src/compiler/nir/nir_lower_system_values.c
> +++ b/src/compiler/nir/nir_lower_system_values.c
> @@ -29,14 +29,22 @@
>  #include "nir_builder.h"
>
>  static nir_ssa_def*
> -build_local_group_size(nir_builder *b)
> +build_local_group_size(nir_builder *b, unsigned bit_size)
>  {
>     nir_const_value local_size;
>     memset(&local_size, 0, sizeof(local_size));
> -   local_size.u32[0] = b->shader->info.cs.local_size[0];
> -   local_size.u32[1] = b->shader->info.cs.local_size[1];
> -   local_size.u32[2] = b->shader->info.cs.local_size[2];
> -   return nir_build_imm(b, 3, 32, local_size);
> +   if (bit_size == 64) {
> +      local_size.u64[0] = b->shader->info.cs.local_size[0];
> +      local_size.u64[1] = b->shader->info.cs.local_size[1];
> +      local_size.u64[2] = b->shader->info.cs.local_size[2];
> +   } else if (bit_size == 32) {
> +      local_size.u32[0] = b->shader->info.cs.local_size[0];
> +      local_size.u32[1] = b->shader->info.cs.local_size[1];
> +      local_size.u32[2] = b->shader->info.cs.local_size[2];
> +   } else {
> +      assert(!"local group size can't be smaller than 32 bits");
> +   }
> +   return nir_build_imm(b, 3, bit_size, local_size);
>  }
>
>  static bool
> @@ -67,6 +75,7 @@ convert_block(nir_block *block, nir_builder *b)
>        }
>        nir_variable *var = deref->var;
>
> +      unsigned bit_size = load_deref->dest.ssa.bit_size;
>        b->cursor = nir_after_instr(&load_deref->instr);
>
>        nir_ssa_def *sysval = NULL;
> @@ -77,9 +86,9 @@ convert_block(nir_block *block, nir_builder *b)
>            *    "The value of gl_GlobalInvocationID is equal to
>            *    gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID"
>            */
> -         nir_ssa_def *group_size = build_local_group_size(b);
> -         nir_ssa_def *group_id = nir_load_work_group_id(b);
> -         nir_ssa_def *local_id = nir_load_local_invocation_id(b);
> +         nir_ssa_def *group_size = build_local_group_size(b, bit_size);
> +         nir_ssa_def *group_id = nir_load_work_group_id(b, bit_size);
> +         nir_ssa_def *local_id = nir_load_local_invocation_id(b,
> bit_size);
>
>           sysval = nir_iadd(b, nir_imul(b, group_id, group_size),
> local_id);
>           break;
> @@ -99,7 +108,7 @@ convert_block(nir_block *block, nir_builder *b)
>            *    gl_WorkGroupSize.y + gl_LocalInvocationID.y *
>            *    gl_WorkGroupSize.x + gl_LocalInvocationID.x"
>            */
> -         nir_ssa_def *local_id = nir_load_local_invocation_id(b);
> +         nir_ssa_def *local_id = nir_load_local_invocation_id(b,
> bit_size);
>
>           nir_ssa_def *size_x =
>              nir_imm_int(b, b->shader->info.cs.local_size[0]);
> @@ -115,17 +124,17 @@ convert_block(nir_block *block, nir_builder *b)
>        }
>
>        case SYSTEM_VALUE_LOCAL_GROUP_SIZE: {
> -         sysval = build_local_group_size(b);
> +         sysval = build_local_group_size(b, bit_size);
>           break;
>        }
>
>        case SYSTEM_VALUE_VERTEX_ID:
>           if (b->shader->options->vertex_id_zero_based) {
>              sysval = nir_iadd(b,
> -                              nir_load_vertex_id_zero_base(b),
> -                              nir_load_first_vertex(b));
> +                              nir_load_vertex_id_zero_base(b, bit_size),
> +                              nir_load_first_vertex(b, bit_size));
>           } else {
> -            sysval = nir_load_vertex_id(b);
> +            sysval = nir_load_vertex_id(b, bit_size);
>           }
>           break;
>
> @@ -140,14 +149,14 @@ convert_block(nir_block *block, nir_builder *b)
>            */
>           if (b->shader->options->lower_base_vertex)
>              sysval = nir_iand(b,
> -                              nir_load_is_indexed_draw(b),
> -                              nir_load_first_vertex(b));
> +                              nir_load_is_indexed_draw(b, bit_size),
> +                              nir_load_first_vertex(b, bit_size));
>           break;
>
>        case SYSTEM_VALUE_INSTANCE_INDEX:
>           sysval = nir_iadd(b,
> -                           nir_load_instance_id(b),
> -                           nir_load_base_instance(b));
> +                           nir_load_instance_id(b, bit_size),
> +                           nir_load_base_instance(b, bit_size));
>           break;
>
>        case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
> @@ -172,8 +181,8 @@ convert_block(nir_block *block, nir_builder *b)
>           break;
>
>        case SYSTEM_VALUE_GLOBAL_GROUP_SIZE: {
> -         nir_ssa_def *group_size = build_local_group_size(b);
> -         nir_ssa_def *num_work_groups = nir_load_num_work_groups(b);
> +         nir_ssa_def *group_size = nir_load_local_group_size(b, bit_size);
> +         nir_ssa_def *num_work_groups = nir_load_num_work_groups(b,
> bit_size);
>           sysval = nir_imul(b, group_size, num_work_groups);
>           break;
>        }
> @@ -185,7 +194,7 @@ convert_block(nir_block *block, nir_builder *b)
>        if (sysval == NULL) {
>           nir_intrinsic_op sysval_op =
>              nir_intrinsic_from_system_value(var->data.location);
> -         sysval = nir_load_system_value(b, sysval_op, 0);
> +         sysval = nir_load_system_value(b, sysval_op, 0, bit_size);
>        }
>
>        nir_ssa_def_rewrite_uses(&load_deref->dest.ssa,
> nir_src_for_ssa(sysval));
> diff --git a/src/compiler/nir/nir_lower_two_sided_color.c
> b/src/compiler/nir/nir_lower_two_sided_color.c
> index b6742ab2462..20af88b6aec 100644
> --- a/src/compiler/nir/nir_lower_two_sided_color.c
> +++ b/src/compiler/nir/nir_lower_two_sided_color.c
> @@ -158,7 +158,7 @@ nir_lower_two_sided_color_block(nir_block *block,
>         * bcsel(load_system_value(FACE), load_input(COLn),
> load_input(BFCn))
>         */
>        b->cursor = nir_before_instr(&intr->instr);
> -      nir_ssa_def *face  = nir_load_front_face(b);
> +      nir_ssa_def *face  = nir_load_front_face(b, 32);
>        nir_ssa_def *front = load_input(b, state->colors[idx].front);
>        nir_ssa_def *back  = load_input(b, state->colors[idx].back);
>        nir_ssa_def *color = nir_bcsel(b, face, front, back);
> diff --git a/src/compiler/nir/nir_lower_wpos_center.c
> b/src/compiler/nir/nir_lower_wpos_center.c
> index b6f3529c766..f2151244e17 100644
> --- a/src/compiler/nir/nir_lower_wpos_center.c
> +++ b/src/compiler/nir/nir_lower_wpos_center.c
> @@ -58,7 +58,7 @@ update_fragcoord(nir_builder *b, nir_intrinsic_instr
> *intr,
>        wpos = nir_fadd(b, wpos, nir_imm_vec4(b, 0.5f, 0.5f, 0.0f, 0.0f));
>     } else {
>        nir_ssa_def *spos =
> -         nir_load_system_value(b, nir_intrinsic_load_sample_pos, 0);
> +         nir_load_system_value(b, nir_intrinsic_load_sample_pos, 0, 32);
>
>        wpos = nir_fadd(b, wpos,
>                        nir_vec4(b,
> diff --git a/src/compiler/spirv/vtn_subgroup.c
> b/src/compiler/spirv/vtn_subgroup.c
> index ecec3aa62d0..d71ae5284e4 100644
> --- a/src/compiler/spirv/vtn_subgroup.c
> +++ b/src/compiler/spirv/vtn_subgroup.c
> @@ -110,7 +110,7 @@ vtn_handle_subgroup(struct vtn_builder *b, SpvOp
> opcode,
>
>  nir_intrinsic_ballot_bitfield_extract);
>
>        intrin->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def);
> -      intrin->src[1] =
> nir_src_for_ssa(nir_load_subgroup_invocation(&b->nb));
> +      intrin->src[1] =
> nir_src_for_ssa(nir_load_subgroup_invocation(&b->nb, 32));
>
>        nir_ssa_dest_init(&intrin->instr, &intrin->dest, 1, 32, NULL);
>        nir_builder_instr_insert(&b->nb, &intrin->instr);
> diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c
> b/src/gallium/auxiliary/nir/tgsi_to_nir.c
> index 1b31b564246..a4e46c54b4e 100644
> --- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
> +++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
> @@ -591,7 +591,8 @@ ttn_src_for_file_and_index(struct ttn_compile *c,
> unsigned file, unsigned index,
>              nir_ssa_def *tgsi_frontface[4] = {
>                 nir_bcsel(&c->build,
>                           nir_load_system_value(&c->build,
> -
>  nir_intrinsic_load_front_face, 0),
> +
>  nir_intrinsic_load_front_face,
> +                                               0, 32),
>                           nir_imm_float(&c->build, 1.0),
>                           nir_imm_float(&c->build, -1.0)),
>                 nir_imm_float(&c->build, 0.0),
> diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
> index f719aac1b86..1dc0f5f25e8 100644
> --- a/src/intel/blorp/blorp_blit.c
> +++ b/src/intel/blorp/blorp_blit.c
> @@ -116,7 +116,7 @@ blorp_blit_get_frag_coords(nir_builder *b,
>
>     if (key->persample_msaa_dispatch) {
>        return nir_vec3(b, nir_channel(b, coord, 0), nir_channel(b, coord,
> 1),
> -                      nir_load_sample_id(b));
> +                      nir_load_sample_id(b, 32));
>     } else {
>        return nir_vec2(b, nir_channel(b, coord, 0), nir_channel(b, coord,
> 1));
>     }
> diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c
> index b4c744020d9..a377fb5a212 100644
> --- a/src/intel/blorp/blorp_clear.c
> +++ b/src/intel/blorp/blorp_clear.c
> @@ -967,7 +967,7 @@ blorp_params_get_mcs_partial_resolve_kernel(struct
> blorp_context *blorp,
>     /* Do an MCS fetch and check if it is equal to the magic clear value */
>     nir_ssa_def *mcs =
>        blorp_nir_txf_ms_mcs(&b, nir_f2i32(&b, blorp_nir_frag_coord(&b)),
> -                               nir_load_layer_id(&b));
> +                               nir_load_layer_id(&b, 32));
>     nir_ssa_def *is_clear =
>        blorp_nir_mcs_is_clear_color(&b, mcs, blorp_key.num_samples);
>
> diff --git a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c
> b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c
> index bfbdea0e8fa..846e82ffdf9 100644
> --- a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c
> +++ b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c
> @@ -61,11 +61,11 @@ lower_cs_intrinsics_convert_block(struct
> lower_intrinsics_state *state,
>           if (state->local_workgroup_size <= state->dispatch_width)
>              subgroup_id = nir_imm_int(b, 0);
>           else
> -            subgroup_id = nir_load_subgroup_id(b);
> +            subgroup_id = nir_load_subgroup_id(b, 32);
>
>           nir_ssa_def *thread_local_id =
>              nir_imul(b, subgroup_id, nir_imm_int(b,
> state->dispatch_width));
> -         nir_ssa_def *channel = nir_load_subgroup_invocation(b);
> +         nir_ssa_def *channel = nir_load_subgroup_invocation(b, 32);
>           sysval = nir_iadd(b, channel, thread_local_id);
>           break;
>        }
> @@ -86,7 +86,7 @@ lower_cs_intrinsics_convert_block(struct
> lower_intrinsics_state *state,
>            */
>           unsigned *size = nir->info.cs.local_size;
>
> -         nir_ssa_def *local_index = nir_load_local_invocation_index(b);
> +         nir_ssa_def *local_index = nir_load_local_invocation_index(b,
> 32);
>
>           nir_const_value uvec3;
>           memset(&uvec3, 0, sizeof(uvec3));
> diff --git a/src/mesa/drivers/dri/i965/brw_tcs.c
> b/src/mesa/drivers/dri/i965/brw_tcs.c
> index 3b4642033fe..84a1d162607 100644
> --- a/src/mesa/drivers/dri/i965/brw_tcs.c
> +++ b/src/mesa/drivers/dri/i965/brw_tcs.c
> @@ -48,7 +48,7 @@ create_passthrough_tcs(void *mem_ctx, const struct
> brw_compiler *compiler,
>     nir_intrinsic_instr *store;
>     nir_ssa_def *zero = nir_imm_int(&b, 0);
>     nir_ssa_def *invoc_id =
> -      nir_load_system_value(&b, nir_intrinsic_load_invocation_id, 0);
> +      nir_load_system_value(&b, nir_intrinsic_load_invocation_id, 0, 32);
>
>     nir->info.inputs_read = key->outputs_written &
>        ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
> --
> 2.17.1
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20180716/62de5edd/attachment-0001.html>


More information about the mesa-dev mailing list