[Mesa-dev] [PATCH 8/8] nir: specify bit_size when loading system values

Karol Herbst kherbst at redhat.com
Mon Jul 16 14:28:26 UTC 2018


With OpenCL the size of some system value depends on the Physical model
choosen, so we need a way to load any system value as 32 or 64 bit.

We could probably be a lot smarter and specify which system values might
be valid as 32 and/or 64 bit, but I get the feeling it isn't really worth
the effort and we can simply depend on the dest type choosen by the API.

Signed-off-by: Karol Herbst <kherbst at redhat.com>
---
 src/compiler/nir/nir_builder_opcodes_h.py     |  9 ++--
 src/compiler/nir/nir_lower_alpha_test.c       |  2 +-
 src/compiler/nir/nir_lower_clip.c             |  3 +-
 src/compiler/nir/nir_lower_subgroups.c        |  8 +--
 src/compiler/nir/nir_lower_system_values.c    | 49 +++++++++++--------
 src/compiler/nir/nir_lower_two_sided_color.c  |  2 +-
 src/compiler/nir/nir_lower_wpos_center.c      |  2 +-
 src/compiler/spirv/vtn_subgroup.c             |  2 +-
 src/gallium/auxiliary/nir/tgsi_to_nir.c       |  3 +-
 src/intel/blorp/blorp_blit.c                  |  2 +-
 src/intel/blorp/blorp_clear.c                 |  2 +-
 .../compiler/brw_nir_lower_cs_intrinsics.c    |  6 +--
 src/mesa/drivers/dri/i965/brw_tcs.c           |  2 +-
 13 files changed, 52 insertions(+), 40 deletions(-)

diff --git a/src/compiler/nir/nir_builder_opcodes_h.py b/src/compiler/nir/nir_builder_opcodes_h.py
index 72cf5b4549d..d16dac6b16e 100644
--- a/src/compiler/nir/nir_builder_opcodes_h.py
+++ b/src/compiler/nir/nir_builder_opcodes_h.py
@@ -44,22 +44,23 @@ nir_${name}(nir_builder *build, ${src_decl_list(opcode.num_inputs)})
 
 /* Generic builder for system values. */
 static inline nir_ssa_def *
-nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index)
+nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index,
+                      unsigned bit_size)
 {
    nir_intrinsic_instr *load = nir_intrinsic_instr_create(build->shader, op);
    load->num_components = nir_intrinsic_infos[op].dest_components;
    load->const_index[0] = index;
    nir_ssa_dest_init(&load->instr, &load->dest,
-                     nir_intrinsic_infos[op].dest_components, 32, NULL);
+                     nir_intrinsic_infos[op].dest_components, bit_size, NULL);
    nir_builder_instr_insert(build, &load->instr);
    return &load->dest.ssa;
 }
 
 % for name, opcode in filter(lambda v: v[1].sysval, sorted(INTR_OPCODES.iteritems())):
 static inline nir_ssa_def *
-nir_${name}(nir_builder *build)
+nir_${name}(nir_builder *build, unsigned bit_size)
 {
-   return nir_load_system_value(build, nir_intrinsic_${name}, 0);
+   return nir_load_system_value(build, nir_intrinsic_${name}, 0, bit_size);
 }
 % endfor
 
diff --git a/src/compiler/nir/nir_lower_alpha_test.c b/src/compiler/nir/nir_lower_alpha_test.c
index ddd815765bd..8341a0246d2 100644
--- a/src/compiler/nir/nir_lower_alpha_test.c
+++ b/src/compiler/nir/nir_lower_alpha_test.c
@@ -95,7 +95,7 @@ nir_lower_alpha_test(nir_shader *shader, enum compare_func func,
 
                nir_ssa_def *condition =
                   nir_compare_func(&b, func,
-                                   alpha, nir_load_alpha_ref_float(&b));
+                                   alpha, nir_load_alpha_ref_float(&b, 32));
 
                nir_intrinsic_instr *discard =
                   nir_intrinsic_instr_create(b.shader,
diff --git a/src/compiler/nir/nir_lower_clip.c b/src/compiler/nir/nir_lower_clip.c
index ea12f51a7bb..b9a91f7d40b 100644
--- a/src/compiler/nir/nir_lower_clip.c
+++ b/src/compiler/nir/nir_lower_clip.c
@@ -174,7 +174,8 @@ lower_clip_vs(nir_function_impl *impl, unsigned ucp_enables,
    for (int plane = 0; plane < MAX_CLIP_PLANES; plane++) {
       if (ucp_enables & (1 << plane)) {
          nir_ssa_def *ucp =
-            nir_load_system_value(&b, nir_intrinsic_load_user_clip_plane, plane);
+            nir_load_system_value(&b, nir_intrinsic_load_user_clip_plane,
+                                  plane, 32);
 
          /* calculate clipdist[plane] - dot(ucp, cv): */
          clipdist[plane] = nir_fdot4(&b, ucp, cv);
diff --git a/src/compiler/nir/nir_lower_subgroups.c b/src/compiler/nir/nir_lower_subgroups.c
index ee5e8bd644b..c474b9fd27a 100644
--- a/src/compiler/nir/nir_lower_subgroups.c
+++ b/src/compiler/nir/nir_lower_subgroups.c
@@ -226,7 +226,7 @@ static nir_ssa_def *
 lower_shuffle(nir_builder *b, nir_intrinsic_instr *intrin,
               bool lower_to_scalar, bool lower_to_32bit)
 {
-   nir_ssa_def *index = nir_load_subgroup_invocation(b);
+   nir_ssa_def *index = nir_load_subgroup_invocation(b, 32);
    switch (intrin->intrinsic) {
    case nir_intrinsic_shuffle_xor:
       assert(intrin->src[1].is_ssa);
@@ -338,7 +338,7 @@ lower_subgroups_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
       assert(options->subgroup_size <= 64);
       uint64_t group_mask = ~0ull >> (64 - options->subgroup_size);
 
-      nir_ssa_def *count = nir_load_subgroup_invocation(b);
+      nir_ssa_def *count = nir_load_subgroup_invocation(b, 32);
       nir_ssa_def *val;
       switch (intrin->intrinsic) {
       case nir_intrinsic_load_subgroup_eq_mask:
@@ -411,7 +411,7 @@ lower_subgroups_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
 
    case nir_intrinsic_ballot_bit_count_exclusive:
    case nir_intrinsic_ballot_bit_count_inclusive: {
-      nir_ssa_def *count = nir_load_subgroup_invocation(b);
+      nir_ssa_def *count = nir_load_subgroup_invocation(b, 32);
       nir_ssa_def *mask = nir_imm_intN_t(b, ~0ull, options->ballot_bit_size);
       if (intrin->intrinsic == nir_intrinsic_ballot_bit_count_inclusive) {
          const unsigned bits = options->ballot_bit_size;
@@ -434,7 +434,7 @@ lower_subgroups_intrin(nir_builder *b, nir_intrinsic_instr *intrin,
       nir_ssa_dest_init(&first->instr, &first->dest, 1, 32, NULL);
       nir_builder_instr_insert(b, &first->instr);
 
-      return nir_ieq(b, nir_load_subgroup_invocation(b), &first->dest.ssa);
+      return nir_ieq(b, nir_load_subgroup_invocation(b, 32), &first->dest.ssa);
    }
 
    case nir_intrinsic_shuffle:
diff --git a/src/compiler/nir/nir_lower_system_values.c b/src/compiler/nir/nir_lower_system_values.c
index da04895d66c..41f939dd935 100644
--- a/src/compiler/nir/nir_lower_system_values.c
+++ b/src/compiler/nir/nir_lower_system_values.c
@@ -29,14 +29,22 @@
 #include "nir_builder.h"
 
 static nir_ssa_def*
-build_local_group_size(nir_builder *b)
+build_local_group_size(nir_builder *b, unsigned bit_size)
 {
    nir_const_value local_size;
    memset(&local_size, 0, sizeof(local_size));
-   local_size.u32[0] = b->shader->info.cs.local_size[0];
-   local_size.u32[1] = b->shader->info.cs.local_size[1];
-   local_size.u32[2] = b->shader->info.cs.local_size[2];
-   return nir_build_imm(b, 3, 32, local_size);
+   if (bit_size == 64) {
+      local_size.u64[0] = b->shader->info.cs.local_size[0];
+      local_size.u64[1] = b->shader->info.cs.local_size[1];
+      local_size.u64[2] = b->shader->info.cs.local_size[2];
+   } else if (bit_size == 32) {
+      local_size.u32[0] = b->shader->info.cs.local_size[0];
+      local_size.u32[1] = b->shader->info.cs.local_size[1];
+      local_size.u32[2] = b->shader->info.cs.local_size[2];
+   } else {
+      assert(!"local group size can't be smaller than 32 bits");
+   }
+   return nir_build_imm(b, 3, bit_size, local_size);
 }
 
 static bool
@@ -67,6 +75,7 @@ convert_block(nir_block *block, nir_builder *b)
       }
       nir_variable *var = deref->var;
 
+      unsigned bit_size = load_deref->dest.ssa.bit_size;
       b->cursor = nir_after_instr(&load_deref->instr);
 
       nir_ssa_def *sysval = NULL;
@@ -77,9 +86,9 @@ convert_block(nir_block *block, nir_builder *b)
           *    "The value of gl_GlobalInvocationID is equal to
           *    gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID"
           */
-         nir_ssa_def *group_size = build_local_group_size(b);
-         nir_ssa_def *group_id = nir_load_work_group_id(b);
-         nir_ssa_def *local_id = nir_load_local_invocation_id(b);
+         nir_ssa_def *group_size = build_local_group_size(b, bit_size);
+         nir_ssa_def *group_id = nir_load_work_group_id(b, bit_size);
+         nir_ssa_def *local_id = nir_load_local_invocation_id(b, bit_size);
 
          sysval = nir_iadd(b, nir_imul(b, group_id, group_size), local_id);
          break;
@@ -99,7 +108,7 @@ convert_block(nir_block *block, nir_builder *b)
           *    gl_WorkGroupSize.y + gl_LocalInvocationID.y *
           *    gl_WorkGroupSize.x + gl_LocalInvocationID.x"
           */
-         nir_ssa_def *local_id = nir_load_local_invocation_id(b);
+         nir_ssa_def *local_id = nir_load_local_invocation_id(b, bit_size);
 
          nir_ssa_def *size_x =
             nir_imm_int(b, b->shader->info.cs.local_size[0]);
@@ -115,17 +124,17 @@ convert_block(nir_block *block, nir_builder *b)
       }
 
       case SYSTEM_VALUE_LOCAL_GROUP_SIZE: {
-         sysval = build_local_group_size(b);
+         sysval = build_local_group_size(b, bit_size);
          break;
       }
 
       case SYSTEM_VALUE_VERTEX_ID:
          if (b->shader->options->vertex_id_zero_based) {
             sysval = nir_iadd(b,
-                              nir_load_vertex_id_zero_base(b),
-                              nir_load_first_vertex(b));
+                              nir_load_vertex_id_zero_base(b, bit_size),
+                              nir_load_first_vertex(b, bit_size));
          } else {
-            sysval = nir_load_vertex_id(b);
+            sysval = nir_load_vertex_id(b, bit_size);
          }
          break;
 
@@ -140,14 +149,14 @@ convert_block(nir_block *block, nir_builder *b)
           */
          if (b->shader->options->lower_base_vertex)
             sysval = nir_iand(b,
-                              nir_load_is_indexed_draw(b),
-                              nir_load_first_vertex(b));
+                              nir_load_is_indexed_draw(b, bit_size),
+                              nir_load_first_vertex(b, bit_size));
          break;
 
       case SYSTEM_VALUE_INSTANCE_INDEX:
          sysval = nir_iadd(b,
-                           nir_load_instance_id(b),
-                           nir_load_base_instance(b));
+                           nir_load_instance_id(b, bit_size),
+                           nir_load_base_instance(b, bit_size));
          break;
 
       case SYSTEM_VALUE_SUBGROUP_EQ_MASK:
@@ -172,8 +181,8 @@ convert_block(nir_block *block, nir_builder *b)
          break;
 
       case SYSTEM_VALUE_GLOBAL_GROUP_SIZE: {
-         nir_ssa_def *group_size = build_local_group_size(b);
-         nir_ssa_def *num_work_groups = nir_load_num_work_groups(b);
+         nir_ssa_def *group_size = nir_load_local_group_size(b, bit_size);
+         nir_ssa_def *num_work_groups = nir_load_num_work_groups(b, bit_size);
          sysval = nir_imul(b, group_size, num_work_groups);
          break;
       }
@@ -185,7 +194,7 @@ convert_block(nir_block *block, nir_builder *b)
       if (sysval == NULL) {
          nir_intrinsic_op sysval_op =
             nir_intrinsic_from_system_value(var->data.location);
-         sysval = nir_load_system_value(b, sysval_op, 0);
+         sysval = nir_load_system_value(b, sysval_op, 0, bit_size);
       }
 
       nir_ssa_def_rewrite_uses(&load_deref->dest.ssa, nir_src_for_ssa(sysval));
diff --git a/src/compiler/nir/nir_lower_two_sided_color.c b/src/compiler/nir/nir_lower_two_sided_color.c
index b6742ab2462..20af88b6aec 100644
--- a/src/compiler/nir/nir_lower_two_sided_color.c
+++ b/src/compiler/nir/nir_lower_two_sided_color.c
@@ -158,7 +158,7 @@ nir_lower_two_sided_color_block(nir_block *block,
        * bcsel(load_system_value(FACE), load_input(COLn), load_input(BFCn))
        */
       b->cursor = nir_before_instr(&intr->instr);
-      nir_ssa_def *face  = nir_load_front_face(b);
+      nir_ssa_def *face  = nir_load_front_face(b, 32);
       nir_ssa_def *front = load_input(b, state->colors[idx].front);
       nir_ssa_def *back  = load_input(b, state->colors[idx].back);
       nir_ssa_def *color = nir_bcsel(b, face, front, back);
diff --git a/src/compiler/nir/nir_lower_wpos_center.c b/src/compiler/nir/nir_lower_wpos_center.c
index b6f3529c766..f2151244e17 100644
--- a/src/compiler/nir/nir_lower_wpos_center.c
+++ b/src/compiler/nir/nir_lower_wpos_center.c
@@ -58,7 +58,7 @@ update_fragcoord(nir_builder *b, nir_intrinsic_instr *intr,
       wpos = nir_fadd(b, wpos, nir_imm_vec4(b, 0.5f, 0.5f, 0.0f, 0.0f));
    } else {
       nir_ssa_def *spos =
-         nir_load_system_value(b, nir_intrinsic_load_sample_pos, 0);
+         nir_load_system_value(b, nir_intrinsic_load_sample_pos, 0, 32);
 
       wpos = nir_fadd(b, wpos,
                       nir_vec4(b,
diff --git a/src/compiler/spirv/vtn_subgroup.c b/src/compiler/spirv/vtn_subgroup.c
index ecec3aa62d0..d71ae5284e4 100644
--- a/src/compiler/spirv/vtn_subgroup.c
+++ b/src/compiler/spirv/vtn_subgroup.c
@@ -110,7 +110,7 @@ vtn_handle_subgroup(struct vtn_builder *b, SpvOp opcode,
                                     nir_intrinsic_ballot_bitfield_extract);
 
       intrin->src[0] = nir_src_for_ssa(vtn_ssa_value(b, w[4])->def);
-      intrin->src[1] = nir_src_for_ssa(nir_load_subgroup_invocation(&b->nb));
+      intrin->src[1] = nir_src_for_ssa(nir_load_subgroup_invocation(&b->nb, 32));
 
       nir_ssa_dest_init(&intrin->instr, &intrin->dest, 1, 32, NULL);
       nir_builder_instr_insert(&b->nb, &intrin->instr);
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index 1b31b564246..a4e46c54b4e 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -591,7 +591,8 @@ ttn_src_for_file_and_index(struct ttn_compile *c, unsigned file, unsigned index,
             nir_ssa_def *tgsi_frontface[4] = {
                nir_bcsel(&c->build,
                          nir_load_system_value(&c->build,
-                                               nir_intrinsic_load_front_face, 0),
+                                               nir_intrinsic_load_front_face,
+                                               0, 32),
                          nir_imm_float(&c->build, 1.0),
                          nir_imm_float(&c->build, -1.0)),
                nir_imm_float(&c->build, 0.0),
diff --git a/src/intel/blorp/blorp_blit.c b/src/intel/blorp/blorp_blit.c
index f719aac1b86..1dc0f5f25e8 100644
--- a/src/intel/blorp/blorp_blit.c
+++ b/src/intel/blorp/blorp_blit.c
@@ -116,7 +116,7 @@ blorp_blit_get_frag_coords(nir_builder *b,
 
    if (key->persample_msaa_dispatch) {
       return nir_vec3(b, nir_channel(b, coord, 0), nir_channel(b, coord, 1),
-                      nir_load_sample_id(b));
+                      nir_load_sample_id(b, 32));
    } else {
       return nir_vec2(b, nir_channel(b, coord, 0), nir_channel(b, coord, 1));
    }
diff --git a/src/intel/blorp/blorp_clear.c b/src/intel/blorp/blorp_clear.c
index b4c744020d9..a377fb5a212 100644
--- a/src/intel/blorp/blorp_clear.c
+++ b/src/intel/blorp/blorp_clear.c
@@ -967,7 +967,7 @@ blorp_params_get_mcs_partial_resolve_kernel(struct blorp_context *blorp,
    /* Do an MCS fetch and check if it is equal to the magic clear value */
    nir_ssa_def *mcs =
       blorp_nir_txf_ms_mcs(&b, nir_f2i32(&b, blorp_nir_frag_coord(&b)),
-                               nir_load_layer_id(&b));
+                               nir_load_layer_id(&b, 32));
    nir_ssa_def *is_clear =
       blorp_nir_mcs_is_clear_color(&b, mcs, blorp_key.num_samples);
 
diff --git a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c
index bfbdea0e8fa..846e82ffdf9 100644
--- a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c
+++ b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c
@@ -61,11 +61,11 @@ lower_cs_intrinsics_convert_block(struct lower_intrinsics_state *state,
          if (state->local_workgroup_size <= state->dispatch_width)
             subgroup_id = nir_imm_int(b, 0);
          else
-            subgroup_id = nir_load_subgroup_id(b);
+            subgroup_id = nir_load_subgroup_id(b, 32);
 
          nir_ssa_def *thread_local_id =
             nir_imul(b, subgroup_id, nir_imm_int(b, state->dispatch_width));
-         nir_ssa_def *channel = nir_load_subgroup_invocation(b);
+         nir_ssa_def *channel = nir_load_subgroup_invocation(b, 32);
          sysval = nir_iadd(b, channel, thread_local_id);
          break;
       }
@@ -86,7 +86,7 @@ lower_cs_intrinsics_convert_block(struct lower_intrinsics_state *state,
           */
          unsigned *size = nir->info.cs.local_size;
 
-         nir_ssa_def *local_index = nir_load_local_invocation_index(b);
+         nir_ssa_def *local_index = nir_load_local_invocation_index(b, 32);
 
          nir_const_value uvec3;
          memset(&uvec3, 0, sizeof(uvec3));
diff --git a/src/mesa/drivers/dri/i965/brw_tcs.c b/src/mesa/drivers/dri/i965/brw_tcs.c
index 3b4642033fe..84a1d162607 100644
--- a/src/mesa/drivers/dri/i965/brw_tcs.c
+++ b/src/mesa/drivers/dri/i965/brw_tcs.c
@@ -48,7 +48,7 @@ create_passthrough_tcs(void *mem_ctx, const struct brw_compiler *compiler,
    nir_intrinsic_instr *store;
    nir_ssa_def *zero = nir_imm_int(&b, 0);
    nir_ssa_def *invoc_id =
-      nir_load_system_value(&b, nir_intrinsic_load_invocation_id, 0);
+      nir_load_system_value(&b, nir_intrinsic_load_invocation_id, 0, 32);
 
    nir->info.inputs_read = key->outputs_written &
       ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
-- 
2.17.1



More information about the mesa-dev mailing list