[Mesa-dev] [PATCH 2/3] nir: Add an intrinsic for a bitmask of the whole subgroup

Neil Roberts nroberts at igalia.com
Tue Oct 31 17:55:14 UTC 2017


Similar to nir_intrinsic_load_subgroup_eq_mask and friends, this adds
an intrinsic which contains a bit for every member of the group. This
doesn’t have a corresponding GLSL builtin but it will be used to
calculate nir_intrinsic_load_subgroup_g{t,e}_mask. It has its own nir
option on whether to lower it. The idea is that this should be much
easier to generate than the other masks because it will likely be a
compile-time constant and if so it will generate more efficient code
for the other masks.
---
 src/compiler/nir/nir.h                |  1 +
 src/compiler/nir/nir_intrinsics.h     |  1 +
 src/compiler/nir/nir_opt_intrinsics.c | 41 +++++++++++++++++++++++++++--------
 src/intel/compiler/brw_compiler.c     |  1 +
 src/intel/compiler/brw_fs_nir.cpp     |  1 +
 5 files changed, 36 insertions(+), 9 deletions(-)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index dd833cf..edb02b9 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -1836,6 +1836,7 @@ typedef struct nir_shader_compiler_options {
    bool lower_extract_word;
 
    bool lower_vote_trivial;
+   bool lower_subgroup_all_mask;
    bool lower_subgroup_masks;
 
    /**
diff --git a/src/compiler/nir/nir_intrinsics.h b/src/compiler/nir/nir_intrinsics.h
index cefd18b..de362a8 100644
--- a/src/compiler/nir/nir_intrinsics.h
+++ b/src/compiler/nir/nir_intrinsics.h
@@ -350,6 +350,7 @@ SYSTEM_VALUE(layer_id, 1, 0, xx, xx, xx)
 SYSTEM_VALUE(view_index, 1, 0, xx, xx, xx)
 SYSTEM_VALUE(subgroup_size, 1, 0, xx, xx, xx)
 SYSTEM_VALUE(subgroup_invocation, 1, 0, xx, xx, xx)
+SYSTEM_VALUE(subgroup_all_mask, 1, 0, xx, xx, xx)
 SYSTEM_VALUE(subgroup_eq_mask, 1, 0, xx, xx, xx)
 SYSTEM_VALUE(subgroup_ge_mask, 1, 0, xx, xx, xx)
 SYSTEM_VALUE(subgroup_gt_mask, 1, 0, xx, xx, xx)
diff --git a/src/compiler/nir/nir_opt_intrinsics.c b/src/compiler/nir/nir_opt_intrinsics.c
index d5fdc51..71d79d7 100644
--- a/src/compiler/nir/nir_opt_intrinsics.c
+++ b/src/compiler/nir/nir_opt_intrinsics.c
@@ -29,23 +29,39 @@
  */
 
 static nir_ssa_def *
-high_subgroup_mask(nir_builder *b,
-                   nir_ssa_def *count,
-                   uint64_t base_mask)
+subgroup_all_mask(nir_builder *b,
+                  nir_ssa_def *count)
 {
    /* group_mask could probably be calculated more efficiently but we want to
     * be sure not to shift by 64 if the subgroup size is 64 because the GLSL
-    * shift operator is undefined in that case. In any case if we were worried
-    * about efficency this should probably be done further down because the
-    * subgroup size is likely to be known at compile time.
+    * shift operator is undefined in that case. In any case if the driver is
+    * worried about efficency this should probably be done further down
+    * because the subgroup size is likely to be known at compile time.
     */
    nir_ssa_def *subgroup_size = nir_load_subgroup_size(b);
    nir_ssa_def *all_bits = nir_imm_int64(b, ~0ull);
    nir_ssa_def *shift = nir_isub(b, nir_imm_int(b, 64), subgroup_size);
-   nir_ssa_def *group_mask = nir_ushr(b, all_bits, shift);
-   nir_ssa_def *higher_bits = nir_ishl(b, nir_imm_int64(b, base_mask), count);
+   return nir_ushr(b, all_bits, shift);
+}
 
-   return nir_iand(b, higher_bits, group_mask);
+static nir_ssa_def *
+high_subgroup_mask(nir_builder *b,
+                   nir_ssa_def *count,
+                   uint64_t base_mask)
+{
+   nir_ssa_def *higher_bits = nir_ishl(b, nir_imm_int64(b, base_mask), count);
+   nir_intrinsic_instr *load_group_mask =
+      nir_intrinsic_instr_create(b->shader,
+                                 nir_intrinsic_load_subgroup_all_mask);
+   load_group_mask->num_components = 1;
+   nir_ssa_dest_init(&load_group_mask->instr,
+                     &load_group_mask->dest,
+                     1 /* num_components */,
+                     64 /* bit_size */,
+                     NULL /* name */);
+   nir_builder_instr_insert(b, &load_group_mask->instr);
+
+   return nir_iand(b, higher_bits, &load_group_mask->dest.ssa);
 }
 
 static bool
@@ -100,6 +116,10 @@ opt_intrinsics_impl(nir_function_impl *impl)
                                                  nir_imm_int(&b, 0));
             break;
          }
+         case nir_intrinsic_load_subgroup_all_mask:
+            if (!b.shader->options->lower_subgroup_all_mask)
+               break;
+            /* flow through */
          case nir_intrinsic_load_subgroup_eq_mask:
          case nir_intrinsic_load_subgroup_ge_mask:
          case nir_intrinsic_load_subgroup_gt_mask:
@@ -111,6 +131,9 @@ opt_intrinsics_impl(nir_function_impl *impl)
             nir_ssa_def *count = nir_load_subgroup_invocation(&b);
 
             switch (intrin->intrinsic) {
+            case nir_intrinsic_load_subgroup_all_mask:
+               replacement = subgroup_all_mask(&b, count);
+               break;
             case nir_intrinsic_load_subgroup_eq_mask:
                replacement = nir_ishl(&b, nir_imm_int64(&b, 1ull), count);
                break;
diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c
index 2f6af7d..8df0d2e 100644
--- a/src/intel/compiler/brw_compiler.c
+++ b/src/intel/compiler/brw_compiler.c
@@ -57,6 +57,7 @@ static const struct nir_shader_compiler_options scalar_nir_options = {
    .lower_unpack_snorm_4x8 = true,
    .lower_unpack_unorm_2x16 = true,
    .lower_unpack_unorm_4x8 = true,
+   .lower_subgroup_all_mask = true,
    .lower_subgroup_masks = true,
    .max_subgroup_size = 32,
    .max_unroll_iterations = 32,
diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp
index bb153ca..9202b0f 100644
--- a/src/intel/compiler/brw_fs_nir.cpp
+++ b/src/intel/compiler/brw_fs_nir.cpp
@@ -4185,6 +4185,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr
       break;
    }
 
+   case nir_intrinsic_load_subgroup_all_mask:
    case nir_intrinsic_load_subgroup_eq_mask:
    case nir_intrinsic_load_subgroup_ge_mask:
    case nir_intrinsic_load_subgroup_gt_mask:
-- 
2.9.5



More information about the mesa-dev mailing list