<div dir="ltr"><div class="gmail_extra"><div class="gmail_quote">On Tue, Oct 31, 2017 at 10:55 AM, Neil Roberts <span dir="ltr"><<a href="mailto:nroberts@igalia.com" target="_blank">nroberts@igalia.com</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Similar to nir_intrinsic_load_subgroup_<wbr>eq_mask and friends, this adds<br>
an intrinsic which contains a bit for every member of the group. This<br>
doesn’t have a corresponding GLSL builtin but it will be used to<br>
calculate nir_intrinsic_load_subgroup_g{<wbr>t,e}_mask. It has its own nir<br>
option on whether to lower it. The idea is that this should be much<br>
easier to generate than the other masks because it will likely be a<br>
compile-time constant and if so it will generate more efficient code<br>
for the other masks.<br></blockquote><div><br></div><div>As I remarked on patch 3, this isn't really going to be needed once my stuff lands.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
---<br>
 src/compiler/nir/nir.h                |  1 +<br>
 src/compiler/nir/nir_<wbr>intrinsics.h     |  1 +<br>
 src/compiler/nir/nir_opt_<wbr>intrinsics.c | 41 +++++++++++++++++++++++++++---<wbr>-----<br>
 src/intel/compiler/brw_<wbr>compiler.c     |  1 +<br>
 src/intel/compiler/brw_fs_nir.<wbr>cpp     |  1 +<br>
 5 files changed, 36 insertions(+), 9 deletions(-)<br>
<br>
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h<br>
index dd833cf..edb02b9 100644<br>
--- a/src/compiler/nir/nir.h<br>
+++ b/src/compiler/nir/nir.h<br>
@@ -1836,6 +1836,7 @@ typedef struct nir_shader_compiler_options {<br>
    bool lower_extract_word;<br>
<br>
    bool lower_vote_trivial;<br>
+   bool lower_subgroup_all_mask;<br>
    bool lower_subgroup_masks;<br>
<br>
    /**<br>
diff --git a/src/compiler/nir/nir_<wbr>intrinsics.h b/src/compiler/nir/nir_<wbr>intrinsics.h<br>
index cefd18b..de362a8 100644<br>
--- a/src/compiler/nir/nir_<wbr>intrinsics.h<br>
+++ b/src/compiler/nir/nir_<wbr>intrinsics.h<br>
@@ -350,6 +350,7 @@ SYSTEM_VALUE(layer_id, 1, 0, xx, xx, xx)<br>
 SYSTEM_VALUE(view_index, 1, 0, xx, xx, xx)<br>
 SYSTEM_VALUE(subgroup_size, 1, 0, xx, xx, xx)<br>
 SYSTEM_VALUE(subgroup_<wbr>invocation, 1, 0, xx, xx, xx)<br>
+SYSTEM_VALUE(subgroup_all_<wbr>mask, 1, 0, xx, xx, xx)<br>
 SYSTEM_VALUE(subgroup_eq_mask, 1, 0, xx, xx, xx)<br>
 SYSTEM_VALUE(subgroup_ge_mask, 1, 0, xx, xx, xx)<br>
 SYSTEM_VALUE(subgroup_gt_mask, 1, 0, xx, xx, xx)<br>
diff --git a/src/compiler/nir/nir_opt_<wbr>intrinsics.c b/src/compiler/nir/nir_opt_<wbr>intrinsics.c<br>
index d5fdc51..71d79d7 100644<br>
--- a/src/compiler/nir/nir_opt_<wbr>intrinsics.c<br>
+++ b/src/compiler/nir/nir_opt_<wbr>intrinsics.c<br>
@@ -29,23 +29,39 @@<br>
  */<br>
<br>
 static nir_ssa_def *<br>
-high_subgroup_mask(nir_<wbr>builder *b,<br>
-                   nir_ssa_def *count,<br>
-                   uint64_t base_mask)<br>
+subgroup_all_mask(nir_builder *b,<br>
+                  nir_ssa_def *count)<br>
 {<br>
    /* group_mask could probably be calculated more efficiently but we want to<br>
     * be sure not to shift by 64 if the subgroup size is 64 because the GLSL<br>
-    * shift operator is undefined in that case. In any case if we were worried<br>
-    * about efficency this should probably be done further down because the<br>
-    * subgroup size is likely to be known at compile time.<br>
+    * shift operator is undefined in that case. In any case if the driver is<br>
+    * worried about efficency this should probably be done further down<br>
+    * because the subgroup size is likely to be known at compile time.<br>
     */<br>
    nir_ssa_def *subgroup_size = nir_load_subgroup_size(b);<br>
    nir_ssa_def *all_bits = nir_imm_int64(b, ~0ull);<br>
    nir_ssa_def *shift = nir_isub(b, nir_imm_int(b, 64), subgroup_size);<br>
-   nir_ssa_def *group_mask = nir_ushr(b, all_bits, shift);<br>
-   nir_ssa_def *higher_bits = nir_ishl(b, nir_imm_int64(b, base_mask), count);<br>
+   return nir_ushr(b, all_bits, shift);<br>
+}<br>
<br>
-   return nir_iand(b, higher_bits, group_mask);<br>
+static nir_ssa_def *<br>
+high_subgroup_mask(nir_<wbr>builder *b,<br>
+                   nir_ssa_def *count,<br>
+                   uint64_t base_mask)<br>
+{<br>
+   nir_ssa_def *higher_bits = nir_ishl(b, nir_imm_int64(b, base_mask), count);<br>
+   nir_intrinsic_instr *load_group_mask =<br>
+      nir_intrinsic_instr_create(b-><wbr>shader,<br>
+                                 nir_intrinsic_load_subgroup_<wbr>all_mask);<br>
+   load_group_mask->num_<wbr>components = 1;<br>
+   nir_ssa_dest_init(&load_group_<wbr>mask->instr,<br>
+                     &load_group_mask->dest,<br>
+                     1 /* num_components */,<br>
+                     64 /* bit_size */,<br>
+                     NULL /* name */);<br>
+   nir_builder_instr_insert(b, &load_group_mask->instr);<br>
+<br>
+   return nir_iand(b, higher_bits, &load_group_mask->dest.ssa);<br>
 }<br>
<br>
 static bool<br>
@@ -100,6 +116,10 @@ opt_intrinsics_impl(nir_<wbr>function_impl *impl)<br>
                                                  nir_imm_int(&b, 0));<br>
             break;<br>
          }<br>
+         case nir_intrinsic_load_subgroup_<wbr>all_mask:<br>
+            if (!b.shader->options->lower_<wbr>subgroup_all_mask)<br>
+               break;<br>
+            /* flow through */<br>
          case nir_intrinsic_load_subgroup_<wbr>eq_mask:<br>
          case nir_intrinsic_load_subgroup_<wbr>ge_mask:<br>
          case nir_intrinsic_load_subgroup_<wbr>gt_mask:<br>
@@ -111,6 +131,9 @@ opt_intrinsics_impl(nir_<wbr>function_impl *impl)<br>
             nir_ssa_def *count = nir_load_subgroup_invocation(&<wbr>b);<br>
<br>
             switch (intrin->intrinsic) {<br>
+            case nir_intrinsic_load_subgroup_<wbr>all_mask:<br>
+               replacement = subgroup_all_mask(&b, count);<br>
+               break;<br>
             case nir_intrinsic_load_subgroup_<wbr>eq_mask:<br>
                replacement = nir_ishl(&b, nir_imm_int64(&b, 1ull), count);<br>
                break;<br>
diff --git a/src/intel/compiler/brw_<wbr>compiler.c b/src/intel/compiler/brw_<wbr>compiler.c<br>
index 2f6af7d..8df0d2e 100644<br>
--- a/src/intel/compiler/brw_<wbr>compiler.c<br>
+++ b/src/intel/compiler/brw_<wbr>compiler.c<br>
@@ -57,6 +57,7 @@ static const struct nir_shader_compiler_options scalar_nir_options = {<br>
    .lower_unpack_snorm_4x8 = true,<br>
    .lower_unpack_unorm_2x16 = true,<br>
    .lower_unpack_unorm_4x8 = true,<br>
+   .lower_subgroup_all_mask = true,<br>
    .lower_subgroup_masks = true,<br>
    .max_subgroup_size = 32,<br>
    .max_unroll_iterations = 32,<br>
diff --git a/src/intel/compiler/brw_fs_<wbr>nir.cpp b/src/intel/compiler/brw_fs_<wbr>nir.cpp<br>
index bb153ca..9202b0f 100644<br>
--- a/src/intel/compiler/brw_fs_<wbr>nir.cpp<br>
+++ b/src/intel/compiler/brw_fs_<wbr>nir.cpp<br>
@@ -4185,6 +4185,7 @@ fs_visitor::nir_emit_<wbr>intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr<br>
       break;<br>
    }<br>
<br>
+   case nir_intrinsic_load_subgroup_<wbr>all_mask:<br>
    case nir_intrinsic_load_subgroup_<wbr>eq_mask:<br>
    case nir_intrinsic_load_subgroup_<wbr>ge_mask:<br>
    case nir_intrinsic_load_subgroup_<wbr>gt_mask:<br>
<span class="HOEnZb"><font color="#888888">--<br>
2.9.5<br>
<br>
______________________________<wbr>_________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/<wbr>mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div></div>