<div dir="ltr"><div class="gmail_extra"><div class="gmail_quote">On Fri, Mar 16, 2018 at 2:50 AM, Daniel Schürmann <span dir="ltr"><<a href="mailto:daniel.schuermann@campus.tu-berlin.de" target="_blank">daniel.schuermann@campus.tu-berlin.de</a>></span> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Signed-off-by: Daniel Schürmann <<a href="mailto:daniel.schuermann@campus.tu-berlin.de">daniel.schuermann@campus.tu-<wbr>berlin.de</a>><br>
---<br>
 src/compiler/nir/nir.h                 |  1 +<br>
 src/compiler/nir/nir_lower_<wbr>subgroups.c | 83 +++++++++++++++++++++++++++---<wbr>----<br>
 2 files changed, 67 insertions(+), 17 deletions(-)<br>
<br>
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h<br>
index 6a51b7c4ab..0e3c026efa 100644<br>
--- a/src/compiler/nir/nir.h<br>
+++ b/src/compiler/nir/nir.h<br>
@@ -2564,6 +2564,7 @@ typedef struct nir_lower_subgroups_options {<br>
    bool lower_vote_eq_to_ballot:1;<br>
    bool lower_subgroup_masks:1;<br>
    bool lower_shuffle:1;<br>
+   bool lower_shuffle_to_32bit:1;<br>
    bool lower_quad:1;<br>
 } nir_lower_subgroups_options;<br>
<br>
diff --git a/src/compiler/nir/nir_lower_<wbr>subgroups.c b/src/compiler/nir/nir_lower_<wbr>subgroups.c<br>
index 9dc7be7947..669168e830 100644<br>
--- a/src/compiler/nir/nir_lower_<wbr>subgroups.c<br>
+++ b/src/compiler/nir/nir_lower_<wbr>subgroups.c<br>
@@ -28,6 +28,37 @@<br>
  * \file nir_opt_intrinsics.c<br>
  */<br>
<br>
+static nir_intrinsic_instr *ac_lower_subgroups_64bit_<wbr>split_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin, unsigned int component) {<br></blockquote><div><br></div><div>Please put "static nir_intrinsic_instr *" and the "{" each on their own line and wrap things so that we don't go over 80 characters.  Also, please drop the ac_ prefix as this is no longer in radv code.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+   nir_ssa_def *comp;<br>
+   if (component == 0)<br>
+      comp = nir_unpack_64_2x32_split_x(b, intrin->src[0].ssa);<br>
+   else<br>
+      comp = nir_unpack_64_2x32_split_y(b, intrin->src[0].ssa);<br>
+<br>
+   nir_intrinsic_instr *intr = nir_intrinsic_instr_create(b-><wbr>shader, intrin->intrinsic);<br>
+   nir_ssa_dest_init(&intr-><wbr>instr, &intr->dest, 1, 32, NULL);<br>
+   intr->src[0] = nir_src_for_ssa(comp);<br>
+<br>
+   intr->const_index[0] = intrin->const_index[0];<br>
+   intr->const_index[1] = intrin->const_index[1];<br>
+   if (intrin->intrinsic == nir_intrinsic_read_invocation ||<br>
+      intrin->intrinsic == nir_intrinsic_shuffle ||<br>
+      intrin->intrinsic == nir_intrinsic_quad_broadcast) {<br></blockquote><div><br></div><div>You can use nir_intrinsic_infos[intrin->intrinsic].num_srcs to make this a bit more general.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+      nir_src_copy(&intr->src[1], &intrin->src[1], intr);<br>
+   }<br>
+   intr->num_components = 1;<br>
+   nir_builder_instr_insert(b, &intr->instr);<br>
+   return intr;<br>
+}<br>
+<br>
+static nir_ssa_def *<br>
+lower_64bit_to_32bit(nir_<wbr>builder *b, nir_intrinsic_instr *intrin) { </blockquote><div><br></div><div>"{" goes on it's own line.  Also, how about "lower_subgroup_op_to_32bit" to match "lower_subgroup_op_to_scalar" below.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+   assert(intrin->src[0].ssa-><wbr>bit_size == 64);<br>
+   nir_intrinsic_instr *intr_x = ac_lower_subgroups_64bit_<wbr>split_intrinsic(b, intrin, 0);<br>
+   nir_intrinsic_instr *intr_y = ac_lower_subgroups_64bit_<wbr>split_intrinsic(b, intrin, 1);<br>
+   return nir_pack_64_2x32_split(b, &intr_x->dest.ssa, &intr_y->dest.ssa);<br>
+}<br>
+<br>
 static nir_ssa_def *<br>
 ballot_type_to_uint(nir_<wbr>builder *b, nir_ssa_def *value, unsigned bit_size)<br>
 {<br>
@@ -80,7 +111,8 @@ uint_to_ballot_type(nir_<wbr>builder *b, nir_ssa_def *value,<br>
 }<br>
<br>
 static nir_ssa_def *<br>
-lower_subgroup_op_to_scalar(<wbr>nir_builder *b, nir_intrinsic_instr *intrin)<br>
+lower_subgroup_op_to_scalar(<wbr>nir_builder *b, nir_intrinsic_instr *intrin,<br>
+                        bool lower_shuffle_to_32bit)<br></blockquote><div><br></div><div>Just call this lower_to_32bit as it doesn't necessarily have anything to do with shuffles.  Also, please align the parameter to the (<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
 {<br>
    /* This is safe to call on scalar things but it would be silly */<br>
    assert(intrin->dest.ssa.num_<wbr>components > 1);<br>
@@ -107,9 +139,12 @@ lower_subgroup_op_to_scalar(<wbr>nir_builder *b, nir_intrinsic_instr *intrin)<br>
       chan_intrin->const_index[0] = intrin->const_index[0];<br>
       chan_intrin->const_index[1] = intrin->const_index[1];<br>
<br>
-      nir_builder_instr_insert(b, &chan_intrin->instr);<br>
-<br>
-      reads[i] = &chan_intrin->dest.ssa;<br>
+      if (lower_shuffle_to_32bit && chan_intrin->src[0].ssa->bit_<wbr>size == 64) {<br>
+         reads[i] = lower_64bit_to_32bit(b, chan_intrin);<br>
+      } else {<br>
+         nir_builder_instr_insert(b, &chan_intrin->instr);<br>
+         reads[i] = &chan_intrin->dest.ssa;<br>
+      }<br>
    }<br>
<br>
    return nir_vec(b, reads, intrin->num_components);<br>
@@ -158,13 +193,19 @@ lower_vote_eq_to_ballot(nir_<wbr>builder *b, nir_intrinsic_instr *intrin,<br>
                         1, value->bit_size, NULL);<br>
       rfi->num_components = 1;<br>
       rfi->src[0] = nir_src_for_ssa(nir_channel(b, value, i));<br>
-      nir_builder_instr_insert(b, &rfi->instr);<br>
+      nir_ssa_def *first_lane;<br>
+      if (options->lower_shuffle_to_<wbr>32bit && rfi->src[0].ssa->bit_size == 64) {<br></blockquote><div><br>I don't really see how read_first_invocation is related to shuffles<br> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+         first_lane = lower_64bit_to_32bit(b, rfi);<br>
+      } else {<br>
+         nir_builder_instr_insert(b, &rfi->instr);<br>
+         first_lane = &rfi->dest.ssa;<br>
+      }<br>
<br>
       nir_ssa_def *is_eq;<br>
       if (intrin->intrinsic == nir_intrinsic_vote_feq) {<br>
-         is_eq = nir_feq(b, &rfi->dest.ssa, nir_channel(b, value, i));<br>
+         is_eq = nir_feq(b, first_lane, nir_channel(b, value, i));<br>
       } else {<br>
-         is_eq = nir_ieq(b, &rfi->dest.ssa, nir_channel(b, value, i));<br>
+         is_eq = nir_ieq(b, first_lane, nir_channel(b, value, i));<br>
       }<br>
<br>
       if (all_eq == NULL) {<br>
@@ -188,7 +229,7 @@ lower_vote_eq_to_ballot(nir_<wbr>builder *b, nir_intrinsic_instr *intrin,<br>
<br>
 static nir_ssa_def *<br>
 lower_shuffle(nir_builder *b, nir_intrinsic_instr *intrin,<br>
-              bool lower_to_scalar)<br>
+                        const nir_lower_subgroups_options *options)<br></blockquote><div><br></div><div>Please align to the (<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
 {<br>
    nir_ssa_def *index = nir_load_subgroup_invocation(<wbr>b);<br>
    switch (intrin->intrinsic) {<br>
@@ -240,8 +281,10 @@ lower_shuffle(nir_builder *b, nir_intrinsic_instr *intrin,<br>
                      intrin->dest.ssa.num_<wbr>components,<br>
                      intrin->dest.ssa.bit_size, NULL);<br>
<br>
-   if (lower_to_scalar && shuffle->num_components > 1) {<br>
-      return lower_subgroup_op_to_scalar(b, shuffle);<br>
+   if (options->lower_to_scalar && shuffle->num_components > 1) {<br>
+      return lower_subgroup_op_to_scalar(b, shuffle, options->lower_shuffle_to_<wbr>32bit);<br>
+   } else if (options->lower_shuffle_to_<wbr>32bit && shuffle->src[0].ssa->bit_size == 64) {<br>
+      return lower_64bit_to_32bit(b, shuffle);<br>
    } else {<br>
       nir_builder_instr_insert(b, &shuffle->instr);<br>
       return &shuffle->dest.ssa;<br>
@@ -279,7 +322,9 @@ lower_subgroups_intrin(nir_<wbr>builder *b, nir_intrinsic_instr *intrin,<br>
    case nir_intrinsic_read_invocation:<br>
    case nir_intrinsic_read_first_<wbr>invocation:<br>
       if (options->lower_to_scalar && intrin->num_components > 1)<br>
-         return lower_subgroup_op_to_scalar(b, intrin);<br>
+         return lower_subgroup_op_to_scalar(b, intrin, options->lower_shuffle_to_<wbr>32bit);<br>
+         else if (options->lower_shuffle_to_<wbr>32bit && intrin->src[0].ssa->bit_size == 64)<br>
+         return lower_64bit_to_32bit(b, intrin);<br>
       break;<br>
<br>
    case nir_intrinsic_load_subgroup_<wbr>eq_mask:<br>
@@ -400,16 +445,18 @@ lower_subgroups_intrin(nir_<wbr>builder *b, nir_intrinsic_instr *intrin,<br>
<br>
    case nir_intrinsic_shuffle:<br>
       if (options->lower_to_scalar && intrin->num_components > 1)<br>
-         return lower_subgroup_op_to_scalar(b, intrin);<br>
+         return lower_subgroup_op_to_scalar(b, intrin, options->lower_shuffle_to_<wbr>32bit);<br>
+         else if (options->lower_shuffle_to_<wbr>32bit && intrin->src[0].ssa->bit_size == 64)<br></blockquote><div><br></div><div>This needs to be dedented.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+         return lower_64bit_to_32bit(b, intrin);<br>
       break;<br>
<br>
    case nir_intrinsic_shuffle_xor:<br>
    case nir_intrinsic_shuffle_up:<br>
    case nir_intrinsic_shuffle_down:<br>
       if (options->lower_shuffle)<br>
-         return lower_shuffle(b, intrin, options->lower_to_scalar);<br>
+         return lower_shuffle(b, intrin, options);<br>
       else if (options->lower_to_scalar && intrin->num_components > 1)<br>
-         return lower_subgroup_op_to_scalar(b, intrin);<br>
+         return lower_subgroup_op_to_scalar(b, intrin, options->lower_shuffle_to_<wbr>32bit);<br></blockquote><div><br></div><div>I think you need an "else if (options->lower_shuffle_to_32bit && intrin->src[0].ssa->bit_size == 64)" case here as well.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
       break;<br>
<br>
    case nir_intrinsic_quad_broadcast:<br>
@@ -417,16 +464,18 @@ lower_subgroups_intrin(nir_<wbr>builder *b, nir_intrinsic_instr *intrin,<br>
    case nir_intrinsic_quad_swap_<wbr>vertical:<br>
    case nir_intrinsic_quad_swap_<wbr>diagonal:<br>
       if (options->lower_quad)<br>
-         return lower_shuffle(b, intrin, options->lower_to_scalar);<br>
+         return lower_shuffle(b, intrin, options);<br>
       else if (options->lower_to_scalar && intrin->num_components > 1)<br>
-         return lower_subgroup_op_to_scalar(b, intrin);<br>
+         return lower_subgroup_op_to_scalar(b, intrin, options->lower_shuffle_to_<wbr>32bit);<br>
+         else if (options->lower_shuffle_to_<wbr>32bit && intrin->src[0].ssa->bit_size == 64)<br></blockquote><div><br></div><div>dedent, please.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+         return lower_64bit_to_32bit(b, intrin);<br>
       break;<br>
<br>
    case nir_intrinsic_reduce:<br>
    case nir_intrinsic_inclusive_scan:<br>
    case nir_intrinsic_exclusive_scan:<br>
       if (options->lower_to_scalar && intrin->num_components > 1)<br>
-         return lower_subgroup_op_to_scalar(b, intrin);<br>
+         return lower_subgroup_op_to_scalar(b, intrin, false);<br>
       break;<br>
<br>
    default:<br>
<span class="HOEnZb"><font color="#888888">--<br>
2.14.1<br>
<br>
______________________________<wbr>_________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/<wbr>mailman/listinfo/mesa-dev</a><br>
</font></span></blockquote></div><br></div></div>