[Mesa-dev] [PATCH 2/7] nir/int64: Add some more lowering helpers

Matt Turner mattst88 at gmail.com
Sun Oct 14 22:11:32 UTC 2018


From: Jason Ekstrand <jason.ekstrand at intel.com>

[mattst88]: Found in an old branch of Jason's.

Jason implemented: inot, iand, ior, iadd, isub, ineg, iabs, compare,
                   imin, imax, umin, umax
Matt implemented:  ixor, imov, bcsel
---
 src/compiler/nir/nir_lower_int64.c | 186 +++++++++++++++++++++++++++++++++++++
 1 file changed, 186 insertions(+)

diff --git a/src/compiler/nir/nir_lower_int64.c b/src/compiler/nir/nir_lower_int64.c
index 0d7f165b406..6b269830801 100644
--- a/src/compiler/nir/nir_lower_int64.c
+++ b/src/compiler/nir/nir_lower_int64.c
@@ -24,6 +24,192 @@
 #include "nir.h"
 #include "nir_builder.h"
 
+static nir_ssa_def *
+lower_imov64(nir_builder *b, nir_ssa_def *x)
+{
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+
+   return nir_pack_64_2x32_split(b, nir_imov(b, x_lo), nir_imov(b, x_hi));
+}
+
+static nir_ssa_def *
+lower_bcsel64(nir_builder *b, nir_ssa_def *cond, nir_ssa_def *x, nir_ssa_def *y)
+{
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
+   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
+
+   return nir_pack_64_2x32_split(b, nir_bcsel(b, cond, x_lo, y_lo),
+                                    nir_bcsel(b, cond, x_hi, y_hi));
+}
+
+static nir_ssa_def *
+lower_inot64(nir_builder *b, nir_ssa_def *x)
+{
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+
+   return nir_pack_64_2x32_split(b, nir_inot(b, x_lo), nir_inot(b, x_hi));
+}
+
+static nir_ssa_def *
+lower_iand64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
+   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
+
+   return nir_pack_64_2x32_split(b, nir_iand(b, x_lo, y_lo),
+                                    nir_iand(b, x_hi, y_hi));
+}
+
+static nir_ssa_def *
+lower_ior64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
+   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
+
+   return nir_pack_64_2x32_split(b, nir_ior(b, x_lo, y_lo),
+                                    nir_ior(b, x_hi, y_hi));
+}
+
+static nir_ssa_def *
+lower_ixor64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
+   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
+
+   return nir_pack_64_2x32_split(b, nir_ixor(b, x_lo, y_lo),
+                                    nir_ixor(b, x_hi, y_hi));
+}
+
+static nir_ssa_def *
+lower_iadd64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
+   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
+
+   nir_ssa_def *res_lo = nir_iadd(b, x_lo, y_lo);
+   nir_ssa_def *carry = nir_b2i(b, nir_ult(b, res_lo, x_lo));
+   nir_ssa_def *res_hi = nir_iadd(b, carry, nir_iadd(b, x_hi, y_hi));
+
+   return nir_pack_64_2x32_split(b, res_lo, res_hi);
+}
+
+static nir_ssa_def *
+lower_isub64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
+   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
+
+   nir_ssa_def *res_lo = nir_isub(b, x_lo, y_lo);
+   /* In NIR, true is represented by ~0 which is -1 */
+   nir_ssa_def *borrow = nir_ult(b, x_lo, y_lo);
+   nir_ssa_def *res_hi = nir_iadd(b, nir_isub(b, x_hi, y_hi), borrow);
+
+   return nir_pack_64_2x32_split(b, res_lo, res_hi);
+}
+
+static nir_ssa_def *
+lower_ineg64(nir_builder *b, nir_ssa_def *x)
+{
+   /* Since isub is the same number of instructions (with better dependencies)
+    * as iadd, subtraction is actually more efficient for ineg than the usual
+    * 2's complement "flip the bits and add one".
+    */
+   return lower_isub64(b, nir_imm_int64(b, 0), x);
+}
+
+static nir_ssa_def *
+lower_iabs64(nir_builder *b, nir_ssa_def *x)
+{
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+   nir_ssa_def *x_is_neg = nir_ilt(b, x_hi, nir_imm_int(b, 0));
+   return nir_bcsel(b, x_is_neg, lower_ineg64(b, x), x);
+}
+
+static nir_ssa_def *
+lower_int64_compare(nir_builder *b, nir_op op, nir_ssa_def *x, nir_ssa_def *y)
+{
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+   nir_ssa_def *y_lo = nir_unpack_64_2x32_split_x(b, y);
+   nir_ssa_def *y_hi = nir_unpack_64_2x32_split_y(b, y);
+
+   switch (op) {
+   case nir_op_ieq:
+      return nir_iand(b, nir_ieq(b, x_hi, y_hi), nir_ieq(b, x_lo, y_lo));
+   case nir_op_ine:
+      return nir_ior(b, nir_ine(b, x_hi, y_hi), nir_ine(b, x_lo, y_lo));
+   case nir_op_ult:
+      return nir_ior(b, nir_ult(b, x_hi, y_hi),
+                        nir_iand(b, nir_ieq(b, x_hi, y_hi),
+                                    nir_ult(b, x_lo, y_lo)));
+   case nir_op_ilt:
+      /* This one is tricky.  Handled below */
+      break;
+   case nir_op_uge:
+      /* Lower as !(x < y) in the hopes of better CSE */
+      return nir_inot(b, lower_int64_compare(b, nir_op_ult, x, y));
+   case nir_op_ige:
+      /* Lower as !(x < y) in the hopes of better CSE */
+      return nir_inot(b, lower_int64_compare(b, nir_op_ilt, x, y));
+   default:
+      unreachable("Invalid comparison");
+   }
+
+   assert(op == nir_op_ilt);
+
+   nir_ssa_def *neg_x = lower_ineg64(b, x);
+   nir_ssa_def *neg_y = lower_ineg64(b, y);
+   nir_ssa_def *x_is_neg = nir_ilt(b, x_hi, nir_imm_int(b, 0));
+   nir_ssa_def *y_is_neg = nir_ilt(b, y_hi, nir_imm_int(b, 0));
+
+   nir_ssa_def *imm_true = nir_imm_int(b, NIR_TRUE);
+   nir_ssa_def *imm_false = nir_imm_int(b, NIR_FALSE);
+   nir_ssa_def *ult = lower_int64_compare(b, nir_op_ult, x, y);
+   nir_ssa_def *not_ult_neg =
+      nir_inot(b, lower_int64_compare(b, nir_op_ult, neg_x, neg_y));
+
+   return nir_bcsel(b, x_is_neg, nir_bcsel(b, y_is_neg, not_ult_neg, imm_true),
+                                 nir_bcsel(b, y_is_neg, imm_false, ult));
+}
+
+static nir_ssa_def *
+lower_umax64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   return nir_bcsel(b, lower_int64_compare(b, nir_op_ult, x, y), y, x);
+}
+
+static nir_ssa_def *
+lower_imax64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   return nir_bcsel(b, lower_int64_compare(b, nir_op_ilt, x, y), y, x);
+}
+
+static nir_ssa_def *
+lower_umin64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   return nir_bcsel(b, lower_int64_compare(b, nir_op_ult, x, y), x, y);
+}
+
+static nir_ssa_def *
+lower_imin64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   return nir_bcsel(b, lower_int64_compare(b, nir_op_ilt, x, y), x, y);
+}
+
 static nir_ssa_def *
 lower_imul64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
 {
-- 
2.16.4



More information about the mesa-dev mailing list