[Mesa-dev] [PATCH 4/7] nir/int64: Implement lowering of shift operations

Matt Turner mattst88 at gmail.com
Sun Oct 14 22:11:34 UTC 2018


---
 src/compiler/nir/nir.h             |   1 +
 src/compiler/nir/nir_lower_int64.c | 142 +++++++++++++++++++++++++++++++++++++
 2 files changed, 143 insertions(+)

diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index 12cbd030e21..2c477126acc 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -3001,6 +3001,7 @@ typedef enum {
    nir_lower_ineg64    = (1 << 7),
    nir_lower_logic64   = (1 << 8),
    nir_lower_minmax64  = (1 << 9),
+   nir_lower_shift64   = (1 << 10),
 } nir_lower_int64_options;
 
 bool nir_lower_int64(nir_shader *shader, nir_lower_int64_options options);
diff --git a/src/compiler/nir/nir_lower_int64.c b/src/compiler/nir/nir_lower_int64.c
index 9cdc8a9d592..25882d3a858 100644
--- a/src/compiler/nir/nir_lower_int64.c
+++ b/src/compiler/nir/nir_lower_int64.c
@@ -90,6 +90,138 @@ lower_ixor64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
                                     nir_ixor(b, x_hi, y_hi));
 }
 
+static nir_ssa_def *
+lower_ishl64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   /* Implemented as
+    *
+    * uint64_t lshift(uint64_t x, int c)
+    * {
+    *    if (c == 0) return x;
+    *
+    *    uint32_t lo = LO(x), hi = HI(x);
+    *
+    *    if (c < 32) {
+    *       uint32_t lo_shifted = lo << (c & 0x1f);
+    *       uint32_t hi_shifted = hi << (c & 0x1f);
+    *       uint32_t lo_shifted_hi = lo >> (abs(32 - c) & 0x1f);
+    *       return pack_64(lo_shifted, hi_shifted | lo_shifted_hi);
+    *    } else {
+    *       uint32_t lo_shifted_hi = lo << (abs(32 - c) & 0x1f);
+    *       return pack_64(0, lo_shifted_hi);
+    *    }
+    * }
+    */
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+
+   nir_ssa_def *reverse_count = nir_iabs(b, nir_iadd(b, y, nir_imm_int(b, -32)));
+   nir_ssa_def *lo_shifted = nir_ishl(b, x_lo, y);
+   nir_ssa_def *hi_shifted = nir_ishl(b, x_hi, y);
+   nir_ssa_def *lo_shifted_hi = nir_ushr(b, x_lo, reverse_count);
+
+   nir_ssa_def *res_if_lt_32 =
+      nir_pack_64_2x32_split(b, lo_shifted,
+                                nir_ior(b, hi_shifted, lo_shifted_hi));
+   nir_ssa_def *res_if_ge_32 =
+      nir_pack_64_2x32_split(b, nir_imm_int(b, 0),
+                                nir_ishl(b, x_lo, reverse_count));
+
+   return nir_bcsel(b,
+                    nir_ieq(b, y, nir_imm_int(b, 0)), x,
+                    nir_bcsel(b, nir_uge(b, y, nir_imm_int(b, 32)),
+                                 res_if_ge_32, res_if_lt_32));
+}
+
+static nir_ssa_def *
+lower_ishr64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   /* Implemented as
+    *
+    * uint64_t arshift(uint64_t x, int c)
+    * {
+    *    if (c == 0) return x;
+    *
+    *    uint32_t lo = LO(x);
+    *    int32_t  hi = HI(x);
+    *
+    *    if (c < 32) {
+    *       uint32_t lo_shifted = lo >> (c & 0x1f);
+    *       uint32_t hi_shifted = hi >> (c & 0x1f);
+    *       uint32_t hi_shifted_lo = hi << (abs(32 - c) & 0x1f);
+    *       return pack_64(hi_shifted, hi_shifted_lo | lo_shifted);
+    *    } else {
+    *       uint32_t hi_shifted = hi >> 31;
+    *       uint32_t hi_shifted_lo = hi >> (abs(32 - c) & 0x1f);
+    *       return pack_64(hi_shifted, hi_shifted_lo);
+    *    }
+    * }
+    */
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+
+   nir_ssa_def *reverse_count = nir_iabs(b, nir_iadd(b, y, nir_imm_int(b, -32)));
+   nir_ssa_def *lo_shifted = nir_ushr(b, x_lo, y);
+   nir_ssa_def *hi_shifted = nir_ishr(b, x_hi, y);
+   nir_ssa_def *hi_shifted_lo = nir_ishl(b, x_hi, reverse_count);
+
+   nir_ssa_def *res_if_lt_32 =
+      nir_pack_64_2x32_split(b, nir_ior(b, lo_shifted, hi_shifted_lo),
+                                hi_shifted);
+   nir_ssa_def *res_if_ge_32 =
+      nir_pack_64_2x32_split(b, nir_ishr(b, x_hi, reverse_count),
+                                nir_ishr(b, x_hi, nir_imm_int(b, 31)));
+
+   return nir_bcsel(b,
+                    nir_ieq(b, y, nir_imm_int(b, 0)), x,
+                    nir_bcsel(b, nir_uge(b, y, nir_imm_int(b, 32)),
+                                 res_if_ge_32, res_if_lt_32));
+}
+
+static nir_ssa_def *
+lower_ushr64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
+{
+   /* Implemented as
+    *
+    * uint64_t rshift(uint64_t x, int c)
+    * {
+    *    if (c == 0) return x;
+    *
+    *    uint32_t lo = LO(x), hi = HI(x);
+    *
+    *    if (c < 32) {
+    *       uint32_t lo_shifted = lo >> (c & 0x1f);
+    *       uint32_t hi_shifted = hi >> (c & 0x1f);
+    *       uint32_t hi_shifted_lo = hi << (abs(32 - c) & 0x1f);
+    *       return pack_64(hi_shifted, hi_shifted_lo | lo_shifted);
+    *    } else {
+    *       uint32_t hi_shifted_lo = hi >> (abs(32 - c) & 0x1f);
+    *       return pack_64(0, hi_shifted_lo);
+    *    }
+    * }
+    */
+
+   nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
+   nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
+
+   nir_ssa_def *reverse_count = nir_iabs(b, nir_iadd(b, y, nir_imm_int(b, -32)));
+   nir_ssa_def *lo_shifted = nir_ushr(b, x_lo, y);
+   nir_ssa_def *hi_shifted = nir_ushr(b, x_hi, y);
+   nir_ssa_def *hi_shifted_lo = nir_ishl(b, x_hi, reverse_count);
+
+   nir_ssa_def *res_if_lt_32 =
+      nir_pack_64_2x32_split(b, nir_ior(b, lo_shifted, hi_shifted_lo),
+                                hi_shifted);
+   nir_ssa_def *res_if_ge_32 =
+      nir_pack_64_2x32_split(b, nir_ushr(b, x_hi, reverse_count),
+                                nir_imm_int(b, 0));
+
+   return nir_bcsel(b,
+                    nir_ieq(b, y, nir_imm_int(b, 0)), x,
+                    nir_bcsel(b, nir_uge(b, y, nir_imm_int(b, 32)),
+                                 res_if_ge_32, res_if_lt_32));
+}
+
 static nir_ssa_def *
 lower_iadd64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
 {
@@ -430,6 +562,10 @@ opcode_to_options_mask(nir_op opcode)
    case nir_op_ixor:
    case nir_op_inot:
       return nir_lower_logic64;
+   case nir_op_ishl:
+   case nir_op_ishr:
+   case nir_op_ushr:
+      return nir_lower_shift64;
    default:
       return 0;
    }
@@ -492,6 +628,12 @@ lower_int64_alu_instr(nir_builder *b, nir_alu_instr *alu)
       return lower_ixor64(b, src[0], src[1]);
    case nir_op_inot:
       return lower_inot64(b, src[0]);
+   case nir_op_ishl:
+      return lower_ishl64(b, src[0], src[1]);
+   case nir_op_ishr:
+      return lower_ishr64(b, src[0], src[1]);
+   case nir_op_ushr:
+      return lower_ushr64(b, src[0], src[1]);
    default:
       unreachable("Invalid ALU opcode to lower");
    }
-- 
2.16.4



More information about the mesa-dev mailing list