<div dir="ltr"><div class="gmail_quote"><div dir="ltr">On Sun, Oct 14, 2018 at 5:12 PM Matt Turner <<a href="mailto:mattst88@gmail.com">mattst88@gmail.com</a>> wrote:<br></div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">---<br>
src/compiler/nir/nir.h | 1 +<br>
src/compiler/nir/nir_lower_int64.c | 142 +++++++++++++++++++++++++++++++++++++<br>
2 files changed, 143 insertions(+)<br>
<br>
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h<br>
index 12cbd030e21..2c477126acc 100644<br>
--- a/src/compiler/nir/nir.h<br>
+++ b/src/compiler/nir/nir.h<br>
@@ -3001,6 +3001,7 @@ typedef enum {<br>
nir_lower_ineg64 = (1 << 7),<br>
nir_lower_logic64 = (1 << 8),<br>
nir_lower_minmax64 = (1 << 9),<br>
+ nir_lower_shift64 = (1 << 10),<br>
} nir_lower_int64_options;<br>
<br>
bool nir_lower_int64(nir_shader *shader, nir_lower_int64_options options);<br>
diff --git a/src/compiler/nir/nir_lower_int64.c b/src/compiler/nir/nir_lower_int64.c<br>
index 9cdc8a9d592..25882d3a858 100644<br>
--- a/src/compiler/nir/nir_lower_int64.c<br>
+++ b/src/compiler/nir/nir_lower_int64.c<br>
@@ -90,6 +90,138 @@ lower_ixor64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)<br>
nir_ixor(b, x_hi, y_hi));<br>
}<br>
<br>
+static nir_ssa_def *<br>
+lower_ishl64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)<br>
+{<br>
+ /* Implemented as<br>
+ *<br>
+ * uint64_t lshift(uint64_t x, int c)<br>
+ * {<br>
+ * if (c == 0) return x;<br>
+ *<br>
+ * uint32_t lo = LO(x), hi = HI(x);<br>
+ *<br>
+ * if (c < 32) {<br>
+ * uint32_t lo_shifted = lo << (c & 0x1f);<br>
+ * uint32_t hi_shifted = hi << (c & 0x1f);<br>
+ * uint32_t lo_shifted_hi = lo >> (abs(32 - c) & 0x1f);<br></blockquote><div><br></div><div>Why the abs and the &? it's already predicated on c < 32 and negative or OOB shifts already have undefined results.<br></div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+ * return pack_64(lo_shifted, hi_shifted | lo_shifted_hi);<br>
+ * } else {<br>
+ * uint32_t lo_shifted_hi = lo << (abs(32 - c) & 0x1f);<br>
+ * return pack_64(0, lo_shifted_hi);<br>
+ * }<br>
+ * }<br>
+ */<br>
+ nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);<br>
+ nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);<br>
+<br>
+ nir_ssa_def *reverse_count = nir_iabs(b, nir_iadd(b, y, nir_imm_int(b, -32)));<br></blockquote><div><br></div><div>This is iabs(c - 32) (which yields the same result but isn't the same expression) and doesn't have the & 0x1f.</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+ nir_ssa_def *lo_shifted = nir_ishl(b, x_lo, y);<br>
+ nir_ssa_def *hi_shifted = nir_ishl(b, x_hi, y);<br></blockquote><div><br></div><div>In general, all of the 0x1f are missing. While not having them works on
i965, there's no guarantee it works in general. Maybe we should add
them in and have an i965-specific optimization to delete them again?
Maybe it's ok to just not have them. In any case, the code down here
should match the code above or there should be a very good comment
saying why it doesn't.</div><div> </div><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">
+ nir_ssa_def *lo_shifted_hi = nir_ushr(b, x_lo, reverse_count);<br>
+<br>
+ nir_ssa_def *res_if_lt_32 =<br>
+ nir_pack_64_2x32_split(b, lo_shifted,<br>
+ nir_ior(b, hi_shifted, lo_shifted_hi));<br>
+ nir_ssa_def *res_if_ge_32 =<br>
+ nir_pack_64_2x32_split(b, nir_imm_int(b, 0),<br>
+ nir_ishl(b, x_lo, reverse_count));<br>
+<br>
+ return nir_bcsel(b,<br>
+ nir_ieq(b, y, nir_imm_int(b, 0)), x,<br>
+ nir_bcsel(b, nir_uge(b, y, nir_imm_int(b, 32)),<br>
+ res_if_ge_32, res_if_lt_32));<br>
+}<br>
+<br>
+static nir_ssa_def *<br>
+lower_ishr64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)<br>
+{<br>
+ /* Implemented as<br>
+ *<br>
+ * uint64_t arshift(uint64_t x, int c)<br>
+ * {<br>
+ * if (c == 0) return x;<br>
+ *<br>
+ * uint32_t lo = LO(x);<br>
+ * int32_t hi = HI(x);<br>
+ *<br>
+ * if (c < 32) {<br>
+ * uint32_t lo_shifted = lo >> (c & 0x1f);<br>
+ * uint32_t hi_shifted = hi >> (c & 0x1f);<br>
+ * uint32_t hi_shifted_lo = hi << (abs(32 - c) & 0x1f);<br>
+ * return pack_64(hi_shifted, hi_shifted_lo | lo_shifted);<br>
+ * } else {<br>
+ * uint32_t hi_shifted = hi >> 31;<br>
+ * uint32_t hi_shifted_lo = hi >> (abs(32 - c) & 0x1f);<br>
+ * return pack_64(hi_shifted, hi_shifted_lo);<br>
+ * }<br>
+ * }<br>
+ */<br>
+ nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);<br>
+ nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);<br>
+<br>
+ nir_ssa_def *reverse_count = nir_iabs(b, nir_iadd(b, y, nir_imm_int(b, -32)));<br>
+ nir_ssa_def *lo_shifted = nir_ushr(b, x_lo, y);<br>
+ nir_ssa_def *hi_shifted = nir_ishr(b, x_hi, y);<br>
+ nir_ssa_def *hi_shifted_lo = nir_ishl(b, x_hi, reverse_count);<br>
+<br>
+ nir_ssa_def *res_if_lt_32 =<br>
+ nir_pack_64_2x32_split(b, nir_ior(b, lo_shifted, hi_shifted_lo),<br>
+ hi_shifted);<br>
+ nir_ssa_def *res_if_ge_32 =<br>
+ nir_pack_64_2x32_split(b, nir_ishr(b, x_hi, reverse_count),<br>
+ nir_ishr(b, x_hi, nir_imm_int(b, 31)));<br>
+<br>
+ return nir_bcsel(b,<br>
+ nir_ieq(b, y, nir_imm_int(b, 0)), x,<br>
+ nir_bcsel(b, nir_uge(b, y, nir_imm_int(b, 32)),<br>
+ res_if_ge_32, res_if_lt_32));<br>
+}<br>
+<br>
+static nir_ssa_def *<br>
+lower_ushr64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)<br>
+{<br>
+ /* Implemented as<br>
+ *<br>
+ * uint64_t rshift(uint64_t x, int c)<br>
+ * {<br>
+ * if (c == 0) return x;<br>
+ *<br>
+ * uint32_t lo = LO(x), hi = HI(x);<br>
+ *<br>
+ * if (c < 32) {<br>
+ * uint32_t lo_shifted = lo >> (c & 0x1f);<br>
+ * uint32_t hi_shifted = hi >> (c & 0x1f);<br>
+ * uint32_t hi_shifted_lo = hi << (abs(32 - c) & 0x1f);<br>
+ * return pack_64(hi_shifted, hi_shifted_lo | lo_shifted);<br>
+ * } else {<br>
+ * uint32_t hi_shifted_lo = hi >> (abs(32 - c) & 0x1f);<br>
+ * return pack_64(0, hi_shifted_lo);<br>
+ * }<br>
+ * }<br>
+ */<br>
+<br>
+ nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);<br>
+ nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);<br>
+<br>
+ nir_ssa_def *reverse_count = nir_iabs(b, nir_iadd(b, y, nir_imm_int(b, -32)));<br>
+ nir_ssa_def *lo_shifted = nir_ushr(b, x_lo, y);<br>
+ nir_ssa_def *hi_shifted = nir_ushr(b, x_hi, y);<br>
+ nir_ssa_def *hi_shifted_lo = nir_ishl(b, x_hi, reverse_count);<br>
+<br>
+ nir_ssa_def *res_if_lt_32 =<br>
+ nir_pack_64_2x32_split(b, nir_ior(b, lo_shifted, hi_shifted_lo),<br>
+ hi_shifted);<br>
+ nir_ssa_def *res_if_ge_32 =<br>
+ nir_pack_64_2x32_split(b, nir_ushr(b, x_hi, reverse_count),<br>
+ nir_imm_int(b, 0));<br>
+<br>
+ return nir_bcsel(b,<br>
+ nir_ieq(b, y, nir_imm_int(b, 0)), x,<br>
+ nir_bcsel(b, nir_uge(b, y, nir_imm_int(b, 32)),<br>
+ res_if_ge_32, res_if_lt_32));<br>
+}<br>
+<br>
static nir_ssa_def *<br>
lower_iadd64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)<br>
{<br>
@@ -430,6 +562,10 @@ opcode_to_options_mask(nir_op opcode)<br>
case nir_op_ixor:<br>
case nir_op_inot:<br>
return nir_lower_logic64;<br>
+ case nir_op_ishl:<br>
+ case nir_op_ishr:<br>
+ case nir_op_ushr:<br>
+ return nir_lower_shift64;<br>
default:<br>
return 0;<br>
}<br>
@@ -492,6 +628,12 @@ lower_int64_alu_instr(nir_builder *b, nir_alu_instr *alu)<br>
return lower_ixor64(b, src[0], src[1]);<br>
case nir_op_inot:<br>
return lower_inot64(b, src[0]);<br>
+ case nir_op_ishl:<br>
+ return lower_ishl64(b, src[0], src[1]);<br>
+ case nir_op_ishr:<br>
+ return lower_ishr64(b, src[0], src[1]);<br>
+ case nir_op_ushr:<br>
+ return lower_ushr64(b, src[0], src[1]);<br>
default:<br>
unreachable("Invalid ALU opcode to lower");<br>
}<br>
-- <br>
2.16.4<br>
<br>
_______________________________________________<br>
mesa-dev mailing list<br>
<a href="mailto:mesa-dev@lists.freedesktop.org" target="_blank">mesa-dev@lists.freedesktop.org</a><br>
<a href="https://lists.freedesktop.org/mailman/listinfo/mesa-dev" rel="noreferrer" target="_blank">https://lists.freedesktop.org/mailman/listinfo/mesa-dev</a><br>
</blockquote></div></div>