[Mesa-dev] [PATCH 4/7] nir/int64: Implement lowering of shift operations
Jason Ekstrand
jason at jlekstrand.net
Mon Oct 15 02:16:24 UTC 2018
On Sun, Oct 14, 2018 at 5:12 PM Matt Turner <mattst88 at gmail.com> wrote:
> ---
> src/compiler/nir/nir.h | 1 +
> src/compiler/nir/nir_lower_int64.c | 142
> +++++++++++++++++++++++++++++++++++++
> 2 files changed, 143 insertions(+)
>
> diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
> index 12cbd030e21..2c477126acc 100644
> --- a/src/compiler/nir/nir.h
> +++ b/src/compiler/nir/nir.h
> @@ -3001,6 +3001,7 @@ typedef enum {
> nir_lower_ineg64 = (1 << 7),
> nir_lower_logic64 = (1 << 8),
> nir_lower_minmax64 = (1 << 9),
> + nir_lower_shift64 = (1 << 10),
> } nir_lower_int64_options;
>
> bool nir_lower_int64(nir_shader *shader, nir_lower_int64_options options);
> diff --git a/src/compiler/nir/nir_lower_int64.c
> b/src/compiler/nir/nir_lower_int64.c
> index 9cdc8a9d592..25882d3a858 100644
> --- a/src/compiler/nir/nir_lower_int64.c
> +++ b/src/compiler/nir/nir_lower_int64.c
> @@ -90,6 +90,138 @@ lower_ixor64(nir_builder *b, nir_ssa_def *x,
> nir_ssa_def *y)
> nir_ixor(b, x_hi, y_hi));
> }
>
> +static nir_ssa_def *
> +lower_ishl64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
> +{
> + /* Implemented as
> + *
> + * uint64_t lshift(uint64_t x, int c)
> + * {
> + * if (c == 0) return x;
> + *
> + * uint32_t lo = LO(x), hi = HI(x);
> + *
> + * if (c < 32) {
> + * uint32_t lo_shifted = lo << (c & 0x1f);
> + * uint32_t hi_shifted = hi << (c & 0x1f);
> + * uint32_t lo_shifted_hi = lo >> (abs(32 - c) & 0x1f);
>
Why the abs and the &? it's already predicated on c < 32 and negative or
OOB shifts already have undefined results.
> + * return pack_64(lo_shifted, hi_shifted | lo_shifted_hi);
> + * } else {
> + * uint32_t lo_shifted_hi = lo << (abs(32 - c) & 0x1f);
> + * return pack_64(0, lo_shifted_hi);
> + * }
> + * }
> + */
> + nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
> + nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
> +
> + nir_ssa_def *reverse_count = nir_iabs(b, nir_iadd(b, y, nir_imm_int(b,
> -32)));
>
This is iabs(c - 32) (which yields the same result but isn't the same
expression) and doesn't have the & 0x1f.
> + nir_ssa_def *lo_shifted = nir_ishl(b, x_lo, y);
> + nir_ssa_def *hi_shifted = nir_ishl(b, x_hi, y);
>
In general, all of the 0x1f are missing. While not having them works on
i965, there's no guarantee it works in general. Maybe we should add them
in and have an i965-specific optimization to delete them again? Maybe it's
ok to just not have them. In any case, the code down here should match the
code above or there should be a very good comment saying why it doesn't.
> + nir_ssa_def *lo_shifted_hi = nir_ushr(b, x_lo, reverse_count);
> +
> + nir_ssa_def *res_if_lt_32 =
> + nir_pack_64_2x32_split(b, lo_shifted,
> + nir_ior(b, hi_shifted, lo_shifted_hi));
> + nir_ssa_def *res_if_ge_32 =
> + nir_pack_64_2x32_split(b, nir_imm_int(b, 0),
> + nir_ishl(b, x_lo, reverse_count));
> +
> + return nir_bcsel(b,
> + nir_ieq(b, y, nir_imm_int(b, 0)), x,
> + nir_bcsel(b, nir_uge(b, y, nir_imm_int(b, 32)),
> + res_if_ge_32, res_if_lt_32));
> +}
> +
> +static nir_ssa_def *
> +lower_ishr64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
> +{
> + /* Implemented as
> + *
> + * uint64_t arshift(uint64_t x, int c)
> + * {
> + * if (c == 0) return x;
> + *
> + * uint32_t lo = LO(x);
> + * int32_t hi = HI(x);
> + *
> + * if (c < 32) {
> + * uint32_t lo_shifted = lo >> (c & 0x1f);
> + * uint32_t hi_shifted = hi >> (c & 0x1f);
> + * uint32_t hi_shifted_lo = hi << (abs(32 - c) & 0x1f);
> + * return pack_64(hi_shifted, hi_shifted_lo | lo_shifted);
> + * } else {
> + * uint32_t hi_shifted = hi >> 31;
> + * uint32_t hi_shifted_lo = hi >> (abs(32 - c) & 0x1f);
> + * return pack_64(hi_shifted, hi_shifted_lo);
> + * }
> + * }
> + */
> + nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
> + nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
> +
> + nir_ssa_def *reverse_count = nir_iabs(b, nir_iadd(b, y, nir_imm_int(b,
> -32)));
> + nir_ssa_def *lo_shifted = nir_ushr(b, x_lo, y);
> + nir_ssa_def *hi_shifted = nir_ishr(b, x_hi, y);
> + nir_ssa_def *hi_shifted_lo = nir_ishl(b, x_hi, reverse_count);
> +
> + nir_ssa_def *res_if_lt_32 =
> + nir_pack_64_2x32_split(b, nir_ior(b, lo_shifted, hi_shifted_lo),
> + hi_shifted);
> + nir_ssa_def *res_if_ge_32 =
> + nir_pack_64_2x32_split(b, nir_ishr(b, x_hi, reverse_count),
> + nir_ishr(b, x_hi, nir_imm_int(b, 31)));
> +
> + return nir_bcsel(b,
> + nir_ieq(b, y, nir_imm_int(b, 0)), x,
> + nir_bcsel(b, nir_uge(b, y, nir_imm_int(b, 32)),
> + res_if_ge_32, res_if_lt_32));
> +}
> +
> +static nir_ssa_def *
> +lower_ushr64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
> +{
> + /* Implemented as
> + *
> + * uint64_t rshift(uint64_t x, int c)
> + * {
> + * if (c == 0) return x;
> + *
> + * uint32_t lo = LO(x), hi = HI(x);
> + *
> + * if (c < 32) {
> + * uint32_t lo_shifted = lo >> (c & 0x1f);
> + * uint32_t hi_shifted = hi >> (c & 0x1f);
> + * uint32_t hi_shifted_lo = hi << (abs(32 - c) & 0x1f);
> + * return pack_64(hi_shifted, hi_shifted_lo | lo_shifted);
> + * } else {
> + * uint32_t hi_shifted_lo = hi >> (abs(32 - c) & 0x1f);
> + * return pack_64(0, hi_shifted_lo);
> + * }
> + * }
> + */
> +
> + nir_ssa_def *x_lo = nir_unpack_64_2x32_split_x(b, x);
> + nir_ssa_def *x_hi = nir_unpack_64_2x32_split_y(b, x);
> +
> + nir_ssa_def *reverse_count = nir_iabs(b, nir_iadd(b, y, nir_imm_int(b,
> -32)));
> + nir_ssa_def *lo_shifted = nir_ushr(b, x_lo, y);
> + nir_ssa_def *hi_shifted = nir_ushr(b, x_hi, y);
> + nir_ssa_def *hi_shifted_lo = nir_ishl(b, x_hi, reverse_count);
> +
> + nir_ssa_def *res_if_lt_32 =
> + nir_pack_64_2x32_split(b, nir_ior(b, lo_shifted, hi_shifted_lo),
> + hi_shifted);
> + nir_ssa_def *res_if_ge_32 =
> + nir_pack_64_2x32_split(b, nir_ushr(b, x_hi, reverse_count),
> + nir_imm_int(b, 0));
> +
> + return nir_bcsel(b,
> + nir_ieq(b, y, nir_imm_int(b, 0)), x,
> + nir_bcsel(b, nir_uge(b, y, nir_imm_int(b, 32)),
> + res_if_ge_32, res_if_lt_32));
> +}
> +
> static nir_ssa_def *
> lower_iadd64(nir_builder *b, nir_ssa_def *x, nir_ssa_def *y)
> {
> @@ -430,6 +562,10 @@ opcode_to_options_mask(nir_op opcode)
> case nir_op_ixor:
> case nir_op_inot:
> return nir_lower_logic64;
> + case nir_op_ishl:
> + case nir_op_ishr:
> + case nir_op_ushr:
> + return nir_lower_shift64;
> default:
> return 0;
> }
> @@ -492,6 +628,12 @@ lower_int64_alu_instr(nir_builder *b, nir_alu_instr
> *alu)
> return lower_ixor64(b, src[0], src[1]);
> case nir_op_inot:
> return lower_inot64(b, src[0]);
> + case nir_op_ishl:
> + return lower_ishl64(b, src[0], src[1]);
> + case nir_op_ishr:
> + return lower_ishr64(b, src[0], src[1]);
> + case nir_op_ushr:
> + return lower_ushr64(b, src[0], src[1]);
> default:
> unreachable("Invalid ALU opcode to lower");
> }
> --
> 2.16.4
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <https://lists.freedesktop.org/archives/mesa-dev/attachments/20181014/7c81dcb8/attachment-0001.html>
More information about the mesa-dev
mailing list