[Mesa-dev] [PATCH 46/59] nir: Add 64-bit integer support for conversions and bitcasts

Wed Oct 26 22:23:19 UTC 2016

On Tue, Oct 25, 2016 at 8:59 PM, Ian Romanick <idr at freedesktop.org> wrote:
> From: Ian Romanick <ian.d.romanick at intel.com>
>
> Signed-off-by: Ian Romanick <ian.d.romanick at intel.com>
> ---
>  src/compiler/glsl/glsl_to_nir.cpp          | 30 +++++++++++++++++++++++++++
>  src/compiler/nir/nir_lower_alu_to_scalar.c |  1 +
>  src/compiler/nir/nir_opcodes.py            | 33 ++++++++++++++++++++++++++++++
>  3 files changed, 64 insertions(+)
>
> diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp
> index 09fd5aa..b1c8ec6 100644
> --- a/src/compiler/glsl/glsl_to_nir.cpp
> +++ b/src/compiler/glsl/glsl_to_nir.cpp
> @@ -1484,12 +1484,34 @@ nir_visitor::visit(ir_expression *ir)
>        assert(supports_ints);
>        result = nir_u2d(&b, srcs[0]);
>        break;
> +   case ir_unop_i642i: result = nir_i642i(&b, srcs[0]);   break;
> +   case ir_unop_u642i: result = nir_u642i(&b, srcs[0]);   break;
> +   case ir_unop_i642u: result = nir_i642u(&b, srcs[0]);   break;
> +   case ir_unop_u642u: result = nir_u642u(&b, srcs[0]);   break;
> +   case ir_unop_i642f: result = nir_i642f(&b, srcs[0]);   break;
> +   case ir_unop_u642f: result = nir_u642f(&b, srcs[0]);   break;
> +   case ir_unop_i642d: result = nir_i642d(&b, srcs[0]);   break;
> +   case ir_unop_u642d: result = nir_u642d(&b, srcs[0]);   break;
> +   case ir_unop_i2i64: result = nir_i2i64(&b, srcs[0]);   break;
> +   case ir_unop_u2i64: result = nir_u2i64(&b, srcs[0]);   break;
> +   case ir_unop_f2i64: result = nir_f2i64(&b, srcs[0]);   break;
> +   case ir_unop_d2i64: result = nir_d2i64(&b, srcs[0]);   break;
> +   case ir_unop_i2u64: result = nir_i2u64(&b, srcs[0]);   break;
> +   case ir_unop_u2u64: result = nir_u2u64(&b, srcs[0]);   break;
> +   case ir_unop_f2u64: result = nir_f2u64(&b, srcs[0]);   break;
> +   case ir_unop_d2u64: result = nir_d2u64(&b, srcs[0]);   break;
>     case ir_unop_i2u:
>     case ir_unop_u2i:
> +   case ir_unop_i642u64:
> +   case ir_unop_u642i64:
>     case ir_unop_bitcast_i2f:
>     case ir_unop_bitcast_f2i:
>     case ir_unop_bitcast_u2f:
>     case ir_unop_bitcast_f2u:
> +   case ir_unop_bitcast_i642d:
> +   case ir_unop_bitcast_d2i64:
> +   case ir_unop_bitcast_u642d:
> +   case ir_unop_bitcast_d2u64:
>     case ir_unop_subroutine_to_int:
>        /* no-op */
>        result = nir_imov(&b, srcs[0]);
> @@ -1543,6 +1565,14 @@ nir_visitor::visit(ir_expression *ir)
>     case ir_unop_unpack_double_2x32:
>        result = nir_unpack_double_2x32(&b, srcs[0]);
>        break;
> +   case ir_unop_pack_int_2x32:
> +   case ir_unop_pack_uint_2x32:
> +      result = nir_pack_int_2x32(&b, srcs[0]);
> +      break;
> +   case ir_unop_unpack_int_2x32:
> +   case ir_unop_unpack_uint_2x32:
> +      result = nir_unpack_int_2x32(&b, srcs[0]);
> +      break;
>     case ir_unop_bitfield_reverse:
>        result = nir_bitfield_reverse(&b, srcs[0]);
>        break;
> diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c b/src/compiler/nir/nir_lower_alu_to_scalar.c
> index fa18deb..8a967c5 100644
> --- a/src/compiler/nir/nir_lower_alu_to_scalar.c
> +++ b/src/compiler/nir/nir_lower_alu_to_scalar.c
> @@ -189,6 +189,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
>     }
>
>     case nir_op_unpack_double_2x32:
> +   case nir_op_unpack_int_2x32:
>        return false;
>
>        LOWER_REDUCTION(nir_op_fdot, nir_op_fmul, nir_op_fadd);
> diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
> index 7045c95..1825dc3 100644
> --- a/src/compiler/nir/nir_opcodes.py
> +++ b/src/compiler/nir/nir_opcodes.py
> @@ -95,6 +95,7 @@ tuint = "uint"
>  tfloat32 = "float32"
>  tint32 = "int32"
>  tuint32 = "uint32"
> +tint64 = "int64"
>  tuint64 = "uint64"
>  tfloat64 = "float64"
>
> @@ -171,6 +172,23 @@ unop_convert("d2i", tint32, tfloat64, "src0") # Double-to-integer conversion.
>  unop_convert("d2u", tuint32, tfloat64, "src0") # Double-to-unsigned conversion.
>  unop_convert("i2f", tfloat32, tint32, "src0") # Integer-to-float conversion.
>  unop_convert("i2d", tfloat64, tint32, "src0") # Integer-to-double conversion.
> +unop_convert("i642i", tint32, tint64, "src0")    # int64_t-to-int conversion.
> +unop_convert("u642i", tint32, tuint64, "src0")   # uint64_t-to-int conversion.
> +unop_convert("i642u", tuint32, tint64, "src0")   # int64_t-to-unsigned conversion.
> +unop_convert("u642u", tuint32, tuint64, "src0")  # uint64_t-to-unsigned conversion.
> +unop_convert("i642f", tfloat32, tint64, "src0")  # int64_t-to-float conversion.
> +unop_convert("u642f", tfloat32, tuint64, "src0") # uint64_t-to-float conversion.
> +unop_convert("i642d", tfloat64, tint64, "src0")  # int64_t-to-double conversion.
> +unop_convert("u642d", tfloat64, tuint64, "src0") # uint64_t-to-double conversion.
> +unop_convert("i2i64", tint64, tint32, "src0")    # Integer-to-int64_t conversion.
> +unop_convert("u2i64", tint64, tuint32, "src0")   # Unsigned-to-int64_t conversion.
> +unop_convert("f2i64", tint64, tfloat32, "src0")  # Float-to-int64_t conversion.
> +unop_convert("d2i64", tint64, tfloat64, "src0")  # Double-to-int64_t conversion.
> +unop_convert("i2u64", tuint64, tint32, "src0")   # Integer-to-uint64_t conversion.
> +unop_convert("u2u64", tuint64, tuint32, "src0")  # Unsigned-to-uint64_t conversion.
> +unop_convert("f2u64", tuint64, tfloat32, "src0") # Float-to-uint64_t conversion.
> +unop_convert("d2u64", tuint64, tfloat64, "src0") # Double-to-uint64_t conversion.
> +

So the plan that Jason and I had was to make the conversions only
explicitly sized in one of the destination or the source, but not
both. That doesn't matter too much now, but the idea is that it would
simplify adding i8/i16 support later if/when we do that (we would have
a linear number of conversion opcodes instead of quadratic in the
number of bitsizes supported). So, for example, we'd have:

unop_convert("i2i32", tint, tint32, "src0") # general integer (int8_t,
int64_t, etc.) to int32_t conversion

We decided not to make both sides implicitly sized, since it would
make it a lot harder for nir_opt_algebraic to deduce the bitsizes of
various things on the RHS. The way it is now, NIR enforces that any
two unsized inputs/outputs must have the same bitsize, which helps
opt_algebraic propagate bitsizes up/down the tree, but it obviously
excludes having only one nir_i2i opcode.

As to whether to implicitly size the source or destination, I don't
have much of an opinion. The main difference would be in opt_algebraic
rules -- with one, you'd be forced to write down the source type, with
the other you'd be forced to write down the destination type. I'll
leave it up to you.

>  # Float-to-boolean conversion
>  unop_convert("f2b", tbool, tfloat32, "src0 != 0.0f")
>  unop_convert("d2b", tbool, tfloat64, "src0 != 0.0")
> @@ -179,6 +197,10 @@ unop_convert("b2f", tfloat32, tbool, "src0 ? 1.0f : 0.0f")
>  # Int-to-boolean conversion
>  unop_convert("i2b", tbool, tint32, "src0 != 0")
>  unop_convert("b2i", tint32, tbool, "src0 ? 1 : 0") # Boolean-to-int conversion
> +unop_convert("i642b", tbool, tint64, "src0")          # int64_t-to-Boolean conversion.
> +unop_convert("u642b", tbool, tuint64, "src0")         # uint64_t-to-Boolean conversion.
> +unop_convert("b2i64", tint64, tbool, "src0 ? 1 : 0")  # Boolean-to-int64_t conversion.
> +unop_convert("b2u64", tuint64, tbool, "src0 ? 1 : 0") # Boolean-to-uint64_t conversion.

Similarly, since tbool is always 32 bits, we could get rid of these if
we changed the definition of i2b to:

unop_convert("i2b", tbool, tint, "src0 != 0")

and similar for b2i etc. Seems like we messed up with doubles, adding
the unnecessary extra opcodes :(.

>  unop_convert("u2f", tfloat32, tuint32, "src0") # Unsigned-to-float conversion.
>  unop_convert("u2d", tfloat64, tuint32, "src0") # Unsigned-to-double conversion.
>  # double-to-float conversion
> @@ -270,9 +292,15 @@ dst.x = (src0.x <<  0) |
>  unop_horiz("pack_double_2x32", 1, tuint64, 2, tuint32,
>             "dst.x = src0.x | ((uint64_t)src0.y << 32);")
>
> +unop_horiz("pack_int_2x32", 1, tint64, 2, tint32,
> +           "dst.x = src0.x | ((int64_t)src0.y << 32);")
> +
>  unop_horiz("unpack_double_2x32", 2, tuint32, 1, tuint64,
>             "dst.x = src0.x; dst.y = src0.x >> 32;")
>
> +unop_horiz("unpack_int_2x32", 2, tint32, 1, tint64,
> +           "dst.x = src0.x; dst.y = src0.x >> 32;")
> +
>  # Lowered floating point unpacking operations.
>
>
> @@ -283,6 +311,8 @@ unop_horiz("unpack_half_2x16_split_y", 1, tfloat32, 1, tuint32,
>
>  unop_convert("unpack_double_2x32_split_x", tuint32, tuint64, "src0")
>  unop_convert("unpack_double_2x32_split_y", tuint32, tuint64, "src0 >> 32")
> +unop_convert("unpack_int_2x32_split_x", tuint32, tuint64, "src0")
> +unop_convert("unpack_int_2x32_split_y", tuint32, tuint64, "src0 >> 32")
>
>  # Bit operations, part of ARB_gpu_shader5.
>
> @@ -565,6 +595,9 @@ binop_horiz("pack_half_2x16_split", 1, tuint32, 1, tfloat32, 1, tfloat32,
>  binop_convert("pack_double_2x32_split", tuint64, tuint32, "",
>                "src0 | ((uint64_t)src1 << 32)")
>
> +binop_convert("pack_int_2x32_split", tuint64, tuint32, "",
> +              "src0 | ((uint64_t)src1 << 32)")
> +
>  # bfm implements the behavior of the first operation of the SM5 "bfi" assembly
>  # and that of the "bfi1" i965 instruction. That is, it has undefined behavior
>  # if either of its arguments are 32.
> --
> 2.5.5
>
> _______________________________________________
> mesa-dev mailing list
> mesa-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/mesa-dev