[Mesa-dev] [PATCH 2/5] nir: Combine the int and double [un]pack opcodes
Jason Ekstrand
jason at jlekstrand.net
Wed Feb 15 07:29:48 UTC 2017
NIR is a typeless IR and the two opcodes, when considered bitwise, do
exactly the same thing. There's no reason to have two versions.
---
src/compiler/glsl/glsl_to_nir.cpp | 10 ++-----
src/compiler/nir/nir_lower_alu_to_scalar.c | 3 +-
src/compiler/nir/nir_lower_double_ops.c | 22 +++++++-------
src/compiler/nir/nir_lower_double_packing.c | 46 ++++++++---------------------
src/compiler/nir/nir_opcodes.py | 21 ++++---------
src/compiler/nir/nir_opt_algebraic.py | 2 +-
src/compiler/spirv/vtn_glsl450.c | 4 +--
src/mesa/drivers/dri/i965/brw_fs_nir.cpp | 31 ++++++-------------
src/mesa/drivers/dri/i965/brw_vec4_nir.cpp | 8 ++---
9 files changed, 48 insertions(+), 99 deletions(-)
diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp
index 96d8164..00f20da 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -1581,18 +1581,14 @@ nir_visitor::visit(ir_expression *ir)
result = nir_unpack_half_2x16(&b, srcs[0]);
break;
case ir_unop_pack_double_2x32:
- result = nir_pack_double_2x32(&b, srcs[0]);
- break;
- case ir_unop_unpack_double_2x32:
- result = nir_unpack_double_2x32(&b, srcs[0]);
- break;
case ir_unop_pack_int_2x32:
case ir_unop_pack_uint_2x32:
- result = nir_pack_int_2x32(&b, srcs[0]);
+ result = nir_pack_64_2x32(&b, srcs[0]);
break;
+ case ir_unop_unpack_double_2x32:
case ir_unop_unpack_int_2x32:
case ir_unop_unpack_uint_2x32:
- result = nir_unpack_int_2x32(&b, srcs[0]);
+ result = nir_unpack_64_2x32(&b, srcs[0]);
break;
case ir_unop_bitfield_reverse:
result = nir_bitfield_reverse(&b, srcs[0]);
diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c b/src/compiler/nir/nir_lower_alu_to_scalar.c
index 8a967c5..080d980 100644
--- a/src/compiler/nir/nir_lower_alu_to_scalar.c
+++ b/src/compiler/nir/nir_lower_alu_to_scalar.c
@@ -188,8 +188,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
return true;
}
- case nir_op_unpack_double_2x32:
- case nir_op_unpack_int_2x32:
+ case nir_op_unpack_64_2x32:
return false;
LOWER_REDUCTION(nir_op_fdot, nir_op_fmul, nir_op_fadd);
diff --git a/src/compiler/nir/nir_lower_double_ops.c b/src/compiler/nir/nir_lower_double_ops.c
index fdd0f44..ad96313 100644
--- a/src/compiler/nir/nir_lower_double_ops.c
+++ b/src/compiler/nir/nir_lower_double_ops.c
@@ -41,22 +41,22 @@ static nir_ssa_def *
set_exponent(nir_builder *b, nir_ssa_def *src, nir_ssa_def *exp)
{
/* Split into bits 0-31 and 32-63 */
- nir_ssa_def *lo = nir_unpack_double_2x32_split_x(b, src);
- nir_ssa_def *hi = nir_unpack_double_2x32_split_y(b, src);
+ nir_ssa_def *lo = nir_unpack_64_2x32_split_x(b, src);
+ nir_ssa_def *hi = nir_unpack_64_2x32_split_y(b, src);
/* The exponent is bits 52-62, or 20-30 of the high word, so set the exponent
* to 1023
*/
nir_ssa_def *new_hi = nir_bfi(b, nir_imm_int(b, 0x7ff00000), exp, hi);
/* recombine */
- return nir_pack_double_2x32_split(b, lo, new_hi);
+ return nir_pack_64_2x32_split(b, lo, new_hi);
}
static nir_ssa_def *
get_exponent(nir_builder *b, nir_ssa_def *src)
{
/* get bits 32-63 */
- nir_ssa_def *hi = nir_unpack_double_2x32_split_y(b, src);
+ nir_ssa_def *hi = nir_unpack_64_2x32_split_y(b, src);
/* extract bits 20-30 of the high word */
return nir_ubitfield_extract(b, hi, nir_imm_int(b, 20), nir_imm_int(b, 11));
@@ -67,7 +67,7 @@ get_exponent(nir_builder *b, nir_ssa_def *src)
static nir_ssa_def *
get_signed_inf(nir_builder *b, nir_ssa_def *zero)
{
- nir_ssa_def *zero_hi = nir_unpack_double_2x32_split_y(b, zero);
+ nir_ssa_def *zero_hi = nir_unpack_64_2x32_split_y(b, zero);
/* The bit pattern for infinity is 0x7ff0000000000000, where the sign bit
* is the highest bit. Only the sign bit can be non-zero in the passed in
@@ -76,7 +76,7 @@ get_signed_inf(nir_builder *b, nir_ssa_def *zero)
* bits and then pack it together with zero low 32 bits.
*/
nir_ssa_def *inf_hi = nir_ior(b, nir_imm_int(b, 0x7ff00000), zero_hi);
- return nir_pack_double_2x32_split(b, nir_imm_int(b, 0), inf_hi);
+ return nir_pack_64_2x32_split(b, nir_imm_int(b, 0), inf_hi);
}
/*
@@ -337,8 +337,8 @@ lower_trunc(nir_builder *b, nir_ssa_def *src)
nir_imm_int(b, ~0),
nir_isub(b, frac_bits, nir_imm_int(b, 32))));
- nir_ssa_def *src_lo = nir_unpack_double_2x32_split_x(b, src);
- nir_ssa_def *src_hi = nir_unpack_double_2x32_split_y(b, src);
+ nir_ssa_def *src_lo = nir_unpack_64_2x32_split_x(b, src);
+ nir_ssa_def *src_hi = nir_unpack_64_2x32_split_y(b, src);
return
nir_bcsel(b,
@@ -346,9 +346,9 @@ lower_trunc(nir_builder *b, nir_ssa_def *src)
nir_imm_double(b, 0.0),
nir_bcsel(b, nir_ige(b, unbiased_exp, nir_imm_int(b, 53)),
src,
- nir_pack_double_2x32_split(b,
- nir_iand(b, mask_lo, src_lo),
- nir_iand(b, mask_hi, src_hi))));
+ nir_pack_64_2x32_split(b,
+ nir_iand(b, mask_lo, src_lo),
+ nir_iand(b, mask_hi, src_hi))));
}
static nir_ssa_def *
diff --git a/src/compiler/nir/nir_lower_double_packing.c b/src/compiler/nir/nir_lower_double_packing.c
index 6bb01ff..61c4ea6 100644
--- a/src/compiler/nir/nir_lower_double_packing.c
+++ b/src/compiler/nir/nir_lower_double_packing.c
@@ -35,31 +35,17 @@
*/
static nir_ssa_def *
-lower_pack_double(nir_builder *b, nir_ssa_def *src)
+lower_pack_64(nir_builder *b, nir_ssa_def *src)
{
- return nir_pack_double_2x32_split(b, nir_channel(b, src, 0),
- nir_channel(b, src, 1));
+ return nir_pack_64_2x32_split(b, nir_channel(b, src, 0),
+ nir_channel(b, src, 1));
}
static nir_ssa_def *
-lower_unpack_double(nir_builder *b, nir_ssa_def *src)
+lower_unpack_64(nir_builder *b, nir_ssa_def *src)
{
- return nir_vec2(b, nir_unpack_double_2x32_split_x(b, src),
- nir_unpack_double_2x32_split_y(b, src));
-}
-
-static nir_ssa_def *
-lower_pack_int(nir_builder *b, nir_ssa_def *src)
-{
- return nir_pack_int_2x32_split(b, nir_channel(b, src, 0),
- nir_channel(b, src, 1));
-}
-
-static nir_ssa_def *
-lower_unpack_int(nir_builder *b, nir_ssa_def *src)
-{
- return nir_vec2(b, nir_unpack_int_2x32_split_x(b, src),
- nir_unpack_int_2x32_split_y(b, src));
+ return nir_vec2(b, nir_unpack_64_2x32_split_x(b, src),
+ nir_unpack_64_2x32_split_y(b, src));
}
static void
@@ -75,10 +61,8 @@ lower_double_pack_impl(nir_function_impl *impl)
nir_alu_instr *alu_instr = (nir_alu_instr *) instr;
- if (alu_instr->op != nir_op_pack_double_2x32 &&
- alu_instr->op != nir_op_unpack_double_2x32 &&
- alu_instr->op != nir_op_pack_int_2x32 &&
- alu_instr->op != nir_op_unpack_int_2x32)
+ if (alu_instr->op != nir_op_pack_64_2x32 &&
+ alu_instr->op != nir_op_unpack_64_2x32)
continue;
b.cursor = nir_before_instr(&alu_instr->instr);
@@ -87,17 +71,11 @@ lower_double_pack_impl(nir_function_impl *impl)
nir_ssa_def *dest;
switch (alu_instr->op) {
- case nir_op_pack_double_2x32:
- dest = lower_pack_double(&b, src);
- break;
- case nir_op_unpack_double_2x32:
- dest = lower_unpack_double(&b, src);
- break;
- case nir_op_pack_int_2x32:
- dest = lower_pack_int(&b, src);
+ case nir_op_pack_64_2x32:
+ dest = lower_pack_64(&b, src);
break;
- case nir_op_unpack_int_2x32:
- dest = lower_unpack_int(&b, src);
+ case nir_op_unpack_64_2x32:
+ dest = lower_unpack_64(&b, src);
break;
default:
unreachable("Impossible opcode");
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index ece673c..b116fcf 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -285,16 +285,10 @@ dst.x = (src0.x << 0) |
(src0.w << 24);
""")
-unop_horiz("pack_double_2x32", 1, tuint64, 2, tuint32,
+unop_horiz("pack_64_2x32", 1, tuint64, 2, tuint32,
"dst.x = src0.x | ((uint64_t)src0.y << 32);")
-unop_horiz("pack_int_2x32", 1, tint64, 2, tint32,
- "dst.x = src0.x | ((int64_t)src0.y << 32);")
-
-unop_horiz("unpack_double_2x32", 2, tuint32, 1, tuint64,
- "dst.x = src0.x; dst.y = src0.x >> 32;")
-
-unop_horiz("unpack_int_2x32", 2, tint32, 1, tint64,
+unop_horiz("unpack_64_2x32", 2, tuint32, 1, tuint64,
"dst.x = src0.x; dst.y = src0.x >> 32;")
# Lowered floating point unpacking operations.
@@ -305,10 +299,8 @@ unop_horiz("unpack_half_2x16_split_x", 1, tfloat32, 1, tuint32,
unop_horiz("unpack_half_2x16_split_y", 1, tfloat32, 1, tuint32,
"unpack_half_1x16((uint16_t)(src0.x >> 16))")
-unop_convert("unpack_double_2x32_split_x", tuint32, tuint64, "src0")
-unop_convert("unpack_double_2x32_split_y", tuint32, tuint64, "src0 >> 32")
-unop_convert("unpack_int_2x32_split_x", tuint32, tuint64, "src0")
-unop_convert("unpack_int_2x32_split_y", tuint32, tuint64, "src0 >> 32")
+unop_convert("unpack_64_2x32_split_x", tuint32, tuint64, "src0")
+unop_convert("unpack_64_2x32_split_y", tuint32, tuint64, "src0 >> 32")
# Bit operations, part of ARB_gpu_shader5.
@@ -588,10 +580,7 @@ binop("fpow", tfloat, "", "bit_size == 64 ? powf(src0, src1) : pow(src0, src1)")
binop_horiz("pack_half_2x16_split", 1, tuint32, 1, tfloat32, 1, tfloat32,
"pack_half_1x16(src0.x) | (pack_half_1x16(src1.x) << 16)")
-binop_convert("pack_double_2x32_split", tuint64, tuint32, "",
- "src0 | ((uint64_t)src1 << 32)")
-
-binop_convert("pack_int_2x32_split", tuint64, tuint32, "",
+binop_convert("pack_64_2x32_split", tuint64, tuint32, "",
"src0 | ((uint64_t)src1 << 32)")
# bfm implements the behavior of the first operation of the SM5 "bfi" assembly
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index aaad45a..8a6dd07 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -434,7 +434,7 @@ def fexp2i(exp, bits):
if bits == 32:
return ('ishl', ('iadd', exp, 127), 23)
elif bits == 64:
- return ('pack_double_2x32_split', 0, ('ishl', ('iadd', exp, 1023), 20))
+ return ('pack_64_2x32_split', 0, ('ishl', ('iadd', exp, 1023), 20))
else:
assert False
diff --git a/src/compiler/spirv/vtn_glsl450.c b/src/compiler/spirv/vtn_glsl450.c
index dd38cc9..5d38431 100644
--- a/src/compiler/spirv/vtn_glsl450.c
+++ b/src/compiler/spirv/vtn_glsl450.c
@@ -452,13 +452,13 @@ vtn_nir_alu_op_for_spirv_glsl_opcode(enum GLSLstd450 opcode)
case GLSLstd450PackSnorm2x16: return nir_op_pack_snorm_2x16;
case GLSLstd450PackUnorm2x16: return nir_op_pack_unorm_2x16;
case GLSLstd450PackHalf2x16: return nir_op_pack_half_2x16;
- case GLSLstd450PackDouble2x32: return nir_op_pack_double_2x32;
+ case GLSLstd450PackDouble2x32: return nir_op_pack_64_2x32;
case GLSLstd450UnpackSnorm4x8: return nir_op_unpack_snorm_4x8;
case GLSLstd450UnpackUnorm4x8: return nir_op_unpack_unorm_4x8;
case GLSLstd450UnpackSnorm2x16: return nir_op_unpack_snorm_2x16;
case GLSLstd450UnpackUnorm2x16: return nir_op_unpack_unorm_2x16;
case GLSLstd450UnpackHalf2x16: return nir_op_unpack_half_2x16;
- case GLSLstd450UnpackDouble2x32: return nir_op_unpack_double_2x32;
+ case GLSLstd450UnpackDouble2x32: return nir_op_unpack_64_2x32;
default:
unreachable("No NIR equivalent");
diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
index 94f2751..91c14eb 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp
@@ -1212,7 +1212,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
inst->saturate = instr->dest.saturate;
break;
- case nir_op_pack_double_2x32_split:
+ case nir_op_pack_64_2x32_split:
/* Optimize the common case where we are re-packing a double with
* the result of a previous double unpack. In this case we can take the
* 32-bit value to use in the re-pack from the original double and bypass
@@ -1227,8 +1227,8 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
continue;
const nir_alu_instr *alu_parent = nir_instr_as_alu(parent_instr);
- if (alu_parent->op == nir_op_unpack_double_2x32_split_x ||
- alu_parent->op == nir_op_unpack_double_2x32_split_y)
+ if (alu_parent->op == nir_op_unpack_64_2x32_split_x ||
+ alu_parent->op == nir_op_unpack_64_2x32_split_y)
continue;
if (!alu_parent->src[0].src.is_ssa)
@@ -1237,7 +1237,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
op[i] = get_nir_src(alu_parent->src[0].src);
op[i] = offset(retype(op[i], BRW_REGISTER_TYPE_DF), bld,
alu_parent->src[0].swizzle[channel]);
- if (alu_parent->op == nir_op_unpack_double_2x32_split_y)
+ if (alu_parent->op == nir_op_unpack_64_2x32_split_y)
op[i] = subscript(op[i], BRW_REGISTER_TYPE_UD, 1);
else
op[i] = subscript(op[i], BRW_REGISTER_TYPE_UD, 0);
@@ -1245,18 +1245,18 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
bld.emit(FS_OPCODE_PACK, result, op[0], op[1]);
break;
- case nir_op_unpack_double_2x32_split_x:
- case nir_op_unpack_double_2x32_split_y: {
+ case nir_op_unpack_64_2x32_split_x:
+ case nir_op_unpack_64_2x32_split_y: {
/* Optimize the common case where we are unpacking from a double we have
* previously packed. In this case we can just bypass the pack operation
* and source directly from its arguments.
*/
- unsigned index = (instr->op == nir_op_unpack_double_2x32_split_x) ? 0 : 1;
+ unsigned index = (instr->op == nir_op_unpack_64_2x32_split_x) ? 0 : 1;
if (instr->src[0].src.is_ssa) {
nir_instr *parent_instr = instr->src[0].src.ssa->parent_instr;
if (parent_instr->type == nir_instr_type_alu) {
nir_alu_instr *alu_parent = nir_instr_as_alu(parent_instr);
- if (alu_parent->op == nir_op_pack_double_2x32_split &&
+ if (alu_parent->op == nir_op_pack_64_2x32_split &&
alu_parent->src[index].src.is_ssa) {
op[0] = retype(get_nir_src(alu_parent->src[index].src),
BRW_REGISTER_TYPE_UD);
@@ -1268,20 +1268,7 @@ fs_visitor::nir_emit_alu(const fs_builder &bld, nir_alu_instr *instr)
}
}
- if (instr->op == nir_op_unpack_double_2x32_split_x)
- bld.MOV(result, subscript(op[0], BRW_REGISTER_TYPE_UD, 0));
- else
- bld.MOV(result, subscript(op[0], BRW_REGISTER_TYPE_UD, 1));
- break;
- }
-
- case nir_op_pack_int_2x32_split:
- bld.emit(FS_OPCODE_PACK, result, op[0], op[1]);
- break;
-
- case nir_op_unpack_int_2x32_split_x:
- case nir_op_unpack_int_2x32_split_y: {
- if (instr->op == nir_op_unpack_int_2x32_split_x)
+ if (instr->op == nir_op_unpack_64_2x32_split_x)
bld.MOV(result, subscript(op[0], BRW_REGISTER_TYPE_UD, 0));
else
bld.MOV(result, subscript(op[0], BRW_REGISTER_TYPE_UD, 1));
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
index 2127415..a7f048a 100644
--- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
+++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp
@@ -1762,7 +1762,7 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
break;
}
- case nir_op_pack_double_2x32_split: {
+ case nir_op_pack_64_2x32_split: {
dst_reg result = dst_reg(this, glsl_type::dvec4_type);
dst_reg tmp = dst_reg(this, glsl_type::uvec4_type);
emit(MOV(tmp, retype(op[0], BRW_REGISTER_TYPE_UD)));
@@ -1773,9 +1773,9 @@ vec4_visitor::nir_emit_alu(nir_alu_instr *instr)
break;
}
- case nir_op_unpack_double_2x32_split_x:
- case nir_op_unpack_double_2x32_split_y: {
- enum opcode oper = (instr->op == nir_op_unpack_double_2x32_split_x) ?
+ case nir_op_unpack_64_2x32_split_x:
+ case nir_op_unpack_64_2x32_split_y: {
+ enum opcode oper = (instr->op == nir_op_unpack_64_2x32_split_x) ?
VEC4_OPCODE_PICK_LOW_32BIT : VEC4_OPCODE_PICK_HIGH_32BIT;
dst_reg tmp = dst_reg(this, glsl_type::dvec4_type);
emit(MOV(tmp, op[0]));
--
2.5.0.400.gff86faf
More information about the mesa-dev
mailing list