Mesa (master): nir: remove redundant opcode u2ump

Thu Sep 10 23:50:12 UTC 2020

Module: Mesa
Branch: master
Commit: 3d3df8dbffd5d4b38c802ccd38967e4ca117901e
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=3d3df8dbffd5d4b38c802ccd38967e4ca117901e

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Fri Sep  4 01:51:49 2020 -0400

nir: remove redundant opcode u2ump

Reviewed-by: Rob Clark <robdclark at chromium.org>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6283>

---

 src/amd/llvm/ac_nir_to_llvm.c             |  1 -
 src/compiler/glsl/glsl_to_nir.cpp         |  2 +-
 src/compiler/nir/nir_opcodes.py           |  3 ++-
 src/compiler/nir/nir_opt_algebraic.py     | 14 +++++++++-----
 src/panfrost/util/pan_lower_framebuffer.c |  6 +++---
 5 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/src/amd/llvm/ac_nir_to_llvm.c b/src/amd/llvm/ac_nir_to_llvm.c
index 25c71edbc5d..2645cfbffcd 100644
--- a/src/amd/llvm/ac_nir_to_llvm.c
+++ b/src/amd/llvm/ac_nir_to_llvm.c
@@ -971,7 +971,6 @@ static void visit_alu(struct ac_nir_context *ctx, const nir_alu_instr *instr)
       break;
    case nir_op_u2u8:
    case nir_op_u2u16:
-   case nir_op_u2ump:
    case nir_op_u2u32:
    case nir_op_u2u64:
       if (ac_get_elem_bits(&ctx->ac, LLVMTypeOf(src[0])) < ac_get_elem_bits(&ctx->ac, def_type))
diff --git a/src/compiler/glsl/glsl_to_nir.cpp b/src/compiler/glsl/glsl_to_nir.cpp
index 4722a77af47..90a8ed13747 100644
--- a/src/compiler/glsl/glsl_to_nir.cpp
+++ b/src/compiler/glsl/glsl_to_nir.cpp
@@ -1990,7 +1990,7 @@ nir_visitor::visit(ir_expression *ir)
    }
 
    case ir_unop_u2ump: {
-      result = nir_build_alu(&b, nir_op_u2ump, srcs[0], NULL, NULL, NULL);
+      result = nir_build_alu(&b, nir_op_i2imp, srcs[0], NULL, NULL, NULL);
       break;
    }
 
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index e19d7b00a7d..f3ed96ae69a 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -274,7 +274,8 @@ for src_t in [tint, tuint, tfloat, tbool]:
 # precision.
 unop_numeric_convert("f2fmp", tfloat16, tfloat, opcodes["f2f16"].const_expr)
 unop_numeric_convert("i2imp", tint16, tint, opcodes["i2i16"].const_expr)
-unop_numeric_convert("u2ump", tuint16, tuint, opcodes["u2u16"].const_expr)
+# u2ump isn't defined, because the behavior is equal to i2imp if src has more
+# than 16 bits.
 
 # Unary floating-point rounding operations.
 
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 11b0d00de44..3e42f2552e4 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -967,15 +967,15 @@ optimizations.extend([
    # Conversions from 16 bits to 32 bits and back can always be removed
    (('f2fmp', ('f2f32', 'a at 16')), a),
    (('i2imp', ('i2i32', 'a at 16')), a),
-   (('u2ump', ('u2u32', 'a at 16')), a),
+   (('i2imp', ('u2u32', 'a at 16')), a),
    (('f2fmp', ('b2f32', 'a at 1')), ('b2f16', a)),
    (('i2imp', ('b2i32', 'a at 1')), ('b2i16', a)),
-   (('u2ump', ('b2i32', 'a at 1')), ('b2i16', a)),
+   (('i2imp', ('b2i32', 'a at 1')), ('b2i16', a)),
    # Conversions to 16 bits would be lossy so they should only be removed if
    # the instruction was generated by the precision lowering pass.
    (('f2f32', ('f2fmp', 'a at 32')), a),
    (('i2i32', ('i2imp', 'a at 32')), a),
-   (('u2u32', ('u2ump', 'a at 32')), a),
+   (('u2u32', ('i2imp', 'a at 32')), a),
 
    (('ffloor', 'a(is_integral)'), a),
    (('fceil', 'a(is_integral)'), a),
@@ -1899,8 +1899,12 @@ for i in range(2, 4 + 1):
 
       optimizations  += [
          ((to_16, vec_inst + suffix_in), vec_inst + out_16, '!options->vectorize_vec2_16bit'),
-         ((to_mp, vec_inst + suffix_in), vec_inst + out_mp, '!options->vectorize_vec2_16bit')
       ]
+      # u2ump doesn't exist, because it's equal to i2imp
+      if T in ['f', 'i']:
+          optimizations  += [
+             ((to_mp, vec_inst + suffix_in), vec_inst + out_mp, '!options->vectorize_vec2_16bit')
+          ]
 
 # This section contains "late" optimizations that should be run before
 # creating ffmas and calling regular optimizations for the final time.
@@ -2065,7 +2069,7 @@ late_optimizations = [
    # nir_opt_algebraic so any remaining ones are required.
    (('f2fmp', a), ('f2f16', a)),
    (('i2imp', a), ('i2i16', a)),
-   (('u2ump', a), ('u2u16', a)),
+   (('i2imp', a), ('u2u16', a)),
 
    # Section 8.8 (Integer Functions) of the GLSL 4.60 spec says:
    #
diff --git a/src/panfrost/util/pan_lower_framebuffer.c b/src/panfrost/util/pan_lower_framebuffer.c
index 1b733687b33..1c497351ea2 100644
--- a/src/panfrost/util/pan_lower_framebuffer.c
+++ b/src/panfrost/util/pan_lower_framebuffer.c
@@ -315,7 +315,7 @@ pan_unpack_unorm_small(nir_builder *b, nir_ssa_def *pack,
                 nir_ssa_def *scales, nir_ssa_def *shifts)
 {
         nir_ssa_def *channels = nir_unpack_32_4x8(b, nir_channel(b, pack, 0));
-        nir_ssa_def *raw = nir_ushr(b, nir_u2ump(b, channels), shifts);
+        nir_ssa_def *raw = nir_ushr(b, nir_i2imp(b, channels), shifts);
         return nir_fmul(b, nir_u2f16(b, raw), scales);
 }
 
@@ -402,7 +402,7 @@ pan_unpack_unorm_1010102(nir_builder *b, nir_ssa_def *packed)
 {
         nir_ssa_def *p = nir_channel(b, packed, 0);
         nir_ssa_def *bytes = nir_unpack_32_4x8(b, p);
-        nir_ssa_def *ubytes = nir_u2ump(b, bytes);
+        nir_ssa_def *ubytes = nir_i2imp(b, bytes);
 
         nir_ssa_def *shifts = nir_ushr(b, pan_replicate_4(b, nir_channel(b, ubytes, 3)),
                         nir_imm_ivec4(b, 0, 2, 4, 6));
@@ -449,7 +449,7 @@ pan_unpack_uint_1010102(nir_builder *b, nir_ssa_def *packed)
         nir_ssa_def *mask = nir_iand(b, shift,
                         nir_imm_ivec4(b, 0x3ff, 0x3ff, 0x3ff, 0x3));
 
-        return nir_u2ump(b, mask);
+        return nir_i2imp(b, mask);
 }
 
 /* NIR means we can *finally* catch a break */