Mesa (master): aco: remove Format::{VOP3A,VOP3B}

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Fri Jan 22 14:21:22 UTC 2021


Module: Mesa
Branch: master
Commit: 441ead5fb35f84dcbaf4724a771ff3475257d400
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=441ead5fb35f84dcbaf4724a771ff3475257d400

Author: Rhys Perry <pendingchaos02 at gmail.com>
Date:   Wed Jan 20 13:50:45 2021 +0000

aco: remove Format::{VOP3A,VOP3B}

These are really the same as Format::VOP3.

Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Daniel Schürmann <daniel at schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8595>

---

 src/amd/compiler/aco_assembler.cpp             |  4 +-
 src/amd/compiler/aco_builder_h.py              | 10 ++---
 src/amd/compiler/aco_instruction_selection.cpp | 18 ++++----
 src/amd/compiler/aco_ir.cpp                    |  4 +-
 src/amd/compiler/aco_ir.h                      | 16 +++----
 src/amd/compiler/aco_lower_to_hw_instr.cpp     | 10 ++---
 src/amd/compiler/aco_opcodes.py                |  5 +--
 src/amd/compiler/aco_opt_value_numbering.cpp   |  6 +--
 src/amd/compiler/aco_optimizer.cpp             | 60 +++++++++++++-------------
 src/amd/compiler/aco_print_ir.cpp              |  6 +--
 src/amd/compiler/aco_register_allocation.cpp   |  6 +--
 src/amd/compiler/aco_validate.cpp              |  4 +-
 src/amd/compiler/tests/test_assembler.cpp      |  4 +-
 src/amd/compiler/tests/test_optimizer.cpp      |  4 +-
 14 files changed, 76 insertions(+), 81 deletions(-)

diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp
index 05ec485a2cf..3efdf663e76 100644
--- a/src/amd/compiler/aco_assembler.cpp
+++ b/src/amd/compiler/aco_assembler.cpp
@@ -563,8 +563,8 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
          unreachable("Pseudo instructions should be lowered before assembly.");
       break;
    default:
-      if ((uint16_t) instr->format & (uint16_t) Format::VOP3A) {
-         VOP3A_instruction* vop3 = static_cast<VOP3A_instruction*>(instr);
+      if ((uint16_t) instr->format & (uint16_t) Format::VOP3) {
+         VOP3_instruction* vop3 = static_cast<VOP3_instruction*>(instr);
 
          if ((uint16_t) instr->format & (uint16_t) Format::VOP2) {
             opcode = opcode + 0x100;
diff --git a/src/amd/compiler/aco_builder_h.py b/src/amd/compiler/aco_builder_h.py
index af0eacdb7ac..502ffc7cd34 100644
--- a/src/amd/compiler/aco_builder_h.py
+++ b/src/amd/compiler/aco_builder_h.py
@@ -484,7 +484,7 @@ public:
       int num_defs = carry_out ? 2 : 1;
       aco_ptr<Instruction> sub;
       if (vop3)
-        sub.reset(create_instruction<VOP3A_instruction>(op, Format::VOP3B, num_ops, num_defs));
+        sub.reset(create_instruction<VOP3_instruction>(op, Format::VOP3, num_ops, num_defs));
       else
         sub.reset(create_instruction<VOP2_instruction>(op, Format::VOP2, num_ops, num_defs));
       sub->operands[0] = a.op;
@@ -534,15 +534,15 @@ formats = [("pseudo", [Format.PSEUDO], 'Pseudo_instruction', list(itertools.prod
            ("vop2", [Format.VOP2], 'VOP2_instruction', itertools.product([1, 2], [2, 3])),
            ("vop2_sdwa", [Format.VOP2, Format.SDWA], 'SDWA_instruction', itertools.product([1, 2], [2, 3])),
            ("vopc", [Format.VOPC], 'VOPC_instruction', itertools.product([1, 2], [2])),
-           ("vop3", [Format.VOP3A], 'VOP3A_instruction', [(1, 3), (1, 2), (1, 1), (2, 2)]),
+           ("vop3", [Format.VOP3], 'VOP3_instruction', [(1, 3), (1, 2), (1, 1), (2, 2)]),
            ("vop3p", [Format.VOP3P], 'VOP3P_instruction', [(1, 2), (1, 3)]),
            ("vintrp", [Format.VINTRP], 'Interp_instruction', [(1, 2), (1, 3)]),
            ("vop1_dpp", [Format.VOP1, Format.DPP], 'DPP_instruction', [(1, 1)]),
            ("vop2_dpp", [Format.VOP2, Format.DPP], 'DPP_instruction', itertools.product([1, 2], [2, 3])),
            ("vopc_dpp", [Format.VOPC, Format.DPP], 'DPP_instruction', itertools.product([1, 2], [2])),
-           ("vop1_e64", [Format.VOP1, Format.VOP3A], 'VOP3A_instruction', itertools.product([1], [1])),
-           ("vop2_e64", [Format.VOP2, Format.VOP3A], 'VOP3A_instruction', itertools.product([1, 2], [2, 3])),
-           ("vopc_e64", [Format.VOPC, Format.VOP3A], 'VOP3A_instruction', itertools.product([1, 2], [2])),
+           ("vop1_e64", [Format.VOP1, Format.VOP3], 'VOP3_instruction', itertools.product([1], [1])),
+           ("vop2_e64", [Format.VOP2, Format.VOP3], 'VOP3_instruction', itertools.product([1, 2], [2, 3])),
+           ("vopc_e64", [Format.VOPC, Format.VOP3], 'VOP3_instruction', itertools.product([1, 2], [2])),
            ("flat", [Format.FLAT], 'FLAT_instruction', [(0, 3), (1, 2)]),
            ("global", [Format.GLOBAL], 'FLAT_instruction', [(0, 3), (1, 2)])]
 formats = [(f if len(f) == 5 else f + ('',)) for f in formats]
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 7e614071bb0..3aa890fe6b2 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -1224,7 +1224,7 @@ Temp emit_floor_f64(isel_context *ctx, Builder& bld, Definition dst, Temp val)
    Temp v = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), dst0, dst1);
 
    Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), src0, v);
-   static_cast<VOP3A_instruction*>(add)->neg[1] = true;
+   static_cast<VOP3_instruction*>(add)->neg[1] = true;
 
    return add->definitions[0].getTemp();
 }
@@ -1692,10 +1692,10 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
                std::swap(src0, src1);
             add_instr = bld.vop2_e64(aco_opcode::v_add_u16, Definition(dst), src0, as_vgpr(ctx, src1)).instr;
          }
-         static_cast<VOP3A_instruction*>(add_instr)->clamp = 1;
+         static_cast<VOP3_instruction*>(add_instr)->clamp = 1;
       } else if (dst.regClass() == v1) {
          if (ctx->options->chip_class >= GFX9) {
-            aco_ptr<VOP3A_instruction> add{create_instruction<VOP3A_instruction>(aco_opcode::v_add_u32, asVOP3(Format::VOP2), 2, 1)};
+            aco_ptr<VOP3_instruction> add{create_instruction<VOP3_instruction>(aco_opcode::v_add_u32, asVOP3(Format::VOP2), 2, 1)};
             add->operands[0] = Operand(src0);
             add->operands[1] = Operand(src1);
             add->definitions[0] = Definition(dst);
@@ -1965,7 +1965,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
       } else if (dst.regClass() == v2) {
          Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst),
                                      as_vgpr(ctx, src0), as_vgpr(ctx, src1));
-         VOP3A_instruction* sub = static_cast<VOP3A_instruction*>(add);
+         VOP3_instruction* sub = static_cast<VOP3_instruction*>(add);
          sub->neg[1] = true;
       } else {
          isel_err(&instr->instr, "Unimplemented NIR instr bit size");
@@ -2115,7 +2115,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
          // TODO: confirm that this holds under any circumstances
       } else if (dst.regClass() == v2) {
          Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), src, Operand(0u));
-         VOP3A_instruction* vop3 = static_cast<VOP3A_instruction*>(add);
+         VOP3_instruction* vop3 = static_cast<VOP3_instruction*>(add);
          vop3->clamp = true;
       } else {
          isel_err(&instr->instr, "Unimplemented NIR instr bit size");
@@ -2255,12 +2255,12 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
             Temp bfi = bld.vop3(aco_opcode::v_bfi_b32, bld.def(v1), bitmask, bld.copy(bld.def(v1), Operand(0x43300000u)), as_vgpr(ctx, src0_hi));
             Temp tmp = bld.vop3(aco_opcode::v_add_f64, bld.def(v2), src0, bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(0u), bfi));
             Instruction *sub = bld.vop3(aco_opcode::v_add_f64, bld.def(v2), tmp, bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(0u), bfi));
-            static_cast<VOP3A_instruction*>(sub)->neg[1] = true;
+            static_cast<VOP3_instruction*>(sub)->neg[1] = true;
             tmp = sub->definitions[0].getTemp();
 
             Temp v = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(-1u), Operand(0x432fffffu));
             Instruction* vop3 = bld.vopc_e64(aco_opcode::v_cmp_gt_f64, bld.hint_vcc(bld.def(bld.lm)), src0, v);
-            static_cast<VOP3A_instruction*>(vop3)->abs[0] = true;
+            static_cast<VOP3_instruction*>(vop3)->abs[0] = true;
             Temp cond = vop3->definitions[0].getTemp();
 
             Temp tmp_lo = bld.tmp(v1), tmp_hi = bld.tmp(v1);
@@ -2926,7 +2926,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
          f32 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), f16);
          Temp smallest = bld.copy(bld.def(s1), Operand(0x38800000u));
          Instruction* vop3 = bld.vopc_e64(aco_opcode::v_cmp_nlt_f32, bld.hint_vcc(bld.def(bld.lm)), f32, smallest);
-         static_cast<VOP3A_instruction*>(vop3)->abs[0] = true;
+         static_cast<VOP3_instruction*>(vop3)->abs[0] = true;
          cmp_res = vop3->definitions[0].getTemp();
       }
 
@@ -8847,7 +8847,7 @@ void prepare_cube_coords(isel_context *ctx, std::vector<Temp>& coords, Temp* ddx
 
    ma = bld.vop3(aco_opcode::v_cubema_f32, bld.def(v1), coords[0], coords[1], coords[2]);
 
-   aco_ptr<VOP3A_instruction> vop3a{create_instruction<VOP3A_instruction>(aco_opcode::v_rcp_f32, asVOP3(Format::VOP1), 1, 1)};
+   aco_ptr<VOP3_instruction> vop3a{create_instruction<VOP3_instruction>(aco_opcode::v_rcp_f32, asVOP3(Format::VOP1), 1, 1)};
    vop3a->operands[0] = Operand(ma);
    vop3a->abs[0] = true;
    Temp invma = bld.tmp(v1);
diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp
index 676a047c8b4..5b46e8a67ee 100644
--- a/src/amd/compiler/aco_ir.cpp
+++ b/src/amd/compiler/aco_ir.cpp
@@ -170,7 +170,7 @@ bool can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr)
       return true;
 
    if (instr->isVOP3()) {
-      VOP3A_instruction *vop3 = static_cast<VOP3A_instruction*>(instr.get());
+      VOP3_instruction *vop3 = static_cast<VOP3_instruction*>(instr.get());
       if (instr->format == Format::VOP3)
          return false;
       if (vop3->clamp && instr->format == asVOP3(Format::VOPC) && chip != GFX8)
@@ -235,7 +235,7 @@ aco_ptr<Instruction> convert_to_SDWA(chip_class chip, aco_ptr<Instruction>& inst
    SDWA_instruction *sdwa = static_cast<SDWA_instruction*>(instr.get());
 
    if (tmp->isVOP3()) {
-      VOP3A_instruction *vop3 = static_cast<VOP3A_instruction*>(tmp.get());
+      VOP3_instruction *vop3 = static_cast<VOP3_instruction*>(tmp.get());
       memcpy(sdwa->neg, vop3->neg, sizeof(sdwa->neg));
       memcpy(sdwa->abs, vop3->abs, sizeof(sdwa->abs));
       sdwa->omod = vop3->omod;
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index 5beca44ff2b..798d9cc80a6 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -60,7 +60,7 @@ enum {
 /**
  * Representation of the instruction's microcode encoding format
  * Note: Some Vector ALU Formats can be combined, such that:
- * - VOP2* | VOP3A represents a VOP2 instruction in VOP3A encoding
+ * - VOP2* | VOP3 represents a VOP2 instruction in VOP3 encoding
  * - VOP2* | DPP represents a VOP2 instruction with data parallel primitive.
  * - VOP2* | SDWA represents a VOP2 instruction with sub-dword addressing.
  *
@@ -101,8 +101,6 @@ enum class Format : std::uint16_t {
    VOP2 = 1 << 9,
    VOPC = 1 << 10,
    VOP3 = 1 << 11,
-   VOP3A = 1 << 11,
-   VOP3B = 1 << 11,
    /* Vector Parameter Interpolation Format */
    VINTRP = 1 << 12,
    DPP = 1 << 13,
@@ -1001,8 +999,7 @@ struct Instruction {
       return ((uint16_t) format & (uint16_t) Format::VOP1) == (uint16_t) Format::VOP1
           || ((uint16_t) format & (uint16_t) Format::VOP2) == (uint16_t) Format::VOP2
           || ((uint16_t) format & (uint16_t) Format::VOPC) == (uint16_t) Format::VOPC
-          || ((uint16_t) format & (uint16_t) Format::VOP3A) == (uint16_t) Format::VOP3A
-          || ((uint16_t) format & (uint16_t) Format::VOP3B) == (uint16_t) Format::VOP3B
+          || ((uint16_t) format & (uint16_t) Format::VOP3) == (uint16_t) Format::VOP3
           || format == Format::VOP3P;
    }
 
@@ -1029,8 +1026,7 @@ struct Instruction {
 
    constexpr bool isVOP3() const noexcept
    {
-      return ((uint16_t) format & (uint16_t) Format::VOP3A) ||
-             ((uint16_t) format & (uint16_t) Format::VOP3B);
+      return (uint16_t) format & (uint16_t) Format::VOP3;
    }
 
    constexpr bool isSDWA() const noexcept
@@ -1114,7 +1110,7 @@ struct VOPC_instruction : public Instruction {
 };
 static_assert(sizeof(VOPC_instruction) == sizeof(Instruction) + 0, "Unexpected padding");
 
-struct VOP3A_instruction : public Instruction {
+struct VOP3_instruction : public Instruction {
    bool abs[3];
    bool neg[3];
    uint8_t opsel : 4;
@@ -1123,7 +1119,7 @@ struct VOP3A_instruction : public Instruction {
    uint8_t padding0 : 1;
    uint8_t padding1;
 };
-static_assert(sizeof(VOP3A_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
+static_assert(sizeof(VOP3_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
 
 struct VOP3P_instruction : public Instruction {
    bool neg_lo[3];
@@ -1450,7 +1446,7 @@ constexpr bool Instruction::usesModifiers() const noexcept
       }
       return vop3p->opsel_lo || vop3p->clamp;
    } else if (isVOP3()) {
-      const VOP3A_instruction *vop3 = static_cast<const VOP3A_instruction*>(this);
+      const VOP3_instruction *vop3 = static_cast<const VOP3_instruction*>(this);
       for (unsigned i = 0; i < operands.size(); i++) {
          if (vop3->abs[i] || vop3->neg[i])
             return true;
diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp
index 56da58bd333..f9eb9323f11 100644
--- a/src/amd/compiler/aco_lower_to_hw_instr.cpp
+++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp
@@ -646,7 +646,7 @@ void emit_reduction(lower_context *ctx, aco_opcode op, ReduceOp reduce_op, unsig
                                          Definition(PhysReg{vtmp+i}, v1),
                                          Operand(PhysReg{tmp+i}, v1),
                                          Operand(0xffffffffu), Operand(0xffffffffu)).instr;
-            static_cast<VOP3A_instruction*>(perm)->opsel = 1; /* FI (Fetch Inactive) */
+            static_cast<VOP3_instruction*>(perm)->opsel = 1; /* FI (Fetch Inactive) */
          }
          bld.sop1(Builder::s_mov, Definition(exec, bld.lm), Operand(UINT64_MAX));
 
@@ -757,7 +757,7 @@ void emit_reduction(lower_context *ctx, aco_opcode op, ReduceOp reduce_op, unsig
                                          Definition(PhysReg{vtmp+i}, v1),
                                          Operand(PhysReg{tmp+i}, v1),
                                          Operand(0xffffffffu), Operand(0xffffffffu)).instr;
-            static_cast<VOP3A_instruction*>(perm)->opsel = 1; /* FI (Fetch Inactive) */
+            static_cast<VOP3_instruction*>(perm)->opsel = 1; /* FI (Fetch Inactive) */
          }
          emit_op(ctx, tmp, tmp, vtmp, PhysReg{0}, reduce_op, src.size());
 
@@ -1052,12 +1052,12 @@ void copy_constant(lower_context *ctx, Builder& bld, Definition dst, Operand op)
          if (dst.physReg().byte() == 2) {
             Operand def_lo(dst.physReg().advance(-2), v2b);
             Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, dst, def_lo, op);
-            static_cast<VOP3A_instruction*>(instr)->opsel = 0;
+            static_cast<VOP3_instruction*>(instr)->opsel = 0;
          } else {
             assert(dst.physReg().byte() == 0);
             Operand def_hi(dst.physReg().advance(2), v2b);
             Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, dst, op, def_hi);
-            static_cast<VOP3A_instruction*>(instr)->opsel = 2;
+            static_cast<VOP3_instruction*>(instr)->opsel = 2;
          }
       } else {
          uint32_t offset = dst.physReg().byte() * 8u;
@@ -1251,7 +1251,7 @@ void do_pack_2x16(lower_context *ctx, Builder& bld, Definition def, Operand lo,
    if (can_use_pack) {
       Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, def, lo, hi);
       /* opsel: 0 = select low half, 1 = select high half. [0] = src0, [1] = src1 */
-      static_cast<VOP3A_instruction*>(instr)->opsel = hi.physReg().byte() | (lo.physReg().byte() >> 1);
+      static_cast<VOP3_instruction*>(instr)->opsel = hi.physReg().byte() | (lo.physReg().byte() >> 1);
       return;
    }
 
diff --git a/src/amd/compiler/aco_opcodes.py b/src/amd/compiler/aco_opcodes.py
index 2a8bc8c4cdc..20f5006c186 100644
--- a/src/amd/compiler/aco_opcodes.py
+++ b/src/amd/compiler/aco_opcodes.py
@@ -53,8 +53,7 @@ class Format(Enum):
    VOP1 = 1 << 8
    VOP2 = 1 << 9
    VOPC = 1 << 10
-   VOP3A = 1 << 11
-   VOP3B = 1 << 11
+   VOP3 = 1 << 11
    VINTRP = 1 << 12
    DPP = 1 << 13
    SDWA = 1 << 14
@@ -1082,7 +1081,7 @@ VOP3 = {
    (   -1,    -1,    -1,    -1, 0x140, "v_fma_legacy_f32", True, True), #GFX10.3+
 }
 for (gfx6, gfx7, gfx8, gfx9, gfx10, name, in_mod, out_mod) in VOP3:
-   opcode(name, gfx7, gfx9, gfx10, Format.VOP3A, in_mod, out_mod)
+   opcode(name, gfx7, gfx9, gfx10, Format.VOP3, in_mod, out_mod)
 
 
 # DS instructions: 3 inputs (1 addr, 2 data), 1 output
diff --git a/src/amd/compiler/aco_opt_value_numbering.cpp b/src/amd/compiler/aco_opt_value_numbering.cpp
index 2d1a69b1492..a5a3a8c9cca 100644
--- a/src/amd/compiler/aco_opt_value_numbering.cpp
+++ b/src/amd/compiler/aco_opt_value_numbering.cpp
@@ -81,7 +81,7 @@ struct InstrHash {
    std::size_t operator()(Instruction* instr) const
    {
       if (instr->isVOP3())
-         return hash_murmur_32<VOP3A_instruction>(instr);
+         return hash_murmur_32<VOP3_instruction>(instr);
 
       if (instr->isDPP())
          return hash_murmur_32<DPP_instruction>(instr);
@@ -178,8 +178,8 @@ struct InstrPred {
          return false;
 
       if (a->isVOP3()) {
-         VOP3A_instruction* a3 = static_cast<VOP3A_instruction*>(a);
-         VOP3A_instruction* b3 = static_cast<VOP3A_instruction*>(b);
+         VOP3_instruction* a3 = static_cast<VOP3_instruction*>(a);
+         VOP3_instruction* b3 = static_cast<VOP3_instruction*>(b);
          for (unsigned i = 0; i < 3; i++) {
             if (a3->abs[i] != b3->abs[i] ||
                 a3->neg[i] != b3->neg[i])
diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp
index b1e786408a1..7806c37a3a6 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -706,7 +706,7 @@ void to_VOP3(opt_ctx& ctx, aco_ptr<Instruction>& instr)
 
    aco_ptr<Instruction> tmp = std::move(instr);
    Format format = asVOP3(tmp->format);
-   instr.reset(create_instruction<VOP3A_instruction>(tmp->opcode, format, tmp->operands.size(), tmp->definitions.size()));
+   instr.reset(create_instruction<VOP3_instruction>(tmp->opcode, format, tmp->operands.size(), tmp->definitions.size()));
    std::copy(tmp->operands.cbegin(), tmp->operands.cend(), instr->operands.begin());
    for (unsigned i = 0; i < instr->definitions.size(); i++) {
       instr->definitions[i] = tmp->definitions[i];
@@ -953,7 +953,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
             else if (instr->isSDWA())
                static_cast<SDWA_instruction*>(instr.get())->abs[i] = true;
             else
-               static_cast<VOP3A_instruction*>(instr.get())->abs[i] = true;
+               static_cast<VOP3_instruction*>(instr.get())->abs[i] = true;
          }
          if (info.is_neg() && instr->opcode == aco_opcode::v_add_f32) {
             instr->opcode = i ? aco_opcode::v_sub_f32 : aco_opcode::v_subrev_f32;
@@ -972,7 +972,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
             else if (instr->isSDWA())
                static_cast<SDWA_instruction*>(instr.get())->neg[i] = true;
             else
-               static_cast<VOP3A_instruction*>(instr.get())->neg[i] = true;
+               static_cast<VOP3_instruction*>(instr.get())->neg[i] = true;
             continue;
          }
          unsigned bits = get_operand_size(instr, i);
@@ -1365,7 +1365,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
    }
    case aco_opcode::v_med3_f16:
    case aco_opcode::v_med3_f32: { /* clamp */
-      VOP3A_instruction* vop3 = static_cast<VOP3A_instruction*>(instr.get());
+      VOP3_instruction* vop3 = static_cast<VOP3_instruction*>(instr.get());
       if (vop3->abs[0] || vop3->abs[1] || vop3->abs[2] ||
           vop3->neg[0] || vop3->neg[1] || vop3->neg[2] ||
           vop3->omod != 0 || vop3->opsel != 0)
@@ -1682,7 +1682,7 @@ bool combine_ordering_test(opt_ctx &ctx, aco_ptr<Instruction>& instr)
          return false;
 
       if (op_instr[i]->isVOP3()) {
-         VOP3A_instruction *vop3 = static_cast<VOP3A_instruction*>(op_instr[i]);
+         VOP3_instruction *vop3 = static_cast<VOP3_instruction*>(op_instr[i]);
          if (vop3->neg[0] != vop3->neg[1] || vop3->abs[0] != vop3->abs[1] || vop3->opsel == 1 || vop3->opsel == 2)
             return false;
          neg[i] = vop3->neg[0];
@@ -1726,7 +1726,7 @@ bool combine_ordering_test(opt_ctx &ctx, aco_ptr<Instruction>& instr)
    }
    Instruction *new_instr;
    if (neg[0] || neg[1] || abs[0] || abs[1] || opsel || num_sgprs > 1) {
-      VOP3A_instruction *vop3 = create_instruction<VOP3A_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
+      VOP3_instruction *vop3 = create_instruction<VOP3_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
       for (unsigned i = 0; i < 2; i++) {
          vop3->neg[i] = neg[i];
          vop3->abs[i] = abs[i];
@@ -1797,8 +1797,8 @@ bool combine_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& instr)
    aco_opcode new_op = is_or ? get_unordered(cmp->opcode) : get_ordered(cmp->opcode);
    Instruction *new_instr;
    if (cmp->isVOP3()) {
-      VOP3A_instruction *new_vop3 = create_instruction<VOP3A_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
-      VOP3A_instruction *cmp_vop3 = static_cast<VOP3A_instruction*>(cmp);
+      VOP3_instruction *new_vop3 = create_instruction<VOP3_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
+      VOP3_instruction *cmp_vop3 = static_cast<VOP3_instruction*>(cmp);
       memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs));
       memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg));
       new_vop3->clamp = cmp_vop3->clamp;
@@ -1885,7 +1885,7 @@ bool combine_constant_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& in
       return false;
 
    if (nan_test->isVOP3()) {
-      VOP3A_instruction *vop3 = static_cast<VOP3A_instruction*>(nan_test);
+      VOP3_instruction *vop3 = static_cast<VOP3_instruction*>(nan_test);
       if (vop3->neg[0] != vop3->neg[1] || vop3->abs[0] != vop3->abs[1] || vop3->opsel == 1 || vop3->opsel == 2)
          return false;
    }
@@ -1916,8 +1916,8 @@ bool combine_constant_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& in
    aco_opcode new_op = is_or ? get_unordered(cmp->opcode) : get_ordered(cmp->opcode);
    Instruction *new_instr;
    if (cmp->isVOP3()) {
-      VOP3A_instruction *new_vop3 = create_instruction<VOP3A_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
-      VOP3A_instruction *cmp_vop3 = static_cast<VOP3A_instruction*>(cmp);
+      VOP3_instruction *new_vop3 = create_instruction<VOP3_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
+      VOP3_instruction *cmp_vop3 = static_cast<VOP3_instruction*>(cmp);
       memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs));
       memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg));
       new_vop3->clamp = cmp_vop3->clamp;
@@ -1965,8 +1965,8 @@ bool combine_inverse_comparison(opt_ctx &ctx, aco_ptr<Instruction>& instr)
     * comparison so that the comparison is done with the correct exec mask. */
    Instruction *new_instr;
    if (cmp->isVOP3()) {
-      VOP3A_instruction *new_vop3 = create_instruction<VOP3A_instruction>(new_opcode, asVOP3(Format::VOPC), 2, 1);
-      VOP3A_instruction *cmp_vop3 = static_cast<VOP3A_instruction*>(cmp);
+      VOP3_instruction *new_vop3 = create_instruction<VOP3_instruction>(new_opcode, asVOP3(Format::VOPC), 2, 1);
+      VOP3_instruction *cmp_vop3 = static_cast<VOP3_instruction*>(cmp);
       memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs));
       memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg));
       new_vop3->clamp = cmp_vop3->clamp;
@@ -2019,8 +2019,8 @@ bool match_op3_for_vop3(opt_ctx &ctx, aco_opcode op1, aco_opcode op2,
    if (fixed_to_exec(op2_instr->operands[0]) || fixed_to_exec(op2_instr->operands[1]))
       return false;
 
-   VOP3A_instruction *op1_vop3 = op1_instr->isVOP3() ? static_cast<VOP3A_instruction *>(op1_instr) : NULL;
-   VOP3A_instruction *op2_vop3 = op2_instr->isVOP3() ? static_cast<VOP3A_instruction *>(op2_instr) : NULL;
+   VOP3_instruction *op1_vop3 = op1_instr->isVOP3() ? static_cast<VOP3_instruction *>(op1_instr) : NULL;
+   VOP3_instruction *op2_vop3 = op2_instr->isVOP3() ? static_cast<VOP3_instruction *>(op2_instr) : NULL;
 
    if (op1_instr->isSDWA() || op2_instr->isSDWA())
       return false;
@@ -2081,7 +2081,7 @@ void create_vop3_for_op3(opt_ctx& ctx, aco_opcode opcode, aco_ptr<Instruction>&
                          Operand operands[3], bool neg[3], bool abs[3], uint8_t opsel,
                          bool clamp, unsigned omod)
 {
-   VOP3A_instruction *new_instr = create_instruction<VOP3A_instruction>(opcode, Format::VOP3A, 3, 1);
+   VOP3_instruction *new_instr = create_instruction<VOP3_instruction>(opcode, Format::VOP3, 3, 1);
    memcpy(new_instr->abs, abs, sizeof(bool[3]));
    memcpy(new_instr->neg, neg, sizeof(bool[3]));
    new_instr->clamp = clamp;
@@ -2306,7 +2306,7 @@ bool combine_add_sub_b2i(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode n
             new_instr.reset(create_instruction<VOP2_instruction>(new_op, Format::VOP2, 3, 2));
          } else if (ctx.program->chip_class >= GFX10 ||
                     (instr->operands[!i].isConstant() && !instr->operands[!i].isLiteral())) {
-            new_instr.reset(create_instruction<VOP3A_instruction>(new_op, asVOP3(Format::VOP2), 3, 2));
+            new_instr.reset(create_instruction<VOP3_instruction>(new_op, asVOP3(Format::VOP2), 3, 2));
          } else {
             return false;
          }
@@ -2347,7 +2347,7 @@ bool combine_add_bcnt(opt_ctx& ctx, aco_ptr<Instruction>& instr)
           op_instr->operands[0].isTemp() &&
           op_instr->operands[0].getTemp().type() == RegType::vgpr &&
           op_instr->operands[1].constantEquals(0)) {
-         aco_ptr<Instruction> new_instr{create_instruction<VOP3A_instruction>(aco_opcode::v_bcnt_u32_b32, Format::VOP3, 2, 1)};
+         aco_ptr<Instruction> new_instr{create_instruction<VOP3_instruction>(aco_opcode::v_bcnt_u32_b32, Format::VOP3, 2, 1)};
          ctx.uses[instr->operands[i].tempId()]--;
          new_instr->operands[0] = op_instr->operands[0];
          new_instr->operands[1] = instr->operands[!i];
@@ -2645,7 +2645,7 @@ bool apply_omod_clamp(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
          return false;
    } else {
       to_VOP3(ctx, instr);
-      if (!apply_omod_clamp_helper(ctx, static_cast<VOP3A_instruction *>(instr.get()), def_info))
+      if (!apply_omod_clamp_helper(ctx, static_cast<VOP3_instruction *>(instr.get()), def_info))
          return false;
    }
 
@@ -2675,7 +2675,7 @@ bool combine_and_subbrev(opt_ctx& ctx, aco_ptr<Instruction>& instr)
             new_instr.reset(create_instruction<VOP2_instruction>(aco_opcode::v_cndmask_b32, Format::VOP2, 3, 1));
          } else if (ctx.program->chip_class >= GFX10 ||
                     (instr->operands[!i].isConstant() && !instr->operands[!i].isLiteral())) {
-            new_instr.reset(create_instruction<VOP3A_instruction>(aco_opcode::v_cndmask_b32, asVOP3(Format::VOP2), 3, 1));
+            new_instr.reset(create_instruction<VOP3_instruction>(aco_opcode::v_cndmask_b32, asVOP3(Format::VOP2), 3, 1));
          } else {
             return false;
          }
@@ -2729,7 +2729,7 @@ bool combine_add_lshl(opt_ctx& ctx, aco_ptr<Instruction>& instr)
 
          ctx.uses[instr->operands[i].tempId()]--;
 
-         aco_ptr<VOP3A_instruction> new_instr{create_instruction<VOP3A_instruction>(aco_opcode::v_mad_u32_u24, Format::VOP3A, 3, 1)};
+         aco_ptr<VOP3_instruction> new_instr{create_instruction<VOP3_instruction>(aco_opcode::v_mad_u32_u24, Format::VOP3, 3, 1)};
          new_instr->operands[0] = op_instr->operands[!shift_op_idx];
          new_instr->operands[1] = Operand(multiplier);
          new_instr->operands[2] = instr->operands[!i];
@@ -2944,7 +2944,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
 
       if (mul_instr->operands[0].isLiteral())
          return;
-      if (mul_instr->isVOP3() && static_cast<VOP3A_instruction*>(mul_instr)->clamp)
+      if (mul_instr->isVOP3() && static_cast<VOP3_instruction*>(mul_instr)->clamp)
          return;
       if (mul_instr->isSDWA())
          return;
@@ -2954,13 +2954,13 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
       Definition def = instr->definitions[0];
       /* neg(abs(mul(a, b))) -> mul(neg(abs(a)), abs(b)) */
       bool is_abs = ctx.info[instr->definitions[0].tempId()].is_abs();
-      instr.reset(create_instruction<VOP3A_instruction>(mul_instr->opcode, asVOP3(Format::VOP2), 2, 1));
+      instr.reset(create_instruction<VOP3_instruction>(mul_instr->opcode, asVOP3(Format::VOP2), 2, 1));
       instr->operands[0] = mul_instr->operands[0];
       instr->operands[1] = mul_instr->operands[1];
       instr->definitions[0] = def;
-      VOP3A_instruction* new_mul = static_cast<VOP3A_instruction*>(instr.get());
+      VOP3_instruction* new_mul = static_cast<VOP3_instruction*>(instr.get());
       if (mul_instr->isVOP3()) {
-         VOP3A_instruction* mul = static_cast<VOP3A_instruction*>(mul_instr);
+         VOP3_instruction* mul = static_cast<VOP3_instruction*>(mul_instr);
          new_mul->neg[0] = mul->neg[0] && !is_abs;
          new_mul->neg[1] = mul->neg[1] && !is_abs;
          new_mul->abs[0] = mul->abs[0] || is_abs;
@@ -3003,8 +3003,8 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
 
          /* no clamp/omod allowed between mul and add */
          if (info.instr->isVOP3() &&
-             (static_cast<VOP3A_instruction*>(info.instr)->clamp ||
-              static_cast<VOP3A_instruction*>(info.instr)->omod))
+             (static_cast<VOP3_instruction*>(info.instr)->clamp ||
+              static_cast<VOP3_instruction*>(info.instr)->omod))
             continue;
 
          Operand op[3] = {info.instr->operands[0], info.instr->operands[1], instr->operands[1 - i]};
@@ -3035,7 +3035,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
          bool clamp = false;
 
          if (mul_instr->isVOP3()) {
-            VOP3A_instruction* vop3 = static_cast<VOP3A_instruction*> (mul_instr);
+            VOP3_instruction* vop3 = static_cast<VOP3_instruction*> (mul_instr);
             neg[0] = vop3->neg[0];
             neg[1] = vop3->neg[1];
             abs[0] = vop3->abs[0];
@@ -3043,7 +3043,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
          }
 
          if (instr->isVOP3()) {
-            VOP3A_instruction* vop3 = static_cast<VOP3A_instruction*> (instr.get());
+            VOP3_instruction* vop3 = static_cast<VOP3_instruction*> (instr.get());
             neg[2] = vop3->neg[add_op_idx];
             abs[2] = vop3->abs[add_op_idx];
             omod = vop3->omod;
@@ -3068,7 +3068,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
             mad_op = need_fma ? (ctx.program->chip_class == GFX8 ? aco_opcode::v_fma_legacy_f16 : aco_opcode::v_fma_f16) :
                                 (ctx.program->chip_class == GFX8 ? aco_opcode::v_mad_legacy_f16 : aco_opcode::v_mad_f16);
 
-         aco_ptr<VOP3A_instruction> mad{create_instruction<VOP3A_instruction>(mad_op, Format::VOP3A, 3, 1)};
+         aco_ptr<VOP3_instruction> mad{create_instruction<VOP3_instruction>(mad_op, Format::VOP3, 3, 1)};
          for (unsigned i = 0; i < 3; i++) {
             mad->operands[i] = op[i];
             mad->neg[i] = neg[i];
diff --git a/src/amd/compiler/aco_print_ir.cpp b/src/amd/compiler/aco_print_ir.cpp
index f99046da007..6324355cb92 100644
--- a/src/amd/compiler/aco_print_ir.cpp
+++ b/src/amd/compiler/aco_print_ir.cpp
@@ -588,7 +588,7 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output)
    }
    }
    if (instr->isVOP3()) {
-      const VOP3A_instruction* vop3 = static_cast<const VOP3A_instruction*>(instr);
+      const VOP3_instruction* vop3 = static_cast<const VOP3_instruction*>(instr);
       switch (vop3->omod) {
       case 1:
          fprintf(output, " *2");
@@ -693,8 +693,8 @@ void aco_print_instr(const Instruction *instr, FILE *output)
       bool *const neg = (bool *)alloca(instr->operands.size() * sizeof(bool));
       bool *const opsel = (bool *)alloca(instr->operands.size() * sizeof(bool));
       uint8_t *const sel = (uint8_t *)alloca(instr->operands.size() * sizeof(uint8_t));
-      if ((int)instr->format & (int)Format::VOP3A) {
-         const VOP3A_instruction* vop3 = static_cast<const VOP3A_instruction*>(instr);
+      if ((int)instr->format & (int)Format::VOP3) {
+         const VOP3_instruction* vop3 = static_cast<const VOP3_instruction*>(instr);
          for (unsigned i = 0; i < instr->operands.size(); ++i) {
             abs[i] = vop3->abs[i];
             neg[i] = vop3->neg[i];
diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp
index 12d8cb354cf..4b083ee6013 100644
--- a/src/amd/compiler/aco_register_allocation.cpp
+++ b/src/amd/compiler/aco_register_allocation.cpp
@@ -503,7 +503,7 @@ void add_subdword_operand(ra_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx
          update_phi_map(ctx, tmp.get(), instr.get());
       return;
    } else if (rc.bytes() == 2 && can_use_opsel(chip, instr->opcode, idx, byte / 2)) {
-      VOP3A_instruction* vop3 = static_cast<VOP3A_instruction *>(instr.get());
+      VOP3_instruction* vop3 = static_cast<VOP3_instruction *>(instr.get());
       vop3->opsel |= (byte / 2) << idx;
       return;
    } else if (instr->format == Format::VOP3P && byte == 2) {
@@ -614,7 +614,7 @@ void add_subdword_definition(Program *program, aco_ptr<Instruction>& instr, unsi
          convert_to_SDWA(chip, instr);
       return;
    } else if (reg.byte() && rc.bytes() == 2 && can_use_opsel(chip, instr->opcode, -1, reg.byte() / 2)) {
-      VOP3A_instruction *vop3 = static_cast<VOP3A_instruction *>(instr.get());
+      VOP3_instruction *vop3 = static_cast<VOP3_instruction *>(instr.get());
       if (reg.byte() == 2)
          vop3->opsel |= (1 << 3); /* dst in high half */
       return;
@@ -2478,7 +2478,7 @@ void register_allocation(Program *program, std::vector<IDSet>& live_out_per_bloc
             /* change the instruction to VOP3 to enable an arbitrary register pair as dst */
             aco_ptr<Instruction> tmp = std::move(instr);
             Format format = asVOP3(tmp->format);
-            instr.reset(create_instruction<VOP3A_instruction>(tmp->opcode, format, tmp->operands.size(), tmp->definitions.size()));
+            instr.reset(create_instruction<VOP3_instruction>(tmp->opcode, format, tmp->operands.size(), tmp->definitions.size()));
             std::copy(tmp->operands.begin(), tmp->operands.end(), instr->operands.begin());
             std::copy(tmp->definitions.begin(), tmp->definitions.end(), instr->definitions.begin());
             update_phi_map(ctx, tmp.get(), instr.get());
diff --git a/src/amd/compiler/aco_validate.cpp b/src/amd/compiler/aco_validate.cpp
index b7f9fef2b36..5b6aa53d6ea 100644
--- a/src/amd/compiler/aco_validate.cpp
+++ b/src/amd/compiler/aco_validate.cpp
@@ -136,7 +136,7 @@ bool validate_ir(Program* program)
                   base_format == Format::VOP1 ||
                   base_format == Format::VOPC ||
                   base_format == Format::VINTRP,
-                  "Format cannot have VOP3A/VOP3B applied", instr.get());
+                  "Format cannot have VOP3/VOP3B applied", instr.get());
          }
 
          /* check SDWA */
@@ -188,7 +188,7 @@ bool validate_ir(Program* program)
 
          /* check opsel */
          if (instr->isVOP3()) {
-            VOP3A_instruction *vop3 = static_cast<VOP3A_instruction*>(instr.get());
+            VOP3_instruction *vop3 = static_cast<VOP3_instruction*>(instr.get());
             check(vop3->opsel == 0 || program->chip_class >= GFX9, "Opsel is only supported on GFX9+", instr.get());
 
             for (unsigned i = 0; i < 3; i++) {
diff --git a/src/amd/compiler/tests/test_assembler.cpp b/src/amd/compiler/tests/test_assembler.cpp
index 23f02dd3926..bd6055cc20a 100644
--- a/src/amd/compiler/tests/test_assembler.cpp
+++ b/src/amd/compiler/tests/test_assembler.cpp
@@ -235,7 +235,7 @@ BEGIN_TEST(assembler.v_add3)
 
       //~gfx9>> v_add3_u32 v0, 0, 0, 0 ; d1ff0000 02010080
       //~gfx10>> v_add3_u32 v0, 0, 0, 0 ; d76d0000 02010080
-      aco_ptr<VOP3A_instruction> add3{create_instruction<VOP3A_instruction>(aco_opcode::v_add3_u32, Format::VOP3A, 3, 1)};
+      aco_ptr<VOP3_instruction> add3{create_instruction<VOP3_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)};
       add3->operands[0] = Operand(0u);
       add3->operands[1] = Operand(0u);
       add3->operands[2] = Operand(0u);
@@ -253,7 +253,7 @@ BEGIN_TEST(assembler.v_add3_clamp)
 
       //~gfx9>> integer addition + clamp ; d1ff8000 02010080
       //~gfx10>> integer addition + clamp ; d76d8000 02010080
-      aco_ptr<VOP3A_instruction> add3{create_instruction<VOP3A_instruction>(aco_opcode::v_add3_u32, Format::VOP3A, 3, 1)};
+      aco_ptr<VOP3_instruction> add3{create_instruction<VOP3_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)};
       add3->operands[0] = Operand(0u);
       add3->operands[1] = Operand(0u);
       add3->operands[2] = Operand(0u);
diff --git a/src/amd/compiler/tests/test_optimizer.cpp b/src/amd/compiler/tests/test_optimizer.cpp
index 679812faac8..f914bce922f 100644
--- a/src/amd/compiler/tests/test_optimizer.cpp
+++ b/src/amd/compiler/tests/test_optimizer.cpp
@@ -735,7 +735,7 @@ BEGIN_TEST(optimize.add3)
    //! v1: %res1 = v_add_u32 %a, %tmp1
    //! p_unit_test 1, %res1
    tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]);
-   static_cast<VOP3A_instruction *>(tmp.instr)->clamp = true;
+   static_cast<VOP3_instruction *>(tmp.instr)->clamp = true;
    writeout(1, bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp));
 
    //! v1: %tmp2 = v_add_u32 %b, %c
@@ -743,7 +743,7 @@ BEGIN_TEST(optimize.add3)
    //! p_unit_test 2, %res2
    tmp = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]);
    tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp);
-   static_cast<VOP3A_instruction *>(tmp.instr)->clamp = true;
+   static_cast<VOP3_instruction *>(tmp.instr)->clamp = true;
    writeout(2, tmp);
 
    finish_opt_test();



More information about the mesa-commit mailing list