Mesa (master): aco: remove Format::{VOP3A,VOP3B}
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Fri Jan 22 14:21:22 UTC 2021
Module: Mesa
Branch: master
Commit: 441ead5fb35f84dcbaf4724a771ff3475257d400
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=441ead5fb35f84dcbaf4724a771ff3475257d400
Author: Rhys Perry <pendingchaos02 at gmail.com>
Date: Wed Jan 20 13:50:45 2021 +0000
aco: remove Format::{VOP3A,VOP3B}
These are really the same as Format::VOP3.
Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Daniel Schürmann <daniel at schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/8595>
---
src/amd/compiler/aco_assembler.cpp | 4 +-
src/amd/compiler/aco_builder_h.py | 10 ++---
src/amd/compiler/aco_instruction_selection.cpp | 18 ++++----
src/amd/compiler/aco_ir.cpp | 4 +-
src/amd/compiler/aco_ir.h | 16 +++----
src/amd/compiler/aco_lower_to_hw_instr.cpp | 10 ++---
src/amd/compiler/aco_opcodes.py | 5 +--
src/amd/compiler/aco_opt_value_numbering.cpp | 6 +--
src/amd/compiler/aco_optimizer.cpp | 60 +++++++++++++-------------
src/amd/compiler/aco_print_ir.cpp | 6 +--
src/amd/compiler/aco_register_allocation.cpp | 6 +--
src/amd/compiler/aco_validate.cpp | 4 +-
src/amd/compiler/tests/test_assembler.cpp | 4 +-
src/amd/compiler/tests/test_optimizer.cpp | 4 +-
14 files changed, 76 insertions(+), 81 deletions(-)
diff --git a/src/amd/compiler/aco_assembler.cpp b/src/amd/compiler/aco_assembler.cpp
index 05ec485a2cf..3efdf663e76 100644
--- a/src/amd/compiler/aco_assembler.cpp
+++ b/src/amd/compiler/aco_assembler.cpp
@@ -563,8 +563,8 @@ void emit_instruction(asm_context& ctx, std::vector<uint32_t>& out, Instruction*
unreachable("Pseudo instructions should be lowered before assembly.");
break;
default:
- if ((uint16_t) instr->format & (uint16_t) Format::VOP3A) {
- VOP3A_instruction* vop3 = static_cast<VOP3A_instruction*>(instr);
+ if ((uint16_t) instr->format & (uint16_t) Format::VOP3) {
+ VOP3_instruction* vop3 = static_cast<VOP3_instruction*>(instr);
if ((uint16_t) instr->format & (uint16_t) Format::VOP2) {
opcode = opcode + 0x100;
diff --git a/src/amd/compiler/aco_builder_h.py b/src/amd/compiler/aco_builder_h.py
index af0eacdb7ac..502ffc7cd34 100644
--- a/src/amd/compiler/aco_builder_h.py
+++ b/src/amd/compiler/aco_builder_h.py
@@ -484,7 +484,7 @@ public:
int num_defs = carry_out ? 2 : 1;
aco_ptr<Instruction> sub;
if (vop3)
- sub.reset(create_instruction<VOP3A_instruction>(op, Format::VOP3B, num_ops, num_defs));
+ sub.reset(create_instruction<VOP3_instruction>(op, Format::VOP3, num_ops, num_defs));
else
sub.reset(create_instruction<VOP2_instruction>(op, Format::VOP2, num_ops, num_defs));
sub->operands[0] = a.op;
@@ -534,15 +534,15 @@ formats = [("pseudo", [Format.PSEUDO], 'Pseudo_instruction', list(itertools.prod
("vop2", [Format.VOP2], 'VOP2_instruction', itertools.product([1, 2], [2, 3])),
("vop2_sdwa", [Format.VOP2, Format.SDWA], 'SDWA_instruction', itertools.product([1, 2], [2, 3])),
("vopc", [Format.VOPC], 'VOPC_instruction', itertools.product([1, 2], [2])),
- ("vop3", [Format.VOP3A], 'VOP3A_instruction', [(1, 3), (1, 2), (1, 1), (2, 2)]),
+ ("vop3", [Format.VOP3], 'VOP3_instruction', [(1, 3), (1, 2), (1, 1), (2, 2)]),
("vop3p", [Format.VOP3P], 'VOP3P_instruction', [(1, 2), (1, 3)]),
("vintrp", [Format.VINTRP], 'Interp_instruction', [(1, 2), (1, 3)]),
("vop1_dpp", [Format.VOP1, Format.DPP], 'DPP_instruction', [(1, 1)]),
("vop2_dpp", [Format.VOP2, Format.DPP], 'DPP_instruction', itertools.product([1, 2], [2, 3])),
("vopc_dpp", [Format.VOPC, Format.DPP], 'DPP_instruction', itertools.product([1, 2], [2])),
- ("vop1_e64", [Format.VOP1, Format.VOP3A], 'VOP3A_instruction', itertools.product([1], [1])),
- ("vop2_e64", [Format.VOP2, Format.VOP3A], 'VOP3A_instruction', itertools.product([1, 2], [2, 3])),
- ("vopc_e64", [Format.VOPC, Format.VOP3A], 'VOP3A_instruction', itertools.product([1, 2], [2])),
+ ("vop1_e64", [Format.VOP1, Format.VOP3], 'VOP3_instruction', itertools.product([1], [1])),
+ ("vop2_e64", [Format.VOP2, Format.VOP3], 'VOP3_instruction', itertools.product([1, 2], [2, 3])),
+ ("vopc_e64", [Format.VOPC, Format.VOP3], 'VOP3_instruction', itertools.product([1, 2], [2])),
("flat", [Format.FLAT], 'FLAT_instruction', [(0, 3), (1, 2)]),
("global", [Format.GLOBAL], 'FLAT_instruction', [(0, 3), (1, 2)])]
formats = [(f if len(f) == 5 else f + ('',)) for f in formats]
diff --git a/src/amd/compiler/aco_instruction_selection.cpp b/src/amd/compiler/aco_instruction_selection.cpp
index 7e614071bb0..3aa890fe6b2 100644
--- a/src/amd/compiler/aco_instruction_selection.cpp
+++ b/src/amd/compiler/aco_instruction_selection.cpp
@@ -1224,7 +1224,7 @@ Temp emit_floor_f64(isel_context *ctx, Builder& bld, Definition dst, Temp val)
Temp v = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), dst0, dst1);
Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), src0, v);
- static_cast<VOP3A_instruction*>(add)->neg[1] = true;
+ static_cast<VOP3_instruction*>(add)->neg[1] = true;
return add->definitions[0].getTemp();
}
@@ -1692,10 +1692,10 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
std::swap(src0, src1);
add_instr = bld.vop2_e64(aco_opcode::v_add_u16, Definition(dst), src0, as_vgpr(ctx, src1)).instr;
}
- static_cast<VOP3A_instruction*>(add_instr)->clamp = 1;
+ static_cast<VOP3_instruction*>(add_instr)->clamp = 1;
} else if (dst.regClass() == v1) {
if (ctx->options->chip_class >= GFX9) {
- aco_ptr<VOP3A_instruction> add{create_instruction<VOP3A_instruction>(aco_opcode::v_add_u32, asVOP3(Format::VOP2), 2, 1)};
+ aco_ptr<VOP3_instruction> add{create_instruction<VOP3_instruction>(aco_opcode::v_add_u32, asVOP3(Format::VOP2), 2, 1)};
add->operands[0] = Operand(src0);
add->operands[1] = Operand(src1);
add->definitions[0] = Definition(dst);
@@ -1965,7 +1965,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
} else if (dst.regClass() == v2) {
Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst),
as_vgpr(ctx, src0), as_vgpr(ctx, src1));
- VOP3A_instruction* sub = static_cast<VOP3A_instruction*>(add);
+ VOP3_instruction* sub = static_cast<VOP3_instruction*>(add);
sub->neg[1] = true;
} else {
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
@@ -2115,7 +2115,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
// TODO: confirm that this holds under any circumstances
} else if (dst.regClass() == v2) {
Instruction* add = bld.vop3(aco_opcode::v_add_f64, Definition(dst), src, Operand(0u));
- VOP3A_instruction* vop3 = static_cast<VOP3A_instruction*>(add);
+ VOP3_instruction* vop3 = static_cast<VOP3_instruction*>(add);
vop3->clamp = true;
} else {
isel_err(&instr->instr, "Unimplemented NIR instr bit size");
@@ -2255,12 +2255,12 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
Temp bfi = bld.vop3(aco_opcode::v_bfi_b32, bld.def(v1), bitmask, bld.copy(bld.def(v1), Operand(0x43300000u)), as_vgpr(ctx, src0_hi));
Temp tmp = bld.vop3(aco_opcode::v_add_f64, bld.def(v2), src0, bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(0u), bfi));
Instruction *sub = bld.vop3(aco_opcode::v_add_f64, bld.def(v2), tmp, bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(0u), bfi));
- static_cast<VOP3A_instruction*>(sub)->neg[1] = true;
+ static_cast<VOP3_instruction*>(sub)->neg[1] = true;
tmp = sub->definitions[0].getTemp();
Temp v = bld.pseudo(aco_opcode::p_create_vector, bld.def(v2), Operand(-1u), Operand(0x432fffffu));
Instruction* vop3 = bld.vopc_e64(aco_opcode::v_cmp_gt_f64, bld.hint_vcc(bld.def(bld.lm)), src0, v);
- static_cast<VOP3A_instruction*>(vop3)->abs[0] = true;
+ static_cast<VOP3_instruction*>(vop3)->abs[0] = true;
Temp cond = vop3->definitions[0].getTemp();
Temp tmp_lo = bld.tmp(v1), tmp_hi = bld.tmp(v1);
@@ -2926,7 +2926,7 @@ void visit_alu_instr(isel_context *ctx, nir_alu_instr *instr)
f32 = bld.vop1(aco_opcode::v_cvt_f32_f16, bld.def(v1), f16);
Temp smallest = bld.copy(bld.def(s1), Operand(0x38800000u));
Instruction* vop3 = bld.vopc_e64(aco_opcode::v_cmp_nlt_f32, bld.hint_vcc(bld.def(bld.lm)), f32, smallest);
- static_cast<VOP3A_instruction*>(vop3)->abs[0] = true;
+ static_cast<VOP3_instruction*>(vop3)->abs[0] = true;
cmp_res = vop3->definitions[0].getTemp();
}
@@ -8847,7 +8847,7 @@ void prepare_cube_coords(isel_context *ctx, std::vector<Temp>& coords, Temp* ddx
ma = bld.vop3(aco_opcode::v_cubema_f32, bld.def(v1), coords[0], coords[1], coords[2]);
- aco_ptr<VOP3A_instruction> vop3a{create_instruction<VOP3A_instruction>(aco_opcode::v_rcp_f32, asVOP3(Format::VOP1), 1, 1)};
+ aco_ptr<VOP3_instruction> vop3a{create_instruction<VOP3_instruction>(aco_opcode::v_rcp_f32, asVOP3(Format::VOP1), 1, 1)};
vop3a->operands[0] = Operand(ma);
vop3a->abs[0] = true;
Temp invma = bld.tmp(v1);
diff --git a/src/amd/compiler/aco_ir.cpp b/src/amd/compiler/aco_ir.cpp
index 676a047c8b4..5b46e8a67ee 100644
--- a/src/amd/compiler/aco_ir.cpp
+++ b/src/amd/compiler/aco_ir.cpp
@@ -170,7 +170,7 @@ bool can_use_SDWA(chip_class chip, const aco_ptr<Instruction>& instr)
return true;
if (instr->isVOP3()) {
- VOP3A_instruction *vop3 = static_cast<VOP3A_instruction*>(instr.get());
+ VOP3_instruction *vop3 = static_cast<VOP3_instruction*>(instr.get());
if (instr->format == Format::VOP3)
return false;
if (vop3->clamp && instr->format == asVOP3(Format::VOPC) && chip != GFX8)
@@ -235,7 +235,7 @@ aco_ptr<Instruction> convert_to_SDWA(chip_class chip, aco_ptr<Instruction>& inst
SDWA_instruction *sdwa = static_cast<SDWA_instruction*>(instr.get());
if (tmp->isVOP3()) {
- VOP3A_instruction *vop3 = static_cast<VOP3A_instruction*>(tmp.get());
+ VOP3_instruction *vop3 = static_cast<VOP3_instruction*>(tmp.get());
memcpy(sdwa->neg, vop3->neg, sizeof(sdwa->neg));
memcpy(sdwa->abs, vop3->abs, sizeof(sdwa->abs));
sdwa->omod = vop3->omod;
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index 5beca44ff2b..798d9cc80a6 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -60,7 +60,7 @@ enum {
/**
* Representation of the instruction's microcode encoding format
* Note: Some Vector ALU Formats can be combined, such that:
- * - VOP2* | VOP3A represents a VOP2 instruction in VOP3A encoding
+ * - VOP2* | VOP3 represents a VOP2 instruction in VOP3 encoding
* - VOP2* | DPP represents a VOP2 instruction with data parallel primitive.
* - VOP2* | SDWA represents a VOP2 instruction with sub-dword addressing.
*
@@ -101,8 +101,6 @@ enum class Format : std::uint16_t {
VOP2 = 1 << 9,
VOPC = 1 << 10,
VOP3 = 1 << 11,
- VOP3A = 1 << 11,
- VOP3B = 1 << 11,
/* Vector Parameter Interpolation Format */
VINTRP = 1 << 12,
DPP = 1 << 13,
@@ -1001,8 +999,7 @@ struct Instruction {
return ((uint16_t) format & (uint16_t) Format::VOP1) == (uint16_t) Format::VOP1
|| ((uint16_t) format & (uint16_t) Format::VOP2) == (uint16_t) Format::VOP2
|| ((uint16_t) format & (uint16_t) Format::VOPC) == (uint16_t) Format::VOPC
- || ((uint16_t) format & (uint16_t) Format::VOP3A) == (uint16_t) Format::VOP3A
- || ((uint16_t) format & (uint16_t) Format::VOP3B) == (uint16_t) Format::VOP3B
+ || ((uint16_t) format & (uint16_t) Format::VOP3) == (uint16_t) Format::VOP3
|| format == Format::VOP3P;
}
@@ -1029,8 +1026,7 @@ struct Instruction {
constexpr bool isVOP3() const noexcept
{
- return ((uint16_t) format & (uint16_t) Format::VOP3A) ||
- ((uint16_t) format & (uint16_t) Format::VOP3B);
+ return (uint16_t) format & (uint16_t) Format::VOP3;
}
constexpr bool isSDWA() const noexcept
@@ -1114,7 +1110,7 @@ struct VOPC_instruction : public Instruction {
};
static_assert(sizeof(VOPC_instruction) == sizeof(Instruction) + 0, "Unexpected padding");
-struct VOP3A_instruction : public Instruction {
+struct VOP3_instruction : public Instruction {
bool abs[3];
bool neg[3];
uint8_t opsel : 4;
@@ -1123,7 +1119,7 @@ struct VOP3A_instruction : public Instruction {
uint8_t padding0 : 1;
uint8_t padding1;
};
-static_assert(sizeof(VOP3A_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
+static_assert(sizeof(VOP3_instruction) == sizeof(Instruction) + 8, "Unexpected padding");
struct VOP3P_instruction : public Instruction {
bool neg_lo[3];
@@ -1450,7 +1446,7 @@ constexpr bool Instruction::usesModifiers() const noexcept
}
return vop3p->opsel_lo || vop3p->clamp;
} else if (isVOP3()) {
- const VOP3A_instruction *vop3 = static_cast<const VOP3A_instruction*>(this);
+ const VOP3_instruction *vop3 = static_cast<const VOP3_instruction*>(this);
for (unsigned i = 0; i < operands.size(); i++) {
if (vop3->abs[i] || vop3->neg[i])
return true;
diff --git a/src/amd/compiler/aco_lower_to_hw_instr.cpp b/src/amd/compiler/aco_lower_to_hw_instr.cpp
index 56da58bd333..f9eb9323f11 100644
--- a/src/amd/compiler/aco_lower_to_hw_instr.cpp
+++ b/src/amd/compiler/aco_lower_to_hw_instr.cpp
@@ -646,7 +646,7 @@ void emit_reduction(lower_context *ctx, aco_opcode op, ReduceOp reduce_op, unsig
Definition(PhysReg{vtmp+i}, v1),
Operand(PhysReg{tmp+i}, v1),
Operand(0xffffffffu), Operand(0xffffffffu)).instr;
- static_cast<VOP3A_instruction*>(perm)->opsel = 1; /* FI (Fetch Inactive) */
+ static_cast<VOP3_instruction*>(perm)->opsel = 1; /* FI (Fetch Inactive) */
}
bld.sop1(Builder::s_mov, Definition(exec, bld.lm), Operand(UINT64_MAX));
@@ -757,7 +757,7 @@ void emit_reduction(lower_context *ctx, aco_opcode op, ReduceOp reduce_op, unsig
Definition(PhysReg{vtmp+i}, v1),
Operand(PhysReg{tmp+i}, v1),
Operand(0xffffffffu), Operand(0xffffffffu)).instr;
- static_cast<VOP3A_instruction*>(perm)->opsel = 1; /* FI (Fetch Inactive) */
+ static_cast<VOP3_instruction*>(perm)->opsel = 1; /* FI (Fetch Inactive) */
}
emit_op(ctx, tmp, tmp, vtmp, PhysReg{0}, reduce_op, src.size());
@@ -1052,12 +1052,12 @@ void copy_constant(lower_context *ctx, Builder& bld, Definition dst, Operand op)
if (dst.physReg().byte() == 2) {
Operand def_lo(dst.physReg().advance(-2), v2b);
Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, dst, def_lo, op);
- static_cast<VOP3A_instruction*>(instr)->opsel = 0;
+ static_cast<VOP3_instruction*>(instr)->opsel = 0;
} else {
assert(dst.physReg().byte() == 0);
Operand def_hi(dst.physReg().advance(2), v2b);
Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, dst, op, def_hi);
- static_cast<VOP3A_instruction*>(instr)->opsel = 2;
+ static_cast<VOP3_instruction*>(instr)->opsel = 2;
}
} else {
uint32_t offset = dst.physReg().byte() * 8u;
@@ -1251,7 +1251,7 @@ void do_pack_2x16(lower_context *ctx, Builder& bld, Definition def, Operand lo,
if (can_use_pack) {
Instruction* instr = bld.vop3(aco_opcode::v_pack_b32_f16, def, lo, hi);
/* opsel: 0 = select low half, 1 = select high half. [0] = src0, [1] = src1 */
- static_cast<VOP3A_instruction*>(instr)->opsel = hi.physReg().byte() | (lo.physReg().byte() >> 1);
+ static_cast<VOP3_instruction*>(instr)->opsel = hi.physReg().byte() | (lo.physReg().byte() >> 1);
return;
}
diff --git a/src/amd/compiler/aco_opcodes.py b/src/amd/compiler/aco_opcodes.py
index 2a8bc8c4cdc..20f5006c186 100644
--- a/src/amd/compiler/aco_opcodes.py
+++ b/src/amd/compiler/aco_opcodes.py
@@ -53,8 +53,7 @@ class Format(Enum):
VOP1 = 1 << 8
VOP2 = 1 << 9
VOPC = 1 << 10
- VOP3A = 1 << 11
- VOP3B = 1 << 11
+ VOP3 = 1 << 11
VINTRP = 1 << 12
DPP = 1 << 13
SDWA = 1 << 14
@@ -1082,7 +1081,7 @@ VOP3 = {
( -1, -1, -1, -1, 0x140, "v_fma_legacy_f32", True, True), #GFX10.3+
}
for (gfx6, gfx7, gfx8, gfx9, gfx10, name, in_mod, out_mod) in VOP3:
- opcode(name, gfx7, gfx9, gfx10, Format.VOP3A, in_mod, out_mod)
+ opcode(name, gfx7, gfx9, gfx10, Format.VOP3, in_mod, out_mod)
# DS instructions: 3 inputs (1 addr, 2 data), 1 output
diff --git a/src/amd/compiler/aco_opt_value_numbering.cpp b/src/amd/compiler/aco_opt_value_numbering.cpp
index 2d1a69b1492..a5a3a8c9cca 100644
--- a/src/amd/compiler/aco_opt_value_numbering.cpp
+++ b/src/amd/compiler/aco_opt_value_numbering.cpp
@@ -81,7 +81,7 @@ struct InstrHash {
std::size_t operator()(Instruction* instr) const
{
if (instr->isVOP3())
- return hash_murmur_32<VOP3A_instruction>(instr);
+ return hash_murmur_32<VOP3_instruction>(instr);
if (instr->isDPP())
return hash_murmur_32<DPP_instruction>(instr);
@@ -178,8 +178,8 @@ struct InstrPred {
return false;
if (a->isVOP3()) {
- VOP3A_instruction* a3 = static_cast<VOP3A_instruction*>(a);
- VOP3A_instruction* b3 = static_cast<VOP3A_instruction*>(b);
+ VOP3_instruction* a3 = static_cast<VOP3_instruction*>(a);
+ VOP3_instruction* b3 = static_cast<VOP3_instruction*>(b);
for (unsigned i = 0; i < 3; i++) {
if (a3->abs[i] != b3->abs[i] ||
a3->neg[i] != b3->neg[i])
diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp
index b1e786408a1..7806c37a3a6 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -706,7 +706,7 @@ void to_VOP3(opt_ctx& ctx, aco_ptr<Instruction>& instr)
aco_ptr<Instruction> tmp = std::move(instr);
Format format = asVOP3(tmp->format);
- instr.reset(create_instruction<VOP3A_instruction>(tmp->opcode, format, tmp->operands.size(), tmp->definitions.size()));
+ instr.reset(create_instruction<VOP3_instruction>(tmp->opcode, format, tmp->operands.size(), tmp->definitions.size()));
std::copy(tmp->operands.cbegin(), tmp->operands.cend(), instr->operands.begin());
for (unsigned i = 0; i < instr->definitions.size(); i++) {
instr->definitions[i] = tmp->definitions[i];
@@ -953,7 +953,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
else if (instr->isSDWA())
static_cast<SDWA_instruction*>(instr.get())->abs[i] = true;
else
- static_cast<VOP3A_instruction*>(instr.get())->abs[i] = true;
+ static_cast<VOP3_instruction*>(instr.get())->abs[i] = true;
}
if (info.is_neg() && instr->opcode == aco_opcode::v_add_f32) {
instr->opcode = i ? aco_opcode::v_sub_f32 : aco_opcode::v_subrev_f32;
@@ -972,7 +972,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
else if (instr->isSDWA())
static_cast<SDWA_instruction*>(instr.get())->neg[i] = true;
else
- static_cast<VOP3A_instruction*>(instr.get())->neg[i] = true;
+ static_cast<VOP3_instruction*>(instr.get())->neg[i] = true;
continue;
}
unsigned bits = get_operand_size(instr, i);
@@ -1365,7 +1365,7 @@ void label_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
}
case aco_opcode::v_med3_f16:
case aco_opcode::v_med3_f32: { /* clamp */
- VOP3A_instruction* vop3 = static_cast<VOP3A_instruction*>(instr.get());
+ VOP3_instruction* vop3 = static_cast<VOP3_instruction*>(instr.get());
if (vop3->abs[0] || vop3->abs[1] || vop3->abs[2] ||
vop3->neg[0] || vop3->neg[1] || vop3->neg[2] ||
vop3->omod != 0 || vop3->opsel != 0)
@@ -1682,7 +1682,7 @@ bool combine_ordering_test(opt_ctx &ctx, aco_ptr<Instruction>& instr)
return false;
if (op_instr[i]->isVOP3()) {
- VOP3A_instruction *vop3 = static_cast<VOP3A_instruction*>(op_instr[i]);
+ VOP3_instruction *vop3 = static_cast<VOP3_instruction*>(op_instr[i]);
if (vop3->neg[0] != vop3->neg[1] || vop3->abs[0] != vop3->abs[1] || vop3->opsel == 1 || vop3->opsel == 2)
return false;
neg[i] = vop3->neg[0];
@@ -1726,7 +1726,7 @@ bool combine_ordering_test(opt_ctx &ctx, aco_ptr<Instruction>& instr)
}
Instruction *new_instr;
if (neg[0] || neg[1] || abs[0] || abs[1] || opsel || num_sgprs > 1) {
- VOP3A_instruction *vop3 = create_instruction<VOP3A_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
+ VOP3_instruction *vop3 = create_instruction<VOP3_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
for (unsigned i = 0; i < 2; i++) {
vop3->neg[i] = neg[i];
vop3->abs[i] = abs[i];
@@ -1797,8 +1797,8 @@ bool combine_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& instr)
aco_opcode new_op = is_or ? get_unordered(cmp->opcode) : get_ordered(cmp->opcode);
Instruction *new_instr;
if (cmp->isVOP3()) {
- VOP3A_instruction *new_vop3 = create_instruction<VOP3A_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
- VOP3A_instruction *cmp_vop3 = static_cast<VOP3A_instruction*>(cmp);
+ VOP3_instruction *new_vop3 = create_instruction<VOP3_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
+ VOP3_instruction *cmp_vop3 = static_cast<VOP3_instruction*>(cmp);
memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs));
memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg));
new_vop3->clamp = cmp_vop3->clamp;
@@ -1885,7 +1885,7 @@ bool combine_constant_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& in
return false;
if (nan_test->isVOP3()) {
- VOP3A_instruction *vop3 = static_cast<VOP3A_instruction*>(nan_test);
+ VOP3_instruction *vop3 = static_cast<VOP3_instruction*>(nan_test);
if (vop3->neg[0] != vop3->neg[1] || vop3->abs[0] != vop3->abs[1] || vop3->opsel == 1 || vop3->opsel == 2)
return false;
}
@@ -1916,8 +1916,8 @@ bool combine_constant_comparison_ordering(opt_ctx &ctx, aco_ptr<Instruction>& in
aco_opcode new_op = is_or ? get_unordered(cmp->opcode) : get_ordered(cmp->opcode);
Instruction *new_instr;
if (cmp->isVOP3()) {
- VOP3A_instruction *new_vop3 = create_instruction<VOP3A_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
- VOP3A_instruction *cmp_vop3 = static_cast<VOP3A_instruction*>(cmp);
+ VOP3_instruction *new_vop3 = create_instruction<VOP3_instruction>(new_op, asVOP3(Format::VOPC), 2, 1);
+ VOP3_instruction *cmp_vop3 = static_cast<VOP3_instruction*>(cmp);
memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs));
memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg));
new_vop3->clamp = cmp_vop3->clamp;
@@ -1965,8 +1965,8 @@ bool combine_inverse_comparison(opt_ctx &ctx, aco_ptr<Instruction>& instr)
* comparison so that the comparison is done with the correct exec mask. */
Instruction *new_instr;
if (cmp->isVOP3()) {
- VOP3A_instruction *new_vop3 = create_instruction<VOP3A_instruction>(new_opcode, asVOP3(Format::VOPC), 2, 1);
- VOP3A_instruction *cmp_vop3 = static_cast<VOP3A_instruction*>(cmp);
+ VOP3_instruction *new_vop3 = create_instruction<VOP3_instruction>(new_opcode, asVOP3(Format::VOPC), 2, 1);
+ VOP3_instruction *cmp_vop3 = static_cast<VOP3_instruction*>(cmp);
memcpy(new_vop3->abs, cmp_vop3->abs, sizeof(new_vop3->abs));
memcpy(new_vop3->neg, cmp_vop3->neg, sizeof(new_vop3->neg));
new_vop3->clamp = cmp_vop3->clamp;
@@ -2019,8 +2019,8 @@ bool match_op3_for_vop3(opt_ctx &ctx, aco_opcode op1, aco_opcode op2,
if (fixed_to_exec(op2_instr->operands[0]) || fixed_to_exec(op2_instr->operands[1]))
return false;
- VOP3A_instruction *op1_vop3 = op1_instr->isVOP3() ? static_cast<VOP3A_instruction *>(op1_instr) : NULL;
- VOP3A_instruction *op2_vop3 = op2_instr->isVOP3() ? static_cast<VOP3A_instruction *>(op2_instr) : NULL;
+ VOP3_instruction *op1_vop3 = op1_instr->isVOP3() ? static_cast<VOP3_instruction *>(op1_instr) : NULL;
+ VOP3_instruction *op2_vop3 = op2_instr->isVOP3() ? static_cast<VOP3_instruction *>(op2_instr) : NULL;
if (op1_instr->isSDWA() || op2_instr->isSDWA())
return false;
@@ -2081,7 +2081,7 @@ void create_vop3_for_op3(opt_ctx& ctx, aco_opcode opcode, aco_ptr<Instruction>&
Operand operands[3], bool neg[3], bool abs[3], uint8_t opsel,
bool clamp, unsigned omod)
{
- VOP3A_instruction *new_instr = create_instruction<VOP3A_instruction>(opcode, Format::VOP3A, 3, 1);
+ VOP3_instruction *new_instr = create_instruction<VOP3_instruction>(opcode, Format::VOP3, 3, 1);
memcpy(new_instr->abs, abs, sizeof(bool[3]));
memcpy(new_instr->neg, neg, sizeof(bool[3]));
new_instr->clamp = clamp;
@@ -2306,7 +2306,7 @@ bool combine_add_sub_b2i(opt_ctx& ctx, aco_ptr<Instruction>& instr, aco_opcode n
new_instr.reset(create_instruction<VOP2_instruction>(new_op, Format::VOP2, 3, 2));
} else if (ctx.program->chip_class >= GFX10 ||
(instr->operands[!i].isConstant() && !instr->operands[!i].isLiteral())) {
- new_instr.reset(create_instruction<VOP3A_instruction>(new_op, asVOP3(Format::VOP2), 3, 2));
+ new_instr.reset(create_instruction<VOP3_instruction>(new_op, asVOP3(Format::VOP2), 3, 2));
} else {
return false;
}
@@ -2347,7 +2347,7 @@ bool combine_add_bcnt(opt_ctx& ctx, aco_ptr<Instruction>& instr)
op_instr->operands[0].isTemp() &&
op_instr->operands[0].getTemp().type() == RegType::vgpr &&
op_instr->operands[1].constantEquals(0)) {
- aco_ptr<Instruction> new_instr{create_instruction<VOP3A_instruction>(aco_opcode::v_bcnt_u32_b32, Format::VOP3, 2, 1)};
+ aco_ptr<Instruction> new_instr{create_instruction<VOP3_instruction>(aco_opcode::v_bcnt_u32_b32, Format::VOP3, 2, 1)};
ctx.uses[instr->operands[i].tempId()]--;
new_instr->operands[0] = op_instr->operands[0];
new_instr->operands[1] = instr->operands[!i];
@@ -2645,7 +2645,7 @@ bool apply_omod_clamp(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr)
return false;
} else {
to_VOP3(ctx, instr);
- if (!apply_omod_clamp_helper(ctx, static_cast<VOP3A_instruction *>(instr.get()), def_info))
+ if (!apply_omod_clamp_helper(ctx, static_cast<VOP3_instruction *>(instr.get()), def_info))
return false;
}
@@ -2675,7 +2675,7 @@ bool combine_and_subbrev(opt_ctx& ctx, aco_ptr<Instruction>& instr)
new_instr.reset(create_instruction<VOP2_instruction>(aco_opcode::v_cndmask_b32, Format::VOP2, 3, 1));
} else if (ctx.program->chip_class >= GFX10 ||
(instr->operands[!i].isConstant() && !instr->operands[!i].isLiteral())) {
- new_instr.reset(create_instruction<VOP3A_instruction>(aco_opcode::v_cndmask_b32, asVOP3(Format::VOP2), 3, 1));
+ new_instr.reset(create_instruction<VOP3_instruction>(aco_opcode::v_cndmask_b32, asVOP3(Format::VOP2), 3, 1));
} else {
return false;
}
@@ -2729,7 +2729,7 @@ bool combine_add_lshl(opt_ctx& ctx, aco_ptr<Instruction>& instr)
ctx.uses[instr->operands[i].tempId()]--;
- aco_ptr<VOP3A_instruction> new_instr{create_instruction<VOP3A_instruction>(aco_opcode::v_mad_u32_u24, Format::VOP3A, 3, 1)};
+ aco_ptr<VOP3_instruction> new_instr{create_instruction<VOP3_instruction>(aco_opcode::v_mad_u32_u24, Format::VOP3, 3, 1)};
new_instr->operands[0] = op_instr->operands[!shift_op_idx];
new_instr->operands[1] = Operand(multiplier);
new_instr->operands[2] = instr->operands[!i];
@@ -2944,7 +2944,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
if (mul_instr->operands[0].isLiteral())
return;
- if (mul_instr->isVOP3() && static_cast<VOP3A_instruction*>(mul_instr)->clamp)
+ if (mul_instr->isVOP3() && static_cast<VOP3_instruction*>(mul_instr)->clamp)
return;
if (mul_instr->isSDWA())
return;
@@ -2954,13 +2954,13 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
Definition def = instr->definitions[0];
/* neg(abs(mul(a, b))) -> mul(neg(abs(a)), abs(b)) */
bool is_abs = ctx.info[instr->definitions[0].tempId()].is_abs();
- instr.reset(create_instruction<VOP3A_instruction>(mul_instr->opcode, asVOP3(Format::VOP2), 2, 1));
+ instr.reset(create_instruction<VOP3_instruction>(mul_instr->opcode, asVOP3(Format::VOP2), 2, 1));
instr->operands[0] = mul_instr->operands[0];
instr->operands[1] = mul_instr->operands[1];
instr->definitions[0] = def;
- VOP3A_instruction* new_mul = static_cast<VOP3A_instruction*>(instr.get());
+ VOP3_instruction* new_mul = static_cast<VOP3_instruction*>(instr.get());
if (mul_instr->isVOP3()) {
- VOP3A_instruction* mul = static_cast<VOP3A_instruction*>(mul_instr);
+ VOP3_instruction* mul = static_cast<VOP3_instruction*>(mul_instr);
new_mul->neg[0] = mul->neg[0] && !is_abs;
new_mul->neg[1] = mul->neg[1] && !is_abs;
new_mul->abs[0] = mul->abs[0] || is_abs;
@@ -3003,8 +3003,8 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
/* no clamp/omod allowed between mul and add */
if (info.instr->isVOP3() &&
- (static_cast<VOP3A_instruction*>(info.instr)->clamp ||
- static_cast<VOP3A_instruction*>(info.instr)->omod))
+ (static_cast<VOP3_instruction*>(info.instr)->clamp ||
+ static_cast<VOP3_instruction*>(info.instr)->omod))
continue;
Operand op[3] = {info.instr->operands[0], info.instr->operands[1], instr->operands[1 - i]};
@@ -3035,7 +3035,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
bool clamp = false;
if (mul_instr->isVOP3()) {
- VOP3A_instruction* vop3 = static_cast<VOP3A_instruction*> (mul_instr);
+ VOP3_instruction* vop3 = static_cast<VOP3_instruction*> (mul_instr);
neg[0] = vop3->neg[0];
neg[1] = vop3->neg[1];
abs[0] = vop3->abs[0];
@@ -3043,7 +3043,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
}
if (instr->isVOP3()) {
- VOP3A_instruction* vop3 = static_cast<VOP3A_instruction*> (instr.get());
+ VOP3_instruction* vop3 = static_cast<VOP3_instruction*> (instr.get());
neg[2] = vop3->neg[add_op_idx];
abs[2] = vop3->abs[add_op_idx];
omod = vop3->omod;
@@ -3068,7 +3068,7 @@ void combine_instruction(opt_ctx &ctx, Block& block, aco_ptr<Instruction>& instr
mad_op = need_fma ? (ctx.program->chip_class == GFX8 ? aco_opcode::v_fma_legacy_f16 : aco_opcode::v_fma_f16) :
(ctx.program->chip_class == GFX8 ? aco_opcode::v_mad_legacy_f16 : aco_opcode::v_mad_f16);
- aco_ptr<VOP3A_instruction> mad{create_instruction<VOP3A_instruction>(mad_op, Format::VOP3A, 3, 1)};
+ aco_ptr<VOP3_instruction> mad{create_instruction<VOP3_instruction>(mad_op, Format::VOP3, 3, 1)};
for (unsigned i = 0; i < 3; i++) {
mad->operands[i] = op[i];
mad->neg[i] = neg[i];
diff --git a/src/amd/compiler/aco_print_ir.cpp b/src/amd/compiler/aco_print_ir.cpp
index f99046da007..6324355cb92 100644
--- a/src/amd/compiler/aco_print_ir.cpp
+++ b/src/amd/compiler/aco_print_ir.cpp
@@ -588,7 +588,7 @@ static void print_instr_format_specific(const Instruction *instr, FILE *output)
}
}
if (instr->isVOP3()) {
- const VOP3A_instruction* vop3 = static_cast<const VOP3A_instruction*>(instr);
+ const VOP3_instruction* vop3 = static_cast<const VOP3_instruction*>(instr);
switch (vop3->omod) {
case 1:
fprintf(output, " *2");
@@ -693,8 +693,8 @@ void aco_print_instr(const Instruction *instr, FILE *output)
bool *const neg = (bool *)alloca(instr->operands.size() * sizeof(bool));
bool *const opsel = (bool *)alloca(instr->operands.size() * sizeof(bool));
uint8_t *const sel = (uint8_t *)alloca(instr->operands.size() * sizeof(uint8_t));
- if ((int)instr->format & (int)Format::VOP3A) {
- const VOP3A_instruction* vop3 = static_cast<const VOP3A_instruction*>(instr);
+ if ((int)instr->format & (int)Format::VOP3) {
+ const VOP3_instruction* vop3 = static_cast<const VOP3_instruction*>(instr);
for (unsigned i = 0; i < instr->operands.size(); ++i) {
abs[i] = vop3->abs[i];
neg[i] = vop3->neg[i];
diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp
index 12d8cb354cf..4b083ee6013 100644
--- a/src/amd/compiler/aco_register_allocation.cpp
+++ b/src/amd/compiler/aco_register_allocation.cpp
@@ -503,7 +503,7 @@ void add_subdword_operand(ra_ctx& ctx, aco_ptr<Instruction>& instr, unsigned idx
update_phi_map(ctx, tmp.get(), instr.get());
return;
} else if (rc.bytes() == 2 && can_use_opsel(chip, instr->opcode, idx, byte / 2)) {
- VOP3A_instruction* vop3 = static_cast<VOP3A_instruction *>(instr.get());
+ VOP3_instruction* vop3 = static_cast<VOP3_instruction *>(instr.get());
vop3->opsel |= (byte / 2) << idx;
return;
} else if (instr->format == Format::VOP3P && byte == 2) {
@@ -614,7 +614,7 @@ void add_subdword_definition(Program *program, aco_ptr<Instruction>& instr, unsi
convert_to_SDWA(chip, instr);
return;
} else if (reg.byte() && rc.bytes() == 2 && can_use_opsel(chip, instr->opcode, -1, reg.byte() / 2)) {
- VOP3A_instruction *vop3 = static_cast<VOP3A_instruction *>(instr.get());
+ VOP3_instruction *vop3 = static_cast<VOP3_instruction *>(instr.get());
if (reg.byte() == 2)
vop3->opsel |= (1 << 3); /* dst in high half */
return;
@@ -2478,7 +2478,7 @@ void register_allocation(Program *program, std::vector<IDSet>& live_out_per_bloc
/* change the instruction to VOP3 to enable an arbitrary register pair as dst */
aco_ptr<Instruction> tmp = std::move(instr);
Format format = asVOP3(tmp->format);
- instr.reset(create_instruction<VOP3A_instruction>(tmp->opcode, format, tmp->operands.size(), tmp->definitions.size()));
+ instr.reset(create_instruction<VOP3_instruction>(tmp->opcode, format, tmp->operands.size(), tmp->definitions.size()));
std::copy(tmp->operands.begin(), tmp->operands.end(), instr->operands.begin());
std::copy(tmp->definitions.begin(), tmp->definitions.end(), instr->definitions.begin());
update_phi_map(ctx, tmp.get(), instr.get());
diff --git a/src/amd/compiler/aco_validate.cpp b/src/amd/compiler/aco_validate.cpp
index b7f9fef2b36..5b6aa53d6ea 100644
--- a/src/amd/compiler/aco_validate.cpp
+++ b/src/amd/compiler/aco_validate.cpp
@@ -136,7 +136,7 @@ bool validate_ir(Program* program)
base_format == Format::VOP1 ||
base_format == Format::VOPC ||
base_format == Format::VINTRP,
- "Format cannot have VOP3A/VOP3B applied", instr.get());
+ "Format cannot have VOP3/VOP3B applied", instr.get());
}
/* check SDWA */
@@ -188,7 +188,7 @@ bool validate_ir(Program* program)
/* check opsel */
if (instr->isVOP3()) {
- VOP3A_instruction *vop3 = static_cast<VOP3A_instruction*>(instr.get());
+ VOP3_instruction *vop3 = static_cast<VOP3_instruction*>(instr.get());
check(vop3->opsel == 0 || program->chip_class >= GFX9, "Opsel is only supported on GFX9+", instr.get());
for (unsigned i = 0; i < 3; i++) {
diff --git a/src/amd/compiler/tests/test_assembler.cpp b/src/amd/compiler/tests/test_assembler.cpp
index 23f02dd3926..bd6055cc20a 100644
--- a/src/amd/compiler/tests/test_assembler.cpp
+++ b/src/amd/compiler/tests/test_assembler.cpp
@@ -235,7 +235,7 @@ BEGIN_TEST(assembler.v_add3)
//~gfx9>> v_add3_u32 v0, 0, 0, 0 ; d1ff0000 02010080
//~gfx10>> v_add3_u32 v0, 0, 0, 0 ; d76d0000 02010080
- aco_ptr<VOP3A_instruction> add3{create_instruction<VOP3A_instruction>(aco_opcode::v_add3_u32, Format::VOP3A, 3, 1)};
+ aco_ptr<VOP3_instruction> add3{create_instruction<VOP3_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)};
add3->operands[0] = Operand(0u);
add3->operands[1] = Operand(0u);
add3->operands[2] = Operand(0u);
@@ -253,7 +253,7 @@ BEGIN_TEST(assembler.v_add3_clamp)
//~gfx9>> integer addition + clamp ; d1ff8000 02010080
//~gfx10>> integer addition + clamp ; d76d8000 02010080
- aco_ptr<VOP3A_instruction> add3{create_instruction<VOP3A_instruction>(aco_opcode::v_add3_u32, Format::VOP3A, 3, 1)};
+ aco_ptr<VOP3_instruction> add3{create_instruction<VOP3_instruction>(aco_opcode::v_add3_u32, Format::VOP3, 3, 1)};
add3->operands[0] = Operand(0u);
add3->operands[1] = Operand(0u);
add3->operands[2] = Operand(0u);
diff --git a/src/amd/compiler/tests/test_optimizer.cpp b/src/amd/compiler/tests/test_optimizer.cpp
index 679812faac8..f914bce922f 100644
--- a/src/amd/compiler/tests/test_optimizer.cpp
+++ b/src/amd/compiler/tests/test_optimizer.cpp
@@ -735,7 +735,7 @@ BEGIN_TEST(optimize.add3)
//! v1: %res1 = v_add_u32 %a, %tmp1
//! p_unit_test 1, %res1
tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]);
- static_cast<VOP3A_instruction *>(tmp.instr)->clamp = true;
+ static_cast<VOP3_instruction *>(tmp.instr)->clamp = true;
writeout(1, bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp));
//! v1: %tmp2 = v_add_u32 %b, %c
@@ -743,7 +743,7 @@ BEGIN_TEST(optimize.add3)
//! p_unit_test 2, %res2
tmp = bld.vop2(aco_opcode::v_add_u32, bld.def(v1), inputs[1], inputs[2]);
tmp = bld.vop2_e64(aco_opcode::v_add_u32, bld.def(v1), inputs[0], tmp);
- static_cast<VOP3A_instruction *>(tmp.instr)->clamp = true;
+ static_cast<VOP3_instruction *>(tmp.instr)->clamp = true;
writeout(2, tmp);
finish_opt_test();
More information about the mesa-commit
mailing list