Mesa (main): aco: remove DPP when applying constants/literals/sgprs
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Tue Aug 31 17:15:49 UTC 2021
Module: Mesa
Branch: main
Commit: 33ddbd220f26391fd117f484f6b566d17d942091
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=33ddbd220f26391fd117f484f6b566d17d942091
Author: Rhys Perry <pendingchaos02 at gmail.com>
Date: Mon Aug 30 10:30:45 2021 +0100
aco: remove DPP when applying constants/literals/sgprs
Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/12601>
---
src/amd/compiler/aco_ir.h | 6 +++++
src/amd/compiler/aco_optimizer.cpp | 13 ++++++++---
src/amd/compiler/tests/test_optimizer.cpp | 39 ++++++++++++++++++++++---------
3 files changed, 44 insertions(+), 14 deletions(-)
diff --git a/src/amd/compiler/aco_ir.h b/src/amd/compiler/aco_ir.h
index 2b0f7d34550..c96dcce7892 100644
--- a/src/amd/compiler/aco_ir.h
+++ b/src/amd/compiler/aco_ir.h
@@ -290,6 +290,12 @@ asSDWA(Format format)
return (Format)((uint32_t)Format::SDWA | (uint32_t)format);
}
+constexpr Format
+withoutDPP(Format format)
+{
+ return (Format)((uint32_t)format & ~(uint32_t)Format::DPP);
+}
+
enum class RegType {
none = 0,
sgpr,
diff --git a/src/amd/compiler/aco_optimizer.cpp b/src/amd/compiler/aco_optimizer.cpp
index 634c0939ea1..27c993e83fc 100644
--- a/src/amd/compiler/aco_optimizer.cpp
+++ b/src/amd/compiler/aco_optimizer.cpp
@@ -560,10 +560,11 @@ pseudo_propagate_temp(opt_ctx& ctx, aco_ptr<Instruction>& instr, Temp temp, unsi
return true;
}
+/* This expects the DPP modifier to be removed. */
bool
can_apply_sgprs(opt_ctx& ctx, aco_ptr<Instruction>& instr)
{
- if ((instr->isSDWA() && ctx.program->chip_class < GFX9) || instr->isDPP())
+ if (instr->isSDWA() && ctx.program->chip_class < GFX9)
return false;
return instr->opcode != aco_opcode::v_readfirstlane_b32 &&
instr->opcode != aco_opcode::v_readlane_b32 &&
@@ -1010,6 +1011,7 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
/* applying SGPRs to VOP1 doesn't increase code size and DCE is helped by doing it earlier */
if (info.is_temp() && info.temp.type() == RegType::sgpr && can_apply_sgprs(ctx, instr) &&
instr->operands.size() == 1) {
+ instr->format = withoutDPP(instr->format);
instr->operands[i].setTemp(info.temp);
info = ctx.info[info.temp.id()];
}
@@ -1058,13 +1060,14 @@ label_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
unsigned bits = get_operand_size(instr, i);
if (info.is_constant(bits) && alu_can_accept_constant(instr->opcode, i) &&
- (!instr->isSDWA() || ctx.program->chip_class >= GFX9) && !instr->isDPP()) {
+ (!instr->isSDWA() || ctx.program->chip_class >= GFX9)) {
Operand op = get_constant_op(ctx, info, bits);
perfwarn(ctx.program, instr->opcode == aco_opcode::v_cndmask_b32 && i == 2,
"v_cndmask_b32 with a constant selector", instr.get());
if (i == 0 || instr->isSDWA() || instr->isVOP3P() ||
instr->opcode == aco_opcode::v_readlane_b32 ||
instr->opcode == aco_opcode::v_writelane_b32) {
+ instr->format = withoutDPP(instr->format);
instr->operands[i] = op;
continue;
} else if (!instr->isVOP3() && can_swap_operands(instr, &instr->opcode)) {
@@ -2740,6 +2743,9 @@ apply_sgprs(opt_ctx& ctx, aco_ptr<Instruction>& instr)
if (new_sgpr && num_sgprs >= max_sgprs)
continue;
+ if (sgpr_idx == 0)
+ instr->format = withoutDPP(instr->format);
+
if (sgpr_idx == 0 || instr->isVOP3() || instr->isSDWA() || instr->isVOP3P() ||
info.is_extract()) {
/* can_apply_extract() checks SGPR encoding restrictions */
@@ -3734,7 +3740,7 @@ select_instruction(opt_ctx& ctx, aco_ptr<Instruction>& instr)
}
}
- if (instr->isSDWA() || instr->isDPP() || (instr->isVOP3() && ctx.program->chip_class < GFX10) ||
+ if (instr->isSDWA() || (instr->isVOP3() && ctx.program->chip_class < GFX10) ||
(instr->isVOP3P() && ctx.program->chip_class < GFX10))
return; /* some encodings can't ever take literals */
@@ -3858,6 +3864,7 @@ apply_literals(opt_ctx& ctx, aco_ptr<Instruction>& instr)
unsigned bits = get_operand_size(instr, i);
if (op.isTemp() && ctx.info[op.tempId()].is_literal(bits) && ctx.uses[op.tempId()] == 0) {
Operand literal = Operand::c32(ctx.info[op.tempId()].val);
+ instr->format = withoutDPP(instr->format);
if (instr->isVALU() && i > 0 && instr->format != Format::VOP3P)
to_VOP3(ctx, instr);
instr->operands[i] = literal;
diff --git a/src/amd/compiler/tests/test_optimizer.cpp b/src/amd/compiler/tests/test_optimizer.cpp
index ce4e925b779..9609fea4f2b 100644
--- a/src/amd/compiler/tests/test_optimizer.cpp
+++ b/src/amd/compiler/tests/test_optimizer.cpp
@@ -1037,23 +1037,40 @@ BEGIN_TEST(optimize.dpp_prop)
if (!setup_cs("v1 s1", GFX10))
return;
- //! v1: %zero = p_parallelcopy 0
- //! v1: %res0 = v_mul_f32 %zero, %a row_shl:1 bound_ctrl:1
+ //! v1: %one = p_parallelcopy 1
+ //! v1: %res0 = v_mul_f32 1, %a
//! p_unit_test 0, %res0
- Temp zero = bld.copy(bld.def(v1), Operand::zero());
- writeout(0, bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), zero, inputs[0], dpp_row_sl(1)));
+ Temp one = bld.copy(bld.def(v1), Operand::c32(1));
+ writeout(0, bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), one, inputs[0], dpp_row_sl(1)));
- //! v1: %literal = p_parallelcopy 0x12345678
- //! v1: %res1 = v_mul_f32 %literal, %a row_shl:1 bound_ctrl:1
+ //! v1: %res1 = v_mul_f32 %a, %one row_shl:1 bound_ctrl:1
//! p_unit_test 1, %res1
- Temp literal = bld.copy(bld.def(v1), Operand::c32(0x12345678u));
- writeout(1, bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), literal, inputs[0], dpp_row_sl(1)));
+ writeout(1, bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], one, dpp_row_sl(1)));
- //! v1: %b_v = p_parallelcopy %b
- //! v1: %res2 = v_mul_f32 %b_v, %a row_shl:1 bound_ctrl:1
+ //! v1: %res2 = v_mul_f32 0x12345678, %a
//! p_unit_test 2, %res2
+ Temp literal1 = bld.copy(bld.def(v1), Operand::c32(0x12345678u));
+ writeout(2, bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), literal1, inputs[0], dpp_row_sl(1)));
+
+ //! v1: %literal2 = p_parallelcopy 0x12345679
+ //! v1: %res3 = v_mul_f32 %a, %literal row_shl:1 bound_ctrl:1
+ //! p_unit_test 3, %res3
+ Temp literal2 = bld.copy(bld.def(v1), Operand::c32(0x12345679u));
+ writeout(3, bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], literal2, dpp_row_sl(1)));
+
+ //! v1: %b_v = p_parallelcopy %b
+ //! v1: %res4 = v_mul_f32 %b, %a
+ //! p_unit_test 4, %res4
Temp b_v = bld.copy(bld.def(v1), inputs[1]);
- writeout(2, bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), b_v, inputs[0], dpp_row_sl(1)));
+ writeout(4, bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), b_v, inputs[0], dpp_row_sl(1)));
+
+ //! v1: %res5 = v_mul_f32 %a, %b_v row_shl:1 bound_ctrl:1
+ //! p_unit_test 5, %res5
+ writeout(5, bld.vop2_dpp(aco_opcode::v_mul_f32, bld.def(v1), inputs[0], b_v, dpp_row_sl(1)));
+
+ //! v1: %res6 = v_rcp_f32 %b
+ //! p_unit_test 6, %res6
+ writeout(6, bld.vop1_dpp(aco_opcode::v_rcp_f32, bld.def(v1), b_v, dpp_row_sl(1)));
finish_opt_test();
END_TEST
More information about the mesa-commit
mailing list