Mesa (main): aco/ra: Move mac encoding optimization to its own function.
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Wed Jul 6 10:21:00 UTC 2022
Module: Mesa
Branch: main
Commit: 87b4f3daa1f74c5561e57e92a314a59da0d62bb4
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=87b4f3daa1f74c5561e57e92a314a59da0d62bb4
Author: Georg Lehmann <dadschoorse at gmail.com>
Date: Thu May 19 15:50:30 2022 +0200
aco/ra: Move mac encoding optimization to its own function.
Signed-off-by: Georg Lehmann <dadschoorse at gmail.com>
Reviewed-by: Daniel Schürmann <daniel at schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/15999>
---
src/amd/compiler/aco_register_allocation.cpp | 114 ++++++++++++++-------------
1 file changed, 60 insertions(+), 54 deletions(-)
diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp
index 922c5b849e5..32ddca431b0 100644
--- a/src/amd/compiler/aco_register_allocation.cpp
+++ b/src/amd/compiler/aco_register_allocation.cpp
@@ -2541,6 +2541,65 @@ get_affinities(ra_ctx& ctx, std::vector<IDSet>& live_out_per_block)
}
}
+void
+optimize_encoding_vop2(Program* program, ra_ctx& ctx, RegisterFile& register_file,
+ aco_ptr<Instruction>& instr)
+{
+ /* try to optimize v_mad_f32 -> v_mac_f32 */
+ if ((instr->opcode != aco_opcode::v_mad_f32 &&
+ (instr->opcode != aco_opcode::v_fma_f32 || program->gfx_level < GFX10) &&
+ instr->opcode != aco_opcode::v_mad_f16 && instr->opcode != aco_opcode::v_mad_legacy_f16 &&
+ (instr->opcode != aco_opcode::v_fma_f16 || program->gfx_level < GFX10) &&
+ (instr->opcode != aco_opcode::v_pk_fma_f16 || program->gfx_level < GFX10) &&
+ (instr->opcode != aco_opcode::v_mad_legacy_f32 || !program->dev.has_mac_legacy32) &&
+ (instr->opcode != aco_opcode::v_fma_legacy_f32 || !program->dev.has_mac_legacy32) &&
+ (instr->opcode != aco_opcode::v_dot4_i32_i8 || program->family == CHIP_VEGA20)) ||
+ !instr->operands[2].isTemp() || !instr->operands[2].isKillBeforeDef() ||
+ instr->operands[2].getTemp().type() != RegType::vgpr ||
+ ((!instr->operands[0].isTemp() || instr->operands[0].getTemp().type() != RegType::vgpr) &&
+ (!instr->operands[1].isTemp() || instr->operands[1].getTemp().type() != RegType::vgpr)) ||
+ instr->usesModifiers() || instr->operands[0].physReg().byte() != 0 ||
+ instr->operands[1].physReg().byte() != 0 || instr->operands[2].physReg().byte() != 0)
+ return;
+
+ if (!instr->operands[1].isTemp() || instr->operands[1].getTemp().type() != RegType::vgpr)
+ std::swap(instr->operands[0], instr->operands[1]);
+
+ unsigned def_id = instr->definitions[0].tempId();
+ if (ctx.assignments[def_id].affinity) {
+ assignment& affinity = ctx.assignments[ctx.assignments[def_id].affinity];
+ if (affinity.assigned && affinity.reg != instr->operands[2].physReg() &&
+ !register_file.test(affinity.reg, instr->operands[2].bytes()))
+ return;
+ }
+
+ static_assert(sizeof(VOP2_instruction) <= sizeof(VOP3_instruction),
+ "Invalid direct instruction cast.");
+ static_assert(sizeof(VOP2_instruction) <= sizeof(VOP3P_instruction),
+ "Invalid direct instruction cast.");
+ instr->format = Format::VOP2;
+ switch (instr->opcode) {
+ case aco_opcode::v_mad_f32: instr->opcode = aco_opcode::v_mac_f32; break;
+ case aco_opcode::v_fma_f32: instr->opcode = aco_opcode::v_fmac_f32; break;
+ case aco_opcode::v_mad_f16:
+ case aco_opcode::v_mad_legacy_f16: instr->opcode = aco_opcode::v_mac_f16; break;
+ case aco_opcode::v_fma_f16: instr->opcode = aco_opcode::v_fmac_f16; break;
+ case aco_opcode::v_pk_fma_f16: instr->opcode = aco_opcode::v_pk_fmac_f16; break;
+ case aco_opcode::v_dot4_i32_i8: instr->opcode = aco_opcode::v_dot4c_i32_i8; break;
+ case aco_opcode::v_mad_legacy_f32: instr->opcode = aco_opcode::v_mac_legacy_f32; break;
+ case aco_opcode::v_fma_legacy_f32: instr->opcode = aco_opcode::v_fmac_legacy_f32; break;
+ default: break;
+ }
+}
+
+void
+optimize_encoding(Program* program, ra_ctx& ctx, RegisterFile& register_file,
+ aco_ptr<Instruction>& instr)
+{
+ if (instr->isVALU())
+ optimize_encoding_vop2(program, ctx, register_file, instr);
+}
+
} /* end namespace */
void
@@ -2664,60 +2723,7 @@ register_allocation(Program* program, std::vector<IDSet>& live_out_per_block, ra
register_file.clear(op);
}
- /* try to optimize v_mad_f32 -> v_mac_f32 */
- if ((instr->opcode == aco_opcode::v_mad_f32 ||
- (instr->opcode == aco_opcode::v_fma_f32 && program->gfx_level >= GFX10) ||
- instr->opcode == aco_opcode::v_mad_f16 ||
- instr->opcode == aco_opcode::v_mad_legacy_f16 ||
- (instr->opcode == aco_opcode::v_fma_f16 && program->gfx_level >= GFX10) ||
- (instr->opcode == aco_opcode::v_pk_fma_f16 && program->gfx_level >= GFX10) ||
- (instr->opcode == aco_opcode::v_mad_legacy_f32 && program->dev.has_mac_legacy32) ||
- (instr->opcode == aco_opcode::v_fma_legacy_f32 && program->dev.has_mac_legacy32) ||
- (instr->opcode == aco_opcode::v_dot4_i32_i8 && program->family != CHIP_VEGA20)) &&
- instr->operands[2].isTemp() && instr->operands[2].isKillBeforeDef() &&
- instr->operands[2].getTemp().type() == RegType::vgpr &&
- ((instr->operands[0].isTemp() &&
- instr->operands[0].getTemp().type() == RegType::vgpr) ||
- (instr->operands[1].isTemp() &&
- instr->operands[1].getTemp().type() == RegType::vgpr)) &&
- !instr->usesModifiers() && instr->operands[0].physReg().byte() == 0 &&
- instr->operands[1].physReg().byte() == 0 && instr->operands[2].physReg().byte() == 0) {
- if (!instr->operands[1].isTemp() ||
- instr->operands[1].getTemp().type() != RegType::vgpr)
- std::swap(instr->operands[0], instr->operands[1]);
-
- unsigned def_id = instr->definitions[0].tempId();
- bool use_vop2 = true;
- if (ctx.assignments[def_id].affinity) {
- assignment& affinity = ctx.assignments[ctx.assignments[def_id].affinity];
- if (affinity.assigned && affinity.reg != instr->operands[2].physReg() &&
- !register_file.test(affinity.reg, instr->operands[2].bytes()))
- use_vop2 = false;
- }
- if (use_vop2) {
- static_assert(sizeof(VOP2_instruction) <= sizeof(VOP3_instruction),
- "Invalid direct instruction cast.");
- static_assert(sizeof(VOP2_instruction) <= sizeof(VOP3P_instruction),
- "Invalid direct instruction cast.");
- instr->format = Format::VOP2;
- switch (instr->opcode) {
- case aco_opcode::v_mad_f32: instr->opcode = aco_opcode::v_mac_f32; break;
- case aco_opcode::v_fma_f32: instr->opcode = aco_opcode::v_fmac_f32; break;
- case aco_opcode::v_mad_f16:
- case aco_opcode::v_mad_legacy_f16: instr->opcode = aco_opcode::v_mac_f16; break;
- case aco_opcode::v_fma_f16: instr->opcode = aco_opcode::v_fmac_f16; break;
- case aco_opcode::v_pk_fma_f16: instr->opcode = aco_opcode::v_pk_fmac_f16; break;
- case aco_opcode::v_dot4_i32_i8: instr->opcode = aco_opcode::v_dot4c_i32_i8; break;
- case aco_opcode::v_mad_legacy_f32:
- instr->opcode = aco_opcode::v_mac_legacy_f32;
- break;
- case aco_opcode::v_fma_legacy_f32:
- instr->opcode = aco_opcode::v_fmac_legacy_f32;
- break;
- default: break;
- }
- }
- }
+ optimize_encoding(program, ctx, register_file, instr);
/* Handle definitions which must have the same register as an operand.
* We expect that the definition has the same size as the operand, otherwise the new
More information about the mesa-commit
mailing list