Mesa (main): aco: make optimize_postRA() work across blocks
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Thu Aug 19 18:36:04 UTC 2021
Module: Mesa
Branch: main
Commit: 2e56e2342094e8ec90afa5265b1c43503f662939
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=2e56e2342094e8ec90afa5265b1c43503f662939
Author: Rhys Perry <pendingchaos02 at gmail.com>
Date: Thu Jul 8 17:43:37 2021 +0100
aco: make optimize_postRA() work across blocks
fossil-db (Sienna Cichlid):
Totals from 46 (0.03% of 150170) affected shaders:
CodeSize: 103672 -> 103488 (-0.18%)
Instrs: 21968 -> 21922 (-0.21%)
Signed-off-by: Rhys Perry <pendingchaos02 at gmail.com>
Reviewed-by: Timur Kristóf <timur.kristof at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/11924>
---
src/amd/compiler/aco_optimizer_postRA.cpp | 109 +++++++++++++++++++++---------
1 file changed, 76 insertions(+), 33 deletions(-)
diff --git a/src/amd/compiler/aco_optimizer_postRA.cpp b/src/amd/compiler/aco_optimizer_postRA.cpp
index d086eff7cef..e612292e822 100644
--- a/src/amd/compiler/aco_optimizer_postRA.cpp
+++ b/src/amd/compiler/aco_optimizer_postRA.cpp
@@ -34,26 +34,65 @@ namespace {
constexpr const size_t max_reg_cnt = 512;
-enum {
- not_written_in_block = -1,
- clobbered = -2,
- const_or_undef = -3,
- written_by_multiple_instrs = -4,
+struct Idx {
+ bool operator==(const Idx& other) const { return block == other.block && instr == other.instr; }
+ bool operator!=(const Idx& other) const { return !operator==(other); }
+
+ bool found() const { return block != UINT32_MAX; }
+
+ uint32_t block;
+ uint32_t instr;
};
+Idx not_written_in_block{UINT32_MAX, 0};
+Idx clobbered{UINT32_MAX, 1};
+Idx const_or_undef{UINT32_MAX, 2};
+Idx written_by_multiple_instrs{UINT32_MAX, 3};
+
+bool
+is_instr_after(Idx second, Idx first)
+{
+ if (first == not_written_in_block && second != not_written_in_block)
+ return true;
+
+ if (!first.found() || !second.found())
+ return false;
+
+ return second.block > first.block || (second.block == first.block && second.instr > first.instr);
+}
+
struct pr_opt_ctx {
Program* program;
Block* current_block;
- int current_instr_idx;
+ uint32_t current_instr_idx;
std::vector<uint16_t> uses;
- std::array<int, max_reg_cnt * 4u> instr_idx_by_regs;
+ std::vector<std::array<Idx, max_reg_cnt>> instr_idx_by_regs;
void reset_block(Block* block)
{
current_block = block;
- current_instr_idx = -1;
- std::fill(instr_idx_by_regs.begin(), instr_idx_by_regs.end(), not_written_in_block);
+ current_instr_idx = 0;
+
+ if ((block->kind & block_kind_loop_header) || block->linear_preds.empty()) {
+ std::fill(instr_idx_by_regs[block->index].begin(), instr_idx_by_regs[block->index].end(),
+ not_written_in_block);
+ } else {
+ unsigned first_pred = block->linear_preds[0];
+ for (unsigned i = 0; i < max_reg_cnt; i++) {
+ bool all_same = std::all_of(
+ std::next(block->linear_preds.begin()), block->linear_preds.end(),
+ [&](unsigned pred)
+ { return instr_idx_by_regs[pred][i] == instr_idx_by_regs[first_pred][i]; });
+
+ if (all_same)
+ instr_idx_by_regs[block->index][i] = instr_idx_by_regs[first_pred][i];
+ else
+ instr_idx_by_regs[block->index][i] = not_written_in_block;
+ }
+ }
}
+
+ Instruction* get(Idx idx) { return program->blocks[idx.block].instructions[idx.instr].get(); }
};
void
@@ -65,36 +104,38 @@ save_reg_writes(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
unsigned dw_size = DIV_ROUND_UP(def.bytes(), 4u);
unsigned r = def.physReg().reg();
- int idx = ctx.current_instr_idx;
+ Idx idx{ctx.current_block->index, ctx.current_instr_idx};
if (def.regClass().is_subdword())
idx = clobbered;
assert(def.size() == dw_size || def.regClass().is_subdword());
- std::fill(&ctx.instr_idx_by_regs[r], &ctx.instr_idx_by_regs[r + dw_size], idx);
+ std::fill(&ctx.instr_idx_by_regs[ctx.current_block->index][r],
+ &ctx.instr_idx_by_regs[ctx.current_block->index][r + dw_size], idx);
}
}
-int
+Idx
last_writer_idx(pr_opt_ctx& ctx, PhysReg physReg, RegClass rc)
{
/* Verify that all of the operand's registers are written by the same instruction. */
- int instr_idx = ctx.instr_idx_by_regs[physReg.reg()];
+ Idx instr_idx = ctx.instr_idx_by_regs[ctx.current_block->index][physReg.reg()];
unsigned dw_size = DIV_ROUND_UP(rc.bytes(), 4u);
unsigned r = physReg.reg();
- bool all_same = std::all_of(&ctx.instr_idx_by_regs[r], &ctx.instr_idx_by_regs[r + dw_size],
- [instr_idx](int i) { return i == instr_idx; });
+ bool all_same = std::all_of(&ctx.instr_idx_by_regs[ctx.current_block->index][r],
+ &ctx.instr_idx_by_regs[ctx.current_block->index][r + dw_size],
+ [instr_idx](Idx i) { return i == instr_idx; });
return all_same ? instr_idx : written_by_multiple_instrs;
}
-int
+Idx
last_writer_idx(pr_opt_ctx& ctx, const Operand& op)
{
if (op.isConstant() || op.isUndefined())
return const_or_undef;
- int instr_idx = ctx.instr_idx_by_regs[op.physReg().reg()];
+ Idx instr_idx = ctx.instr_idx_by_regs[ctx.current_block->index][op.physReg().reg()];
#ifndef NDEBUG
/* Debug mode: */
@@ -129,21 +170,22 @@ try_apply_branch_vcc(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
instr->operands[0].physReg() != scc)
return;
- int op0_instr_idx = last_writer_idx(ctx, instr->operands[0]);
- int last_vcc_wr_idx = last_writer_idx(ctx, vcc, ctx.program->lane_mask);
- int last_exec_wr_idx = last_writer_idx(ctx, exec, ctx.program->lane_mask);
+ Idx op0_instr_idx = last_writer_idx(ctx, instr->operands[0]);
+ Idx last_vcc_wr_idx = last_writer_idx(ctx, vcc, ctx.program->lane_mask);
+ Idx last_exec_wr_idx = last_writer_idx(ctx, exec, ctx.program->lane_mask);
/* We need to make sure:
* - the operand register used by the branch, and VCC were both written in the current block
* - VCC was NOT written after the operand register
* - EXEC is sane and was NOT written after the operand register
*/
- if (op0_instr_idx < 0 || last_vcc_wr_idx < 0 || last_vcc_wr_idx > op0_instr_idx ||
- last_exec_wr_idx > last_vcc_wr_idx || last_exec_wr_idx < not_written_in_block)
+ if (!op0_instr_idx.found() || !last_vcc_wr_idx.found() ||
+ !is_instr_after(last_vcc_wr_idx, last_exec_wr_idx) ||
+ !is_instr_after(op0_instr_idx, last_vcc_wr_idx))
return;
- aco_ptr<Instruction>& op0_instr = ctx.current_block->instructions[op0_instr_idx];
- aco_ptr<Instruction>& last_vcc_wr = ctx.current_block->instructions[last_vcc_wr_idx];
+ Instruction* op0_instr = ctx.get(op0_instr_idx);
+ Instruction* last_vcc_wr = ctx.get(last_vcc_wr_idx);
if ((op0_instr->opcode != aco_opcode::s_and_b64 /* wave64 */ &&
op0_instr->opcode != aco_opcode::s_and_b32 /* wave32 */) ||
@@ -192,12 +234,12 @@ try_optimize_scc_nocompare(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
return;
/* Make sure both SCC and Operand 0 are written by the same instruction. */
- int wr_idx = last_writer_idx(ctx, instr->operands[0]);
- int sccwr_idx = last_writer_idx(ctx, scc, s1);
- if (wr_idx < 0 || wr_idx != sccwr_idx)
+ Idx wr_idx = last_writer_idx(ctx, instr->operands[0]);
+ Idx sccwr_idx = last_writer_idx(ctx, scc, s1);
+ if (!wr_idx.found() || wr_idx != sccwr_idx)
return;
- aco_ptr<Instruction>& wr_instr = ctx.current_block->instructions[wr_idx];
+ Instruction* wr_instr = ctx.get(wr_idx);
if (!wr_instr->isSALU() || wr_instr->definitions.size() < 2 ||
wr_instr->definitions[1].physReg() != scc)
return;
@@ -259,11 +301,11 @@ try_optimize_scc_nocompare(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
scc_op_idx = 2;
}
- int wr_idx = last_writer_idx(ctx, instr->operands[scc_op_idx]);
- if (wr_idx < 0)
+ Idx wr_idx = last_writer_idx(ctx, instr->operands[scc_op_idx]);
+ if (!wr_idx.found())
return;
- aco_ptr<Instruction>& wr_instr = ctx.current_block->instructions[wr_idx];
+ Instruction* wr_instr = ctx.get(wr_idx);
/* Check if we found the pattern above. */
if (wr_instr->opcode != aco_opcode::s_cmp_eq_u32 &&
@@ -299,14 +341,14 @@ try_optimize_scc_nocompare(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
void
process_instruction(pr_opt_ctx& ctx, aco_ptr<Instruction>& instr)
{
- ctx.current_instr_idx++;
-
try_apply_branch_vcc(ctx, instr);
try_optimize_scc_nocompare(ctx, instr);
if (instr)
save_reg_writes(ctx, instr);
+
+ ctx.current_instr_idx++;
}
} // namespace
@@ -317,6 +359,7 @@ optimize_postRA(Program* program)
pr_opt_ctx ctx;
ctx.program = program;
ctx.uses = dead_code_analysis(program);
+ ctx.instr_idx_by_regs.resize(program->blocks.size());
/* Forward pass
* Goes through each instruction exactly once, and can transform
More information about the mesa-commit
mailing list