Mesa (master): aco/ra: Use PhysReg for member functions of PhysRegInterval
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Wed Jan 13 18:31:40 UTC 2021
Module: Mesa
Branch: master
Commit: c3660f4781943632180f866742daf5e756fd07d0
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=c3660f4781943632180f866742daf5e756fd07d0
Author: Tony Wasserka <tony.wasserka at gmx.de>
Date: Thu Oct 29 18:08:13 2020 +0100
aco/ra: Use PhysReg for member functions of PhysRegInterval
This replaces the various PhysReg{lb} casts that had been all over the place.
Reviewed-by: Daniel Schürmann <daniel at schuermann.dev>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/7799>
---
src/amd/compiler/aco_register_allocation.cpp | 98 ++++++++++++++--------------
1 file changed, 49 insertions(+), 49 deletions(-)
diff --git a/src/amd/compiler/aco_register_allocation.cpp b/src/amd/compiler/aco_register_allocation.cpp
index 71c47b1960d..7e73edec3b6 100644
--- a/src/amd/compiler/aco_register_allocation.cpp
+++ b/src/amd/compiler/aco_register_allocation.cpp
@@ -100,19 +100,19 @@ struct PhysRegIterator {
using pointer = const unsigned*;
using iterator_category = std::bidirectional_iterator_tag;
- unsigned reg;
+ PhysReg reg;
unsigned operator*() const {
return reg;
}
PhysRegIterator& operator++() {
- reg++;
+ reg.reg_b += 4;
return *this;
}
PhysRegIterator& operator--() {
- reg--;
+ reg.reg_b -= 4;
return *this;
}
@@ -131,21 +131,21 @@ struct PhysRegIterator {
/* Half-open register interval used in "sliding window"-style for-loops */
struct PhysRegInterval {
- unsigned lo_;
+ PhysReg lo_;
unsigned size;
/* Inclusive lower bound */
- unsigned lo() const {
+ PhysReg lo() const {
return lo_;
}
/* Exclusive upper bound */
- unsigned hi() const {
- return lo() + size;
+ PhysReg hi() const {
+ return PhysReg { lo() + size };
}
PhysRegInterval& operator+=(uint32_t stride) {
- lo_ += stride;
+ lo_ = PhysReg { lo_.reg() + stride };
return *this;
}
@@ -154,7 +154,7 @@ struct PhysRegInterval {
}
/* Construct a half-open interval, excluding the end register */
- static PhysRegInterval from_until(unsigned first, unsigned end) {
+ static PhysRegInterval from_until(PhysReg first, PhysReg end) {
return { first, end - first };
}
@@ -171,7 +171,7 @@ struct PhysRegInterval {
}
PhysRegIterator end() const {
- return { lo_ + size };
+ return { PhysReg { lo_ + size } };
}
};
@@ -207,9 +207,9 @@ struct DefInfo {
stride = get_stride(rc);
if (rc.type() == RegType::vgpr) {
- bounds = { 256, (unsigned)ctx.program->max_reg_demand.vgpr };
+ bounds = { PhysReg { 256 }, (unsigned)ctx.program->max_reg_demand.vgpr };
} else {
- bounds = { 0, (unsigned)ctx.program->max_reg_demand.sgpr };
+ bounds = { PhysReg { 0 }, (unsigned)ctx.program->max_reg_demand.sgpr };
}
if (rc.is_subdword() && operand >= 0) {
@@ -755,11 +755,11 @@ std::pair<PhysReg, bool> get_reg_simple(ra_ctx& ctx,
if (stride == 1) {
/* best fit algorithm: find the smallest gap to fit in the variable */
- PhysRegInterval best_gap { 0xFFFF, 0xFFFF };
+ PhysRegInterval best_gap { PhysReg { 0 }, UINT_MAX };
const unsigned max_gpr = (rc.type() == RegType::vgpr) ? (256 + ctx.max_used_vgpr) : ctx.max_used_sgpr;
PhysRegIterator reg_it = bounds.begin();
- const PhysRegIterator end_it = std::min(bounds.end(), std::max(PhysRegIterator { max_gpr + 1 }, reg_it));
+ const PhysRegIterator end_it = std::min(bounds.end(), std::max(PhysRegIterator { PhysReg { max_gpr + 1 } }, reg_it));
while (reg_it != bounds.end()) {
/* Find the next chunk of available register slots */
reg_it = std::find_if(reg_it, end_it, is_free);
@@ -773,12 +773,12 @@ std::pair<PhysReg, bool> get_reg_simple(ra_ctx& ctx,
next_nonfree_it = bounds.end();
}
- PhysRegInterval gap = PhysRegInterval::from_until(*reg_it, *next_nonfree_it);
+ PhysRegInterval gap = PhysRegInterval::from_until(PhysReg{*reg_it}, PhysReg{*next_nonfree_it});
/* early return on exact matches */
if (size == gap.size) {
adjust_max_used_regs(ctx, rc, gap.lo());
- return {PhysReg{gap.lo()}, true};
+ return {gap.lo(), true};
}
/* check if it fits and the gap size is smaller */
@@ -790,7 +790,7 @@ std::pair<PhysReg, bool> get_reg_simple(ra_ctx& ctx,
reg_it = next_nonfree_it;
}
- if (best_gap.lo() == 0xFFFF)
+ if (best_gap.size == UINT_MAX)
return {{}, false};
/* find best position within gap by leaving a good stride for other variables*/
@@ -799,11 +799,11 @@ std::pair<PhysReg, bool> get_reg_simple(ra_ctx& ctx,
if (((best_gap.lo() + size) % 8 != 0 && (best_gap.lo() + buffer) % 8 == 0) ||
((best_gap.lo() + size) % 4 != 0 && (best_gap.lo() + buffer) % 4 == 0) ||
((best_gap.lo() + size) % 2 != 0 && (best_gap.lo() + buffer) % 2 == 0))
- best_gap = { best_gap.lo() + buffer, best_gap.size - buffer };
+ best_gap = { PhysReg { best_gap.lo() + buffer }, best_gap.size - buffer };
}
adjust_max_used_regs(ctx, rc, best_gap.lo());
- return {PhysReg{best_gap.lo()}, true};
+ return {best_gap.lo(), true};
}
for (PhysRegInterval reg_win = { bounds.lo(), size }; reg_win.hi() <= bounds.hi(); reg_win += stride) {
@@ -814,7 +814,7 @@ std::pair<PhysReg, bool> get_reg_simple(ra_ctx& ctx,
bool is_valid = std::all_of(std::next(reg_win.begin()), reg_win.end(), is_free);
if (is_valid) {
adjust_max_used_regs(ctx, rc, reg_win.lo());
- return {PhysReg{reg_win.lo()}, true};
+ return {reg_win.lo(), true};
}
}
@@ -929,7 +929,7 @@ bool get_regs_for_copies(ra_ctx& ctx,
res = get_reg_simple(ctx, reg_file, info);
if (!res.second && def_reg.hi() <= bounds.hi()) {
unsigned lo = (def_reg.hi() + info.stride - 1) & ~(info.stride - 1);
- info.bounds = PhysRegInterval::from_until(lo, bounds.hi());
+ info.bounds = PhysRegInterval::from_until(PhysReg{lo}, bounds.hi());
res = get_reg_simple(ctx, reg_file, info);
}
}
@@ -947,7 +947,7 @@ bool get_regs_for_copies(ra_ctx& ctx,
continue;
}
- unsigned best_pos = bounds.lo();
+ PhysReg best_pos = bounds.lo();
unsigned num_moves = 0xFF;
unsigned num_vars = 0;
@@ -1017,10 +1017,10 @@ bool get_regs_for_copies(ra_ctx& ctx,
PhysRegInterval reg_win { best_pos, size };
/* collect variables and block reg file */
- std::set<std::pair<unsigned, unsigned>> new_vars = collect_vars(ctx, reg_file, PhysReg{reg_win.lo()}, size);
+ std::set<std::pair<unsigned, unsigned>> new_vars = collect_vars(ctx, reg_file, reg_win.lo(), size);
/* mark the area as blocked */
- reg_file.block(PhysReg{reg_win.lo()}, var.rc);
+ reg_file.block(reg_win.lo(), var.rc);
adjust_max_used_regs(ctx, var.rc, reg_win.lo());
if (!get_regs_for_copies(ctx, reg_file, parallelcopies, new_vars, bounds, instr, def_reg))
@@ -1030,7 +1030,7 @@ bool get_regs_for_copies(ra_ctx& ctx,
Temp tmp = Temp(id, var.rc);
Operand pc_op = Operand(tmp);
pc_op.setFixed(var.reg);
- Definition pc_def = Definition(PhysReg{reg_win.lo()}, pc_op.regClass());
+ Definition pc_def = Definition(reg_win.lo(), pc_op.regClass());
parallelcopies.emplace_back(pc_op, pc_def);
}
@@ -1050,7 +1050,7 @@ std::pair<PhysReg, bool> get_reg_impl(ra_ctx& ctx,
RegClass rc = info.rc;
/* check how many free regs we have */
- unsigned regs_free = reg_file.count_zero(PhysReg{bounds.lo()}, bounds.size);
+ unsigned regs_free = reg_file.count_zero(bounds.lo(), bounds.size);
/* mark and count killed operands */
unsigned killed_ops = 0;
@@ -1088,11 +1088,11 @@ std::pair<PhysReg, bool> get_reg_impl(ra_ctx& ctx,
/* first check if the register window starts in the middle of an
* allocated variable: this is what we have to fix to allow for
* num_moves > size */
- if (reg_win.lo() > bounds.lo() && !reg_file.is_empty_or_blocked(PhysReg(reg_win.lo())) &&
- reg_file.get_id(PhysReg(reg_win.lo())) == reg_file.get_id(PhysReg(reg_win.lo()).advance(-1)))
+ if (reg_win.lo() > bounds.lo() && !reg_file.is_empty_or_blocked(reg_win.lo()) &&
+ reg_file.get_id(reg_win.lo()) == reg_file.get_id(reg_win.lo().advance(-1)))
continue;
- if (reg_win.hi() < bounds.hi() && !reg_file.is_empty_or_blocked(PhysReg(reg_win.hi()).advance(-1)) &&
- reg_file.get_id(PhysReg(reg_win.hi()).advance(-1)) == reg_file.get_id(PhysReg(reg_win.hi())))
+ if (reg_win.hi() < bounds.hi() && !reg_file.is_empty_or_blocked(reg_win.hi().advance(-1)) &&
+ reg_file.get_id(reg_win.hi().advance(-1)) == reg_file.get_id(reg_win.hi()))
continue;
/* second, check that we have at most k=num_moves elements in the window
@@ -1162,14 +1162,14 @@ std::pair<PhysReg, bool> get_reg_impl(ra_ctx& ctx,
if (instr->opcode == aco_opcode::p_create_vector) {
/* move killed operands which aren't yet at the correct position (GFX9+)
* or which are in the definition space */
- PhysReg reg = PhysReg{best_win.lo()};
+ PhysReg reg = best_win.lo();
for (Operand& op : instr->operands) {
if (op.isTemp() && op.isFirstKillBeforeDef() &&
op.getTemp().type() == rc.type()) {
if (op.physReg() != reg &&
(ctx.program->chip_class >= GFX9 ||
- (op.physReg().advance(op.bytes()) > PhysReg{best_win.lo()} &&
- op.physReg() < PhysReg{best_win.hi()}))) {
+ (op.physReg().advance(op.bytes()) > best_win.lo() &&
+ op.physReg() < best_win.hi()))) {
vars.emplace(op.bytes(), op.tempId());
tmp_file.clear(op);
} else {
@@ -1193,7 +1193,7 @@ std::pair<PhysReg, bool> get_reg_impl(ra_ctx& ctx,
parallelcopies.insert(parallelcopies.end(), pc.begin(), pc.end());
adjust_max_used_regs(ctx, rc, best_win.lo());
- return {PhysReg{best_win.lo()}, true};
+ return {best_win.lo(), true};
}
bool get_reg_specified(ra_ctx& ctx,
@@ -1215,14 +1215,14 @@ bool get_reg_specified(ra_ctx& ctx,
PhysRegInterval bounds;
if (rc.type() == RegType::vgpr) {
- bounds = {256, (unsigned)ctx.program->max_reg_demand.vgpr };
+ bounds = { PhysReg { 256 }, (unsigned)ctx.program->max_reg_demand.vgpr };
} else {
- bounds = { 0, (unsigned)ctx.program->max_reg_demand.sgpr };
+ bounds = { PhysReg { 0 }, (unsigned)ctx.program->max_reg_demand.sgpr };
if (reg % stride != 0)
return false;
}
- PhysRegInterval reg_win = { reg.reg(), rc.size() };
+ PhysRegInterval reg_win = { reg, rc.size() };
if (!bounds.contains(reg_win))
return false;
@@ -1339,7 +1339,7 @@ PhysReg get_reg(ra_ctx& ctx,
/* We should only fail here because keeping under the limit would require
* too many moves. */
- assert(reg_file.count_zero(PhysReg{info.bounds.lo()}, info.bounds.size) >= info.size);
+ assert(reg_file.count_zero(info.bounds.lo(), info.bounds.size) >= info.size);
//FIXME: if nothing helps, shift-rotate the registers to make space
@@ -1360,14 +1360,14 @@ PhysReg get_reg_create_vector(ra_ctx& ctx,
uint32_t stride = get_stride(rc);
PhysRegInterval bounds;
if (rc.type() == RegType::vgpr) {
- bounds = { 256, (unsigned)ctx.program->max_reg_demand.vgpr };
+ bounds = { PhysReg { 256 }, (unsigned)ctx.program->max_reg_demand.vgpr };
} else {
- bounds = { 0, (unsigned)ctx.program->max_reg_demand.sgpr };
+ bounds = { PhysReg { 0 }, (unsigned)ctx.program->max_reg_demand.sgpr };
}
//TODO: improve p_create_vector for sub-dword vectors
- unsigned best_pos = -1;
+ PhysReg best_pos { 0xFFF };
unsigned num_moves = 0xFF;
bool best_war_hint = true;
@@ -1383,7 +1383,7 @@ PhysReg get_reg_create_vector(ra_ctx& ctx,
unsigned reg_lower = instr->operands[i].physReg().reg_b - offset;
if (reg_lower % 4)
continue;
- PhysRegInterval reg_win = { reg_lower / 4, size };
+ PhysRegInterval reg_win = { PhysReg { reg_lower / 4 }, size };
unsigned k = 0;
/* no need to check multiple times */
@@ -1394,9 +1394,9 @@ PhysReg get_reg_create_vector(ra_ctx& ctx,
// TODO: this can be improved */
if (!bounds.contains(reg_win) || reg_win.lo() % stride != 0)
continue;
- if (reg_win.lo() > bounds.lo() && reg_file[reg_win.lo()] != 0 && reg_file.get_id(PhysReg(reg_win.lo())) == reg_file.get_id(PhysReg(reg_win.lo()).advance(-1)))
+ if (reg_win.lo() > bounds.lo() && reg_file[reg_win.lo()] != 0 && reg_file.get_id(reg_win.lo()) == reg_file.get_id(reg_win.lo().advance(-1)))
continue;
- if (reg_win.hi() < bounds.hi() && reg_file[reg_win.hi() - 1] != 0 && reg_file.get_id(PhysReg(reg_win.hi()).advance(-1)) == reg_file.get_id(PhysReg(reg_win.hi())))
+ if (reg_win.hi() < bounds.hi() && reg_file[reg_win.hi().advance(-4)] != 0 && reg_file.get_id(reg_win.hi().advance(-1)) == reg_file.get_id(reg_win.hi()))
continue;
/* count variables to be moved and check war_hint */
@@ -1452,18 +1452,18 @@ PhysReg get_reg_create_vector(ra_ctx& ctx,
for (unsigned i = 0, offset = 0; i < instr->operands.size(); offset += instr->operands[i].bytes(), i++) {
if (instr->operands[i].isTemp() &&
instr->operands[i].isFirstKillBeforeDef() &&
- instr->operands[i].physReg().reg_b != best_pos * 4 + offset)
+ instr->operands[i].physReg().reg_b != best_pos.reg_b + offset)
tmp_file.fill(instr->operands[i]);
}
/* collect variables to be moved */
- std::set<std::pair<unsigned, unsigned>> vars = collect_vars(ctx, tmp_file, PhysReg{best_pos}, size);
+ std::set<std::pair<unsigned, unsigned>> vars = collect_vars(ctx, tmp_file, best_pos, size);
for (unsigned i = 0, offset = 0; i < instr->operands.size(); offset += instr->operands[i].bytes(), i++) {
if (!instr->operands[i].isTemp() || !instr->operands[i].isFirstKillBeforeDef() ||
instr->operands[i].getTemp().type() != rc.type())
continue;
- bool correct_pos = instr->operands[i].physReg().reg_b == best_pos * 4 + offset;
+ bool correct_pos = instr->operands[i].physReg().reg_b == best_pos.reg_b + offset;
/* GFX9+: move killed operands which aren't yet at the correct position
* Moving all killed operands generally leads to more register swaps.
* This is only done on GFX9+ because of the cheap v_swap instruction.
@@ -1488,7 +1488,7 @@ PhysReg get_reg_create_vector(ra_ctx& ctx,
parallelcopies.insert(parallelcopies.end(), pc.begin(), pc.end());
adjust_max_used_regs(ctx, rc, best_pos);
- return PhysReg{best_pos};
+ return best_pos;
}
void handle_pseudo(ra_ctx& ctx,
@@ -1615,7 +1615,7 @@ void get_reg_for_operand(ra_ctx& ctx, RegisterFile& register_file,
/* find free reg */
PhysReg reg = get_reg(ctx, register_file, pc_op.getTemp(), parallelcopy, ctx.pseudo_dummy);
update_renames(ctx, register_file, parallelcopy, ctx.pseudo_dummy, true);
- Definition pc_def = Definition(PhysReg{reg}, pc_op.regClass());
+ Definition pc_def = Definition(reg, pc_op.regClass());
parallelcopy.emplace_back(pc_op, pc_def);
blocking_var = true;
}
More information about the mesa-commit
mailing list