Mesa (master): aco/cssa: rewrite lower_to_cssa pass
GitLab Mirror
gitlab-mirror at kemper.freedesktop.org
Tue Apr 13 18:55:20 UTC 2021
Module: Mesa
Branch: master
Commit: 18ba93e6732a328a8e982f4d18bf7943171b4eb4
URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=18ba93e6732a328a8e982f4d18bf7943171b4eb4
Author: Daniel Schürmann <daniel at schuermann.dev>
Date: Thu Feb 18 21:07:08 2021 +0100
aco/cssa: rewrite lower_to_cssa pass
The previous pass was based on misconceptions and
rounded up with bug fixes. The new pass is entirely
rewritten and basically just one-to-one from the paper:
"Revisiting Out-of-SSA Translation for Correctness, CodeQuality, and Efficiency"
by B. Boissinot et al.
It also incorporates the value-equality testing.
The regressions are mainly due to creating parallelcopies for
exec phis at loop headers (mitigated in the next commit).
Totals from 4933 (3.61% of 136546) affected shaders (Raven):
SpillSGPRs: 16249 -> 16527 (+1.71%); split: -0.28%, +1.99%
SpillVGPRs: 1771 -> 1595 (-9.94%)
CodeSize: 57544436 -> 58280304 (+1.28%); split: -0.00%, +1.28%
Scratch: 176128 -> 179200 (+1.74%)
Instrs: 11265783 -> 11445884 (+1.60%); split: -0.00%, +1.60%
Latency: 552596156 -> 555880540 (+0.59%); split: -0.53%, +1.13%
InvThroughput: 271431862 -> 273097423 (+0.61%); split: -0.18%, +0.79%
VClause: 160240 -> 160241 (+0.00%); split: -0.02%, +0.02%
SClause: 386863 -> 386685 (-0.05%); split: -0.07%, +0.02%
Copies: 1180801 -> 1345633 (+13.96%); split: -0.02%, +13.98%
Branches: 379129 -> 393052 (+3.67%); split: -0.01%, +3.69%
Reviewed-by: Rhys Perry <pendingchaos02 at gmail.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/9196>
---
src/amd/compiler/aco_lower_to_cssa.cpp | 542 +++++++++++++++++++++++++--------
1 file changed, 415 insertions(+), 127 deletions(-)
diff --git a/src/amd/compiler/aco_lower_to_cssa.cpp b/src/amd/compiler/aco_lower_to_cssa.cpp
index 4babccf6580..3613df1c8c3 100644
--- a/src/amd/compiler/aco_lower_to_cssa.cpp
+++ b/src/amd/compiler/aco_lower_to_cssa.cpp
@@ -23,6 +23,7 @@
*/
#include <map>
+#include <unordered_map>
#include "aco_ir.h"
#include "aco_builder.h"
@@ -33,163 +34,454 @@
*
* By lowering the IR to CSSA, the insertion of parallelcopies is separated from
* the register coalescing problem. Additionally, correctness is ensured w.r.t. spilling.
- * The algorithm tries to find beneficial insertion points by checking if a basic block
- * is empty and if the variable already has a new definition in a dominating block.
+ * The algorithm coalesces non-interfering phi-resources while taking value-equality
+ * into account. Re-indexes the SSA-defs.
*/
-
namespace aco {
namespace {
-typedef std::map<uint32_t, std::vector<std::pair<Definition, Operand>>> phi_info;
+typedef std::vector<Temp> merge_set;
+
+struct copy {
+ Definition def;
+ Operand op;
+};
+
+struct merge_node {
+ Operand value = Operand(); /* original value: can be an SSA-def or constant value */
+ uint32_t index = -1u; /* index into the vector of merge sets */
+ uint32_t defined_at = -1u; /* defining block */
+
+ /* we also remember two dominating defs with the same value: */
+ Temp equal_anc_in = Temp(); /* within the same merge set */
+ Temp equal_anc_out = Temp(); /* from a different set */
+};
struct cssa_ctx {
Program* program;
- live& live_vars;
- phi_info logical_phi_info {};
- phi_info linear_phi_info {};
+ std::vector<IDSet>& live_out; /* live-out sets per block */
+ std::vector<std::vector<copy>> parallelcopies; /* copies per block */
+ std::vector<merge_set> merge_sets; /* each vector is one (ordered) merge set */
+ std::unordered_map<uint32_t, merge_node> merge_node_table; /* tempid -> merge node */
};
-bool collect_phi_info(cssa_ctx& ctx)
+/* create (virtual) parallelcopies for each phi instruction and
+ * already merge copy-definitions with phi-defs into merge sets */
+void collect_parallelcopies(cssa_ctx& ctx)
{
- bool progress = false;
+ ctx.parallelcopies.resize(ctx.program->blocks.size());
+ Builder bld(ctx.program);
for (Block& block : ctx.program->blocks) {
for (aco_ptr<Instruction>& phi : block.instructions) {
- bool is_logical;
- if (phi->opcode == aco_opcode::p_phi)
- is_logical = true;
- else if (phi->opcode == aco_opcode::p_linear_phi)
- is_logical = false;
- else
+ if (phi->opcode != aco_opcode::p_phi &&
+ phi->opcode != aco_opcode::p_linear_phi)
break;
- /* no CSSA for the exec mask as we don't spill it anyway */
- if (phi->definitions[0].isFixed() && phi->definitions[0].physReg() == exec)
- continue;
- std::vector<unsigned>& preds = is_logical ? block.logical_preds : block.linear_preds;
-
- /* collect definition's block per Operand */
- std::vector<unsigned> def_points(phi->operands.size());
- for (unsigned i = 0; i < phi->operands.size(); i++) {
- Operand& op = phi->operands[i];
- if (op.isUndefined()) {
- def_points[i] = preds[i];
- } else if (op.isConstant()) {
- /* in theory, we could insert the definition there... */
- def_points[i] = 0;
- } else if (op.isTemp()) {
- unsigned pred = preds[i];
- do {
- def_points[i] = pred;
- pred = is_logical ?
- ctx.program->blocks[pred].logical_idom :
- ctx.program->blocks[pred].linear_idom;
- } while (def_points[i] != pred &&
- ctx.live_vars.live_out[pred].count(op.tempId()));
- } else {
- /* no need to insert a copy of the exec mask */
- assert(op.isFixed() && op.physReg() == exec);
- def_points[i] = preds[i];
- }
- }
+ std::vector<unsigned>& preds = phi->opcode == aco_opcode::p_phi ?
+ block.logical_preds :
+ block.linear_preds;
+ const Definition& def = phi->definitions[0];
+ uint32_t index = ctx.merge_sets.size();
+ merge_set set;
- /* check live-range intersections */
+ bool has_preheader_copy = false;
for (unsigned i = 0; i < phi->operands.size(); i++) {
Operand op = phi->operands[i];
if (op.isUndefined())
continue;
- /* check if the operand comes from the exec mask of a predecessor */
- if (op.isFixed() && op.physReg() == exec)
- continue;
- bool interferes = false;
- unsigned idom = is_logical ?
- ctx.program->blocks[def_points[i]].logical_idom :
- ctx.program->blocks[def_points[i]].linear_idom;
- /* live-through operands definitely interfere */
- if (op.isTemp() && !op.isKill()) {
- interferes = true;
- /* create copies for constants to ease spilling */
- } else if (op.isConstant()) {
- interferes = true;
- /* create copies for SGPR -> VGPR moves */
- } else if (op.regClass() != phi->definitions[0].regClass()) {
- interferes = true;
- /* operand might interfere with any phi-def*/
- } else if (def_points[i] == block.index) {
- interferes = true;
- /* operand might interfere with phi-def */
- } else if (ctx.live_vars.live_out[idom].count(phi->definitions[0].tempId())) {
- interferes = true;
- /* else check for interferences with other operands */
- } else {
- for (unsigned j = 0; !interferes && j < phi->operands.size(); j++) {
- /* don't care about other register classes */
- if (!phi->operands[j].isTemp() || phi->operands[j].regClass() != phi->definitions[0].regClass())
- continue;
- /* same operands cannot interfere */
- if (op.getTemp() == phi->operands[j].getTemp())
- continue;
- /* if def_points[i] dominates any other def_point, assume they interfere.
- * As live-through operands are checked above, only test up the current block. */
- unsigned other_def_point = def_points[j];
- while (def_points[i] < other_def_point && other_def_point != block.index)
- other_def_point = is_logical ?
- ctx.program->blocks[other_def_point].logical_idom :
- ctx.program->blocks[other_def_point].linear_idom;
- interferes = def_points[i] == other_def_point;
- }
- }
+ /* create new temporary and rename operands */
+ Temp tmp = bld.tmp(def.regClass());
+ ctx.parallelcopies[preds[i]].emplace_back(copy{Definition(tmp), op});
+ phi->operands[i] = Operand(tmp);
- if (!interferes)
- continue;
+ /* place the new operands in the same merge set */
+ set.emplace_back(tmp);
+ ctx.merge_node_table[tmp.id()] = {op, index, preds[i]};
- progress = true;
+ /* update the liveness information */
+ if (op.isKill())
+ ctx.live_out[preds[i]].erase(op.tempId());
+ ctx.live_out[preds[i]].insert(tmp.id());
- /* create new temporary and rename operands */
- Temp new_tmp = ctx.program->allocateTmp(phi->definitions[0].regClass());
- if (is_logical)
- ctx.logical_phi_info[preds[i]].emplace_back(Definition(new_tmp), op);
+ has_preheader_copy |= i == 0 && block.kind & block_kind_loop_header;
+ }
+ /* place the definition in dominance-order */
+ if (def.isTemp()) {
+ if (has_preheader_copy)
+ set.emplace(std::next(set.begin()), def.getTemp());
+ else if (block.kind & block_kind_loop_header)
+ set.emplace(set.begin(), def.getTemp());
else
- ctx.linear_phi_info[preds[i]].emplace_back(Definition(new_tmp), op);
- phi->operands[i] = Operand(new_tmp);
- phi->operands[i].setKill(true);
- def_points[i] = preds[i];
+ set.emplace_back(def.getTemp());
+ ctx.merge_node_table[def.tempId()] = {Operand(def.getTemp()), index, block.index};
}
+ ctx.merge_sets.emplace_back(set);
}
}
- return progress;
}
-void insert_parallelcopies(cssa_ctx& ctx)
+/* check whether the definition of a comes after b. */
+inline
+bool defined_after(cssa_ctx& ctx, Temp a, Temp b)
+{
+ merge_node& node_a = ctx.merge_node_table[a.id()];
+ merge_node& node_b = ctx.merge_node_table[b.id()];
+ if (node_a.defined_at == node_b.defined_at)
+ return a.id() > b.id();
+
+ return node_a.defined_at > node_b.defined_at;
+}
+
+/* check whether a dominates b where b is defined after a */
+inline
+bool dominates(cssa_ctx& ctx, Temp a, Temp b)
{
- /* insert the parallelcopies from logical phis before p_logical_end */
- for (auto&& entry : ctx.logical_phi_info) {
- Block& block = ctx.program->blocks[entry.first];
- unsigned idx = block.instructions.size() - 1;
- while (block.instructions[idx]->opcode != aco_opcode::p_logical_end) {
- assert(idx > 0);
- idx--;
+ assert(defined_after(ctx, b, a));
+ merge_node& node_a = ctx.merge_node_table[a.id()];
+ merge_node& node_b = ctx.merge_node_table[b.id()];
+ unsigned idom = node_b.defined_at;
+ while (idom > node_a.defined_at)
+ idom = b.regClass().type() == RegType::vgpr ?
+ ctx.program->blocks[idom].logical_idom :
+ ctx.program->blocks[idom].linear_idom;
+
+ return idom == node_a.defined_at;
+}
+
+/* check intersection between var and parent:
+ * We already know that parent dominates var. */
+inline
+bool intersects(cssa_ctx& ctx, Temp var, Temp parent)
+{
+ merge_node& node_var = ctx.merge_node_table[var.id()];
+ merge_node& node_parent = ctx.merge_node_table[parent.id()];
+ assert(node_var.index != node_parent.index);
+ uint32_t block_idx = node_var.defined_at;
+
+ /* if the parent is live-out at the definition block of var, they intersect */
+ bool parent_live = ctx.live_out[block_idx].count(parent.id());
+ if (parent_live)
+ return true;
+
+ /* parent is defined in a different block than var */
+ if (node_parent.defined_at < node_var.defined_at) {
+ /* if the parent is not live-in, they don't interfere */
+ std::vector<uint32_t>& preds = var.type() == RegType::vgpr ?
+ ctx.program->blocks[block_idx].logical_preds :
+ ctx.program->blocks[block_idx].linear_preds;
+ for (uint32_t pred : preds) {
+ if (!ctx.live_out[pred].count(parent.id()))
+ return false;
}
+ }
- Builder bld(ctx.program);
- bld.reset(&block.instructions, std::next(block.instructions.begin(), idx));
- for (std::pair<Definition, Operand>& pair : entry.second)
- bld.pseudo(aco_opcode::p_parallelcopy, pair.first, pair.second);
+ for (const copy& cp : ctx.parallelcopies[block_idx]) {
+ /* if var is defined at the edge, they don't intersect */
+ if (cp.def.getTemp() == var)
+ return false;
+ if (cp.op.isTemp() && cp.op.getTemp() == parent)
+ parent_live = true;
+ }
+ /* if the parent is live at the edge, they intersect */
+ if (parent_live)
+ return true;
+
+ /* both, parent and var, are present in the same block */
+ const Block& block = ctx.program->blocks[block_idx];
+ for (auto it = block.instructions.crbegin(); it != block.instructions.crend(); ++it) {
+ /* if the parent was not encountered yet, it can only be used by a phi */
+ if (is_phi(it->get()))
+ break;
+
+ for (const Definition& def : (*it)->definitions) {
+ if (!def.isTemp())
+ continue;
+ /* if parent was not found yet, they don't intersect */
+ if (def.getTemp() == var)
+ return false;
+ }
+
+ for (const Operand& op : (*it)->operands) {
+ if (!op.isTemp())
+ continue;
+ /* if the var was defined before this point, they intersect */
+ if (op.getTemp() == parent)
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/* check interference between var and parent:
+ * i.e. they have different values and intersect.
+ * If parent and var share the same value, also updates the equal ancestor. */
+inline
+bool interference(cssa_ctx& ctx, Temp var, Temp parent)
+{
+ assert(var != parent);
+ merge_node& node_var = ctx.merge_node_table[var.id()];
+ node_var.equal_anc_out = Temp();
+
+ if (node_var.index == ctx.merge_node_table[parent.id()].index) {
+ /* check/update in other set */
+ parent = ctx.merge_node_table[parent.id()].equal_anc_out;
+ }
+
+ Temp tmp = parent;
+ /* check if var intersects with parent or any equal-valued ancestor */
+ while (tmp != Temp() && !intersects(ctx, var, tmp)) {
+ merge_node& node_tmp = ctx.merge_node_table[tmp.id()];
+ tmp = node_tmp.equal_anc_in;
+ }
+
+ /* no intersection found */
+ if (tmp == Temp())
+ return false;
+
+ /* var and parent, same value, but in different sets */
+ if (node_var.value == ctx.merge_node_table[parent.id()].value) {
+ node_var.equal_anc_out = tmp;
+ return false;
+ }
+
+ /* var and parent, different values and intersect */
+ return true;
+}
+
+/* tries to merge set_b into set_a of given temporary and
+ * drops that temporary as it is being coalesced */
+bool try_merge_merge_set(cssa_ctx& ctx, Temp dst, merge_set& set_b)
+{
+ auto def_node_it = ctx.merge_node_table.find(dst.id());
+ uint32_t index = def_node_it->second.index;
+ merge_set& set_a = ctx.merge_sets[index];
+ std::vector<Temp> dom; /* stack of the traversal */
+ merge_set union_set; /* the new merged merge-set */
+ uint32_t i_a = 0;
+ uint32_t i_b = 0;
+
+ while (i_a < set_a.size() || i_b < set_b.size()) {
+ Temp current;
+ if (i_a == set_a.size())
+ current = set_b[i_b++];
+ else if (i_b == set_b.size())
+ current = set_a[i_a++];
+ /* else pick the one defined first */
+ else if (defined_after(ctx, set_a[i_a], set_b[i_b]))
+ current = set_b[i_b++];
+ else
+ current = set_a[i_a++];
+
+ while (!dom.empty() && !dominates(ctx, dom.back(), current))
+ dom.pop_back(); /* not the desired parent, remove */
+
+ if (!dom.empty() && interference(ctx, current, dom.back()))
+ return false; /* intersection detected */
+
+ dom.emplace_back(current); /* otherwise, keep checking */
+ if (current != dst)
+ union_set.emplace_back(current); /* maintain the new merge-set sorted */
+ }
+
+ /* update hashmap */
+ for (Temp t : union_set) {
+ merge_node& node = ctx.merge_node_table[t.id()];
+ /* update the equal ancestors:
+ * i.e. the 'closest' dominating def with the same value */
+ Temp in = node.equal_anc_in;
+ Temp out = node.equal_anc_out;
+ if (in == Temp() || (out != Temp() && defined_after(ctx, out, in)))
+ node.equal_anc_in = out;
+ node.equal_anc_out = Temp();
+ /* update merge-set index */
+ node.index = index;
+ }
+ set_b = merge_set(); /* free the old set_b */
+ ctx.merge_sets[index] = union_set;
+ ctx.merge_node_table.erase(dst.id()); /* remove the temporary */
+
+ return true;
+}
+
+/* returns true if the copy can safely be omitted */
+bool try_coalesce_copy(cssa_ctx& ctx, copy copy, uint32_t block_idx)
+{
+ /* we can only coalesce temporaries */
+ if (!copy.op.isTemp())
+ return false;
+
+ /* try emplace a merge_node for the copy operand */
+ merge_node& op_node = ctx.merge_node_table[copy.op.tempId()];
+ if (op_node.defined_at == -1u) {
+ /* find defining block of operand */
+ uint32_t pred = block_idx;
+ do {
+ block_idx = pred;
+ pred = copy.op.regClass().type() == RegType::vgpr ?
+ ctx.program->blocks[pred].logical_idom :
+ ctx.program->blocks[pred].linear_idom;
+ } while (block_idx != pred &&
+ ctx.live_out[pred].count(copy.op.tempId()));
+ op_node.defined_at = block_idx;
+ op_node.value = copy.op;
+ }
+
+ /* we can only coalesce copies of the same register class */
+ if (copy.op.regClass() != copy.def.regClass())
+ return false;
+
+ /* check if this operand has not yet been coalesced */
+ if (op_node.index == -1u) {
+ merge_set op_set = merge_set{copy.op.getTemp()};
+ return try_merge_merge_set(ctx, copy.def.getTemp(), op_set);
+ }
+
+ /* check if this operand has been coalesced into the same set */
+ assert(ctx.merge_node_table.count(copy.def.tempId()));
+ if (op_node.index == ctx.merge_node_table[copy.def.tempId()].index)
+ return true;
+
+ /* otherwise, try to coalesce both merge sets */
+ return try_merge_merge_set(ctx, copy.def.getTemp(), ctx.merge_sets[op_node.index]);
+}
+
+/* node in the location-transfer-graph */
+struct ltg_node {
+ copy cp;
+ uint32_t read_idx;
+ uint32_t num_uses = 0;
+};
+
+/* emit the copies in an order that does not
+ * create interferences within a merge-set */
+void emit_copies_block(Builder bld, std::map<uint32_t, ltg_node>& ltg, RegType type)
+{
+ auto&& it = ltg.begin();
+ while (it != ltg.end()) {
+ const copy& cp = it->second.cp;
+ /* wrong regclass or still needed as operand */
+ if (cp.def.regClass().type() != type || it->second.num_uses > 0) {
+ ++it;
+ continue;
+ }
+
+ /* emit the copy */
+ bld.copy(cp.def, it->second.cp.op);
+
+ /* update the location transfer graph */
+ if (it->second.read_idx != -1u) {
+ auto&& other = ltg.find(it->second.read_idx);
+ if (other != ltg.end())
+ other->second.num_uses--;
+ }
+ ltg.erase(it);
+ it = ltg.begin();
+ }
+
+ /* count the number of remaining circular dependencies */
+ unsigned num = std::count_if(ltg.begin(), ltg.end(), [&] (auto& n){
+ return n.second.cp.def.regClass().type() == type;
+ });
+
+ /* if there are circular dependencies, we just emit them as single parallelcopy */
+ if (num) {
+ // TODO: this should be restricted to a feasible number of registers
+ // and otherwise use a temporary to avoid having to reload more (spilled)
+ // variables than we have registers.
+ aco_ptr<Pseudo_instruction> copy{create_instruction<Pseudo_instruction>(aco_opcode::p_parallelcopy, Format::PSEUDO, num, num)};
+ it = ltg.begin();
+ for (unsigned i = 0; i < num; i++) {
+ while (it->second.cp.def.regClass().type() != type)
+ ++it;
+
+ copy->definitions[i] = it->second.cp.def;
+ copy->operands[i] = it->second.cp.op;
+ it = ltg.erase(it);
+ }
+ bld.insert(std::move(copy));
}
+}
+
+/* either emits or coalesces all parallelcopies and
+ * renames the phi-operands accordingly. */
+void emit_parallelcopies(cssa_ctx& ctx)
+{
+ std::unordered_map<uint32_t, Operand> renames;
- /* insert parallelcopies for the linear phis at the end of blocks just before the branch */
- for (auto&& entry : ctx.linear_phi_info) {
- Block& block = ctx.program->blocks[entry.first];
- std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.end();
- --it;
- assert((*it)->isBranch());
+ /* we iterate backwards to prioritize coalescing in else-blocks */
+ for (int i = ctx.program->blocks.size() - 1; i >= 0; i--) {
+ if (ctx.parallelcopies[i].empty())
+ continue;
+ std::map<uint32_t, ltg_node> ltg;
+ /* first, try to coalesce all parallelcopies */
+ for (const copy& cp : ctx.parallelcopies[i]) {
+ if (try_coalesce_copy(ctx, cp, i)) {
+ renames.emplace(cp.def.tempId(), cp.op);
+ /* update liveness info */
+ ctx.live_out[i].erase(cp.def.tempId());
+ ctx.live_out[i].insert(cp.op.tempId());
+ } else {
+ uint32_t read_idx = -1u;
+ if (cp.op.isTemp())
+ read_idx = ctx.merge_node_table[cp.op.tempId()].index;
+ uint32_t write_idx = ctx.merge_node_table[cp.def.tempId()].index;
+ assert(write_idx != -1u);
+ ltg[write_idx] = {cp, read_idx};
+ }
+ }
+
+ /* build location-transfer-graph */
+ for (auto& pair : ltg) {
+ if (pair.second.read_idx == -1u)
+ continue;
+ auto&& it = ltg.find(pair.second.read_idx);
+ if (it != ltg.end())
+ it->second.num_uses++;
+ }
+
+ /* emit parallelcopies ordered */
Builder bld(ctx.program);
- bld.reset(&block.instructions, it);
- for (std::pair<Definition, Operand>& pair : entry.second)
- bld.pseudo(aco_opcode::p_parallelcopy, pair.first, pair.second);
+ Block& block = ctx.program->blocks[i];
+
+ /* emit VGPR copies */
+ auto IsLogicalEnd = [] (const aco_ptr<Instruction>& inst) -> bool {
+ return inst->opcode == aco_opcode::p_logical_end;
+ };
+ auto it = std::find_if(block.instructions.rbegin(), block.instructions.rend(), IsLogicalEnd);
+ bld.reset(&block.instructions, std::prev(it.base()));
+ emit_copies_block(bld, ltg, RegType::vgpr);
+
+ /* emit SGPR copies */
+ aco_ptr<Instruction> branch = std::move(block.instructions.back());
+ block.instructions.pop_back();
+ bld.reset(&block.instructions);
+ emit_copies_block(bld, ltg, RegType::sgpr);
+ bld.insert(std::move(branch));
}
+
+ /* finally, rename coalesced phi operands */
+ for (Block& block : ctx.program->blocks) {
+ for (aco_ptr<Instruction>& phi : block.instructions) {
+ if (phi->opcode != aco_opcode::p_phi &&
+ phi->opcode != aco_opcode::p_linear_phi)
+ break;
+
+ for (Operand& op : phi->operands) {
+ if (!op.isTemp())
+ continue;
+ auto&& it = renames.find(op.tempId());
+ if (it != renames.end()) {
+ op = it->second;
+ renames.erase(it);
+ }
+ }
+ }
+ }
+ assert(renames.empty());
}
} /* end namespace */
@@ -197,14 +489,10 @@ void insert_parallelcopies(cssa_ctx& ctx)
void lower_to_cssa(Program* program, live& live_vars)
{
- cssa_ctx ctx = {program, live_vars};
- /* collect information about all interfering phi operands */
- bool progress = collect_phi_info(ctx);
-
- if (!progress)
- return;
-
- insert_parallelcopies(ctx);
+ reindex_ssa(program, live_vars.live_out);
+ cssa_ctx ctx = {program, live_vars.live_out};
+ collect_parallelcopies(ctx);
+ emit_parallelcopies(ctx);
/* update live variable information */
live_vars = live_var_analysis(program);
More information about the mesa-commit
mailing list