[Mesa-dev] [PATCH] nv50/ir: don't rely on inbound edge order in phi nodes

Karol Herbst kherbst at redhat.com
Sat Jul 7 23:19:50 UTC 2018


On Sun, Jul 8, 2018 at 1:01 AM, Rhys Perry <pendingchaos02 at gmail.com> wrote:
> Previously, a phi node's sources were implicitly ordered by the inbound edge
> order. This patch changes that so that a phi node instead has a basic block
> stored for each source in a deque.
>
> There are no regressions in Unigine Heaven, Valley and Superposition (with
> a SERIALIZE hack to fix unrelated errors in all three).
>
> I also found that the code generated is the same as without this patch for
> the shaders in shader-db even with the sources of phi nodes reversed after
> convertToSSA().
>
> ---
>  src/gallium/drivers/nouveau/codegen/nv50_ir.cpp    | 41 +++++++++++++
>  src/gallium/drivers/nouveau/codegen/nv50_ir.h      | 19 ++++++
>  .../drivers/nouveau/codegen/nv50_ir_inlines.h      | 14 +++++
>  .../drivers/nouveau/codegen/nv50_ir_print.cpp      |  2 +
>  src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp | 71 ++++++----------------
>  .../drivers/nouveau/codegen/nv50_ir_ssa.cpp        | 11 ++--
>  src/gallium/drivers/nouveau/codegen/nv50_ir_util.h |  2 +
>  7 files changed, 104 insertions(+), 56 deletions(-)
>
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
> index 49425b98b9..4a2835c526 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.cpp
> @@ -1104,6 +1104,43 @@ FlowInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
>     return flow;
>  }
>
> +PhiInstruction::PhiInstruction(Function *fn, DataType ty)
> +   : Instruction(fn, OP_PHI, ty) {}
> +
> +PhiInstruction *
> +PhiInstruction::clone(ClonePolicy<Function>& pol, Instruction *i) const
> +{
> +   assert(op == OP_PHI);
> +
> +   PhiInstruction *phi = (i ? static_cast<PhiInstruction *>(i) :
> +                          new_PhiInstruction(pol.context(), dType));
> +
> +   Instruction::clone(pol, phi);
> +   phi->basicBlocks.resize(basicBlocks.size());
> +   for (size_t i = 0; i < basicBlocks.size(); i++)
> +      phi->basicBlocks[i] = basicBlocks[i];
> +
> +   return phi;
> +}
> +
> +void
> +PhiInstruction::setSrcBB(int s, Value *val, BasicBlock *bb)
> +{
> +   this->setSrc(s, val);
> +   if (s >= (int)basicBlocks.size())
> +      basicBlocks.resize(s + 1);
> +   basicBlocks[s] = bb;
> +}
> +
> +void
> +PhiInstruction::setSrcBB(int s, const ValueRef& ref, BasicBlock *bb)
> +{
> +   this->setSrc(s, ref);
> +   if (s >= (int)basicBlocks.size())
> +      basicBlocks.resize(s + 1);
> +   basicBlocks[s] = bb;
> +}
> +
>  Program::Program(Type type, Target *arch)
>     : progType(type),
>       target(arch),
> @@ -1111,6 +1148,7 @@ Program::Program(Type type, Target *arch)
>       mem_CmpInstruction(sizeof(CmpInstruction), 4),
>       mem_TexInstruction(sizeof(TexInstruction), 4),
>       mem_FlowInstruction(sizeof(FlowInstruction), 4),
> +     mem_PhiInstruction(sizeof(PhiInstruction), 4),
>       mem_LValue(sizeof(LValue), 8),
>       mem_Symbol(sizeof(Symbol), 7),
>       mem_ImmediateValue(sizeof(ImmediateValue), 7)
> @@ -1152,6 +1190,9 @@ void Program::releaseInstruction(Instruction *insn)
>     else
>     if (insn->asFlow())
>        mem_FlowInstruction.release(insn);
> +   else
> +   if (insn->asPhi())
> +      mem_PhiInstruction.release(insn);
>     else
>        mem_Instruction.release(insn);
>  }
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir.h b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
> index f4f3c70888..e9fe7422b5 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir.h
> @@ -489,6 +489,7 @@ class Instruction;
>  class CmpInstruction;
>  class TexInstruction;
>  class FlowInstruction;
> +class PhiInstruction;
>
>  class Value;
>  class LValue;
> @@ -853,9 +854,11 @@ public:
>     inline CmpInstruction *asCmp();
>     inline TexInstruction *asTex();
>     inline FlowInstruction *asFlow();
> +   inline PhiInstruction *asPhi();
>     inline const TexInstruction *asTex() const;
>     inline const CmpInstruction *asCmp() const;
>     inline const FlowInstruction *asFlow() const;
> +   inline const PhiInstruction *asPhi() const;
>
>  public:
>     Instruction *next;
> @@ -1075,6 +1078,21 @@ public:
>     } target;
>  };
>
> +class PhiInstruction : public Instruction
> +{
> +public:
> +   PhiInstruction(Function *, DataType);
> +
> +   virtual PhiInstruction *clone(ClonePolicy<Function>&,
> +                                 Instruction * = NULL) const;
> +
> +   void setSrcBB(int s, Value *, BasicBlock *);
> +   void setSrcBB(int s, const ValueRef&, BasicBlock *);
> +
> +public:
> +   std::deque<BasicBlock *> basicBlocks;

don't make internal state public, we should only use methods to
directly access that. Rather add a getSrcBB method.

> +};
> +

I am not convinced about the overall class design, because there is
still the setSrc() method you could instead of setSrcBB, so what do we
do in that case? But overwriting setSrc to be a noop or throw an
assert violates basic OOP principles as well, but might be still fine
to do. Anyway, we should think this through a bit more thoroughly.

>  class BasicBlock
>  {
>  public:
> @@ -1287,6 +1305,7 @@ public:
>     MemoryPool mem_CmpInstruction;
>     MemoryPool mem_TexInstruction;
>     MemoryPool mem_FlowInstruction;
> +   MemoryPool mem_PhiInstruction;
>     MemoryPool mem_LValue;
>     MemoryPool mem_Symbol;
>     MemoryPool mem_ImmediateValue;
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h
> index 4cb53ab42e..829578a2ac 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_inlines.h
> @@ -323,6 +323,20 @@ const TexInstruction *Instruction::asTex() const
>     return NULL;
>  }
>
> +PhiInstruction *Instruction::asPhi()
> +{
> +   if (op == OP_PHI)
> +      return static_cast<PhiInstruction *>(this);
> +   return NULL;
> +}
> +
> +const PhiInstruction *Instruction::asPhi() const
> +{
> +   if (op == OP_PHI)
> +      return static_cast<const PhiInstruction *>(this);
> +   return NULL;
> +}
> +
>  static inline Instruction *cloneForward(Function *ctx, Instruction *obj)
>  {
>     DeepClonePolicy<Function> pol(ctx);
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
> index cbb21f5f72..efb8798dce 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_print.cpp
> @@ -675,6 +675,8 @@ void Instruction::print() const
>                                            getIndirect(s, 1));
>        else
>           pos += getSrc(s)->print(&buf[pos], BUFSZ - pos, sType);
> +      if (op == OP_PHI)
> +         PRINT("%s(BB:%i)", colour[TXT_INSN], asPhi()->basicBlocks[s]->getId());
>     }
>     if (exit)
>        PRINT("%s exit", colour[TXT_INSN]);
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
> index b660fec75c..cecc23b22c 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
> @@ -372,20 +372,13 @@ typedef unordered_map<
>     std::pair<Instruction *, BasicBlock *>, Value *, PhiMapHash> PhiMap;
>
>  // Critical edges need to be split up so that work can be inserted along
> -// specific edge transitions. Unfortunately manipulating incident edges into a
> -// BB invalidates all the PHI nodes since their sources are implicitly ordered
> -// by incident edge order.
> -//
> -// TODO: Make it so that that is not the case, and PHI nodes store pointers to
> -// the original BBs.
> +// specific edge transitions.
>  void
>  RegAlloc::PhiMovesPass::splitEdges(BasicBlock *bb)
>  {
>     BasicBlock *pb, *pn;
> -   Instruction *phi;
>     Graph::EdgeIterator ei;
>     std::stack<BasicBlock *> stack;
> -   int j = 0;
>
>     for (ei = bb->cfg.incident(); !ei.end(); ei.next()) {
>        pb = BasicBlock::get(ei.getNode());
> @@ -394,22 +387,6 @@ RegAlloc::PhiMovesPass::splitEdges(BasicBlock *bb)
>           stack.push(pb);
>     }
>
> -   // No critical edges were found, no need to perform any work.
> -   if (stack.empty())
> -      return;
> -
> -   // We're about to, potentially, reorder the inbound edges. This means that
> -   // we need to hold on to the (phi, bb) -> src mapping, and fix up the phi
> -   // nodes after the graph has been modified.
> -   PhiMap phis;
> -
> -   j = 0;
> -   for (ei = bb->cfg.incident(); !ei.end(); ei.next(), j++) {
> -      pb = BasicBlock::get(ei.getNode());
> -      for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next)
> -         phis.insert(std::make_pair(std::make_pair(phi, pb), phi->getSrc(j)));
> -   }
> -
>     while (!stack.empty()) {
>        pb = stack.top();
>        pn = new BasicBlock(func);
> @@ -423,23 +400,12 @@ RegAlloc::PhiMovesPass::splitEdges(BasicBlock *bb)
>        if (pb->getExit()->asFlow()->target.bb == bb)
>           pb->getExit()->asFlow()->target.bb = pn;
>
> -      for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
> -         PhiMap::iterator it = phis.find(std::make_pair(phi, pb));
> -         assert(it != phis.end());
> -         phis.insert(std::make_pair(std::make_pair(phi, pn), it->second));
> -         phis.erase(it);
> -      }
> -   }
> -
> -   // Now go through and fix up all of the phi node sources.
> -   j = 0;
> -   for (ei = bb->cfg.incident(); !ei.end(); ei.next(), j++) {
> -      pb = BasicBlock::get(ei.getNode());
> -      for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
> -         PhiMap::const_iterator it = phis.find(std::make_pair(phi, pb));
> -         assert(it != phis.end());
> -
> -         phi->setSrc(j, it->second);
> +      PhiInstruction *phi = bb->getPhi() ? bb->getPhi()->asPhi() : NULL;
> +      for (; phi; phi = phi->next->asPhi()) {
> +         for (int s = 0; phi->srcExists(s); s++) {
> +            if (phi->basicBlocks[s] == pb)
> +               phi->basicBlocks[s] = pn;
> +         }
>        }
>     }
>  }

I like how simple that code becomes. Didn't really looked careful
enough, but looks good for now.

> @@ -455,28 +421,29 @@ RegAlloc::PhiMovesPass::splitEdges(BasicBlock *bb)
>  bool
>  RegAlloc::PhiMovesPass::visit(BasicBlock *bb)
>  {
> -   Instruction *phi, *mov;
> -
>     splitEdges(bb);
>
> -   // insert MOVs (phi->src(j) should stem from j-th in-BB)
> -   int j = 0;
> +   // ensure incoming basic blocks are terminated
>     for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
>        BasicBlock *pb = BasicBlock::get(ei.getNode());
>        if (!pb->isTerminated())
>           pb->insertTail(new_FlowInstruction(func, OP_BRA, bb));
> +   }
>
> -      for (phi = bb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
> -         LValue *tmp = new_LValue(func, phi->getDef(0)->asLValue());
> -         mov = new_Instruction(func, OP_MOV, typeOfSize(tmp->reg.size));
> +   // insert MOVs
> +   PhiInstruction *phi = bb->getPhi() ? bb->getPhi()->asPhi() : NULL;
> +   for (; phi; phi = phi->next->asPhi()) {
> +      for (int i = 0; phi->srcExists(i); i++) {
> +         BasicBlock *pb = phi->basicBlocks[i];
>
> -         mov->setSrc(0, phi->getSrc(j));
> +         LValue *tmp = new_LValue(func, phi->getDef(0)->asLValue());
> +         Instruction *mov = new_Instruction(func, OP_MOV, typeOfSize(tmp->reg.size));
> +         mov->setSrc(0, phi->getSrc(i));
>           mov->setDef(0, tmp);
> -         phi->setSrc(j, tmp);
> -
>           pb->insertBefore(pb->getExit(), mov);
> +
> +         phi->setSrc(i, tmp);
>        }
> -      ++j;
>     }
>
>     return true;
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ssa.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ssa.cpp
> index 3d25ad928e..66586a283d 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ssa.cpp
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ssa.cpp
> @@ -371,7 +371,7 @@ Function::convertToSSA()
>
>           DLList::Iterator dfIter = bb->getDF().iterator();
>           for (; !dfIter.end(); dfIter.next()) {
> -            Instruction *phi;
> +            PhiInstruction *phi;
>              BasicBlock *dfBB = BasicBlock::get(dfIter);
>
>              if (hasAlready[dfBB->getId()] >= iterCount)
> @@ -382,12 +382,12 @@ Function::convertToSSA()
>              if (!dfBB->liveSet.test(lval->id))
>                 continue;
>
> -            phi = new_Instruction(this, OP_PHI, typeOfSize(lval->reg.size));
> +            phi = new_PhiInstruction(this, typeOfSize(lval->reg.size));
>              dfBB->insertTail(phi);
>
>              phi->setDef(0, lval);
> -            for (int s = 0; s < dfBB->cfg.incidentCount(); ++s)
> -               phi->setSrc(s, lval);
> +            for (Graph::EdgeIterator ei = dfBB->cfg.incident(); !ei.end(); ei.next())
> +               phi->setSrcBB(phi->srcCount(), lval, BasicBlock::get(ei.getNode()));
>
>              if (work[dfBB->getId()] < iterCount) {
>                 work[dfBB->getId()] = iterCount;
> @@ -514,6 +514,9 @@ void RenamePass::search(BasicBlock *bb)
>        assert(p < sb->cfg.incidentCount());
>
>        for (phi = sb->getPhi(); phi && phi->op == OP_PHI; phi = phi->next) {
> +         // phi instructions are supposed to handle when this isn't true, but
> +         // it's always true during conversion to SSA.
> +         assert(phi->asPhi()->basicBlocks[p] == bb);
>           lval = getStackTop(phi->getSrc(p));
>           if (!lval)
>              lval = mkUndefined(phi->getSrc(p));
> diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_util.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_util.h
> index c619499046..732ad436ad 100644
> --- a/src/gallium/drivers/nouveau/codegen/nv50_ir_util.h
> +++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_util.h
> @@ -64,6 +64,8 @@
>     NV50_IR_FUNC_ALLOC_OBJ_DEF(TexInstruction, f, args)
>  #define new_FlowInstruction(f, args...)                  \
>     NV50_IR_FUNC_ALLOC_OBJ_DEF(FlowInstruction, f, args)
> +#define new_PhiInstruction(f, args...)                   \
> +   NV50_IR_FUNC_ALLOC_OBJ_DEF(PhiInstruction, f, args)
>
>  #define new_LValue(f, args...)                  \
>     NV50_IR_FUNC_ALLOC_OBJ_DEF(LValue, f, args)
> --
> 2.14.4
>


More information about the mesa-dev mailing list