[Mesa-dev] [PATCH 1/3] R600: fix PHI value adding in the structurizer

Tom Stellard tom at stellard.net
Wed Feb 6 19:00:04 PST 2013


Hi Christian,

I'm still unable to get the glsl1-while-loop with continue test to pass
on R600 with the new structurizer.  For this test, the structurizer is
producing strange code and I'm not sure if it is correct.  I have
attached the full before and after dumps to this email.  Here is the
suspect block:

ENDIF:                                            ; preds = %LOOP
  %46 = bitcast float %temp.0 to i32
  %47 = add i32 %46, 1
  %48 = bitcast i32 %47 to float
  %49 = bitcast float %48 to i32
  %50 = icmp slt i32 5, %49
  %51 = sext i1 %50 to i32
  %52 = bitcast i32 %51 to float
  %53 = bitcast float %52 to i32
  %54 = icmp ne i32 %53, 0
  %55 = xor i1 %54, true
  %56 = xor i1 %54, true
  %57 = call { i1, i64 } @llvm.SI.if(i1 %56)
  %58 = extractvalue { i1, i64 } %57, 0
  %59 = extractvalue { i1, i64 } %57, 1
  %60 = call i64 @llvm.SI.if.break(i1 %55, %i64 %0)
  br i1 %58, label %ENDIF20, label %Flow4

Notice how @llvm.SI.if and llvm.SI.if.break use the same condition.  Is
this correct?

-Tom

On Mon, Feb 04, 2013 at 03:52:17PM +0100, Christian K??nig wrote:
> From: Christian K??nig <christian.koenig at amd.com>
> 
> Otherwise we sometimes produce invalid code.
> 
> Signed-off-by: Christian K??nig <christian.koenig at amd.com>
> Tested-by: Michel D??nzer <michel.daenzer at amd.com>
> ---
>  lib/Target/R600/AMDGPUStructurizeCFG.cpp |  146 +++++++++++++++++-------------
>  1 file changed, 81 insertions(+), 65 deletions(-)
> 
> diff --git a/lib/Target/R600/AMDGPUStructurizeCFG.cpp b/lib/Target/R600/AMDGPUStructurizeCFG.cpp
> index 22338b5..c6f0a66 100644
> --- a/lib/Target/R600/AMDGPUStructurizeCFG.cpp
> +++ b/lib/Target/R600/AMDGPUStructurizeCFG.cpp
> @@ -41,6 +41,7 @@ typedef DenseMap<BasicBlock *, PhiMap> BBPhiMap;
>  typedef DenseMap<BasicBlock *, Value *> BBPredicates;
>  typedef DenseMap<BasicBlock *, BBPredicates> PredMap;
>  typedef DenseMap<BasicBlock *, unsigned> VisitedMap;
> +typedef DenseMap<BasicBlock *, BBVector> BB2BBVecMap;
>  
>  // The name for newly created blocks.
>  
> @@ -109,6 +110,7 @@ class AMDGPUStructurizeCFG : public RegionPass {
>    VisitedMap Visited;
>    PredMap Predicates;
>    BBPhiMap DeletedPhis;
> +  BB2BBVecMap AddedPhis;
>    BBVector FlowsInserted;
>  
>    BasicBlock *LoopStart;
> @@ -126,16 +128,18 @@ class AMDGPUStructurizeCFG : public RegionPass {
>  
>    void collectInfos();
>  
> +  void delPhiValues(BasicBlock *From, BasicBlock *To);
> +
> +  void addPhiValues(BasicBlock *From, BasicBlock *To);
> +
> +  void setPhiValues();
> +
>    bool dominatesPredicates(BasicBlock *A, BasicBlock *B);
>  
>    void killTerminator(BasicBlock *BB);
>  
>    RegionNode *skipChained(RegionNode *Node);
>  
> -  void delPhiValues(BasicBlock *From, BasicBlock *To);
> -
> -  void addPhiValues(BasicBlock *From, BasicBlock *To);
> -
>    BasicBlock *getNextFlow(BasicBlock *Prev);
>  
>    bool isPredictableTrue(BasicBlock *Prev, BasicBlock *Node);
> @@ -309,6 +313,76 @@ void AMDGPUStructurizeCFG::collectInfos() {
>    }
>  }
>  
> +/// \brief Remove all PHI values coming from "From" into "To" and remember
> +/// them in DeletedPhis
> +void AMDGPUStructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
> +  PhiMap &Map = DeletedPhis[To];
> +  for (BasicBlock::iterator I = To->begin(), E = To->end();
> +       I != E && isa<PHINode>(*I);) {
> +
> +    PHINode &Phi = cast<PHINode>(*I++);
> +    while (Phi.getBasicBlockIndex(From) != -1) {
> +      Value *Deleted = Phi.removeIncomingValue(From, false);
> +      Map[&Phi].push_back(std::make_pair(From, Deleted));
> +    }
> +  }
> +}
> +
> +/// \brief Add a dummy PHI value as soon as we knew the new predecessor
> +void AMDGPUStructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
> +  for (BasicBlock::iterator I = To->begin(), E = To->end();
> +       I != E && isa<PHINode>(*I);) {
> +
> +    PHINode &Phi = cast<PHINode>(*I++);
> +    Value *Undef = UndefValue::get(Phi.getType());
> +    Phi.addIncoming(Undef, From);
> +  }
> +  AddedPhis[To].push_back(From);
> +}
> +
> +/// \brief Add the real PHI value as soon as everything is set up
> +void AMDGPUStructurizeCFG::setPhiValues() {
> +  
> +  SSAUpdater Updater;
> +  for (BB2BBVecMap::iterator AI = AddedPhis.begin(), AE = AddedPhis.end();
> +       AI != AE; ++AI) {
> +
> +    BasicBlock *To = AI->first;
> +    BBVector &From = AI->second;
> +
> +    if (!DeletedPhis.count(To))
> +      continue;
> +
> +    PhiMap &Map = DeletedPhis[To];
> +    for (PhiMap::iterator PI = Map.begin(), PE = Map.end();
> +         PI != PE; ++PI) {
> +
> +      PHINode *Phi = PI->first;
> +      Value *Undef = UndefValue::get(Phi->getType());
> +      Updater.Initialize(Phi->getType(), "");
> +      Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
> +      Updater.AddAvailableValue(To, Undef);
> +
> +      for (BBValueVector::iterator VI = PI->second.begin(),
> +           VE = PI->second.end(); VI != VE; ++VI) {
> +
> +        Updater.AddAvailableValue(VI->first, VI->second);
> +      }
> +
> +      for (BBVector::iterator FI = From.begin(), FE = From.end();
> +           FI != FE; ++FI) {
> +
> +        int Idx = Phi->getBasicBlockIndex(*FI);
> +        assert(Idx != -1);
> +        Phi->setIncomingValue(Idx, Updater.GetValueAtEndOfBlock(*FI));
> +      }
> +    }
> +
> +    DeletedPhis.erase(To);
> +  }
> +  assert(DeletedPhis.empty());
> +}
> +
>  /// \brief Does A dominate all the predicates of B ?
>  bool AMDGPUStructurizeCFG::dominatesPredicates(BasicBlock *A, BasicBlock *B) {
>    BBPredicates &Preds = Predicates[B];
> @@ -406,57 +480,6 @@ RegionNode *AMDGPUStructurizeCFG::skipChained(RegionNode *Node) {
>    return ParentRegion->getNode(wireFlowBlock(BB, Next));
>  }
>  
> -/// \brief Remove all PHI values coming from "From" into "To" and remember
> -/// them in DeletedPhis
> -void AMDGPUStructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
> -  PhiMap &Map = DeletedPhis[To];
> -  for (BasicBlock::iterator I = To->begin(), E = To->end();
> -       I != E && isa<PHINode>(*I);) {
> -
> -    PHINode &Phi = cast<PHINode>(*I++);
> -    while (Phi.getBasicBlockIndex(From) != -1) {
> -      Value *Deleted = Phi.removeIncomingValue(From, false);
> -      Map[&Phi].push_back(std::make_pair(From, Deleted));
> -    }
> -  }
> -}
> -
> -/// \brief Add the PHI values back once we knew the new predecessor
> -void AMDGPUStructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
> -  if (!DeletedPhis.count(To))
> -    return;
> -
> -  PhiMap &Map = DeletedPhis[To];
> -  SSAUpdater Updater;
> -
> -  for (PhiMap::iterator I = Map.begin(), E = Map.end(); I != E; ++I) {
> -
> -    PHINode *Phi = I->first;
> -    Updater.Initialize(Phi->getType(), "");
> -    BasicBlock *Fallback = To;
> -    bool HaveFallback = false;
> -
> -    for (BBValueVector::iterator VI = I->second.begin(), VE = I->second.end();
> -         VI != VE; ++VI) {
> -
> -      Updater.AddAvailableValue(VI->first, VI->second);
> -      BasicBlock *Dom = DT->findNearestCommonDominator(Fallback, VI->first);
> -      if (Dom == VI->first)
> -        HaveFallback = true;
> -      else if (Dom != Fallback)
> -        HaveFallback = false;
> -      Fallback = Dom;
> -    }
> -    if (!HaveFallback) {
> -      Value *Undef = UndefValue::get(Phi->getType());
> -      Updater.AddAvailableValue(Fallback, Undef);
> -    }
> -
> -    Phi->addIncoming(Updater.GetValueAtEndOfBlock(From), From);
> -  }
> -  DeletedPhis.erase(To);
> -}
> -
>  /// \brief Create a new flow node and update dominator tree and region info
>  BasicBlock *AMDGPUStructurizeCFG::getNextFlow(BasicBlock *Prev) {
>    LLVMContext &Context = Func->getContext();
> @@ -554,6 +577,7 @@ BasicBlock *AMDGPUStructurizeCFG::wireFlowBlock(BasicBlock *Prev,
>  /// branches only have undefined conditions.
>  void AMDGPUStructurizeCFG::createFlow() {
>    DeletedPhis.clear();
> +  AddedPhis.clear();
>  
>    BasicBlock *Prev = Order.pop_back_val()->getEntry();
>    assert(Prev == ParentRegion->getEntry() && "Incorrect node order!");
> @@ -601,18 +625,8 @@ void AMDGPUStructurizeCFG::createFlow() {
>    if (DT->dominates(ParentRegion->getEntry(), Exit))
>      DT->changeImmediateDominator(Exit, Prev);
>  
> -  if (LoopStart && LoopEnd) {
> -    BBVector::iterator FI = std::find(FlowsInserted.begin(),
> -                                      FlowsInserted.end(),
> -                                      LoopStart);
> -    for (; *FI != LoopEnd; ++FI) {
> -      addPhiValues(*FI, (*FI)->getTerminator()->getSuccessor(0));
> -    }
> -  }
> -
>    assert(Order.empty());
>    assert(Visited.empty());
> -  assert(DeletedPhis.empty());
>  }
>  
>  /// \brief Insert the missing branch conditions
> @@ -697,12 +711,14 @@ bool AMDGPUStructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
>    collectInfos();
>    createFlow();
>    insertConditions();
> +  setPhiValues();
>    rebuildSSA();
>  
>    Order.clear();
>    Visited.clear();
>    Predicates.clear();
>    DeletedPhis.clear();
> +  AddedPhis.clear();
>    FlowsInserted.clear();
>  
>    return true;
> -- 
> 1.7.9.5
> 
-------------- next part --------------
*** IR Dump Before Insert stack protectors ***
define void @main() {
main_body:
  br label %LOOP.outer

LOOP.outer:                                       ; preds = %ENDIF20, %main_body
  %temp4.0.ph = phi float [ 0.000000e+00, %main_body ], [ %23, %ENDIF20 ]
  %temp.0.ph = phi float [ 0.000000e+00, %main_body ], [ %16, %ENDIF20 ]
  br label %LOOP

LOOP:                                             ; preds = %LOOP.outer, %ENDIF
  %temp.0 = phi float [ %16, %ENDIF ], [ %temp.0.ph, %LOOP.outer ]
  %0 = bitcast float %temp.0 to i32
  %1 = icmp sge i32 %0, 20
  %2 = sext i1 %1 to i32
  %3 = bitcast i32 %2 to float
  %4 = bitcast float %3 to i32
  %5 = icmp ne i32 %4, 0
  br i1 %5, label %IF, label %ENDIF

IF:                                               ; preds = %LOOP
  %6 = call float @llvm.AMDIL.clamp.(float %temp4.0.ph, float 0.000000e+00, float 1.000000e+00)
  %7 = call float @llvm.AMDIL.clamp.(float %temp4.0.ph, float 0.000000e+00, float 1.000000e+00)
  %8 = call float @llvm.AMDIL.clamp.(float %temp4.0.ph, float 0.000000e+00, float 1.000000e+00)
  %9 = call float @llvm.AMDIL.clamp.(float %temp4.0.ph, float 0.000000e+00, float 1.000000e+00)
  %10 = insertelement <4 x float> undef, float %6, i32 0
  %11 = insertelement <4 x float> %10, float %7, i32 1
  %12 = insertelement <4 x float> %11, float %8, i32 2
  %13 = insertelement <4 x float> %12, float %9, i32 3
  call void @llvm.R600.store.swizzle(<4 x float> %13, i32 0, i32 0)
  ret void

ENDIF:                                            ; preds = %LOOP
  %14 = bitcast float %temp.0 to i32
  %15 = add i32 %14, 1
  %16 = bitcast i32 %15 to float
  %17 = bitcast float %16 to i32
  %18 = icmp slt i32 5, %17
  %19 = sext i1 %18 to i32
  %20 = bitcast i32 %19 to float
  %21 = bitcast float %20 to i32
  %22 = icmp ne i32 %21, 0
  br i1 %22, label %LOOP, label %ENDIF20

ENDIF20:                                          ; preds = %ENDIF
  %23 = fadd float %temp4.0.ph, 0x3FB99999A0000000
  br label %LOOP.outer
}
*** IR Dump Before Preliminary module verification ***
define void @main() {
main_body:
  br label %Flow

LOOP.outer:                                       ; preds = %Flow
  %temp4.0.ph = phi float [ %2, %Flow ]
  %temp.0.ph = phi float [ %3, %Flow ]
  br label %Flow1

Flow:                                             ; preds = %Flow2, %main_body
  %0 = phi i64 [ %44, %Flow2 ], [ 0, %main_body ]
  %1 = phi float [ %10, %Flow2 ], [ undef, %main_body ]
  %2 = phi float [ %39, %Flow2 ], [ 0.000000e+00, %main_body ]
  %3 = phi float [ %40, %Flow2 ], [ 0.000000e+00, %main_body ]
  %4 = phi float [ %41, %Flow2 ], [ undef, %main_body ]
  %5 = phi i1 [ %42, %Flow2 ], [ false, %main_body ]
  %6 = phi i1 [ %43, %Flow2 ], [ true, %main_body ]
  %7 = call { i1, i64 } @llvm.SI.if(i1 %6)
  %8 = extractvalue { i1, i64 } %7, 0
  %9 = extractvalue { i1, i64 } %7, 1
  br i1 %8, label %LOOP.outer, label %Flow1

Flow1:                                            ; preds = %LOOP.outer, %Flow
  %10 = phi float [ %temp4.0.ph, %LOOP.outer ], [ %1, %Flow ]
  %11 = phi float [ undef, %LOOP.outer ], [ %2, %Flow ]
  %12 = phi float [ undef, %LOOP.outer ], [ %3, %Flow ]
  %13 = phi float [ %temp.0.ph, %LOOP.outer ], [ %4, %Flow ]
  %14 = phi i1 [ true, %LOOP.outer ], [ %5, %Flow ]
  call void @llvm.SI.end.cf(i64 %9)
  %15 = call { i1, i64 } @llvm.SI.if(i1 %14)
  %16 = extractvalue { i1, i64 } %15, 0
  %17 = extractvalue { i1, i64 } %15, 1
  br i1 %16, label %LOOP, label %Flow2

LOOP:                                             ; preds = %Flow1
  %temp.0 = phi float [ %13, %Flow1 ]
  %18 = bitcast float %temp.0 to i32
  %19 = icmp sge i32 %18, 20
  %20 = sext i1 %19 to i32
  %21 = bitcast i32 %20 to float
  %22 = bitcast float %21 to i32
  %23 = icmp ne i32 %22, 0
  %24 = xor i1 %23, true
  %25 = call { i1, i64 } @llvm.SI.if(i1 %24)
  %26 = extractvalue { i1, i64 } %25, 0
  %27 = extractvalue { i1, i64 } %25, 1
  br i1 %26, label %ENDIF, label %Flow3

Flow4:                                            ; preds = %ENDIF20, %ENDIF
  %28 = phi float [ %68, %ENDIF20 ], [ %11, %ENDIF ]
  %29 = phi float [ %48, %ENDIF20 ], [ %12, %ENDIF ]
  call void @llvm.SI.end.cf(i64 %59)
  br label %Flow3

IF:                                               ; preds = %Flow2
  call void @llvm.SI.end.cf(i64 %44)
  %30 = call float @llvm.AMDIL.clamp.(float %10, float 0.000000e+00, float 1.000000e+00)
  %31 = call float @llvm.AMDIL.clamp.(float %10, float 0.000000e+00, float 1.000000e+00)
  %32 = call float @llvm.AMDIL.clamp.(float %10, float 0.000000e+00, float 1.000000e+00)
  %33 = call float @llvm.AMDIL.clamp.(float %10, float 0.000000e+00, float 1.000000e+00)
  %34 = insertelement <4 x float> undef, float %30, i32 0
  %35 = insertelement <4 x float> %34, float %31, i32 1
  %36 = insertelement <4 x float> %35, float %32, i32 2
  %37 = insertelement <4 x float> %36, float %33, i32 3
  call void @llvm.R600.store.swizzle(<4 x float> %37, i32 0, i32 0)
  ret void

Flow2:                                            ; preds = %Flow3, %Flow1
  %38 = phi i64 [ %67, %Flow3 ], [ %0, %Flow1 ]
  %39 = phi float [ %62, %Flow3 ], [ %11, %Flow1 ]
  %40 = phi float [ %63, %Flow3 ], [ %12, %Flow1 ]
  %41 = phi float [ %64, %Flow3 ], [ %13, %Flow1 ]
  %42 = phi i1 [ %65, %Flow3 ], [ false, %Flow1 ]
  %43 = phi i1 [ %66, %Flow3 ], [ false, %Flow1 ]
  %44 = call i64 @llvm.SI.else.break(i64 %17, i64 %38)
  call void @llvm.SI.end.cf(i64 %17)
  %45 = call i1 @llvm.SI.loop(i64 %44)
  br i1 %45, label %IF, label %Flow

ENDIF:                                            ; preds = %LOOP
  %46 = bitcast float %temp.0 to i32
  %47 = add i32 %46, 1
  %48 = bitcast i32 %47 to float
  %49 = bitcast float %48 to i32
  %50 = icmp slt i32 5, %49
  %51 = sext i1 %50 to i32
  %52 = bitcast i32 %51 to float
  %53 = bitcast float %52 to i32
  %54 = icmp ne i32 %53, 0
  %55 = xor i1 %54, true
  %56 = xor i1 %54, true
  %57 = call { i1, i64 } @llvm.SI.if(i1 %56)
  %58 = extractvalue { i1, i64 } %57, 0
  %59 = extractvalue { i1, i64 } %57, 1
  %60 = call i64 @llvm.SI.if.break(i1 %55, i64 %0)
  br i1 %58, label %ENDIF20, label %Flow4

Flow3:                                            ; preds = %Flow4, %LOOP
  %61 = phi i64 [ %60, %Flow4 ], [ %0, %LOOP ]
  %62 = phi float [ %28, %Flow4 ], [ %11, %LOOP ]
  %63 = phi float [ %29, %Flow4 ], [ %12, %LOOP ]
  %64 = phi float [ %48, %Flow4 ], [ undef, %LOOP ]
  %65 = phi i1 [ %54, %Flow4 ], [ false, %LOOP ]
  %66 = phi i1 [ %55, %Flow4 ], [ false, %LOOP ]
  %67 = call i64 @llvm.SI.else.break(i64 %27, i64 %61)
  call void @llvm.SI.end.cf(i64 %27)
  br label %Flow2

ENDIF20:                                          ; preds = %ENDIF
  %68 = fadd float %10, 0x3FB99999A0000000
  br label %Flow4
}


More information about the mesa-dev mailing list