[Mesa-dev] [PATCH 1/3] R600: fix PHI value adding in the structurizer
Tom Stellard
tom at stellard.net
Wed Feb 6 19:00:04 PST 2013
Hi Christian,
I'm still unable to get the glsl1-while-loop with continue test to pass
on R600 with the new structurizer. For this test, the structurizer is
producing strange code and I'm not sure if it is correct. I have
attached the full before and after dumps to this email. Here is the
suspect block:
ENDIF: ; preds = %LOOP
%46 = bitcast float %temp.0 to i32
%47 = add i32 %46, 1
%48 = bitcast i32 %47 to float
%49 = bitcast float %48 to i32
%50 = icmp slt i32 5, %49
%51 = sext i1 %50 to i32
%52 = bitcast i32 %51 to float
%53 = bitcast float %52 to i32
%54 = icmp ne i32 %53, 0
%55 = xor i1 %54, true
%56 = xor i1 %54, true
%57 = call { i1, i64 } @llvm.SI.if(i1 %56)
%58 = extractvalue { i1, i64 } %57, 0
%59 = extractvalue { i1, i64 } %57, 1
%60 = call i64 @llvm.SI.if.break(i1 %55, %i64 %0)
br i1 %58, label %ENDIF20, label %Flow4
Notice how @llvm.SI.if and llvm.SI.if.break use the same condition. Is
this correct?
-Tom
On Mon, Feb 04, 2013 at 03:52:17PM +0100, Christian K??nig wrote:
> From: Christian K??nig <christian.koenig at amd.com>
>
> Otherwise we sometimes produce invalid code.
>
> Signed-off-by: Christian K??nig <christian.koenig at amd.com>
> Tested-by: Michel D??nzer <michel.daenzer at amd.com>
> ---
> lib/Target/R600/AMDGPUStructurizeCFG.cpp | 146 +++++++++++++++++-------------
> 1 file changed, 81 insertions(+), 65 deletions(-)
>
> diff --git a/lib/Target/R600/AMDGPUStructurizeCFG.cpp b/lib/Target/R600/AMDGPUStructurizeCFG.cpp
> index 22338b5..c6f0a66 100644
> --- a/lib/Target/R600/AMDGPUStructurizeCFG.cpp
> +++ b/lib/Target/R600/AMDGPUStructurizeCFG.cpp
> @@ -41,6 +41,7 @@ typedef DenseMap<BasicBlock *, PhiMap> BBPhiMap;
> typedef DenseMap<BasicBlock *, Value *> BBPredicates;
> typedef DenseMap<BasicBlock *, BBPredicates> PredMap;
> typedef DenseMap<BasicBlock *, unsigned> VisitedMap;
> +typedef DenseMap<BasicBlock *, BBVector> BB2BBVecMap;
>
> // The name for newly created blocks.
>
> @@ -109,6 +110,7 @@ class AMDGPUStructurizeCFG : public RegionPass {
> VisitedMap Visited;
> PredMap Predicates;
> BBPhiMap DeletedPhis;
> + BB2BBVecMap AddedPhis;
> BBVector FlowsInserted;
>
> BasicBlock *LoopStart;
> @@ -126,16 +128,18 @@ class AMDGPUStructurizeCFG : public RegionPass {
>
> void collectInfos();
>
> + void delPhiValues(BasicBlock *From, BasicBlock *To);
> +
> + void addPhiValues(BasicBlock *From, BasicBlock *To);
> +
> + void setPhiValues();
> +
> bool dominatesPredicates(BasicBlock *A, BasicBlock *B);
>
> void killTerminator(BasicBlock *BB);
>
> RegionNode *skipChained(RegionNode *Node);
>
> - void delPhiValues(BasicBlock *From, BasicBlock *To);
> -
> - void addPhiValues(BasicBlock *From, BasicBlock *To);
> -
> BasicBlock *getNextFlow(BasicBlock *Prev);
>
> bool isPredictableTrue(BasicBlock *Prev, BasicBlock *Node);
> @@ -309,6 +313,76 @@ void AMDGPUStructurizeCFG::collectInfos() {
> }
> }
>
> +/// \brief Remove all PHI values coming from "From" into "To" and remember
> +/// them in DeletedPhis
> +void AMDGPUStructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
> + PhiMap &Map = DeletedPhis[To];
> + for (BasicBlock::iterator I = To->begin(), E = To->end();
> + I != E && isa<PHINode>(*I);) {
> +
> + PHINode &Phi = cast<PHINode>(*I++);
> + while (Phi.getBasicBlockIndex(From) != -1) {
> + Value *Deleted = Phi.removeIncomingValue(From, false);
> + Map[&Phi].push_back(std::make_pair(From, Deleted));
> + }
> + }
> +}
> +
> +/// \brief Add a dummy PHI value as soon as we knew the new predecessor
> +void AMDGPUStructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
> + for (BasicBlock::iterator I = To->begin(), E = To->end();
> + I != E && isa<PHINode>(*I);) {
> +
> + PHINode &Phi = cast<PHINode>(*I++);
> + Value *Undef = UndefValue::get(Phi.getType());
> + Phi.addIncoming(Undef, From);
> + }
> + AddedPhis[To].push_back(From);
> +}
> +
> +/// \brief Add the real PHI value as soon as everything is set up
> +void AMDGPUStructurizeCFG::setPhiValues() {
> +
> + SSAUpdater Updater;
> + for (BB2BBVecMap::iterator AI = AddedPhis.begin(), AE = AddedPhis.end();
> + AI != AE; ++AI) {
> +
> + BasicBlock *To = AI->first;
> + BBVector &From = AI->second;
> +
> + if (!DeletedPhis.count(To))
> + continue;
> +
> + PhiMap &Map = DeletedPhis[To];
> + for (PhiMap::iterator PI = Map.begin(), PE = Map.end();
> + PI != PE; ++PI) {
> +
> + PHINode *Phi = PI->first;
> + Value *Undef = UndefValue::get(Phi->getType());
> + Updater.Initialize(Phi->getType(), "");
> + Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
> + Updater.AddAvailableValue(To, Undef);
> +
> + for (BBValueVector::iterator VI = PI->second.begin(),
> + VE = PI->second.end(); VI != VE; ++VI) {
> +
> + Updater.AddAvailableValue(VI->first, VI->second);
> + }
> +
> + for (BBVector::iterator FI = From.begin(), FE = From.end();
> + FI != FE; ++FI) {
> +
> + int Idx = Phi->getBasicBlockIndex(*FI);
> + assert(Idx != -1);
> + Phi->setIncomingValue(Idx, Updater.GetValueAtEndOfBlock(*FI));
> + }
> + }
> +
> + DeletedPhis.erase(To);
> + }
> + assert(DeletedPhis.empty());
> +}
> +
> /// \brief Does A dominate all the predicates of B ?
> bool AMDGPUStructurizeCFG::dominatesPredicates(BasicBlock *A, BasicBlock *B) {
> BBPredicates &Preds = Predicates[B];
> @@ -406,57 +480,6 @@ RegionNode *AMDGPUStructurizeCFG::skipChained(RegionNode *Node) {
> return ParentRegion->getNode(wireFlowBlock(BB, Next));
> }
>
> -/// \brief Remove all PHI values coming from "From" into "To" and remember
> -/// them in DeletedPhis
> -void AMDGPUStructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
> - PhiMap &Map = DeletedPhis[To];
> - for (BasicBlock::iterator I = To->begin(), E = To->end();
> - I != E && isa<PHINode>(*I);) {
> -
> - PHINode &Phi = cast<PHINode>(*I++);
> - while (Phi.getBasicBlockIndex(From) != -1) {
> - Value *Deleted = Phi.removeIncomingValue(From, false);
> - Map[&Phi].push_back(std::make_pair(From, Deleted));
> - }
> - }
> -}
> -
> -/// \brief Add the PHI values back once we knew the new predecessor
> -void AMDGPUStructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
> - if (!DeletedPhis.count(To))
> - return;
> -
> - PhiMap &Map = DeletedPhis[To];
> - SSAUpdater Updater;
> -
> - for (PhiMap::iterator I = Map.begin(), E = Map.end(); I != E; ++I) {
> -
> - PHINode *Phi = I->first;
> - Updater.Initialize(Phi->getType(), "");
> - BasicBlock *Fallback = To;
> - bool HaveFallback = false;
> -
> - for (BBValueVector::iterator VI = I->second.begin(), VE = I->second.end();
> - VI != VE; ++VI) {
> -
> - Updater.AddAvailableValue(VI->first, VI->second);
> - BasicBlock *Dom = DT->findNearestCommonDominator(Fallback, VI->first);
> - if (Dom == VI->first)
> - HaveFallback = true;
> - else if (Dom != Fallback)
> - HaveFallback = false;
> - Fallback = Dom;
> - }
> - if (!HaveFallback) {
> - Value *Undef = UndefValue::get(Phi->getType());
> - Updater.AddAvailableValue(Fallback, Undef);
> - }
> -
> - Phi->addIncoming(Updater.GetValueAtEndOfBlock(From), From);
> - }
> - DeletedPhis.erase(To);
> -}
> -
> /// \brief Create a new flow node and update dominator tree and region info
> BasicBlock *AMDGPUStructurizeCFG::getNextFlow(BasicBlock *Prev) {
> LLVMContext &Context = Func->getContext();
> @@ -554,6 +577,7 @@ BasicBlock *AMDGPUStructurizeCFG::wireFlowBlock(BasicBlock *Prev,
> /// branches only have undefined conditions.
> void AMDGPUStructurizeCFG::createFlow() {
> DeletedPhis.clear();
> + AddedPhis.clear();
>
> BasicBlock *Prev = Order.pop_back_val()->getEntry();
> assert(Prev == ParentRegion->getEntry() && "Incorrect node order!");
> @@ -601,18 +625,8 @@ void AMDGPUStructurizeCFG::createFlow() {
> if (DT->dominates(ParentRegion->getEntry(), Exit))
> DT->changeImmediateDominator(Exit, Prev);
>
> - if (LoopStart && LoopEnd) {
> - BBVector::iterator FI = std::find(FlowsInserted.begin(),
> - FlowsInserted.end(),
> - LoopStart);
> - for (; *FI != LoopEnd; ++FI) {
> - addPhiValues(*FI, (*FI)->getTerminator()->getSuccessor(0));
> - }
> - }
> -
> assert(Order.empty());
> assert(Visited.empty());
> - assert(DeletedPhis.empty());
> }
>
> /// \brief Insert the missing branch conditions
> @@ -697,12 +711,14 @@ bool AMDGPUStructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
> collectInfos();
> createFlow();
> insertConditions();
> + setPhiValues();
> rebuildSSA();
>
> Order.clear();
> Visited.clear();
> Predicates.clear();
> DeletedPhis.clear();
> + AddedPhis.clear();
> FlowsInserted.clear();
>
> return true;
> --
> 1.7.9.5
>
-------------- next part --------------
*** IR Dump Before Insert stack protectors ***
define void @main() {
main_body:
br label %LOOP.outer
LOOP.outer: ; preds = %ENDIF20, %main_body
%temp4.0.ph = phi float [ 0.000000e+00, %main_body ], [ %23, %ENDIF20 ]
%temp.0.ph = phi float [ 0.000000e+00, %main_body ], [ %16, %ENDIF20 ]
br label %LOOP
LOOP: ; preds = %LOOP.outer, %ENDIF
%temp.0 = phi float [ %16, %ENDIF ], [ %temp.0.ph, %LOOP.outer ]
%0 = bitcast float %temp.0 to i32
%1 = icmp sge i32 %0, 20
%2 = sext i1 %1 to i32
%3 = bitcast i32 %2 to float
%4 = bitcast float %3 to i32
%5 = icmp ne i32 %4, 0
br i1 %5, label %IF, label %ENDIF
IF: ; preds = %LOOP
%6 = call float @llvm.AMDIL.clamp.(float %temp4.0.ph, float 0.000000e+00, float 1.000000e+00)
%7 = call float @llvm.AMDIL.clamp.(float %temp4.0.ph, float 0.000000e+00, float 1.000000e+00)
%8 = call float @llvm.AMDIL.clamp.(float %temp4.0.ph, float 0.000000e+00, float 1.000000e+00)
%9 = call float @llvm.AMDIL.clamp.(float %temp4.0.ph, float 0.000000e+00, float 1.000000e+00)
%10 = insertelement <4 x float> undef, float %6, i32 0
%11 = insertelement <4 x float> %10, float %7, i32 1
%12 = insertelement <4 x float> %11, float %8, i32 2
%13 = insertelement <4 x float> %12, float %9, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %13, i32 0, i32 0)
ret void
ENDIF: ; preds = %LOOP
%14 = bitcast float %temp.0 to i32
%15 = add i32 %14, 1
%16 = bitcast i32 %15 to float
%17 = bitcast float %16 to i32
%18 = icmp slt i32 5, %17
%19 = sext i1 %18 to i32
%20 = bitcast i32 %19 to float
%21 = bitcast float %20 to i32
%22 = icmp ne i32 %21, 0
br i1 %22, label %LOOP, label %ENDIF20
ENDIF20: ; preds = %ENDIF
%23 = fadd float %temp4.0.ph, 0x3FB99999A0000000
br label %LOOP.outer
}
*** IR Dump Before Preliminary module verification ***
define void @main() {
main_body:
br label %Flow
LOOP.outer: ; preds = %Flow
%temp4.0.ph = phi float [ %2, %Flow ]
%temp.0.ph = phi float [ %3, %Flow ]
br label %Flow1
Flow: ; preds = %Flow2, %main_body
%0 = phi i64 [ %44, %Flow2 ], [ 0, %main_body ]
%1 = phi float [ %10, %Flow2 ], [ undef, %main_body ]
%2 = phi float [ %39, %Flow2 ], [ 0.000000e+00, %main_body ]
%3 = phi float [ %40, %Flow2 ], [ 0.000000e+00, %main_body ]
%4 = phi float [ %41, %Flow2 ], [ undef, %main_body ]
%5 = phi i1 [ %42, %Flow2 ], [ false, %main_body ]
%6 = phi i1 [ %43, %Flow2 ], [ true, %main_body ]
%7 = call { i1, i64 } @llvm.SI.if(i1 %6)
%8 = extractvalue { i1, i64 } %7, 0
%9 = extractvalue { i1, i64 } %7, 1
br i1 %8, label %LOOP.outer, label %Flow1
Flow1: ; preds = %LOOP.outer, %Flow
%10 = phi float [ %temp4.0.ph, %LOOP.outer ], [ %1, %Flow ]
%11 = phi float [ undef, %LOOP.outer ], [ %2, %Flow ]
%12 = phi float [ undef, %LOOP.outer ], [ %3, %Flow ]
%13 = phi float [ %temp.0.ph, %LOOP.outer ], [ %4, %Flow ]
%14 = phi i1 [ true, %LOOP.outer ], [ %5, %Flow ]
call void @llvm.SI.end.cf(i64 %9)
%15 = call { i1, i64 } @llvm.SI.if(i1 %14)
%16 = extractvalue { i1, i64 } %15, 0
%17 = extractvalue { i1, i64 } %15, 1
br i1 %16, label %LOOP, label %Flow2
LOOP: ; preds = %Flow1
%temp.0 = phi float [ %13, %Flow1 ]
%18 = bitcast float %temp.0 to i32
%19 = icmp sge i32 %18, 20
%20 = sext i1 %19 to i32
%21 = bitcast i32 %20 to float
%22 = bitcast float %21 to i32
%23 = icmp ne i32 %22, 0
%24 = xor i1 %23, true
%25 = call { i1, i64 } @llvm.SI.if(i1 %24)
%26 = extractvalue { i1, i64 } %25, 0
%27 = extractvalue { i1, i64 } %25, 1
br i1 %26, label %ENDIF, label %Flow3
Flow4: ; preds = %ENDIF20, %ENDIF
%28 = phi float [ %68, %ENDIF20 ], [ %11, %ENDIF ]
%29 = phi float [ %48, %ENDIF20 ], [ %12, %ENDIF ]
call void @llvm.SI.end.cf(i64 %59)
br label %Flow3
IF: ; preds = %Flow2
call void @llvm.SI.end.cf(i64 %44)
%30 = call float @llvm.AMDIL.clamp.(float %10, float 0.000000e+00, float 1.000000e+00)
%31 = call float @llvm.AMDIL.clamp.(float %10, float 0.000000e+00, float 1.000000e+00)
%32 = call float @llvm.AMDIL.clamp.(float %10, float 0.000000e+00, float 1.000000e+00)
%33 = call float @llvm.AMDIL.clamp.(float %10, float 0.000000e+00, float 1.000000e+00)
%34 = insertelement <4 x float> undef, float %30, i32 0
%35 = insertelement <4 x float> %34, float %31, i32 1
%36 = insertelement <4 x float> %35, float %32, i32 2
%37 = insertelement <4 x float> %36, float %33, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %37, i32 0, i32 0)
ret void
Flow2: ; preds = %Flow3, %Flow1
%38 = phi i64 [ %67, %Flow3 ], [ %0, %Flow1 ]
%39 = phi float [ %62, %Flow3 ], [ %11, %Flow1 ]
%40 = phi float [ %63, %Flow3 ], [ %12, %Flow1 ]
%41 = phi float [ %64, %Flow3 ], [ %13, %Flow1 ]
%42 = phi i1 [ %65, %Flow3 ], [ false, %Flow1 ]
%43 = phi i1 [ %66, %Flow3 ], [ false, %Flow1 ]
%44 = call i64 @llvm.SI.else.break(i64 %17, i64 %38)
call void @llvm.SI.end.cf(i64 %17)
%45 = call i1 @llvm.SI.loop(i64 %44)
br i1 %45, label %IF, label %Flow
ENDIF: ; preds = %LOOP
%46 = bitcast float %temp.0 to i32
%47 = add i32 %46, 1
%48 = bitcast i32 %47 to float
%49 = bitcast float %48 to i32
%50 = icmp slt i32 5, %49
%51 = sext i1 %50 to i32
%52 = bitcast i32 %51 to float
%53 = bitcast float %52 to i32
%54 = icmp ne i32 %53, 0
%55 = xor i1 %54, true
%56 = xor i1 %54, true
%57 = call { i1, i64 } @llvm.SI.if(i1 %56)
%58 = extractvalue { i1, i64 } %57, 0
%59 = extractvalue { i1, i64 } %57, 1
%60 = call i64 @llvm.SI.if.break(i1 %55, i64 %0)
br i1 %58, label %ENDIF20, label %Flow4
Flow3: ; preds = %Flow4, %LOOP
%61 = phi i64 [ %60, %Flow4 ], [ %0, %LOOP ]
%62 = phi float [ %28, %Flow4 ], [ %11, %LOOP ]
%63 = phi float [ %29, %Flow4 ], [ %12, %LOOP ]
%64 = phi float [ %48, %Flow4 ], [ undef, %LOOP ]
%65 = phi i1 [ %54, %Flow4 ], [ false, %LOOP ]
%66 = phi i1 [ %55, %Flow4 ], [ false, %LOOP ]
%67 = call i64 @llvm.SI.else.break(i64 %27, i64 %61)
call void @llvm.SI.end.cf(i64 %27)
br label %Flow2
ENDIF20: ; preds = %ENDIF
%68 = fadd float %10, 0x3FB99999A0000000
br label %Flow4
}
More information about the mesa-dev
mailing list