[Mesa-dev] [PATCH 1/2] R600: handle loops to self in the structurizer v2
Tom Stellard
tom at stellard.net
Wed Jan 23 12:51:28 PST 2013
On Mon, Jan 21, 2013 at 10:28:56PM +0100, Christian König wrote:
> v2: don't mess up other loops
>
Hi Christian,
This patch regresses the glsl1-do-loop test, this test worked fine with v1 of
the patch, but the structurizer creates an infinite loop with v2. See
attached LLVM IR.
-Tom
> Signed-off-by: Christian König <deathsimple at vodafone.de>
> ---
> lib/Target/R600/AMDGPUStructurizeCFG.cpp | 11 ++++++++++-
> 1 file changed, 10 insertions(+), 1 deletion(-)
>
> diff --git a/lib/Target/R600/AMDGPUStructurizeCFG.cpp b/lib/Target/R600/AMDGPUStructurizeCFG.cpp
> index 22338b5..5be40de 100644
> --- a/lib/Target/R600/AMDGPUStructurizeCFG.cpp
> +++ b/lib/Target/R600/AMDGPUStructurizeCFG.cpp
> @@ -252,6 +252,11 @@ void AMDGPUStructurizeCFG::analyzeBlock(BasicBlock *BB) {
> BBPredicates &Pred = Predicates[BB];
>
> for (; PI != PE; ++PI) {
> +
> + // Ignore self loops
> + if (*PI == BB)
> + continue;
> +
> BranchInst *Term = cast<BranchInst>((*PI)->getTerminator());
>
> for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
> @@ -296,7 +301,9 @@ void AMDGPUStructurizeCFG::collectInfos() {
> LoopPred.clear();
>
> RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend();
> - for (Visited.clear(); OI != OE; Visited[(*OI++)->getEntry()] = ++Number) {
> + for (Visited.clear(); OI != OE; ++OI) {
> +
> + Visited[(*OI)->getEntry()] = ++Number;
>
> // Analyze all the conditions leading to a node
> analyzeBlock((*OI)->getEntry());
> @@ -568,6 +575,8 @@ void AMDGPUStructurizeCFG::createFlow() {
> Predicates[Split] = Predicates[Prev];
> Order.push_back(ParentRegion->getBBNode(Split));
> LoopPred[Prev] = BoolTrue;
> + if (LoopEnd == Prev)
> + LoopEnd = Split;
>
> } else if (LoopStart == Order.back()->getEntry()) {
> // Loop starts behind entry, split entry so that we can jump to it
> --
> 1.7.10.4
>
-------------- next part --------------
; ModuleID = 'tgsi'
define void @main() {
main_body:
call void @llvm.AMDGPU.reserve.reg(i32 0)
call void @llvm.AMDGPU.reserve.reg(i32 1)
call void @llvm.AMDGPU.reserve.reg(i32 2)
call void @llvm.AMDGPU.reserve.reg(i32 3)
br label %LOOP
LOOP: ; preds = %LOOP, %main_body
%temp.0 = phi float [ 0.000000e+00, %main_body ], [ %0, %LOOP ]
%0 = fadd float %temp.0, 0x3FB99999A0000000
%1 = fcmp uge float %0, 0x3FDFFFFBC0000000
%2 = select i1 %1, float 1.000000e+00, float 0.000000e+00
%3 = fsub float -0.000000e+00, %2
%4 = fptosi float %3 to i32
%5 = bitcast i32 %4 to float
%6 = bitcast float %5 to i32
%7 = icmp ne i32 %6, 0
br i1 %7, label %IF, label %LOOP
IF: ; preds = %LOOP
%8 = call float @llvm.AMDIL.clamp.(float %0, float 0.000000e+00, float 1.000000e+00)
%9 = call float @llvm.AMDIL.clamp.(float %0, float 0.000000e+00, float 1.000000e+00)
%10 = call float @llvm.AMDIL.clamp.(float %0, float 0.000000e+00, float 1.000000e+00)
%11 = call float @llvm.AMDIL.clamp.(float %0, float 0.000000e+00, float 1.000000e+00)
call void @llvm.R600.store.pixel.color(float %8, i32 0)
call void @llvm.R600.store.pixel.color(float %9, i32 1)
call void @llvm.R600.store.pixel.color(float %10, i32 2)
call void @llvm.R600.store.pixel.color(float %11, i32 3)
ret void
}
declare void @llvm.AMDGPU.reserve.reg(i32)
declare float @llvm.AMDIL.clamp.(float, float, float) readnone
declare void @llvm.R600.store.pixel.color(float, i32)
More information about the mesa-dev
mailing list