[Mesa-dev] [PATCH 1/2] R600: handle loops to self in the structurizer v2

Wed Jan 23 12:51:28 PST 2013

On Mon, Jan 21, 2013 at 10:28:56PM +0100, Christian König wrote:
> v2: don't mess up other loops
>

Hi Christian,

This patch regresses the glsl1-do-loop test, this test worked fine with v1 of
the patch, but the structurizer creates an infinite loop with v2.  See
attached LLVM IR.

-Tom

> Signed-off-by: Christian König <deathsimple at vodafone.de>
> ---
>  lib/Target/R600/AMDGPUStructurizeCFG.cpp |   11 ++++++++++-
>  1 file changed, 10 insertions(+), 1 deletion(-)
> 
> diff --git a/lib/Target/R600/AMDGPUStructurizeCFG.cpp b/lib/Target/R600/AMDGPUStructurizeCFG.cpp
> index 22338b5..5be40de 100644
> --- a/lib/Target/R600/AMDGPUStructurizeCFG.cpp
> +++ b/lib/Target/R600/AMDGPUStructurizeCFG.cpp
> @@ -252,6 +252,11 @@ void AMDGPUStructurizeCFG::analyzeBlock(BasicBlock *BB) {
>    BBPredicates &Pred = Predicates[BB];
>  
>    for (; PI != PE; ++PI) {
> +
> +    // Ignore self loops
> +    if (*PI == BB)
> +      continue;
> +
>      BranchInst *Term = cast<BranchInst>((*PI)->getTerminator());
>  
>      for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
> @@ -296,7 +301,9 @@ void AMDGPUStructurizeCFG::collectInfos() {
>    LoopPred.clear();
>  
>    RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend();
> -  for (Visited.clear(); OI != OE; Visited[(*OI++)->getEntry()] = ++Number) {
> +  for (Visited.clear(); OI != OE; ++OI) {
> +
> +    Visited[(*OI)->getEntry()] = ++Number;
>  
>      // Analyze all the conditions leading to a node
>      analyzeBlock((*OI)->getEntry());
> @@ -568,6 +575,8 @@ void AMDGPUStructurizeCFG::createFlow() {
>      Predicates[Split] = Predicates[Prev];
>      Order.push_back(ParentRegion->getBBNode(Split));
>      LoopPred[Prev] = BoolTrue;
> +    if (LoopEnd == Prev)
> +      LoopEnd = Split;
>  
>    } else if (LoopStart == Order.back()->getEntry()) {
>      // Loop starts behind entry, split entry so that we can jump to it
> -- 
> 1.7.10.4
> 
-------------- next part --------------
; ModuleID = 'tgsi'

define void @main() {
main_body:
  call void @llvm.AMDGPU.reserve.reg(i32 0)
  call void @llvm.AMDGPU.reserve.reg(i32 1)
  call void @llvm.AMDGPU.reserve.reg(i32 2)
  call void @llvm.AMDGPU.reserve.reg(i32 3)
  br label %LOOP

LOOP:                                             ; preds = %LOOP, %main_body
  %temp.0 = phi float [ 0.000000e+00, %main_body ], [ %0, %LOOP ]
  %0 = fadd float %temp.0, 0x3FB99999A0000000
  %1 = fcmp uge float %0, 0x3FDFFFFBC0000000
  %2 = select i1 %1, float 1.000000e+00, float 0.000000e+00
  %3 = fsub float -0.000000e+00, %2
  %4 = fptosi float %3 to i32
  %5 = bitcast i32 %4 to float
  %6 = bitcast float %5 to i32
  %7 = icmp ne i32 %6, 0
  br i1 %7, label %IF, label %LOOP

IF:                                               ; preds = %LOOP
  %8 = call float @llvm.AMDIL.clamp.(float %0, float 0.000000e+00, float 1.000000e+00)
  %9 = call float @llvm.AMDIL.clamp.(float %0, float 0.000000e+00, float 1.000000e+00)
  %10 = call float @llvm.AMDIL.clamp.(float %0, float 0.000000e+00, float 1.000000e+00)
  %11 = call float @llvm.AMDIL.clamp.(float %0, float 0.000000e+00, float 1.000000e+00)
  call void @llvm.R600.store.pixel.color(float %8, i32 0)
  call void @llvm.R600.store.pixel.color(float %9, i32 1)
  call void @llvm.R600.store.pixel.color(float %10, i32 2)
  call void @llvm.R600.store.pixel.color(float %11, i32 3)
  ret void
}

declare void @llvm.AMDGPU.reserve.reg(i32)

declare float @llvm.AMDIL.clamp.(float, float, float) readnone

declare void @llvm.R600.store.pixel.color(float, i32)