[Mesa-dev] [PATCH 0/3] structurizer bugfixes
Tom Stellard
tom at stellard.net
Fri Feb 1 14:58:20 PST 2013
On Fri, Feb 01, 2013 at 04:05:51PM +0100, Christian König wrote:
> Hi guys,
>
> I needed to rearrange the order of patches in this patchset, cause fixing one
> bug lead to the discovery of a couple of other more nasty bugs.
>
> It now fixes four piglit tests with radeonsi and also survives the use cases
> Tom provided without causing any regression.
>
> So please test again, and apply if it now works correctly.
>
Hi Christian,
I've discovered another potential problem, this time with the piglit test:
ext_timer_query-time-elapsed timestamp
What's happening is that the EndCF instructions are being reordered within their
basic block during instruction selection, so if there is an llvm IR block like this:
ENDIF39: ; preds = %IF40, %ENDIF36
%temp.2 = phi float [ %77, %IF40 ], [ %58, %ENDIF36 ]
call void @llvm.R600.endcf(i1 %68)
%78 = bitcast float %temp4.0 to i32
%79 = add i32 %78, 1
%80 = bitcast i32 %79 to float
br label %Flow
It is being emitted, like this:
BB#8: derived from LLVM BB %ENDIF39
Predecessors according to CFG: BB#6 BB#7
%vreg14<def> = PHI %vreg11, <BB#6>, %vreg13, <BB#7>; R600_Reg32:%vreg14,%vreg11,%vreg13
%vreg15<def> = ADD_INT 0, 0, 1, 0, 0, 0, %vreg0, 0, 0, 0, -1, %ONE_INT, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg15,%vreg0
R600_ENDIF
BRANCH <BB#5>
I've attached the output of the test I mentioned above. It contains the LLVM
IR before and after the CFG structurizer pass, the SelectionDAG debug optput for
the problem block and then the resulting MachineInstrs.
-Tom
-------------- next part --------------
define void @main() {
main_body:
br label %LOOP
LOOP: ; preds = %ENDIF39, %main_body
%temp4.0 = phi float [ 0.000000e+00, %main_body ], [ %70, %ENDIF39 ]
%temp.0 = phi float [ 0.000000e+00, %main_body ], [ %temp.2, %ENDIF39 ]
%0 = bitcast float %temp4.0 to i32
%1 = load <4 x float> addrspace(9)* null
%2 = extractelement <4 x float> %1, i32 0
%3 = bitcast float %2 to i32
%4 = icmp sge i32 %0, %3
%5 = sext i1 %4 to i32
%6 = bitcast i32 %5 to float
%7 = bitcast float %6 to i32
%8 = icmp ne i32 %7, 0
%9 = bitcast float %temp.0 to i32
br i1 %8, label %IF, label %ENDIF
IF: ; preds = %LOOP
%10 = sitofp i32 %9 to float
%11 = load <4 x float> addrspace(9)* null
%12 = extractelement <4 x float> %11, i32 0
%13 = bitcast float %12 to i32
%14 = sitofp i32 %13 to float
%15 = fdiv float 1.000000e+00, %14
%16 = fmul float %10, %15
%17 = call float @llvm.AMDIL.clamp.(float %16, float 0.000000e+00, float 1.000000e+00)
%18 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
%19 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
%20 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
%21 = insertelement <4 x float> undef, float %17, i32 0
%22 = insertelement <4 x float> %21, float %18, i32 1
%23 = insertelement <4 x float> %22, float %19, i32 2
%24 = insertelement <4 x float> %23, float %20, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %24, i32 0, i32 0)
ret void
ENDIF: ; preds = %LOOP
%25 = bitcast float %temp4.0 to i32
%26 = add i32 %9, %25
%27 = bitcast i32 %26 to float
%28 = bitcast float %27 to i32
%29 = load <4 x float> addrspace(9)* null
%30 = extractelement <4 x float> %29, i32 0
%31 = bitcast float %30 to i32
%32 = icmp sge i32 %28, %31
%33 = sext i1 %32 to i32
%34 = bitcast i32 %33 to float
%35 = bitcast float %34 to i32
%36 = icmp ne i32 %35, 0
br i1 %36, label %IF37, label %ENDIF36
IF37: ; preds = %ENDIF
%37 = load <4 x float> addrspace(9)* null
%38 = extractelement <4 x float> %37, i32 0
%39 = bitcast float %38 to i32
%40 = sub i32 0, %39
%41 = bitcast i32 %40 to float
%42 = bitcast float %27 to i32
%43 = bitcast float %41 to i32
%44 = add i32 %42, %43
%45 = bitcast i32 %44 to float
br label %ENDIF36
ENDIF36: ; preds = %IF37, %ENDIF
%temp.1 = phi float [ %45, %IF37 ], [ %27, %ENDIF ]
%46 = bitcast float %temp.1 to i32
%47 = bitcast float %temp4.0 to i32
%48 = add i32 %46, %47
%49 = bitcast i32 %48 to float
%50 = bitcast float %49 to i32
%51 = load <4 x float> addrspace(9)* null
%52 = extractelement <4 x float> %51, i32 0
%53 = bitcast float %52 to i32
%54 = icmp sge i32 %50, %53
%55 = sext i1 %54 to i32
%56 = bitcast i32 %55 to float
%57 = bitcast float %56 to i32
%58 = icmp ne i32 %57, 0
br i1 %58, label %IF40, label %ENDIF39
IF40: ; preds = %ENDIF36
%59 = load <4 x float> addrspace(9)* null
%60 = extractelement <4 x float> %59, i32 0
%61 = bitcast float %60 to i32
%62 = sub i32 0, %61
%63 = bitcast i32 %62 to float
%64 = bitcast float %49 to i32
%65 = bitcast float %63 to i32
%66 = add i32 %64, %65
%67 = bitcast i32 %66 to float
br label %ENDIF39
ENDIF39: ; preds = %IF40, %ENDIF36
%temp.2 = phi float [ %67, %IF40 ], [ %49, %ENDIF36 ]
%68 = bitcast float %temp4.0 to i32
%69 = add i32 %68, 1
%70 = bitcast i32 %69 to float
br label %LOOP
}
*** IR Dump Before Preliminary module verification ***
define void @main() {
main_body:
br label %LOOP
LOOP: ; preds = %Flow, %main_body
%0 = phi i1 [ %53, %Flow ], [ false, %main_body ]
%temp4.0 = phi float [ 0.000000e+00, %main_body ], [ %50, %Flow ]
%temp.0 = phi float [ 0.000000e+00, %main_body ], [ %51, %Flow ]
%1 = bitcast float %temp4.0 to i32
%2 = load <4 x float> addrspace(9)* null
%3 = extractelement <4 x float> %2, i32 0
%4 = bitcast float %3 to i32
%5 = icmp sge i32 %1, %4
%6 = sext i1 %5 to i32
%7 = bitcast i32 %6 to float
%8 = bitcast float %7 to i32
%9 = icmp ne i32 %8, 0
%10 = bitcast float %temp.0 to i32
%11 = xor i1 %9, true
%12 = call i1 @llvm.R600.if(i1 %11)
br i1 %12, label %ENDIF, label %Flow
IF: ; preds = %Flow
call void @llvm.R600.endcf(i1 %53)
%13 = sitofp i32 %10 to float
%14 = load <4 x float> addrspace(9)* null
%15 = extractelement <4 x float> %14, i32 0
%16 = bitcast float %15 to i32
%17 = sitofp i32 %16 to float
%18 = fdiv float 1.000000e+00, %17
%19 = fmul float %13, %18
%20 = call float @llvm.AMDIL.clamp.(float %19, float 0.000000e+00, float 1.000000e+00)
%21 = call float @llvm.AMDIL.clamp.(float 1.000000e+00, float 0.000000e+00, float 1.000000e+00)
%22 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
%23 = call float @llvm.AMDIL.clamp.(float 0.000000e+00, float 0.000000e+00, float 1.000000e+00)
%24 = insertelement <4 x float> undef, float %20, i32 0
%25 = insertelement <4 x float> %24, float %21, i32 1
%26 = insertelement <4 x float> %25, float %22, i32 2
%27 = insertelement <4 x float> %26, float %23, i32 3
call void @llvm.R600.store.swizzle(<4 x float> %27, i32 0, i32 0)
ret void
ENDIF: ; preds = %LOOP
%28 = bitcast float %temp4.0 to i32
%29 = add i32 %10, %28
%30 = bitcast i32 %29 to float
%31 = bitcast float %30 to i32
%32 = load <4 x float> addrspace(9)* null
%33 = extractelement <4 x float> %32, i32 0
%34 = bitcast float %33 to i32
%35 = icmp sge i32 %31, %34
%36 = sext i1 %35 to i32
%37 = bitcast i32 %36 to float
%38 = bitcast float %37 to i32
%39 = icmp ne i32 %38, 0
%40 = call i1 @llvm.R600.if(i1 %39)
br i1 %40, label %IF37, label %ENDIF36
IF37: ; preds = %ENDIF
%41 = load <4 x float> addrspace(9)* null
%42 = extractelement <4 x float> %41, i32 0
%43 = bitcast float %42 to i32
%44 = sub i32 0, %43
%45 = bitcast i32 %44 to float
%46 = bitcast float %30 to i32
%47 = bitcast float %45 to i32
%48 = add i32 %46, %47
%49 = bitcast i32 %48 to float
br label %ENDIF36
Flow: ; preds = %ENDIF39, %LOOP
%50 = phi float [ %80, %ENDIF39 ], [ undef, %LOOP ]
%51 = phi float [ %temp.2, %ENDIF39 ], [ undef, %LOOP ]
%52 = xor i1 %12, true
%53 = call i1 @llvm.R600.ifbreak(i1 %52)
call void @llvm.R600.endcf(i1 %12)
%54 = call i1 @llvm.R600.loop(i1 %53)
br i1 %54, label %IF, label %LOOP
ENDIF36: ; preds = %IF37, %ENDIF
%temp.1 = phi float [ %49, %IF37 ], [ %30, %ENDIF ]
call void @llvm.R600.endcf(i1 %40)
%55 = bitcast float %temp.1 to i32
%56 = bitcast float %temp4.0 to i32
%57 = add i32 %55, %56
%58 = bitcast i32 %57 to float
%59 = bitcast float %58 to i32
%60 = load <4 x float> addrspace(9)* null
%61 = extractelement <4 x float> %60, i32 0
%62 = bitcast float %61 to i32
%63 = icmp sge i32 %59, %62
%64 = sext i1 %63 to i32
%65 = bitcast i32 %64 to float
%66 = bitcast float %65 to i32
%67 = icmp ne i32 %66, 0
%68 = call i1 @llvm.R600.if(i1 %67)
br i1 %68, label %IF40, label %ENDIF39
IF40: ; preds = %ENDIF36
%69 = load <4 x float> addrspace(9)* null
%70 = extractelement <4 x float> %69, i32 0
%71 = bitcast float %70 to i32
%72 = sub i32 0, %71
%73 = bitcast i32 %72 to float
%74 = bitcast float %58 to i32
%75 = bitcast float %73 to i32
%76 = add i32 %74, %75
%77 = bitcast i32 %76 to float
br label %ENDIF39
ENDIF39: ; preds = %IF40, %ENDIF36
%temp.2 = phi float [ %77, %IF40 ], [ %58, %ENDIF36 ]
call void @llvm.R600.endcf(i1 %68)
%78 = bitcast float %temp4.0 to i32
%79 = add i32 %78, 1
%80 = bitcast i32 %79 to float
br label %Flow
}
Total amount of phi nodes to update: 1
Node 0 : (0x2646428, 2147483661)
Initial selection DAG: BB#8 'main:ENDIF39'
SelectionDAG has 16 nodes:
0x2612150: ch = EntryToken [ORD=59]
0x2612150: <multiple use>
0x2654460: f32 = Register %vreg15
0x2612150: <multiple use>
0x2652c50: f32 = Register %vreg0 [ORD=60]
0x2653450: f32,ch = CopyFromReg 0x2612150, 0x2652c50 [ORD=60]
0x2654360: i32 = bitcast 0x2653450 [ORD=60]
0x2652d50: i32 = Constant<1> [ORD=61]
0x264f6a0: i32 = add 0x2654360, 0x2652d50 [ORD=61]
0x2652950: f32 = bitcast 0x264f6a0 [ORD=62]
0x2652f50: ch = CopyToReg 0x2612150, 0x2654460, 0x2652950
0x2612150: <multiple use>
0x2654260: i32 = TargetConstant<2628> [ORD=59]
0x2612150: <multiple use>
0x2653f60: i1 = Register %vreg12 [ORD=59]
0x2654060: i1,ch = CopyFromReg 0x2612150, 0x2653f60 [ORD=59]
0x2653a60: ch = llvm.R600.endcf 0x2612150, 0x2654260, 0x2654060 [ORD=59]
0x264f8a0: ch = TokenFactor 0x2652f50, 0x2653a60
0x2653660: ch = BasicBlock<Flow 0x2646098>
0x2653e60: ch = br 0x264f8a0, 0x2653660
Optimized lowered selection DAG: BB#8 'main:ENDIF39'
SelectionDAG has 16 nodes:
0x2612150: ch = EntryToken [ORD=59]
0x2612150: <multiple use>
0x2654460: f32 = Register %vreg15
0x2612150: <multiple use>
0x2652c50: f32 = Register %vreg0 [ORD=60]
0x2653450: f32,ch = CopyFromReg 0x2612150, 0x2652c50 [ORD=60]
0x2654360: i32 = bitcast 0x2653450 [ORD=60]
0x2652d50: i32 = Constant<1> [ORD=61]
0x264f6a0: i32 = add 0x2654360, 0x2652d50 [ORD=61]
0x2652950: f32 = bitcast 0x264f6a0 [ORD=62]
0x2652f50: ch = CopyToReg 0x2612150, 0x2654460, 0x2652950
0x2612150: <multiple use>
0x2654260: i32 = TargetConstant<2628> [ORD=59]
0x2612150: <multiple use>
0x2653f60: i1 = Register %vreg12 [ORD=59]
0x2654060: i1,ch = CopyFromReg 0x2612150, 0x2653f60 [ORD=59]
0x2653a60: ch = llvm.R600.endcf 0x2612150, 0x2654260, 0x2654060 [ORD=59]
0x264f8a0: ch = TokenFactor 0x2652f50, 0x2653a60
0x2653660: ch = BasicBlock<Flow 0x2646098>
0x2653e60: ch = br 0x264f8a0, 0x2653660
Type-legalized selection DAG: BB#8 'main:ENDIF39'
SelectionDAG has 16 nodes:
0x2612150: ch = EntryToken [ORD=59] [ID=-3]
0x2612150: <multiple use>
0x2654460: f32 = Register %vreg15 [ID=-3]
0x2612150: <multiple use>
0x2652c50: f32 = Register %vreg0 [ORD=60] [ID=-3]
0x2653450: f32,ch = CopyFromReg 0x2612150, 0x2652c50 [ORD=60] [ID=-3]
0x2654360: i32 = bitcast 0x2653450 [ORD=60] [ID=-3]
0x2652d50: i32 = Constant<1> [ORD=61] [ID=-3]
0x264f6a0: i32 = add 0x2654360, 0x2652d50 [ORD=61] [ID=-3]
0x2652950: f32 = bitcast 0x264f6a0 [ORD=62] [ID=-3]
0x2652f50: ch = CopyToReg 0x2612150, 0x2654460, 0x2652950 [ID=-3]
0x2612150: <multiple use>
0x2654260: i32 = TargetConstant<2628> [ORD=59] [ID=-3]
0x2612150: <multiple use>
0x2653f60: i1 = Register %vreg12 [ORD=59] [ID=-3]
0x2654060: i1,ch = CopyFromReg 0x2612150, 0x2653f60 [ORD=59] [ID=-3]
0x2653a60: ch = llvm.R600.endcf 0x2612150, 0x2654260, 0x2654060 [ORD=59] [ID=-3]
0x264f8a0: ch = TokenFactor 0x2652f50, 0x2653a60 [ID=-3]
0x2653660: ch = BasicBlock<Flow 0x2646098> [ID=-3]
0x2653e60: ch = br 0x264f8a0, 0x2653660 [ID=-3]
Legalized selection DAG: BB#8 'main:ENDIF39'
SelectionDAG has 13 nodes:
0x2612150: ch = EntryToken [ORD=59] [ID=0]
0x2612150: <multiple use>
0x2654460: f32 = Register %vreg15 [ID=5]
0x2612150: <multiple use>
0x2652c50: f32 = Register %vreg0 [ORD=60] [ID=3]
0x2653450: f32,ch = CopyFromReg 0x2612150, 0x2652c50 [ORD=60] [ID=8]
0x2654360: i32 = bitcast 0x2653450 [ORD=60] [ID=10]
0x2652d50: i32 = Constant<1> [ORD=61] [ID=4]
0x264f6a0: i32 = add 0x2654360, 0x2652d50 [ORD=61] [ID=11]
0x2652950: f32 = bitcast 0x264f6a0 [ORD=62] [ID=12]
0x2652f50: ch = CopyToReg 0x2612150, 0x2654460, 0x2652950 [ID=13]
0x2612150: <multiple use>
0x264fba0: ch = ENDIF 0x2612150
0x264f8a0: ch = TokenFactor 0x2652f50, 0x264fba0 [ID=14]
0x2653660: ch = BasicBlock<Flow 0x2646098> [ID=6]
0x2653e60: ch = br 0x264f8a0, 0x2653660 [ID=15]
Optimized legalized selection DAG: BB#8 'main:ENDIF39'
SelectionDAG has 13 nodes:
0x2612150: ch = EntryToken [ORD=59] [ID=0]
0x2612150: <multiple use>
0x2654460: f32 = Register %vreg15 [ID=5]
0x2612150: <multiple use>
0x2652c50: f32 = Register %vreg0 [ORD=60] [ID=3]
0x2653450: f32,ch = CopyFromReg 0x2612150, 0x2652c50 [ORD=60] [ID=8]
0x2654360: i32 = bitcast 0x2653450 [ORD=60] [ID=10]
0x2652d50: i32 = Constant<1> [ORD=61] [ID=4]
0x264f6a0: i32 = add 0x2654360, 0x2652d50 [ORD=61] [ID=11]
0x2652950: f32 = bitcast 0x264f6a0 [ORD=62] [ID=12]
0x2652f50: ch = CopyToReg 0x2612150, 0x2654460, 0x2652950 [ID=13]
0x2612150: <multiple use>
0x264fba0: ch = ENDIF 0x2612150
0x264f8a0: ch = TokenFactor 0x2652f50, 0x264fba0 [ID=14]
0x2653660: ch = BasicBlock<Flow 0x2646098> [ID=6]
0x2653e60: ch = br 0x264f8a0, 0x2653660 [ID=15]
===== Instruction selection begins: BB#8 'ENDIF39'
ISEL: Starting pattern match on root node: 0x2653e60: ch = br 0x264f8a0, 0x2653660 [ID=12]
Initial Opcode index to 17650
Skipped scope entry (due to false predicate) at index 17660, continuing at 17670
Morphed node: 0x2653e60: ch = BRANCH 0x2653660, 0x264f8a0
ISEL: Match complete!
ISEL: Starting pattern match on root node: 0x2652950: f32 = bitcast 0x264f6a0 [ORD=62] [ID=9]
Initial Opcode index to 10366
TypeSwitch[f32] from 10367 to 10395
ISEL: Match complete!
ISEL: Starting pattern match on root node: 0x264f6a0: i32 = add 0x2654360, 0x2652d50 [ORD=61] [ID=8]
Initial Opcode index to 10845
Morphed node: 0x264f6a0: i32 = ADD_INT 0x2652950, 0x2652950, 0x2654260, 0x2652950, 0x2652950, 0x2652950, 0x2654360, 0x2652950, 0x2652950, 0x2652950, 0x2653f60, 0x2652d50, 0x2652950, 0x2652950, 0x2652950, 0x2653f60, 0x2654260, 0x2654060, 0x2652950 [ORD=61]
ISEL: Match complete!
ISEL: Starting pattern match on root node: 0x264fba0: ch = ENDIF 0x2612150 [ID=5]
Initial Opcode index to 17637
Morphed node: 0x264fba0: ch = R600_ENDIF 0x2612150
ISEL: Match complete!
ISEL: Starting pattern match on root node: 0x2652d50: i32 = Constant<1> [ORD=61] [ID=2]
Initial Opcode index to 10223
Skipped scope entry (due to false predicate) at index 10225, continuing at 10238
TypeSwitch[i32] from 10240 to 10259
Skipped scope entry (due to false predicate) at index 10261, continuing at 10285
Morphed node: 0x2652d50: i32 = MOV_IMM_I32 0x2654260 [ORD=61]
ISEL: Match complete!
===== Instruction selection ends:
Selected selection DAG: BB#8 'main:ENDIF39'
SelectionDAG has 17 nodes:
0x2612150: ch = EntryToken [ORD=59]
0x2654260: <multiple use>
0x2652d50: i32 = MOV_IMM_I32 0x2654260 [ORD=61]
0x2612150: <multiple use>
0x2652c50: f32 = Register %vreg0 [ORD=60]
0x2653450: f32,ch = CopyFromReg 0x2612150, 0x2652c50 [ORD=60]
0x2653450: <multiple use>
0x2654360: i32 = bitcast 0x2653450 [ORD=60] [ID=7]
0x2652950: i32 = TargetConstant<0>
0x2654260: i32 = TargetConstant<1>
0x2653f60: i32 = TargetConstant<-1>
0x2653660: ch = BasicBlock<Flow 0x2646098>
0x2612150: <multiple use>
0x2654460: f32 = Register %vreg15
0x2652950: <multiple use>
0x2652950: <multiple use>
0x2654260: <multiple use>
0x2652950: <multiple use>
0x2652950: <multiple use>
0x2652950: <multiple use>
0x2653450: <multiple use>
0x2652950: <multiple use>
0x2652950: <multiple use>
0x2652950: <multiple use>
0x2653f60: <multiple use>
0x2653a60: i32 = Register %ONE_INT
0x2652950: <multiple use>
0x2652950: <multiple use>
0x2652950: <multiple use>
0x2653f60: <multiple use>
0x2654260: <multiple use>
0x2654060: i32 = Register %PRED_SEL_OFF
0x2652950: <multiple use>
0x264f6a0: i32 = ADD_INT 0x2652950, 0x2652950, 0x2654260, 0x2652950, 0x2652950, 0x2652950, 0x2653450, 0x2652950, 0x2652950, 0x2652950, 0x2653f60, 0x2653a60, 0x2652950, 0x2652950, 0x2652950, 0x2653f60, 0x2654260, 0x2654060, 0x2652950 [ORD=61]
0x2652f50: ch = CopyToReg 0x2612150, 0x2654460, 0x264f6a0
0x2612150: <multiple use>
0x264fba0: ch = R600_ENDIF 0x2612150
0x264f8a0: ch = TokenFactor 0x2652f50, 0x264fba0
0x2653e60: ch = BRANCH 0x2653660, 0x264f8a0
//===----------------------------------------------------------------------===//
// Final Machine Code
//
// The ADD_INT instructions has been pulled inside the branch.
//===----------------------------------------------------------------------===//
BB#8: derived from LLVM BB %ENDIF39
Predecessors according to CFG: BB#6 BB#7
%vreg14<def> = PHI %vreg11, <BB#6>, %vreg13, <BB#7>; R600_Reg32:%vreg14,%vreg11,%vreg13
%vreg15<def> = ADD_INT 0, 0, 1, 0, 0, 0, %vreg0, 0, 0, 0, -1, %ONE_INT, 0, 0, 0, -1, 1, pred:%PRED_SEL_OFF, 0; R600_Reg32:%vreg15,%vreg0
R600_ENDIF
BRANCH <BB#5>
Successors according to CFG: BB#5
More information about the mesa-dev
mailing list