[Mesa-dev] Mesa (master): Revert "radeon/llvm: Use alloca instructions for larger arrays"
Michel Dänzer
michel at daenzer.net
Thu Jul 21 08:03:16 UTC 2016
On 21.07.2016 00:04, Michel Dänzer wrote:
> On 15.07.2016 05:15, Marek =?UNKNOWN?B?T2zFocOhaw==?= wrote:
>> Module: Mesa
>> Branch: master
>> Commit: f84e9d749fbb6da73a60fb70e6725db773c9b8f8
>> URL: http://cgit.freedesktop.org/mesa/mesa/commit/?id=f84e9d749fbb6da73a60fb70e6725db773c9b8f8
>>
>> Author: Marek Olšák <marek.olsak at amd.com>
>> Date: Thu Jul 14 22:07:46 2016 +0200
>>
>> Revert "radeon/llvm: Use alloca instructions for larger arrays"
>>
>> This reverts commit 513fccdfb68e6a71180e21827f071617c93fd09b.
>>
>> Bioshock Infinite hangs with that.
>
> Unfortunately, this change caused the piglit test
> shaders at glsl-fs-vec4-indexing-temp-dst-in-loop (and possibly others) to
> hang my Kaveri. Any ideas for how we can get out of this conundrum?
The hang was introduced by LLVM SVN r275934 ("AMDGPU: Expand register
indexing pseudos in custom inserter"). The good/bad (without/with
r275934) shader dumps and the GALLIUM_DDEBUG=800 dump corresponding to
the hang are attached.
BTW, even with Marek's change above reverted, I still see some piglit
regressions compared to last week, but I'm not sure if those are all
related to the same LLVM change.
--
Earthling Michel Dänzer | http://www.amd.com
Libre software enthusiast | Mesa and X developer
-------------- next part --------------
Gallium debugger active. The hang detection timeout is 800 ms.
VERT
PROPERTY NEXT_SHADER FRAG
DCL IN[0]
DCL OUT[0], POSITION
DCL CONST[0..3]
DCL TEMP[0], LOCAL
0: MUL TEMP[0], CONST[0], IN[0].xxxx
1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0]
4: MOV OUT[0], TEMP[0]
5: END
radeonsi: Compiling shader 1
TGSI shader LLVM IR:
; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"
define amdgpu_vs <{ float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) {
main_body:
%14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0
%15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !invariant.load !0
%16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 0)
%17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 4)
%18 = call float @llvm.SI.load.const(<16 x i8> %15, i32 8)
%19 = call float @llvm.SI.load.const(<16 x i8> %15, i32 12)
%20 = call float @llvm.SI.load.const(<16 x i8> %15, i32 16)
%21 = call float @llvm.SI.load.const(<16 x i8> %15, i32 20)
%22 = call float @llvm.SI.load.const(<16 x i8> %15, i32 24)
%23 = call float @llvm.SI.load.const(<16 x i8> %15, i32 28)
%24 = call float @llvm.SI.load.const(<16 x i8> %15, i32 32)
%25 = call float @llvm.SI.load.const(<16 x i8> %15, i32 36)
%26 = call float @llvm.SI.load.const(<16 x i8> %15, i32 40)
%27 = call float @llvm.SI.load.const(<16 x i8> %15, i32 44)
%28 = call float @llvm.SI.load.const(<16 x i8> %15, i32 48)
%29 = call float @llvm.SI.load.const(<16 x i8> %15, i32 52)
%30 = call float @llvm.SI.load.const(<16 x i8> %15, i32 56)
%31 = call float @llvm.SI.load.const(<16 x i8> %15, i32 60)
%32 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0, !amdgpu.uniform !0
%33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !invariant.load !0
%34 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %13)
%35 = extractelement <4 x float> %34, i32 0
%36 = extractelement <4 x float> %34, i32 1
%37 = extractelement <4 x float> %34, i32 2
%38 = extractelement <4 x float> %34, i32 3
%39 = fmul float %16, %35
%40 = fmul float %17, %35
%41 = fmul float %18, %35
%42 = fmul float %19, %35
%43 = fmul float %20, %36
%44 = fadd float %43, %39
%45 = fmul float %21, %36
%46 = fadd float %45, %40
%47 = fmul float %22, %36
%48 = fadd float %47, %41
%49 = fmul float %23, %36
%50 = fadd float %49, %42
%51 = fmul float %24, %37
%52 = fadd float %51, %44
%53 = fmul float %25, %37
%54 = fadd float %53, %46
%55 = fmul float %26, %37
%56 = fadd float %55, %48
%57 = fmul float %27, %37
%58 = fadd float %57, %50
%59 = fmul float %28, %38
%60 = fadd float %59, %52
%61 = fmul float %29, %38
%62 = fadd float %61, %54
%63 = fmul float %30, %38
%64 = fadd float %63, %56
%65 = fmul float %31, %38
%66 = fadd float %65, %58
%67 = bitcast i32 %11 to float
%68 = insertvalue <{ float, float, float }> undef, float %67, 2
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %60, float %62, float %64, float %66)
ret <{ float, float, float }> %68
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #0
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0
; Function Attrs: nounwind
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #1
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
!0 = !{}
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL OUT[0], COLOR
DCL CONST[0..2]
DCL TEMP[0..7], ARRAY(1), LOCAL
DCL TEMP[8..9], LOCAL
DCL ADDR[0]
IMM[0] FLT32 { 0.2000, 0.0000, 0.0000, 0.0000}
IMM[1] INT32 {0, 1, 0, 0}
0: MUL TEMP[0], IMM[0].xyxy, CONST[0].xxxx
1: MUL TEMP[1], IMM[0].yxyy, CONST[0].xxxx
2: MUL TEMP[2], IMM[0].yyxy, CONST[0].xxxx
3: MUL TEMP[3], IMM[0].xxyy, CONST[0].xxxx
4: MUL TEMP[4], IMM[0].yxxy, CONST[0].xxxx
5: MUL TEMP[5], IMM[0].xyxy, CONST[0].xxxx
6: MOV TEMP[6], IMM[0].yyyy
7: MUL TEMP[7], IMM[0].xxxy, CONST[0].xxxx
8: MOV TEMP[8].x, IMM[1].xxxx
9: BGNLOOP :0
10: ISGE TEMP[9].x, TEMP[8].xxxx, CONST[1].xxxx
11: UIF TEMP[9].xxxx :0
12: BRK
13: ENDIF
14: UARL ADDR[0].x, TEMP[8].xxxx
15: MOV TEMP[ADDR[0].x](1), CONST[2]
16: UADD TEMP[8].x, TEMP[8].xxxx, IMM[1].yyyy
17: ENDLOOP :0
18: ADD TEMP[8], TEMP[0], TEMP[1]
19: ADD TEMP[8], TEMP[8], TEMP[2]
20: ADD TEMP[8], TEMP[8], TEMP[3]
21: ADD TEMP[8], TEMP[8], TEMP[4]
22: ADD TEMP[8], TEMP[8], TEMP[5]
23: ADD TEMP[8], TEMP[8], TEMP[6]
24: ADD TEMP[8], TEMP[8], TEMP[7]
25: MOV OUT[0], TEMP[8]
26: END
radeonsi: Compiling shader 2
TGSI shader LLVM IR:
; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"
define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
main_body:
%23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0
%24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !invariant.load !0
%25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0)
%26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16)
%27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 32)
%28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 36)
%29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 40)
%30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 44)
%31 = fmul float %25, 0x3FC99999A0000000
%32 = fmul float %25, 0.000000e+00
%33 = fmul float %25, 0x3FC99999A0000000
%34 = fmul float %25, 0.000000e+00
%35 = fmul float %25, 0.000000e+00
%36 = fmul float %25, 0x3FC99999A0000000
%37 = fmul float %25, 0.000000e+00
%38 = fmul float %25, 0.000000e+00
%39 = fmul float %25, 0.000000e+00
%40 = fmul float %25, 0.000000e+00
%41 = fmul float %25, 0x3FC99999A0000000
%42 = fmul float %25, 0.000000e+00
%43 = fmul float %25, 0x3FC99999A0000000
%44 = fmul float %25, 0x3FC99999A0000000
%45 = fmul float %25, 0.000000e+00
%46 = fmul float %25, 0.000000e+00
%47 = fmul float %25, 0.000000e+00
%48 = fmul float %25, 0x3FC99999A0000000
%49 = fmul float %25, 0x3FC99999A0000000
%50 = fmul float %25, 0.000000e+00
%51 = fmul float %25, 0x3FC99999A0000000
%52 = fmul float %25, 0.000000e+00
%53 = fmul float %25, 0x3FC99999A0000000
%54 = fmul float %25, 0.000000e+00
%55 = fmul float %25, 0x3FC99999A0000000
%56 = fmul float %25, 0x3FC99999A0000000
%57 = fmul float %25, 0x3FC99999A0000000
%58 = fmul float %25, 0.000000e+00
%59 = bitcast float %26 to i32
br label %LOOP
LOOP: ; preds = %ENDIF, %main_body
%temp2.0 = phi float [ %33, %main_body ], [ %141, %ENDIF ]
%temp3.0 = phi float [ %34, %main_body ], [ %158, %ENDIF ]
%temp4.0 = phi float [ %35, %main_body ], [ %108, %ENDIF ]
%temp5.0 = phi float [ %36, %main_body ], [ %125, %ENDIF ]
%temp6.0 = phi float [ %37, %main_body ], [ %142, %ENDIF ]
%temp7.0 = phi float [ %38, %main_body ], [ %159, %ENDIF ]
%temp8.0 = phi float [ %39, %main_body ], [ %109, %ENDIF ]
%temp9.0 = phi float [ %40, %main_body ], [ %126, %ENDIF ]
%temp10.0 = phi float [ %41, %main_body ], [ %143, %ENDIF ]
%temp11.0 = phi float [ %42, %main_body ], [ %160, %ENDIF ]
%temp12.0 = phi float [ %43, %main_body ], [ %110, %ENDIF ]
%temp13.0 = phi float [ %44, %main_body ], [ %127, %ENDIF ]
%temp14.0 = phi float [ %45, %main_body ], [ %144, %ENDIF ]
%temp15.0 = phi float [ %46, %main_body ], [ %161, %ENDIF ]
%temp16.0 = phi float [ %47, %main_body ], [ %111, %ENDIF ]
%temp17.0 = phi float [ %48, %main_body ], [ %128, %ENDIF ]
%temp18.0 = phi float [ %49, %main_body ], [ %145, %ENDIF ]
%temp19.0 = phi float [ %50, %main_body ], [ %162, %ENDIF ]
%temp20.0 = phi float [ %51, %main_body ], [ %112, %ENDIF ]
%temp21.0 = phi float [ %52, %main_body ], [ %129, %ENDIF ]
%temp22.0 = phi float [ %53, %main_body ], [ %146, %ENDIF ]
%temp23.0 = phi float [ %54, %main_body ], [ %163, %ENDIF ]
%temp24.0 = phi float [ 0.000000e+00, %main_body ], [ %113, %ENDIF ]
%temp25.0 = phi float [ 0.000000e+00, %main_body ], [ %130, %ENDIF ]
%temp26.0 = phi float [ 0.000000e+00, %main_body ], [ %147, %ENDIF ]
%temp27.0 = phi float [ 0.000000e+00, %main_body ], [ %164, %ENDIF ]
%temp28.0 = phi float [ %55, %main_body ], [ %114, %ENDIF ]
%temp29.0 = phi float [ %56, %main_body ], [ %131, %ENDIF ]
%temp30.0 = phi float [ %57, %main_body ], [ %148, %ENDIF ]
%temp31.0 = phi float [ %58, %main_body ], [ %165, %ENDIF ]
%temp32.0 = phi float [ 0.000000e+00, %main_body ], [ %168, %ENDIF ]
%temp1.0 = phi float [ %32, %main_body ], [ %124, %ENDIF ]
%temp.0 = phi float [ %31, %main_body ], [ %107, %ENDIF ]
%60 = bitcast float %temp32.0 to i32
%61 = icmp slt i32 %60, %59
br i1 %61, label %ENDIF, label %IF
IF: ; preds = %LOOP
%62 = fadd float %temp.0, %temp4.0
%63 = fadd float %temp1.0, %temp5.0
%64 = fadd float %temp2.0, %temp6.0
%65 = fadd float %temp3.0, %temp7.0
%66 = fadd float %62, %temp8.0
%67 = fadd float %63, %temp9.0
%68 = fadd float %64, %temp10.0
%69 = fadd float %65, %temp11.0
%70 = fadd float %66, %temp12.0
%71 = fadd float %67, %temp13.0
%72 = fadd float %68, %temp14.0
%73 = fadd float %69, %temp15.0
%74 = fadd float %70, %temp16.0
%75 = fadd float %71, %temp17.0
%76 = fadd float %72, %temp18.0
%77 = fadd float %73, %temp19.0
%78 = fadd float %74, %temp20.0
%79 = fadd float %75, %temp21.0
%80 = fadd float %76, %temp22.0
%81 = fadd float %77, %temp23.0
%82 = fadd float %78, %temp24.0
%83 = fadd float %79, %temp25.0
%84 = fadd float %80, %temp26.0
%85 = fadd float %81, %temp27.0
%86 = fadd float %82, %temp28.0
%87 = fadd float %83, %temp29.0
%88 = fadd float %84, %temp30.0
%89 = fadd float %85, %temp31.0
%90 = bitcast float %5 to i32
%91 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %90, 10
%92 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %91, float %86, 11
%93 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %92, float %87, 12
%94 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %93, float %88, 13
%95 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %94, float %89, 14
%96 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %95, float %21, 24
ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %96
ENDIF: ; preds = %LOOP
%97 = bitcast float %temp32.0 to i32
%98 = insertelement <8 x float> undef, float %temp.0, i32 0
%99 = insertelement <8 x float> %98, float %temp4.0, i32 1
%100 = insertelement <8 x float> %99, float %temp8.0, i32 2
%101 = insertelement <8 x float> %100, float %temp12.0, i32 3
%102 = insertelement <8 x float> %101, float %temp16.0, i32 4
%103 = insertelement <8 x float> %102, float %temp20.0, i32 5
%104 = insertelement <8 x float> %103, float %temp24.0, i32 6
%105 = insertelement <8 x float> %104, float %temp28.0, i32 7
%106 = insertelement <8 x float> %105, float %27, i32 %97
%107 = extractelement <8 x float> %106, i32 0
%108 = extractelement <8 x float> %106, i32 1
%109 = extractelement <8 x float> %106, i32 2
%110 = extractelement <8 x float> %106, i32 3
%111 = extractelement <8 x float> %106, i32 4
%112 = extractelement <8 x float> %106, i32 5
%113 = extractelement <8 x float> %106, i32 6
%114 = extractelement <8 x float> %106, i32 7
%115 = insertelement <8 x float> undef, float %temp1.0, i32 0
%116 = insertelement <8 x float> %115, float %temp5.0, i32 1
%117 = insertelement <8 x float> %116, float %temp9.0, i32 2
%118 = insertelement <8 x float> %117, float %temp13.0, i32 3
%119 = insertelement <8 x float> %118, float %temp17.0, i32 4
%120 = insertelement <8 x float> %119, float %temp21.0, i32 5
%121 = insertelement <8 x float> %120, float %temp25.0, i32 6
%122 = insertelement <8 x float> %121, float %temp29.0, i32 7
%123 = insertelement <8 x float> %122, float %28, i32 %97
%124 = extractelement <8 x float> %123, i32 0
%125 = extractelement <8 x float> %123, i32 1
%126 = extractelement <8 x float> %123, i32 2
%127 = extractelement <8 x float> %123, i32 3
%128 = extractelement <8 x float> %123, i32 4
%129 = extractelement <8 x float> %123, i32 5
%130 = extractelement <8 x float> %123, i32 6
%131 = extractelement <8 x float> %123, i32 7
%132 = insertelement <8 x float> undef, float %temp2.0, i32 0
%133 = insertelement <8 x float> %132, float %temp6.0, i32 1
%134 = insertelement <8 x float> %133, float %temp10.0, i32 2
%135 = insertelement <8 x float> %134, float %temp14.0, i32 3
%136 = insertelement <8 x float> %135, float %temp18.0, i32 4
%137 = insertelement <8 x float> %136, float %temp22.0, i32 5
%138 = insertelement <8 x float> %137, float %temp26.0, i32 6
%139 = insertelement <8 x float> %138, float %temp30.0, i32 7
%140 = insertelement <8 x float> %139, float %29, i32 %97
%141 = extractelement <8 x float> %140, i32 0
%142 = extractelement <8 x float> %140, i32 1
%143 = extractelement <8 x float> %140, i32 2
%144 = extractelement <8 x float> %140, i32 3
%145 = extractelement <8 x float> %140, i32 4
%146 = extractelement <8 x float> %140, i32 5
%147 = extractelement <8 x float> %140, i32 6
%148 = extractelement <8 x float> %140, i32 7
%149 = insertelement <8 x float> undef, float %temp3.0, i32 0
%150 = insertelement <8 x float> %149, float %temp7.0, i32 1
%151 = insertelement <8 x float> %150, float %temp11.0, i32 2
%152 = insertelement <8 x float> %151, float %temp15.0, i32 3
%153 = insertelement <8 x float> %152, float %temp19.0, i32 4
%154 = insertelement <8 x float> %153, float %temp23.0, i32 5
%155 = insertelement <8 x float> %154, float %temp27.0, i32 6
%156 = insertelement <8 x float> %155, float %temp31.0, i32 7
%157 = insertelement <8 x float> %156, float %30, i32 %97
%158 = extractelement <8 x float> %157, i32 0
%159 = extractelement <8 x float> %157, i32 1
%160 = extractelement <8 x float> %157, i32 2
%161 = extractelement <8 x float> %157, i32 3
%162 = extractelement <8 x float> %157, i32 4
%163 = extractelement <8 x float> %157, i32 5
%164 = extractelement <8 x float> %157, i32 6
%165 = extractelement <8 x float> %157, i32 7
%166 = bitcast float %temp32.0 to i32
%167 = add i32 %166, 1
%168 = bitcast i32 %167 to float
br label %LOOP
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
attributes #0 = { "InitialPSInputAddr"="36983" }
attributes #1 = { nounwind readnone }
!0 = !{}
radeonsi: Compiling shader 3
Vertex Shader Prolog LLVM IR:
; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"
define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> @main(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) {
main_body:
%19 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> undef, i32 %0, 0
%20 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %19, i32 %1, 1
%21 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %20, i32 %2, 2
%22 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %21, i32 %3, 3
%23 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %22, i32 %4, 4
%24 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %23, i32 %5, 5
%25 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %24, i32 %6, 6
%26 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %25, i32 %7, 7
%27 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %26, i32 %8, 8
%28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %27, i32 %9, 9
%29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %28, i32 %10, 10
%30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %29, i32 %11, 11
%31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %30, i32 %12, 12
%32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %31, i32 %13, 13
%33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %32, i32 %14, 14
%34 = bitcast i32 %15 to float
%35 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %33, float %34, 15
%36 = bitcast i32 %16 to float
%37 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %35, float %36, 16
%38 = bitcast i32 %17 to float
%39 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %37, float %38, 17
%40 = bitcast i32 %18 to float
%41 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %39, float %40, 18
%42 = add i32 %15, %12
%43 = bitcast i32 %42 to float
%44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %41, float %43, 19
ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %44
}
radeonsi: Compiling shader 4
Vertex Shader Epilog LLVM IR:
; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"
define amdgpu_vs void @main() {
main_body:
ret void
}
Vertex Shader as VS:
Shader prolog disassembly:
v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C
Shader main disassembly:
s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[3:6], v4, s[4:7], 0 idxen ; E00C2000 80010304
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102
s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103
s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104
s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105
s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106
s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107
s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108
s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
s_buffer_load_dword s15, s[0:3], 0xb ; C207810B
s_buffer_load_dword s16, s[0:3], 0xc ; C208010C
s_buffer_load_dword s17, s[0:3], 0xd ; C208810D
s_buffer_load_dword s18, s[0:3], 0xe ; C209010E
s_buffer_load_dword s0, s[0:3], 0xf ; C200010F
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v0, s4, v3 ; 10000604
v_mul_f32_e32 v1, s5, v3 ; 10020605
v_mul_f32_e32 v7, s6, v3 ; 100E0606
v_mul_f32_e32 v3, s7, v3 ; 10060607
v_mac_f32_e32 v0, s8, v4 ; 3E000808
v_mac_f32_e32 v1, s9, v4 ; 3E020809
v_mac_f32_e32 v7, s10, v4 ; 3E0E080A
v_mac_f32_e32 v3, s11, v4 ; 3E06080B
v_mac_f32_e32 v0, s12, v5 ; 3E000A0C
v_mac_f32_e32 v1, s13, v5 ; 3E020A0D
v_mac_f32_e32 v7, s14, v5 ; 3E0E0A0E
v_mac_f32_e32 v3, s15, v5 ; 3E060A0F
v_mac_f32_e32 v0, s16, v6 ; 3E000C10
v_mac_f32_e32 v1, s17, v6 ; 3E020C11
v_mac_f32_e32 v7, s18, v6 ; 3E0E0C12
v_mac_f32_e32 v3, s0, v6 ; 3E060C00
exp 15, 12, 0, 1, 0, v0, v1, v7, v3 ; F80008CF 03070100
s_waitcnt expcnt(0) ; BF8C0F0F
Shader epilog disassembly:
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 8
Spilled SGPRs: 0
Spilled VGPRs: 0
Code Size: 172 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 10
********************
radeonsi: Compiling shader 5
Fragment Shader Epilog LLVM IR:
; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"
define amdgpu_ps void @main(i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, float inreg, float, float, float, float, float, float, float, float, float, float, float, float, float, float) #0 {
main_body:
%20 = call i32 @llvm.SI.packf16(float %6, float %7)
%21 = bitcast i32 %20 to float
%22 = call i32 @llvm.SI.packf16(float %8, float %9)
%23 = bitcast i32 %22 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %21, float %23, float undef, float undef)
ret void
}
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
; Function Attrs: nounwind
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #2
attributes #0 = { "InitialPSInputAddr"="16777215" }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind }
Pixel Shader:
Shader main disassembly:
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_mov_b32_e32 v4, 0x3e4ccccd ; 7E0802FF 3E4CCCCD
v_mov_b32_e32 v28, 0 ; 7E380280
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100
s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108
s_buffer_load_dword s5, s[0:3], 0x9 ; C2028109
s_buffer_load_dword s6, s[0:3], 0xa ; C203010A
s_buffer_load_dword s7, s[0:3], 0xb ; C203810B
s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v0, s4 ; 7E000204
v_mov_b32_e32 v1, s5 ; 7E020205
v_mov_b32_e32 v2, s6 ; 7E040206
v_mov_b32_e32 v3, s7 ; 7E060207
v_mul_f32_e32 v22, s8, v4 ; 102C0808
v_mul_f32_e64 v23, 0, s8 ; D2100017 00001080
s_branch BB0_1 ; BF820000
s_mov_b64 exec, s[2:3] ; BEFE0402
v_cmp_gt_i32_e32 vcc, s0, v28 ; 7D083800
s_and_b64 vcc, exec, vcc ; 87EA6A7E
s_cbranch_vccz BB0_11 ; BF860000
v_mov_b32_e32 v24, v23 ; 7E300317
v_mov_b32_e32 v25, v22 ; 7E320316
v_mov_b32_e32 v26, v23 ; 7E340317
v_mov_b32_e32 v27, v22 ; 7E360316
v_mov_b32_e32 v29, v22 ; 7E3A0316
v_mov_b32_e32 v4, v22 ; 7E080316
s_mov_b64 s[2:3], exec ; BE82047E
v_mov_b32_e32 v5, v23 ; 7E0A0317
v_mov_b32_e32 v6, v24 ; 7E0C0318
v_mov_b32_e32 v7, v25 ; 7E0E0319
v_mov_b32_e32 v8, v26 ; 7E10031A
v_mov_b32_e32 v9, v27 ; 7E12031B
v_mov_b32_e32 v10, v28 ; 7E14031C
v_mov_b32_e32 v11, v29 ; 7E16031D
v_readfirstlane_b32 s1, v28 ; 7E02051C
v_cmp_eq_u32_e32 vcc, s1, v28 ; 7D843801
s_mov_b32 m0, s1 ; BEFC0301
s_and_saveexec_b64 vcc, vcc ; BEEA246A
v_movreld_b32_e32 v4, v0 ; 7E088500
s_xor_b64 exec, exec, vcc ; 89FE6A7E
s_cbranch_execnz BB0_3 ; BF890000
s_mov_b64 exec, s[2:3] ; BEFE0402
v_mov_b32_e32 v15, v23 ; 7E1E0317
v_mov_b32_e32 v16, v22 ; 7E200316
v_mov_b32_e32 v17, v23 ; 7E220317
v_mov_b32_e32 v18, v22 ; 7E240316
v_mov_b32_e32 v19, v22 ; 7E260316
v_mov_b32_e32 v20, v23 ; 7E280317
v_mov_b32_e32 v21, v28 ; 7E2A031C
v_mov_b32_e32 v4, v15 ; 7E08030F
s_mov_b64 s[2:3], exec ; BE82047E
v_mov_b32_e32 v5, v16 ; 7E0A0310
v_mov_b32_e32 v6, v17 ; 7E0C0311
v_mov_b32_e32 v7, v18 ; 7E0E0312
v_mov_b32_e32 v8, v19 ; 7E100313
v_mov_b32_e32 v9, v20 ; 7E120314
v_mov_b32_e32 v10, v21 ; 7E140315
v_mov_b32_e32 v11, v22 ; 7E160316
v_readfirstlane_b32 s1, v28 ; 7E02051C
v_cmp_eq_u32_e32 vcc, s1, v28 ; 7D843801
s_mov_b32 m0, s1 ; BEFC0301
s_and_saveexec_b64 vcc, vcc ; BEEA246A
v_movreld_b32_e32 v4, v1 ; 7E088501
s_xor_b64 exec, exec, vcc ; 89FE6A7E
s_cbranch_execnz BB0_5 ; BF890000
s_mov_b64 exec, s[2:3] ; BEFE0402
v_mov_b32_e32 v4, v22 ; 7E080316
v_mov_b32_e32 v5, v23 ; 7E0A0317
v_mov_b32_e32 v6, v22 ; 7E0C0316
v_mov_b32_e32 v7, v23 ; 7E0E0317
v_mov_b32_e32 v8, v22 ; 7E100316
v_mov_b32_e32 v9, v22 ; 7E120316
v_mov_b32_e32 v10, v28 ; 7E14031C
v_mov_b32_e32 v11, v22 ; 7E160316
s_mov_b64 s[2:3], exec ; BE82047E
v_readfirstlane_b32 s1, v28 ; 7E02051C
v_cmp_eq_u32_e32 vcc, s1, v28 ; 7D843801
s_mov_b32 m0, s1 ; BEFC0301
s_and_saveexec_b64 vcc, vcc ; BEEA246A
v_movreld_b32_e32 v4, v2 ; 7E088502
s_xor_b64 exec, exec, vcc ; 89FE6A7E
s_cbranch_execnz BB0_7 ; BF890000
s_mov_b64 exec, s[2:3] ; BEFE0402
v_mov_b32_e32 v4, v23 ; 7E080317
v_mov_b32_e32 v5, v23 ; 7E0A0317
v_mov_b32_e32 v6, v23 ; 7E0C0317
v_mov_b32_e32 v7, v23 ; 7E0E0317
v_mov_b32_e32 v8, v23 ; 7E100317
v_mov_b32_e32 v9, v23 ; 7E120317
v_mov_b32_e32 v10, v28 ; 7E14031C
v_mov_b32_e32 v11, v23 ; 7E160317
s_mov_b64 s[2:3], exec ; BE82047E
v_readfirstlane_b32 s1, v28 ; 7E02051C
v_cmp_eq_u32_e32 vcc, s1, v28 ; 7D843801
s_mov_b32 m0, s1 ; BEFC0301
s_and_saveexec_b64 vcc, vcc ; BEEA246A
v_movreld_b32_e32 v4, v3 ; 7E088503
s_xor_b64 exec, exec, vcc ; 89FE6A7E
s_cbranch_execnz BB0_9 ; BF890000
s_branch BB0_10 ; BF820000
v_add_f32_e32 v0, v23, v22 ; 06002D17
v_add_f32_e32 v1, v22, v23 ; 06022F16
v_add_f32_e32 v2, v23, v22 ; 06042D17
v_add_f32_e32 v3, v23, v23 ; 06062F17
v_add_f32_e32 v0, v23, v0 ; 06000117
v_add_f32_e32 v1, v23, v1 ; 06020317
v_add_f32_e32 v2, v22, v2 ; 06040516
v_add_f32_e32 v3, v23, v3 ; 06060717
v_add_f32_e32 v0, v22, v0 ; 06000116
v_add_f32_e32 v1, v22, v1 ; 06020316
v_add_f32_e32 v2, v23, v2 ; 06040517
v_add_f32_e32 v3, v23, v3 ; 06060717
v_add_f32_e32 v0, v23, v0 ; 06000117
v_add_f32_e32 v1, v22, v1 ; 06020316
v_add_f32_e32 v2, v22, v2 ; 06040516
v_add_f32_e32 v3, v23, v3 ; 06060717
v_add_f32_e32 v0, v22, v0 ; 06000116
v_add_f32_e32 v1, v23, v1 ; 06020317
v_add_f32_e32 v2, v22, v2 ; 06040516
v_add_f32_e32 v3, v23, v3 ; 06060717
v_add_f32_e32 v0, v28, v0 ; 0600011C
v_add_f32_e32 v1, v28, v1 ; 0602031C
v_add_f32_e32 v2, v28, v2 ; 0604051C
v_add_f32_e32 v3, v28, v3 ; 0606071C
v_add_f32_e32 v0, v22, v0 ; 06000116
v_add_f32_e32 v1, v22, v1 ; 06020316
v_add_f32_e32 v2, v22, v2 ; 06040516
v_add_f32_e32 v3, v23, v3 ; 06060717
Shader epilog disassembly:
v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702
exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100
s_endpgm ; BF810000
*** SHADER CONFIG ***
SPI_PS_INPUT_ADDR = 0xd077
SPI_PS_INPUT_ENA = 0x0020
*** SHADER STATS ***
SGPRS: 16
VGPRS: 32
Spilled SGPRs: 0
Spilled VGPRs: 0
Code Size: 548 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 8
********************
FRAG
DCL IN[0], GENERIC[0], CONSTANT
DCL OUT[0], COLOR
0: MOV OUT[0], IN[0]
1: END
radeonsi: Compiling shader 6
TGSI shader LLVM IR:
; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"
define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
main_body:
%23 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %6)
%24 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %6)
%25 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %6)
%26 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %6)
%27 = bitcast float %5 to i32
%28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %27, 10
%29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %28, float %23, 11
%30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %29, float %24, 12
%31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %30, float %25, 13
%32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %31, float %26, 14
%33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %32, float %21, 24
ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %33
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.constant(i32, i32, i32) #1
attributes #0 = { "InitialPSInputAddr"="36983" }
attributes #1 = { nounwind readnone }
VERT
PROPERTY NEXT_SHADER FRAG
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
0: MOV OUT[0], IN[0]
1: MOV OUT[1], IN[1]
2: END
radeonsi: Compiling shader 7
TGSI shader LLVM IR:
; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"
define amdgpu_vs <{ float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) {
main_body:
%15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0, !amdgpu.uniform !0
%16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !invariant.load !0
%17 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %16, i32 0, i32 %13)
%18 = extractelement <4 x float> %17, i32 0
%19 = extractelement <4 x float> %17, i32 1
%20 = extractelement <4 x float> %17, i32 2
%21 = extractelement <4 x float> %17, i32 3
%22 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1, !amdgpu.uniform !0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !invariant.load !0
%24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %23, i32 0, i32 %14)
%25 = extractelement <4 x float> %24, i32 0
%26 = extractelement <4 x float> %24, i32 1
%27 = extractelement <4 x float> %24, i32 2
%28 = extractelement <4 x float> %24, i32 3
%29 = bitcast i32 %11 to float
%30 = insertvalue <{ float, float, float }> undef, float %29, 2
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %25, float %26, float %27, float %28)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %18, float %19, float %20, float %21)
ret <{ float, float, float }> %30
}
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0
; Function Attrs: nounwind
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #1
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
!0 = !{}
radeonsi: Compiling shader 8
Vertex Shader Prolog LLVM IR:
; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"
define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> @main(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) {
main_body:
%19 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> undef, i32 %0, 0
%20 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %19, i32 %1, 1
%21 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %20, i32 %2, 2
%22 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %21, i32 %3, 3
%23 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %22, i32 %4, 4
%24 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %23, i32 %5, 5
%25 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %24, i32 %6, 6
%26 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %25, i32 %7, 7
%27 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %26, i32 %8, 8
%28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %27, i32 %9, 9
%29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %28, i32 %10, 10
%30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %29, i32 %11, 11
%31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %30, i32 %12, 12
%32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %31, i32 %13, 13
%33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %32, i32 %14, 14
%34 = bitcast i32 %15 to float
%35 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %33, float %34, 15
%36 = bitcast i32 %16 to float
%37 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %35, float %36, 16
%38 = bitcast i32 %17 to float
%39 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %37, float %38, 17
%40 = bitcast i32 %18 to float
%41 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %39, float %40, 18
%42 = add i32 %15, %12
%43 = bitcast i32 %42 to float
%44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %41, float %43, 19
%45 = add i32 %15, %12
%46 = bitcast i32 %45 to float
%47 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %44, float %46, 20
ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %47
}
Vertex Shader as VS:
Shader prolog disassembly:
v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C
v_mov_b32_e32 v5, v4 ; 7E0A0304
Shader main disassembly:
s_load_dwordx4 s[0:3], s[10:11], 0x0 ; C0800B00
s_load_dwordx4 s[4:7], s[10:11], 0x4 ; C0820B04
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[6:9], v4, s[0:3], 0 idxen ; E00C2000 80000604
buffer_load_format_xyzw v[10:13], v5, s[4:7], 0 idxen ; E00C2000 80010A05
s_waitcnt vmcnt(0) ; BF8C0F70
exp 15, 32, 0, 0, 0, v10, v11, v12, v13 ; F800020F 0D0C0B0A
exp 15, 12, 0, 1, 0, v6, v7, v8, v9 ; F80008CF 09080706
s_waitcnt expcnt(0) ; BF8C0F0F
Shader epilog disassembly:
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 16
Spilled SGPRs: 0
Spilled VGPRs: 0
Code Size: 64 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 10
********************
Pixel Shader:
Shader main disassembly:
s_mov_b32 m0, s11 ; BEFC030B
v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002
v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102
v_interp_mov_f32 v2, P0, 2, 0, [m0] ; C80A0202
v_interp_mov_f32 v3, P0, 3, 0, [m0] ; C80E0302
Shader epilog disassembly:
v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702
exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100
s_endpgm ; BF810000
*** SHADER CONFIG ***
SPI_PS_INPUT_ADDR = 0xd077
SPI_PS_INPUT_ENA = 0x0020
*** SHADER STATS ***
SGPRS: 16
VGPRS: 16
Spilled SGPRs: 0
Spilled VGPRs: 0
Code Size: 40 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 10
********************
FRAG
DCL IN[0], GENERIC[0], LINEAR
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
0: TEX OUT[0], IN[0], SAMP[0], 2D
1: END
radeonsi: Compiling shader 9
TGSI shader LLVM IR:
; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"
define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
main_body:
%23 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0, !amdgpu.uniform !0
%24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !invariant.load !0
%25 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)*
%26 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %25, i64 0, i64 3, !amdgpu.uniform !0
%27 = load <4 x i32>, <4 x i32> addrspace(2)* %26, align 16, !invariant.load !0
%28 = extractelement <8 x i32> %24, i32 7
%29 = extractelement <4 x i32> %27, i32 0
%30 = and i32 %29, %28
%31 = insertelement <4 x i32> %27, i32 %30, i32 0
%32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %12)
%33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %12)
%34 = bitcast float %32 to i32
%35 = bitcast float %33 to i32
%36 = insertelement <2 x i32> undef, i32 %34, i32 0
%37 = insertelement <2 x i32> %36, i32 %35, i32 1
%38 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %37, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%39 = extractelement <4 x float> %38, i32 0
%40 = extractelement <4 x float> %38, i32 1
%41 = extractelement <4 x float> %38, i32 2
%42 = extractelement <4 x float> %38, i32 3
%43 = bitcast float %5 to i32
%44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %43, 10
%45 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %44, float %39, 11
%46 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %45, float %40, 12
%47 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %46, float %41, 13
%48 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %47, float %42, 14
%49 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %48, float %21, 24
ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %49
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
attributes #0 = { "InitialPSInputAddr"="36983" }
attributes #1 = { nounwind readnone }
!0 = !{}
Pixel Shader:
Shader main disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500
s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C
s_mov_b32 m0, s11 ; BEFC030B
v_interp_p1_f32 v0, v8, 0, 0, [m0] ; C8000008
v_interp_p2_f32 v0, [v0], v9, 0, 0, [m0] ; C8010009
v_interp_p1_f32 v1, v8, 1, 0, [m0] ; C8040108
s_waitcnt lgkmcnt(0) ; BF8C007F
s_and_b32 s0, s0, s19 ; 87001300
v_interp_p2_f32 v1, [v1], v9, 1, 0, [m0] ; C8050109
image_sample v[0:3], v[0:1], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030000
s_waitcnt vmcnt(0) ; BF8C0F70
Shader epilog disassembly:
v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702
exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100
s_endpgm ; BF810000
*** SHADER CONFIG ***
SPI_PS_INPUT_ADDR = 0xd077
SPI_PS_INPUT_ENA = 0x0020
*** SHADER STATS ***
SGPRS: 24
VGPRS: 16
Spilled SGPRs: 0
Spilled VGPRs: 0
Code Size: 72 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 10
********************
dd: GPU hang detected!
dd: Aborting the process...
-------------- next part --------------
VERT
PROPERTY NEXT_SHADER FRAG
DCL IN[0]
DCL OUT[0], POSITION
DCL CONST[0..3]
DCL TEMP[0], LOCAL
0: MUL TEMP[0], CONST[0], IN[0].xxxx
1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0]
4: MOV OUT[0], TEMP[0]
5: END
radeonsi: Compiling shader 1
TGSI shader LLVM IR:
; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"
define amdgpu_vs <{ float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) {
main_body:
%14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0
%15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !invariant.load !0
%16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 0)
%17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 4)
%18 = call float @llvm.SI.load.const(<16 x i8> %15, i32 8)
%19 = call float @llvm.SI.load.const(<16 x i8> %15, i32 12)
%20 = call float @llvm.SI.load.const(<16 x i8> %15, i32 16)
%21 = call float @llvm.SI.load.const(<16 x i8> %15, i32 20)
%22 = call float @llvm.SI.load.const(<16 x i8> %15, i32 24)
%23 = call float @llvm.SI.load.const(<16 x i8> %15, i32 28)
%24 = call float @llvm.SI.load.const(<16 x i8> %15, i32 32)
%25 = call float @llvm.SI.load.const(<16 x i8> %15, i32 36)
%26 = call float @llvm.SI.load.const(<16 x i8> %15, i32 40)
%27 = call float @llvm.SI.load.const(<16 x i8> %15, i32 44)
%28 = call float @llvm.SI.load.const(<16 x i8> %15, i32 48)
%29 = call float @llvm.SI.load.const(<16 x i8> %15, i32 52)
%30 = call float @llvm.SI.load.const(<16 x i8> %15, i32 56)
%31 = call float @llvm.SI.load.const(<16 x i8> %15, i32 60)
%32 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0, !amdgpu.uniform !0
%33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !invariant.load !0
%34 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %13)
%35 = extractelement <4 x float> %34, i32 0
%36 = extractelement <4 x float> %34, i32 1
%37 = extractelement <4 x float> %34, i32 2
%38 = extractelement <4 x float> %34, i32 3
%39 = fmul float %16, %35
%40 = fmul float %17, %35
%41 = fmul float %18, %35
%42 = fmul float %19, %35
%43 = fmul float %20, %36
%44 = fadd float %43, %39
%45 = fmul float %21, %36
%46 = fadd float %45, %40
%47 = fmul float %22, %36
%48 = fadd float %47, %41
%49 = fmul float %23, %36
%50 = fadd float %49, %42
%51 = fmul float %24, %37
%52 = fadd float %51, %44
%53 = fmul float %25, %37
%54 = fadd float %53, %46
%55 = fmul float %26, %37
%56 = fadd float %55, %48
%57 = fmul float %27, %37
%58 = fadd float %57, %50
%59 = fmul float %28, %38
%60 = fadd float %59, %52
%61 = fmul float %29, %38
%62 = fadd float %61, %54
%63 = fmul float %30, %38
%64 = fadd float %63, %56
%65 = fmul float %31, %38
%66 = fadd float %65, %58
%67 = bitcast i32 %11 to float
%68 = insertvalue <{ float, float, float }> undef, float %67, 2
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %60, float %62, float %64, float %66)
ret <{ float, float, float }> %68
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #0
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0
; Function Attrs: nounwind
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #1
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
!0 = !{}
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL OUT[0], COLOR
DCL CONST[0..2]
DCL TEMP[0..7], ARRAY(1), LOCAL
DCL TEMP[8..9], LOCAL
DCL ADDR[0]
IMM[0] FLT32 { 0.2000, 0.0000, 0.0000, 0.0000}
IMM[1] INT32 {0, 1, 0, 0}
0: MUL TEMP[0], IMM[0].xyxy, CONST[0].xxxx
1: MUL TEMP[1], IMM[0].yxyy, CONST[0].xxxx
2: MUL TEMP[2], IMM[0].yyxy, CONST[0].xxxx
3: MUL TEMP[3], IMM[0].xxyy, CONST[0].xxxx
4: MUL TEMP[4], IMM[0].yxxy, CONST[0].xxxx
5: MUL TEMP[5], IMM[0].xyxy, CONST[0].xxxx
6: MOV TEMP[6], IMM[0].yyyy
7: MUL TEMP[7], IMM[0].xxxy, CONST[0].xxxx
8: MOV TEMP[8].x, IMM[1].xxxx
9: BGNLOOP :0
10: ISGE TEMP[9].x, TEMP[8].xxxx, CONST[1].xxxx
11: UIF TEMP[9].xxxx :0
12: BRK
13: ENDIF
14: UARL ADDR[0].x, TEMP[8].xxxx
15: MOV TEMP[ADDR[0].x](1), CONST[2]
16: UADD TEMP[8].x, TEMP[8].xxxx, IMM[1].yyyy
17: ENDLOOP :0
18: ADD TEMP[8], TEMP[0], TEMP[1]
19: ADD TEMP[8], TEMP[8], TEMP[2]
20: ADD TEMP[8], TEMP[8], TEMP[3]
21: ADD TEMP[8], TEMP[8], TEMP[4]
22: ADD TEMP[8], TEMP[8], TEMP[5]
23: ADD TEMP[8], TEMP[8], TEMP[6]
24: ADD TEMP[8], TEMP[8], TEMP[7]
25: MOV OUT[0], TEMP[8]
26: END
radeonsi: Compiling shader 2
TGSI shader LLVM IR:
; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"
define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
main_body:
%23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0
%24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !invariant.load !0
%25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0)
%26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16)
%27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 32)
%28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 36)
%29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 40)
%30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 44)
%31 = fmul float %25, 0x3FC99999A0000000
%32 = fmul float %25, 0.000000e+00
%33 = fmul float %25, 0x3FC99999A0000000
%34 = fmul float %25, 0.000000e+00
%35 = fmul float %25, 0.000000e+00
%36 = fmul float %25, 0x3FC99999A0000000
%37 = fmul float %25, 0.000000e+00
%38 = fmul float %25, 0.000000e+00
%39 = fmul float %25, 0.000000e+00
%40 = fmul float %25, 0.000000e+00
%41 = fmul float %25, 0x3FC99999A0000000
%42 = fmul float %25, 0.000000e+00
%43 = fmul float %25, 0x3FC99999A0000000
%44 = fmul float %25, 0x3FC99999A0000000
%45 = fmul float %25, 0.000000e+00
%46 = fmul float %25, 0.000000e+00
%47 = fmul float %25, 0.000000e+00
%48 = fmul float %25, 0x3FC99999A0000000
%49 = fmul float %25, 0x3FC99999A0000000
%50 = fmul float %25, 0.000000e+00
%51 = fmul float %25, 0x3FC99999A0000000
%52 = fmul float %25, 0.000000e+00
%53 = fmul float %25, 0x3FC99999A0000000
%54 = fmul float %25, 0.000000e+00
%55 = fmul float %25, 0x3FC99999A0000000
%56 = fmul float %25, 0x3FC99999A0000000
%57 = fmul float %25, 0x3FC99999A0000000
%58 = fmul float %25, 0.000000e+00
%59 = bitcast float %26 to i32
br label %LOOP
LOOP: ; preds = %ENDIF, %main_body
%temp2.0 = phi float [ %33, %main_body ], [ %141, %ENDIF ]
%temp3.0 = phi float [ %34, %main_body ], [ %158, %ENDIF ]
%temp4.0 = phi float [ %35, %main_body ], [ %108, %ENDIF ]
%temp5.0 = phi float [ %36, %main_body ], [ %125, %ENDIF ]
%temp6.0 = phi float [ %37, %main_body ], [ %142, %ENDIF ]
%temp7.0 = phi float [ %38, %main_body ], [ %159, %ENDIF ]
%temp8.0 = phi float [ %39, %main_body ], [ %109, %ENDIF ]
%temp9.0 = phi float [ %40, %main_body ], [ %126, %ENDIF ]
%temp10.0 = phi float [ %41, %main_body ], [ %143, %ENDIF ]
%temp11.0 = phi float [ %42, %main_body ], [ %160, %ENDIF ]
%temp12.0 = phi float [ %43, %main_body ], [ %110, %ENDIF ]
%temp13.0 = phi float [ %44, %main_body ], [ %127, %ENDIF ]
%temp14.0 = phi float [ %45, %main_body ], [ %144, %ENDIF ]
%temp15.0 = phi float [ %46, %main_body ], [ %161, %ENDIF ]
%temp16.0 = phi float [ %47, %main_body ], [ %111, %ENDIF ]
%temp17.0 = phi float [ %48, %main_body ], [ %128, %ENDIF ]
%temp18.0 = phi float [ %49, %main_body ], [ %145, %ENDIF ]
%temp19.0 = phi float [ %50, %main_body ], [ %162, %ENDIF ]
%temp20.0 = phi float [ %51, %main_body ], [ %112, %ENDIF ]
%temp21.0 = phi float [ %52, %main_body ], [ %129, %ENDIF ]
%temp22.0 = phi float [ %53, %main_body ], [ %146, %ENDIF ]
%temp23.0 = phi float [ %54, %main_body ], [ %163, %ENDIF ]
%temp24.0 = phi float [ 0.000000e+00, %main_body ], [ %113, %ENDIF ]
%temp25.0 = phi float [ 0.000000e+00, %main_body ], [ %130, %ENDIF ]
%temp26.0 = phi float [ 0.000000e+00, %main_body ], [ %147, %ENDIF ]
%temp27.0 = phi float [ 0.000000e+00, %main_body ], [ %164, %ENDIF ]
%temp28.0 = phi float [ %55, %main_body ], [ %114, %ENDIF ]
%temp29.0 = phi float [ %56, %main_body ], [ %131, %ENDIF ]
%temp30.0 = phi float [ %57, %main_body ], [ %148, %ENDIF ]
%temp31.0 = phi float [ %58, %main_body ], [ %165, %ENDIF ]
%temp32.0 = phi float [ 0.000000e+00, %main_body ], [ %168, %ENDIF ]
%temp1.0 = phi float [ %32, %main_body ], [ %124, %ENDIF ]
%temp.0 = phi float [ %31, %main_body ], [ %107, %ENDIF ]
%60 = bitcast float %temp32.0 to i32
%61 = icmp slt i32 %60, %59
br i1 %61, label %ENDIF, label %IF
IF: ; preds = %LOOP
%62 = fadd float %temp.0, %temp4.0
%63 = fadd float %temp1.0, %temp5.0
%64 = fadd float %temp2.0, %temp6.0
%65 = fadd float %temp3.0, %temp7.0
%66 = fadd float %62, %temp8.0
%67 = fadd float %63, %temp9.0
%68 = fadd float %64, %temp10.0
%69 = fadd float %65, %temp11.0
%70 = fadd float %66, %temp12.0
%71 = fadd float %67, %temp13.0
%72 = fadd float %68, %temp14.0
%73 = fadd float %69, %temp15.0
%74 = fadd float %70, %temp16.0
%75 = fadd float %71, %temp17.0
%76 = fadd float %72, %temp18.0
%77 = fadd float %73, %temp19.0
%78 = fadd float %74, %temp20.0
%79 = fadd float %75, %temp21.0
%80 = fadd float %76, %temp22.0
%81 = fadd float %77, %temp23.0
%82 = fadd float %78, %temp24.0
%83 = fadd float %79, %temp25.0
%84 = fadd float %80, %temp26.0
%85 = fadd float %81, %temp27.0
%86 = fadd float %82, %temp28.0
%87 = fadd float %83, %temp29.0
%88 = fadd float %84, %temp30.0
%89 = fadd float %85, %temp31.0
%90 = bitcast float %5 to i32
%91 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %90, 10
%92 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %91, float %86, 11
%93 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %92, float %87, 12
%94 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %93, float %88, 13
%95 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %94, float %89, 14
%96 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %95, float %21, 24
ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %96
ENDIF: ; preds = %LOOP
%97 = bitcast float %temp32.0 to i32
%98 = insertelement <8 x float> undef, float %temp.0, i32 0
%99 = insertelement <8 x float> %98, float %temp4.0, i32 1
%100 = insertelement <8 x float> %99, float %temp8.0, i32 2
%101 = insertelement <8 x float> %100, float %temp12.0, i32 3
%102 = insertelement <8 x float> %101, float %temp16.0, i32 4
%103 = insertelement <8 x float> %102, float %temp20.0, i32 5
%104 = insertelement <8 x float> %103, float %temp24.0, i32 6
%105 = insertelement <8 x float> %104, float %temp28.0, i32 7
%106 = insertelement <8 x float> %105, float %27, i32 %97
%107 = extractelement <8 x float> %106, i32 0
%108 = extractelement <8 x float> %106, i32 1
%109 = extractelement <8 x float> %106, i32 2
%110 = extractelement <8 x float> %106, i32 3
%111 = extractelement <8 x float> %106, i32 4
%112 = extractelement <8 x float> %106, i32 5
%113 = extractelement <8 x float> %106, i32 6
%114 = extractelement <8 x float> %106, i32 7
%115 = insertelement <8 x float> undef, float %temp1.0, i32 0
%116 = insertelement <8 x float> %115, float %temp5.0, i32 1
%117 = insertelement <8 x float> %116, float %temp9.0, i32 2
%118 = insertelement <8 x float> %117, float %temp13.0, i32 3
%119 = insertelement <8 x float> %118, float %temp17.0, i32 4
%120 = insertelement <8 x float> %119, float %temp21.0, i32 5
%121 = insertelement <8 x float> %120, float %temp25.0, i32 6
%122 = insertelement <8 x float> %121, float %temp29.0, i32 7
%123 = insertelement <8 x float> %122, float %28, i32 %97
%124 = extractelement <8 x float> %123, i32 0
%125 = extractelement <8 x float> %123, i32 1
%126 = extractelement <8 x float> %123, i32 2
%127 = extractelement <8 x float> %123, i32 3
%128 = extractelement <8 x float> %123, i32 4
%129 = extractelement <8 x float> %123, i32 5
%130 = extractelement <8 x float> %123, i32 6
%131 = extractelement <8 x float> %123, i32 7
%132 = insertelement <8 x float> undef, float %temp2.0, i32 0
%133 = insertelement <8 x float> %132, float %temp6.0, i32 1
%134 = insertelement <8 x float> %133, float %temp10.0, i32 2
%135 = insertelement <8 x float> %134, float %temp14.0, i32 3
%136 = insertelement <8 x float> %135, float %temp18.0, i32 4
%137 = insertelement <8 x float> %136, float %temp22.0, i32 5
%138 = insertelement <8 x float> %137, float %temp26.0, i32 6
%139 = insertelement <8 x float> %138, float %temp30.0, i32 7
%140 = insertelement <8 x float> %139, float %29, i32 %97
%141 = extractelement <8 x float> %140, i32 0
%142 = extractelement <8 x float> %140, i32 1
%143 = extractelement <8 x float> %140, i32 2
%144 = extractelement <8 x float> %140, i32 3
%145 = extractelement <8 x float> %140, i32 4
%146 = extractelement <8 x float> %140, i32 5
%147 = extractelement <8 x float> %140, i32 6
%148 = extractelement <8 x float> %140, i32 7
%149 = insertelement <8 x float> undef, float %temp3.0, i32 0
%150 = insertelement <8 x float> %149, float %temp7.0, i32 1
%151 = insertelement <8 x float> %150, float %temp11.0, i32 2
%152 = insertelement <8 x float> %151, float %temp15.0, i32 3
%153 = insertelement <8 x float> %152, float %temp19.0, i32 4
%154 = insertelement <8 x float> %153, float %temp23.0, i32 5
%155 = insertelement <8 x float> %154, float %temp27.0, i32 6
%156 = insertelement <8 x float> %155, float %temp31.0, i32 7
%157 = insertelement <8 x float> %156, float %30, i32 %97
%158 = extractelement <8 x float> %157, i32 0
%159 = extractelement <8 x float> %157, i32 1
%160 = extractelement <8 x float> %157, i32 2
%161 = extractelement <8 x float> %157, i32 3
%162 = extractelement <8 x float> %157, i32 4
%163 = extractelement <8 x float> %157, i32 5
%164 = extractelement <8 x float> %157, i32 6
%165 = extractelement <8 x float> %157, i32 7
%166 = bitcast float %temp32.0 to i32
%167 = add i32 %166, 1
%168 = bitcast i32 %167 to float
br label %LOOP
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
attributes #0 = { "InitialPSInputAddr"="36983" }
attributes #1 = { nounwind readnone }
!0 = !{}
radeonsi: Compiling shader 3
Vertex Shader Prolog LLVM IR:
; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"
define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> @main(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) {
main_body:
%19 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> undef, i32 %0, 0
%20 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %19, i32 %1, 1
%21 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %20, i32 %2, 2
%22 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %21, i32 %3, 3
%23 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %22, i32 %4, 4
%24 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %23, i32 %5, 5
%25 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %24, i32 %6, 6
%26 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %25, i32 %7, 7
%27 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %26, i32 %8, 8
%28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %27, i32 %9, 9
%29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %28, i32 %10, 10
%30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %29, i32 %11, 11
%31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %30, i32 %12, 12
%32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %31, i32 %13, 13
%33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %32, i32 %14, 14
%34 = bitcast i32 %15 to float
%35 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %33, float %34, 15
%36 = bitcast i32 %16 to float
%37 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %35, float %36, 16
%38 = bitcast i32 %17 to float
%39 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %37, float %38, 17
%40 = bitcast i32 %18 to float
%41 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %39, float %40, 18
%42 = add i32 %15, %12
%43 = bitcast i32 %42 to float
%44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %41, float %43, 19
ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %44
}
radeonsi: Compiling shader 4
Vertex Shader Epilog LLVM IR:
; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"
define amdgpu_vs void @main() {
main_body:
ret void
}
Vertex Shader as VS:
Shader prolog disassembly:
v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C
Shader main disassembly:
s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[3:6], v4, s[4:7], 0 idxen ; E00C2000 80010304
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102
s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103
s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104
s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105
s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106
s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107
s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108
s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
s_buffer_load_dword s15, s[0:3], 0xb ; C207810B
s_buffer_load_dword s16, s[0:3], 0xc ; C208010C
s_buffer_load_dword s17, s[0:3], 0xd ; C208810D
s_buffer_load_dword s18, s[0:3], 0xe ; C209010E
s_buffer_load_dword s0, s[0:3], 0xf ; C200010F
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v0, s4, v3 ; 10000604
v_mul_f32_e32 v1, s5, v3 ; 10020605
v_mul_f32_e32 v7, s6, v3 ; 100E0606
v_mul_f32_e32 v3, s7, v3 ; 10060607
v_mac_f32_e32 v0, s8, v4 ; 3E000808
v_mac_f32_e32 v1, s9, v4 ; 3E020809
v_mac_f32_e32 v7, s10, v4 ; 3E0E080A
v_mac_f32_e32 v3, s11, v4 ; 3E06080B
v_mac_f32_e32 v0, s12, v5 ; 3E000A0C
v_mac_f32_e32 v1, s13, v5 ; 3E020A0D
v_mac_f32_e32 v7, s14, v5 ; 3E0E0A0E
v_mac_f32_e32 v3, s15, v5 ; 3E060A0F
v_mac_f32_e32 v0, s16, v6 ; 3E000C10
v_mac_f32_e32 v1, s17, v6 ; 3E020C11
v_mac_f32_e32 v7, s18, v6 ; 3E0E0C12
v_mac_f32_e32 v3, s0, v6 ; 3E060C00
exp 15, 12, 0, 1, 0, v0, v1, v7, v3 ; F80008CF 03070100
s_waitcnt expcnt(0) ; BF8C0F0F
Shader epilog disassembly:
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 8
Spilled SGPRs: 0
Spilled VGPRs: 0
Code Size: 172 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 10
********************
radeonsi: Compiling shader 5
Fragment Shader Epilog LLVM IR:
; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"
define amdgpu_ps void @main(i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, float inreg, float, float, float, float, float, float, float, float, float, float, float, float, float, float) #0 {
main_body:
%20 = call i32 @llvm.SI.packf16(float %6, float %7)
%21 = bitcast i32 %20 to float
%22 = call i32 @llvm.SI.packf16(float %8, float %9)
%23 = bitcast i32 %22 to float
call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %21, float %23, float undef, float undef)
ret void
}
; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1
; Function Attrs: nounwind
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #2
attributes #0 = { "InitialPSInputAddr"="16777215" }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind }
Pixel Shader:
Shader main disassembly:
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_mov_b32_e32 v3, 0x3e4ccccd ; 7E0602FF 3E4CCCCD
v_mov_b32_e32 v36, 0 ; 7E480280
v_mov_b32_e32 v12, 0 ; 7E180280
v_mov_b32_e32 v28, v36 ; 7E380324
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100
s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108
s_buffer_load_dword s5, s[0:3], 0x9 ; C2028109
s_buffer_load_dword s6, s[0:3], 0xa ; C203010A
s_buffer_load_dword s7, s[0:3], 0xb ; C203810B
s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mul_f32_e32 v14, s8, v3 ; 101C0608
v_mul_f32_e64 v3, 0, s8 ; D2100003 00001080
v_mov_b32_e32 v0, s4 ; 7E000204
v_mov_b32_e32 v1, s5 ; 7E020205
v_mov_b32_e32 v2, s6 ; 7E040206
v_mov_b32_e32 v11, s7 ; 7E160207
v_mov_b32_e32 v4, v3 ; 7E080303
v_mov_b32_e32 v16, v14 ; 7E20030E
v_mov_b32_e32 v5, v3 ; 7E0A0303
v_mov_b32_e32 v6, v3 ; 7E0C0303
v_mov_b32_e32 v18, v14 ; 7E24030E
v_mov_b32_e32 v7, v3 ; 7E0E0303
v_mov_b32_e32 v19, v14 ; 7E26030E
v_mov_b32_e32 v8, v3 ; 7E100303
v_mov_b32_e32 v21, v14 ; 7E2A030E
v_mov_b32_e32 v10, v3 ; 7E140303
v_mov_b32_e32 v31, v3 ; 7E3E0303
v_mov_b32_e32 v23, v14 ; 7E2E030E
v_mov_b32_e32 v15, v3 ; 7E1E0303
v_mov_b32_e32 v32, v3 ; 7E400303
v_mov_b32_e32 v24, v3 ; 7E300303
v_mov_b32_e32 v33, v14 ; 7E42030E
v_mov_b32_e32 v25, v14 ; 7E32030E
v_mov_b32_e32 v17, v3 ; 7E220303
v_mov_b32_e32 v34, v3 ; 7E440303
v_mov_b32_e32 v26, v14 ; 7E34030E
v_mov_b32_e32 v35, v14 ; 7E46030E
v_mov_b32_e32 v27, v3 ; 7E360303
v_mov_b32_e32 v20, v36 ; 7E280324
v_mov_b32_e32 v9, v36 ; 7E120324
v_mov_b32_e32 v37, v14 ; 7E4A030E
v_mov_b32_e32 v29, v14 ; 7E3A030E
v_mov_b32_e32 v22, v3 ; 7E2C0303
v_mov_b32_e32 v30, v14 ; 7E3C030E
s_branch BB0_1 ; BF820000
s_mov_b64 s[2:3], exec ; BE82047E
v_readfirstlane_b32 vcc_lo, v12 ; 7ED4050C
s_mov_b32 m0, vcc_lo ; BEFC036A
v_cmp_eq_u32_e32 vcc, m0, v12 ; 7D84187C
s_and_saveexec_b64 vcc, vcc ; BEEA246A
v_movreld_b32_e32 v30, v0 ; 7E3C8500
s_xor_b64 exec, exec, vcc ; 89FE6A7E
s_cbranch_execnz BB0_4 ; BF890000
s_mov_b64 exec, s[2:3] ; BEFE0402
s_mov_b64 s[2:3], exec ; BE82047E
v_readfirstlane_b32 vcc_lo, v12 ; 7ED4050C
s_mov_b32 m0, vcc_lo ; BEFC036A
v_cmp_eq_u32_e32 vcc, m0, v12 ; 7D84187C
s_and_saveexec_b64 vcc, vcc ; BEEA246A
v_movreld_b32_e32 v22, v1 ; 7E2C8501
s_xor_b64 exec, exec, vcc ; 89FE6A7E
s_cbranch_execnz BB0_6 ; BF890000
s_mov_b64 exec, s[2:3] ; BEFE0402
s_mov_b64 s[2:3], exec ; BE82047E
v_readfirstlane_b32 vcc_lo, v12 ; 7ED4050C
s_mov_b32 m0, vcc_lo ; BEFC036A
v_cmp_eq_u32_e32 vcc, m0, v12 ; 7D84187C
s_and_saveexec_b64 vcc, vcc ; BEEA246A
v_movreld_b32_e32 v14, v2 ; 7E1C8502
s_xor_b64 exec, exec, vcc ; 89FE6A7E
s_cbranch_execnz BB0_8 ; BF890000
s_mov_b64 exec, s[2:3] ; BEFE0402
s_mov_b64 s[2:3], exec ; BE82047E
v_readfirstlane_b32 vcc_lo, v12 ; 7ED4050C
s_mov_b32 m0, vcc_lo ; BEFC036A
v_cmp_eq_u32_e32 vcc, m0, v12 ; 7D84187C
s_and_saveexec_b64 vcc, vcc ; BEEA246A
v_movreld_b32_e32 v3, v11 ; 7E06850B
s_xor_b64 exec, exec, vcc ; 89FE6A7E
s_cbranch_execnz BB0_10 ; BF890000
s_mov_b64 exec, s[2:3] ; BEFE0402
v_add_i32_e32 v12, vcc, 1, v12 ; 4A181881
v_cmp_gt_i32_e32 vcc, s0, v12 ; 7D081800
s_and_b64 vcc, exec, vcc ; 87EA6A7E
s_cbranch_vccnz BB0_3 ; BF870000
v_add_f32_e32 v0, v31, v30 ; 06003D1F
v_add_f32_e32 v1, v23, v22 ; 06022D17
v_add_f32_e32 v2, v15, v14 ; 06041D0F
v_add_f32_e32 v3, v4, v3 ; 06060704
v_add_f32_e32 v0, v32, v0 ; 06000120
v_add_f32_e32 v1, v24, v1 ; 06020318
v_add_f32_e32 v2, v16, v2 ; 06040510
v_add_f32_e32 v3, v5, v3 ; 06060705
v_add_f32_e32 v0, v33, v0 ; 06000121
v_add_f32_e32 v1, v25, v1 ; 06020319
v_add_f32_e32 v2, v17, v2 ; 06040511
v_add_f32_e32 v3, v6, v3 ; 06060706
v_add_f32_e32 v0, v34, v0 ; 06000122
v_add_f32_e32 v1, v26, v1 ; 0602031A
v_add_f32_e32 v2, v18, v2 ; 06040512
v_add_f32_e32 v3, v7, v3 ; 06060707
v_add_f32_e32 v0, v35, v0 ; 06000123
v_add_f32_e32 v1, v27, v1 ; 0602031B
v_add_f32_e32 v2, v19, v2 ; 06040513
v_add_f32_e32 v3, v8, v3 ; 06060708
v_add_f32_e32 v0, v36, v0 ; 06000124
v_add_f32_e32 v1, v28, v1 ; 0602031C
v_add_f32_e32 v2, v20, v2 ; 06040514
v_add_f32_e32 v3, v9, v3 ; 06060709
v_add_f32_e32 v0, v37, v0 ; 06000125
v_add_f32_e32 v1, v29, v1 ; 0602031D
v_add_f32_e32 v2, v21, v2 ; 06040515
v_add_f32_e32 v3, v10, v3 ; 0606070A
Shader epilog disassembly:
v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702
exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100
s_endpgm ; BF810000
*** SHADER CONFIG ***
SPI_PS_INPUT_ADDR = 0xd077
SPI_PS_INPUT_ENA = 0x0020
*** SHADER STATS ***
SGPRS: 16
VGPRS: 40
Spilled SGPRs: 0
Spilled VGPRs: 0
Code Size: 492 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 6
********************
FRAG
DCL IN[0], GENERIC[0], CONSTANT
DCL OUT[0], COLOR
0: MOV OUT[0], IN[0]
1: END
radeonsi: Compiling shader 6
TGSI shader LLVM IR:
; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"
define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
main_body:
%23 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %6)
%24 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %6)
%25 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %6)
%26 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %6)
%27 = bitcast float %5 to i32
%28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %27, 10
%29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %28, float %23, 11
%30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %29, float %24, 12
%31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %30, float %25, 13
%32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %31, float %26, 14
%33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %32, float %21, 24
ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %33
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.constant(i32, i32, i32) #1
attributes #0 = { "InitialPSInputAddr"="36983" }
attributes #1 = { nounwind readnone }
VERT
PROPERTY NEXT_SHADER FRAG
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
0: MOV OUT[0], IN[0]
1: MOV OUT[1], IN[1]
2: END
radeonsi: Compiling shader 7
TGSI shader LLVM IR:
; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"
define amdgpu_vs <{ float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) {
main_body:
%15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0, !amdgpu.uniform !0
%16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !invariant.load !0
%17 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %16, i32 0, i32 %13)
%18 = extractelement <4 x float> %17, i32 0
%19 = extractelement <4 x float> %17, i32 1
%20 = extractelement <4 x float> %17, i32 2
%21 = extractelement <4 x float> %17, i32 3
%22 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1, !amdgpu.uniform !0
%23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !invariant.load !0
%24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %23, i32 0, i32 %14)
%25 = extractelement <4 x float> %24, i32 0
%26 = extractelement <4 x float> %24, i32 1
%27 = extractelement <4 x float> %24, i32 2
%28 = extractelement <4 x float> %24, i32 3
%29 = bitcast i32 %11 to float
%30 = insertvalue <{ float, float, float }> undef, float %29, 2
call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %25, float %26, float %27, float %28)
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %18, float %19, float %20, float %21)
ret <{ float, float, float }> %30
}
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0
; Function Attrs: nounwind
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #1
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
!0 = !{}
radeonsi: Compiling shader 8
Vertex Shader Prolog LLVM IR:
; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"
define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> @main(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) {
main_body:
%19 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> undef, i32 %0, 0
%20 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %19, i32 %1, 1
%21 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %20, i32 %2, 2
%22 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %21, i32 %3, 3
%23 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %22, i32 %4, 4
%24 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %23, i32 %5, 5
%25 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %24, i32 %6, 6
%26 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %25, i32 %7, 7
%27 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %26, i32 %8, 8
%28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %27, i32 %9, 9
%29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %28, i32 %10, 10
%30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %29, i32 %11, 11
%31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %30, i32 %12, 12
%32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %31, i32 %13, 13
%33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %32, i32 %14, 14
%34 = bitcast i32 %15 to float
%35 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %33, float %34, 15
%36 = bitcast i32 %16 to float
%37 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %35, float %36, 16
%38 = bitcast i32 %17 to float
%39 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %37, float %38, 17
%40 = bitcast i32 %18 to float
%41 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %39, float %40, 18
%42 = add i32 %15, %12
%43 = bitcast i32 %42 to float
%44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %41, float %43, 19
%45 = add i32 %15, %12
%46 = bitcast i32 %45 to float
%47 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %44, float %46, 20
ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %47
}
Vertex Shader as VS:
Shader prolog disassembly:
v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C
v_mov_b32_e32 v5, v4 ; 7E0A0304
Shader main disassembly:
s_load_dwordx4 s[0:3], s[10:11], 0x0 ; C0800B00
s_load_dwordx4 s[4:7], s[10:11], 0x4 ; C0820B04
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[6:9], v4, s[0:3], 0 idxen ; E00C2000 80000604
buffer_load_format_xyzw v[10:13], v5, s[4:7], 0 idxen ; E00C2000 80010A05
s_waitcnt vmcnt(0) ; BF8C0F70
exp 15, 32, 0, 0, 0, v10, v11, v12, v13 ; F800020F 0D0C0B0A
exp 15, 12, 0, 1, 0, v6, v7, v8, v9 ; F80008CF 09080706
s_waitcnt expcnt(0) ; BF8C0F0F
Shader epilog disassembly:
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 16
Spilled SGPRs: 0
Spilled VGPRs: 0
Code Size: 64 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 10
********************
Pixel Shader:
Shader main disassembly:
s_mov_b32 m0, s11 ; BEFC030B
v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002
v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102
v_interp_mov_f32 v2, P0, 2, 0, [m0] ; C80A0202
v_interp_mov_f32 v3, P0, 3, 0, [m0] ; C80E0302
Shader epilog disassembly:
v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702
exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100
s_endpgm ; BF810000
*** SHADER CONFIG ***
SPI_PS_INPUT_ADDR = 0xd077
SPI_PS_INPUT_ENA = 0x0020
*** SHADER STATS ***
SGPRS: 16
VGPRS: 16
Spilled SGPRs: 0
Spilled VGPRs: 0
Code Size: 40 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 10
********************
FRAG
DCL IN[0], GENERIC[0], LINEAR
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
0: TEX OUT[0], IN[0], SAMP[0], 2D
1: END
radeonsi: Compiling shader 9
TGSI shader LLVM IR:
; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"
define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
main_body:
%23 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0, !amdgpu.uniform !0
%24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !invariant.load !0
%25 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)*
%26 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %25, i64 0, i64 3, !amdgpu.uniform !0
%27 = load <4 x i32>, <4 x i32> addrspace(2)* %26, align 16, !invariant.load !0
%28 = extractelement <8 x i32> %24, i32 7
%29 = extractelement <4 x i32> %27, i32 0
%30 = and i32 %29, %28
%31 = insertelement <4 x i32> %27, i32 %30, i32 0
%32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %12)
%33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %12)
%34 = bitcast float %32 to i32
%35 = bitcast float %33 to i32
%36 = insertelement <2 x i32> undef, i32 %34, i32 0
%37 = insertelement <2 x i32> %36, i32 %35, i32 1
%38 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %37, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
%39 = extractelement <4 x float> %38, i32 0
%40 = extractelement <4 x float> %38, i32 1
%41 = extractelement <4 x float> %38, i32 2
%42 = extractelement <4 x float> %38, i32 3
%43 = bitcast float %5 to i32
%44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %43, 10
%45 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %44, float %39, 11
%46 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %45, float %40, 12
%47 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %46, float %41, 13
%48 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %47, float %42, 14
%49 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %48, float %21, 24
ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %49
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1
attributes #0 = { "InitialPSInputAddr"="36983" }
attributes #1 = { nounwind readnone }
!0 = !{}
Pixel Shader:
Shader main disassembly:
s_wqm_b64 exec, exec ; BEFE0A7E
s_load_dwordx8 s[12:19], s[4:5], 0x0 ; C0C60500
s_load_dwordx4 s[0:3], s[4:5], 0xc ; C080050C
s_mov_b32 m0, s11 ; BEFC030B
v_interp_p1_f32 v0, v8, 0, 0, [m0] ; C8000008
v_interp_p2_f32 v0, [v0], v9, 0, 0, [m0] ; C8010009
v_interp_p1_f32 v1, v8, 1, 0, [m0] ; C8040108
s_waitcnt lgkmcnt(0) ; BF8C007F
s_and_b32 s0, s0, s19 ; 87001300
v_interp_p2_f32 v1, [v1], v9, 1, 0, [m0] ; C8050109
image_sample v[0:3], v[0:1], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030000
s_waitcnt vmcnt(0) ; BF8C0F70
Shader epilog disassembly:
v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702
exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100
s_endpgm ; BF810000
*** SHADER CONFIG ***
SPI_PS_INPUT_ADDR = 0xd077
SPI_PS_INPUT_ENA = 0x0020
*** SHADER STATS ***
SGPRS: 24
VGPRS: 16
Spilled SGPRs: 0
Spilled VGPRs: 0
Code Size: 72 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 10
********************
-------------- next part --------------
Driver vendor: X.Org
Device vendor: AMD
Device name: AMD KAVERI (DRM 2.45.0 / 4.6.2+, LLVM 4.0.0)
[1;33mdraw_info: [0m{indexed = 0, mode = triangle_strip, start = 0, count = 4, start_instance = 0, instance_count = 1, vertices_per_patch = 3, index_bias = 0, min_index = 0, max_index = 3, primitive_restart = 0, restart_index = 0, count_from_stream_output = NULL, indirect = NULL, indirect_offset = 0, }
[1;33mvertex_buffer 0: [0m{stride = 16, buffer_offset = 64, buffer = 0x02475fe0, user_buffer = NULL, }
buffer: {target = buffer, format = PIPE_FORMAT_R8_UNORM, width0 = 1048576, height0 = 1, depth0 = 1, array_size = 1, last_level = 0, nr_samples = 0, usage = 3, bind = 16, flags = 3, }
[1;33mnum vertex elements[0m = 1
[1;33mvertex_element 0: [0m{src_offset = 0, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32A32_FLOAT, }
[1;33mnum stream output targets[0m = 0
[1;32mbegin shader: VERTEX[0m
[1;33mshader_state: [0m{tokens = "
VERT
PROPERTY NEXT_SHADER FRAG
DCL IN[0]
DCL OUT[0], POSITION
DCL CONST[0..3]
DCL TEMP[0], LOCAL
0: MUL TEMP[0], CONST[0], IN[0].xxxx
1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0]
4: MOV OUT[0], TEMP[0]
5: END
", }
[1;33mconstant_buffer 0: [0m{buffer = NULL, buffer_offset = 0, buffer_size = 64, user_buffer = 0x02473bd0, }
[1;32mend shader: VERTEX[0m
[1;33mviewport_state 0: [0m{scale = {125, -125, 0.5, }, translate = {125, 125, 0.5, }, }
[1;33mrasterizer_state: [0m{flatshade = 0, light_twoside = 0, clamp_vertex_color = 1, clamp_fragment_color = 0, front_ccw = 1, cull_face = 0, fill_front = 0, fill_back = 0, offset_point = 0, offset_line = 0, offset_tri = 0, scissor = 0, poly_smooth = 0, poly_stipple_enable = 0, point_smooth = 0, sprite_coord_enable = 0, sprite_coord_mode = 0, point_quad_rasterization = 0, point_tri_clip = 0, point_size_per_vertex = 0, multisample = 0, line_smooth = 0, line_stipple_enable = 0, line_stipple_factor = 0, line_stipple_pattern = 65535, line_last_pixel = 0, flatshade_first = 0, half_pixel_center = 1, bottom_edge_rule = 1, rasterizer_discard = 0, depth_clip = 1, clip_halfz = 0, clip_plane_enable = 0, line_width = 1, point_size = 1, offset_units = 0, offset_scale = 0, offset_clamp = 0, }
[1;32mbegin shader: FRAGMENT[0m
[1;33mshader_state: [0m{tokens = "
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL OUT[0], COLOR
DCL CONST[0..2]
DCL TEMP[0..7], ARRAY(1), LOCAL
DCL TEMP[8..9], LOCAL
DCL ADDR[0]
IMM[0] FLT32 { 0.2000, 0.0000, 0.0000, 0.0000}
IMM[1] INT32 {0, 1, 0, 0}
0: MUL TEMP[0], IMM[0].xyxy, CONST[0].xxxx
1: MUL TEMP[1], IMM[0].yxyy, CONST[0].xxxx
2: MUL TEMP[2], IMM[0].yyxy, CONST[0].xxxx
3: MUL TEMP[3], IMM[0].xxyy, CONST[0].xxxx
4: MUL TEMP[4], IMM[0].yxxy, CONST[0].xxxx
5: MUL TEMP[5], IMM[0].xyxy, CONST[0].xxxx
6: MOV TEMP[6], IMM[0].yyyy
7: MUL TEMP[7], IMM[0].xxxy, CONST[0].xxxx
8: MOV TEMP[8].x, IMM[1].xxxx
9: BGNLOOP :0
10: ISGE TEMP[9].x, TEMP[8].xxxx, CONST[1].xxxx
11: UIF TEMP[9].xxxx :0
12: BRK
13: ENDIF
14: UARL ADDR[0].x, TEMP[8].xxxx
15: MOV TEMP[ADDR[0].x](1), CONST[2]
16: UADD TEMP[8].x, TEMP[8].xxxx, IMM[1].yyyy
17: ENDLOOP :0
18: ADD TEMP[8], TEMP[0], TEMP[1]
19: ADD TEMP[8], TEMP[8], TEMP[2]
20: ADD TEMP[8], TEMP[8], TEMP[3]
21: ADD TEMP[8], TEMP[8], TEMP[4]
22: ADD TEMP[8], TEMP[8], TEMP[5]
23: ADD TEMP[8], TEMP[8], TEMP[6]
24: ADD TEMP[8], TEMP[8], TEMP[7]
25: MOV OUT[0], TEMP[8]
26: END
", }
[1;33mconstant_buffer 0: [0m{buffer = NULL, buffer_offset = 0, buffer_size = 48, user_buffer = 0x024ae1b0, }
[1;32mend shader: FRAGMENT[0m
[1;33mdepth_stencil_alpha_state: [0m{depth = {enabled = 0, }, stencil = {{enabled = 0, }, {enabled = 0, }, }, alpha = {enabled = 0, }, }
[1;33mstencil_ref: [0m{ref_value = {0, 0, }, }
[1;33mblend_state: [0m{dither = 1, alpha_to_coverage = 0, alpha_to_one = 0, logicop_enable = 0, independent_blend_enable = 0, rt = {{blend_enable = 0, colormask = 15, }, }, }
[1;33mblend_color: [0m{color = {0, 0, 0, 0, }, }
[1;33mmin_samples[0m = 1
[1;33msample_mask[0m = 0xffffffff
[1;33mframebuffer_state: [0m{width = 250, height = 250, samples = 0, layers = 0, nr_cbufs = 1, cbufs = {0x0246f280, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }, zsbuf = NULL, }
[1;33mcbufs[0]:[0m
[1;33msurface: [0m{format = PIPE_FORMAT_B8G8R8A8_UNORM, width = 250, height = 250, texture = 0x0246dc30, u.tex.level = 0, u.tex.first_layer = 0, u.tex.last_layer = 0, }
[1;33mresource: [0m{target = 2d, format = PIPE_FORMAT_B8G8R8A8_UNORM, width0 = 250, height0 = 250, depth0 = 1, array_size = 1, last_level = 0, nr_samples = 0, usage = 0, bind = 1572874, flags = 0, }
*****************************************************************************
Driver-specific state:
Memory-mapped registers:
[1;33mGRBM_STATUS[0m <- ME0PIPE0_CMDFIFO_AVAIL = 8
SRBM_RQ_PENDING = 1
ME0PIPE0_CF_RQ_PENDING = 0
ME0PIPE0_PF_RQ_PENDING = 0
GDS_DMA_RQ_PENDING = 0
DB_CLEAN = 1
CB_CLEAN = 0
TA_BUSY = 0
GDS_BUSY = 0
WD_BUSY_NO_DMA = 0
VGT_BUSY = 0
IA_BUSY_NO_DMA = 0
IA_BUSY = 0
SX_BUSY = 0
WD_BUSY = 0
SPI_BUSY = 1
BCI_BUSY = 0
SC_BUSY = 0
PA_BUSY = 0
DB_BUSY = 1
CP_COHERENCY_BUSY = 0
CP_BUSY = 1
CB_BUSY = 1
GUI_ACTIVE = 1
[1;33mColor buffer 0:[0m
Info: npix_x=250, npix_y=250, npix_z=1, blk_w=1, blk_h=1, blk_d=1, array_size=1, last_level=0, bpe=4, nsamples=1, flags=0x110301, b8g8r8a8_unorm
Layout: size=262144, alignment=16384, bankw=1, bankh=1, nbanks=0, mtilea=2, tilesplit=512, pipeconfig=0, scanout=1
CMask: offset=0, size=1024, alignment=1024, pitch=256, height=256, xalign=256, yalign=256, slice_tile_max=3
Level[0]: offset=0, slice_size=262144, npix_x=250, npix_y=250, npix_z=1, nblk_x=256, nblk_y=256, nblk_z=1, pitch_bytes=1024, mode=3
SHADER KEY
instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
as_es = 0
as_ls = 0
export_prim_id = 0
Vertex Shader as VS - main shader part - LLVM IR:
; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"
define amdgpu_vs <{ float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) {
main_body:
%14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0
%15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !invariant.load !0
%16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 0)
%17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 4)
%18 = call float @llvm.SI.load.const(<16 x i8> %15, i32 8)
%19 = call float @llvm.SI.load.const(<16 x i8> %15, i32 12)
%20 = call float @llvm.SI.load.const(<16 x i8> %15, i32 16)
%21 = call float @llvm.SI.load.const(<16 x i8> %15, i32 20)
%22 = call float @llvm.SI.load.const(<16 x i8> %15, i32 24)
%23 = call float @llvm.SI.load.const(<16 x i8> %15, i32 28)
%24 = call float @llvm.SI.load.const(<16 x i8> %15, i32 32)
%25 = call float @llvm.SI.load.const(<16 x i8> %15, i32 36)
%26 = call float @llvm.SI.load.const(<16 x i8> %15, i32 40)
%27 = call float @llvm.SI.load.const(<16 x i8> %15, i32 44)
%28 = call float @llvm.SI.load.const(<16 x i8> %15, i32 48)
%29 = call float @llvm.SI.load.const(<16 x i8> %15, i32 52)
%30 = call float @llvm.SI.load.const(<16 x i8> %15, i32 56)
%31 = call float @llvm.SI.load.const(<16 x i8> %15, i32 60)
%32 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0, !amdgpu.uniform !0
%33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !invariant.load !0
%34 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %13)
%35 = extractelement <4 x float> %34, i32 0
%36 = extractelement <4 x float> %34, i32 1
%37 = extractelement <4 x float> %34, i32 2
%38 = extractelement <4 x float> %34, i32 3
%39 = fmul float %16, %35
%40 = fmul float %17, %35
%41 = fmul float %18, %35
%42 = fmul float %19, %35
%43 = fmul float %20, %36
%44 = fadd float %43, %39
%45 = fmul float %21, %36
%46 = fadd float %45, %40
%47 = fmul float %22, %36
%48 = fadd float %47, %41
%49 = fmul float %23, %36
%50 = fadd float %49, %42
%51 = fmul float %24, %37
%52 = fadd float %51, %44
%53 = fmul float %25, %37
%54 = fadd float %53, %46
%55 = fmul float %26, %37
%56 = fadd float %55, %48
%57 = fmul float %27, %37
%58 = fadd float %57, %50
%59 = fmul float %28, %38
%60 = fadd float %59, %52
%61 = fmul float %29, %38
%62 = fadd float %61, %54
%63 = fmul float %30, %38
%64 = fadd float %63, %56
%65 = fmul float %31, %38
%66 = fadd float %65, %58
%67 = bitcast i32 %11 to float
%68 = insertvalue <{ float, float, float }> undef, float %67, 2
call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %60, float %62, float %64, float %66)
ret <{ float, float, float }> %68
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #0
; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0
; Function Attrs: nounwind
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #1
attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }
!0 = !{}
Vertex Shader as VS:
Shader prolog disassembly:
v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C
Shader main disassembly:
s_load_dwordx4 s[4:7], s[10:11], 0x0 ; C0820B00
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
s_waitcnt lgkmcnt(0) ; BF8C007F
buffer_load_format_xyzw v[3:6], v4, s[4:7], 0 idxen ; E00C2000 80010304
s_buffer_load_dword s4, s[0:3], 0x0 ; C2020100
s_buffer_load_dword s5, s[0:3], 0x1 ; C2028101
s_buffer_load_dword s6, s[0:3], 0x2 ; C2030102
s_buffer_load_dword s7, s[0:3], 0x3 ; C2038103
s_buffer_load_dword s8, s[0:3], 0x4 ; C2040104
s_buffer_load_dword s9, s[0:3], 0x5 ; C2048105
s_buffer_load_dword s10, s[0:3], 0x6 ; C2050106
s_buffer_load_dword s11, s[0:3], 0x7 ; C2058107
s_buffer_load_dword s12, s[0:3], 0x8 ; C2060108
s_buffer_load_dword s13, s[0:3], 0x9 ; C2068109
s_buffer_load_dword s14, s[0:3], 0xa ; C207010A
s_buffer_load_dword s15, s[0:3], 0xb ; C207810B
s_buffer_load_dword s16, s[0:3], 0xc ; C208010C
s_buffer_load_dword s17, s[0:3], 0xd ; C208810D
s_buffer_load_dword s18, s[0:3], 0xe ; C209010E
s_buffer_load_dword s0, s[0:3], 0xf ; C200010F
s_waitcnt vmcnt(0) lgkmcnt(0) ; BF8C0070
v_mul_f32_e32 v0, s4, v3 ; 10000604
v_mul_f32_e32 v1, s5, v3 ; 10020605
v_mul_f32_e32 v7, s6, v3 ; 100E0606
v_mul_f32_e32 v3, s7, v3 ; 10060607
v_mac_f32_e32 v0, s8, v4 ; 3E000808
v_mac_f32_e32 v1, s9, v4 ; 3E020809
v_mac_f32_e32 v7, s10, v4 ; 3E0E080A
v_mac_f32_e32 v3, s11, v4 ; 3E06080B
v_mac_f32_e32 v0, s12, v5 ; 3E000A0C
v_mac_f32_e32 v1, s13, v5 ; 3E020A0D
v_mac_f32_e32 v7, s14, v5 ; 3E0E0A0E
v_mac_f32_e32 v3, s15, v5 ; 3E060A0F
v_mac_f32_e32 v0, s16, v6 ; 3E000C10
v_mac_f32_e32 v1, s17, v6 ; 3E020C11
v_mac_f32_e32 v7, s18, v6 ; 3E0E0C12
v_mac_f32_e32 v3, s0, v6 ; 3E060C00
exp 15, 12, 0, 1, 0, v0, v1, v7, v3 ; F80008CF 03070100
s_waitcnt expcnt(0) ; BF8C0F0F
Shader epilog disassembly:
s_endpgm ; BF810000
*** SHADER STATS ***
SGPRS: 24
VGPRS: 8
Spilled SGPRs: 0
Spilled VGPRs: 0
Code Size: 172 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 10
********************
SHADER KEY
prolog.color_two_side = 0
prolog.flatshade_colors = 0
prolog.poly_stipple = 0
prolog.force_persp_sample_interp = 0
prolog.force_linear_sample_interp = 0
prolog.force_persp_center_interp = 0
prolog.force_linear_center_interp = 0
prolog.bc_optimize_for_persp = 0
prolog.bc_optimize_for_linear = 0
epilog.spi_shader_col_format = 0x4
epilog.color_is_int8 = 0x0
epilog.last_cbuf = 0
epilog.alpha_func = 7
epilog.alpha_to_one = 0
epilog.poly_line_smoothing = 0
epilog.clamp_color = 0
Pixel Shader - main shader part - LLVM IR:
; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"
define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
main_body:
%23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0
%24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !invariant.load !0
%25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0)
%26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16)
%27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 32)
%28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 36)
%29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 40)
%30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 44)
%31 = fmul float %25, 0x3FC99999A0000000
%32 = fmul float %25, 0.000000e+00
%33 = fmul float %25, 0x3FC99999A0000000
%34 = fmul float %25, 0.000000e+00
%35 = fmul float %25, 0.000000e+00
%36 = fmul float %25, 0x3FC99999A0000000
%37 = fmul float %25, 0.000000e+00
%38 = fmul float %25, 0.000000e+00
%39 = fmul float %25, 0.000000e+00
%40 = fmul float %25, 0.000000e+00
%41 = fmul float %25, 0x3FC99999A0000000
%42 = fmul float %25, 0.000000e+00
%43 = fmul float %25, 0x3FC99999A0000000
%44 = fmul float %25, 0x3FC99999A0000000
%45 = fmul float %25, 0.000000e+00
%46 = fmul float %25, 0.000000e+00
%47 = fmul float %25, 0.000000e+00
%48 = fmul float %25, 0x3FC99999A0000000
%49 = fmul float %25, 0x3FC99999A0000000
%50 = fmul float %25, 0.000000e+00
%51 = fmul float %25, 0x3FC99999A0000000
%52 = fmul float %25, 0.000000e+00
%53 = fmul float %25, 0x3FC99999A0000000
%54 = fmul float %25, 0.000000e+00
%55 = fmul float %25, 0x3FC99999A0000000
%56 = fmul float %25, 0x3FC99999A0000000
%57 = fmul float %25, 0x3FC99999A0000000
%58 = fmul float %25, 0.000000e+00
%59 = bitcast float %26 to i32
br label %LOOP
LOOP: ; preds = %ENDIF, %main_body
%temp2.0 = phi float [ %33, %main_body ], [ %141, %ENDIF ]
%temp3.0 = phi float [ %34, %main_body ], [ %158, %ENDIF ]
%temp4.0 = phi float [ %35, %main_body ], [ %108, %ENDIF ]
%temp5.0 = phi float [ %36, %main_body ], [ %125, %ENDIF ]
%temp6.0 = phi float [ %37, %main_body ], [ %142, %ENDIF ]
%temp7.0 = phi float [ %38, %main_body ], [ %159, %ENDIF ]
%temp8.0 = phi float [ %39, %main_body ], [ %109, %ENDIF ]
%temp9.0 = phi float [ %40, %main_body ], [ %126, %ENDIF ]
%temp10.0 = phi float [ %41, %main_body ], [ %143, %ENDIF ]
%temp11.0 = phi float [ %42, %main_body ], [ %160, %ENDIF ]
%temp12.0 = phi float [ %43, %main_body ], [ %110, %ENDIF ]
%temp13.0 = phi float [ %44, %main_body ], [ %127, %ENDIF ]
%temp14.0 = phi float [ %45, %main_body ], [ %144, %ENDIF ]
%temp15.0 = phi float [ %46, %main_body ], [ %161, %ENDIF ]
%temp16.0 = phi float [ %47, %main_body ], [ %111, %ENDIF ]
%temp17.0 = phi float [ %48, %main_body ], [ %128, %ENDIF ]
%temp18.0 = phi float [ %49, %main_body ], [ %145, %ENDIF ]
%temp19.0 = phi float [ %50, %main_body ], [ %162, %ENDIF ]
%temp20.0 = phi float [ %51, %main_body ], [ %112, %ENDIF ]
%temp21.0 = phi float [ %52, %main_body ], [ %129, %ENDIF ]
%temp22.0 = phi float [ %53, %main_body ], [ %146, %ENDIF ]
%temp23.0 = phi float [ %54, %main_body ], [ %163, %ENDIF ]
%temp24.0 = phi float [ 0.000000e+00, %main_body ], [ %113, %ENDIF ]
%temp25.0 = phi float [ 0.000000e+00, %main_body ], [ %130, %ENDIF ]
%temp26.0 = phi float [ 0.000000e+00, %main_body ], [ %147, %ENDIF ]
%temp27.0 = phi float [ 0.000000e+00, %main_body ], [ %164, %ENDIF ]
%temp28.0 = phi float [ %55, %main_body ], [ %114, %ENDIF ]
%temp29.0 = phi float [ %56, %main_body ], [ %131, %ENDIF ]
%temp30.0 = phi float [ %57, %main_body ], [ %148, %ENDIF ]
%temp31.0 = phi float [ %58, %main_body ], [ %165, %ENDIF ]
%temp32.0 = phi float [ 0.000000e+00, %main_body ], [ %168, %ENDIF ]
%temp1.0 = phi float [ %32, %main_body ], [ %124, %ENDIF ]
%temp.0 = phi float [ %31, %main_body ], [ %107, %ENDIF ]
%60 = bitcast float %temp32.0 to i32
%61 = icmp slt i32 %60, %59
br i1 %61, label %ENDIF, label %IF
IF: ; preds = %LOOP
%62 = fadd float %temp.0, %temp4.0
%63 = fadd float %temp1.0, %temp5.0
%64 = fadd float %temp2.0, %temp6.0
%65 = fadd float %temp3.0, %temp7.0
%66 = fadd float %62, %temp8.0
%67 = fadd float %63, %temp9.0
%68 = fadd float %64, %temp10.0
%69 = fadd float %65, %temp11.0
%70 = fadd float %66, %temp12.0
%71 = fadd float %67, %temp13.0
%72 = fadd float %68, %temp14.0
%73 = fadd float %69, %temp15.0
%74 = fadd float %70, %temp16.0
%75 = fadd float %71, %temp17.0
%76 = fadd float %72, %temp18.0
%77 = fadd float %73, %temp19.0
%78 = fadd float %74, %temp20.0
%79 = fadd float %75, %temp21.0
%80 = fadd float %76, %temp22.0
%81 = fadd float %77, %temp23.0
%82 = fadd float %78, %temp24.0
%83 = fadd float %79, %temp25.0
%84 = fadd float %80, %temp26.0
%85 = fadd float %81, %temp27.0
%86 = fadd float %82, %temp28.0
%87 = fadd float %83, %temp29.0
%88 = fadd float %84, %temp30.0
%89 = fadd float %85, %temp31.0
%90 = bitcast float %5 to i32
%91 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %90, 10
%92 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %91, float %86, 11
%93 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %92, float %87, 12
%94 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %93, float %88, 13
%95 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %94, float %89, 14
%96 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %95, float %21, 24
ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %96
ENDIF: ; preds = %LOOP
%97 = bitcast float %temp32.0 to i32
%98 = insertelement <8 x float> undef, float %temp.0, i32 0
%99 = insertelement <8 x float> %98, float %temp4.0, i32 1
%100 = insertelement <8 x float> %99, float %temp8.0, i32 2
%101 = insertelement <8 x float> %100, float %temp12.0, i32 3
%102 = insertelement <8 x float> %101, float %temp16.0, i32 4
%103 = insertelement <8 x float> %102, float %temp20.0, i32 5
%104 = insertelement <8 x float> %103, float %temp24.0, i32 6
%105 = insertelement <8 x float> %104, float %temp28.0, i32 7
%106 = insertelement <8 x float> %105, float %27, i32 %97
%107 = extractelement <8 x float> %106, i32 0
%108 = extractelement <8 x float> %106, i32 1
%109 = extractelement <8 x float> %106, i32 2
%110 = extractelement <8 x float> %106, i32 3
%111 = extractelement <8 x float> %106, i32 4
%112 = extractelement <8 x float> %106, i32 5
%113 = extractelement <8 x float> %106, i32 6
%114 = extractelement <8 x float> %106, i32 7
%115 = insertelement <8 x float> undef, float %temp1.0, i32 0
%116 = insertelement <8 x float> %115, float %temp5.0, i32 1
%117 = insertelement <8 x float> %116, float %temp9.0, i32 2
%118 = insertelement <8 x float> %117, float %temp13.0, i32 3
%119 = insertelement <8 x float> %118, float %temp17.0, i32 4
%120 = insertelement <8 x float> %119, float %temp21.0, i32 5
%121 = insertelement <8 x float> %120, float %temp25.0, i32 6
%122 = insertelement <8 x float> %121, float %temp29.0, i32 7
%123 = insertelement <8 x float> %122, float %28, i32 %97
%124 = extractelement <8 x float> %123, i32 0
%125 = extractelement <8 x float> %123, i32 1
%126 = extractelement <8 x float> %123, i32 2
%127 = extractelement <8 x float> %123, i32 3
%128 = extractelement <8 x float> %123, i32 4
%129 = extractelement <8 x float> %123, i32 5
%130 = extractelement <8 x float> %123, i32 6
%131 = extractelement <8 x float> %123, i32 7
%132 = insertelement <8 x float> undef, float %temp2.0, i32 0
%133 = insertelement <8 x float> %132, float %temp6.0, i32 1
%134 = insertelement <8 x float> %133, float %temp10.0, i32 2
%135 = insertelement <8 x float> %134, float %temp14.0, i32 3
%136 = insertelement <8 x float> %135, float %temp18.0, i32 4
%137 = insertelement <8 x float> %136, float %temp22.0, i32 5
%138 = insertelement <8 x float> %137, float %temp26.0, i32 6
%139 = insertelement <8 x float> %138, float %temp30.0, i32 7
%140 = insertelement <8 x float> %139, float %29, i32 %97
%141 = extractelement <8 x float> %140, i32 0
%142 = extractelement <8 x float> %140, i32 1
%143 = extractelement <8 x float> %140, i32 2
%144 = extractelement <8 x float> %140, i32 3
%145 = extractelement <8 x float> %140, i32 4
%146 = extractelement <8 x float> %140, i32 5
%147 = extractelement <8 x float> %140, i32 6
%148 = extractelement <8 x float> %140, i32 7
%149 = insertelement <8 x float> undef, float %temp3.0, i32 0
%150 = insertelement <8 x float> %149, float %temp7.0, i32 1
%151 = insertelement <8 x float> %150, float %temp11.0, i32 2
%152 = insertelement <8 x float> %151, float %temp15.0, i32 3
%153 = insertelement <8 x float> %152, float %temp19.0, i32 4
%154 = insertelement <8 x float> %153, float %temp23.0, i32 5
%155 = insertelement <8 x float> %154, float %temp27.0, i32 6
%156 = insertelement <8 x float> %155, float %temp31.0, i32 7
%157 = insertelement <8 x float> %156, float %30, i32 %97
%158 = extractelement <8 x float> %157, i32 0
%159 = extractelement <8 x float> %157, i32 1
%160 = extractelement <8 x float> %157, i32 2
%161 = extractelement <8 x float> %157, i32 3
%162 = extractelement <8 x float> %157, i32 4
%163 = extractelement <8 x float> %157, i32 5
%164 = extractelement <8 x float> %157, i32 6
%165 = extractelement <8 x float> %157, i32 7
%166 = bitcast float %temp32.0 to i32
%167 = add i32 %166, 1
%168 = bitcast i32 %167 to float
br label %LOOP
}
; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1
attributes #0 = { "InitialPSInputAddr"="36983" }
attributes #1 = { nounwind readnone }
!0 = !{}
Pixel Shader:
Shader main disassembly:
s_load_dwordx4 s[0:3], s[2:3], 0x0 ; C0800300
v_mov_b32_e32 v4, 0x3e4ccccd ; 7E0802FF 3E4CCCCD
v_mov_b32_e32 v28, 0 ; 7E380280
s_waitcnt lgkmcnt(0) ; BF8C007F
s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100
s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108
s_buffer_load_dword s5, s[0:3], 0x9 ; C2028109
s_buffer_load_dword s6, s[0:3], 0xa ; C203010A
s_buffer_load_dword s7, s[0:3], 0xb ; C203810B
s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104
s_waitcnt lgkmcnt(0) ; BF8C007F
v_mov_b32_e32 v0, s4 ; 7E000204
v_mov_b32_e32 v1, s5 ; 7E020205
v_mov_b32_e32 v2, s6 ; 7E040206
v_mov_b32_e32 v3, s7 ; 7E060207
v_mul_f32_e32 v22, s8, v4 ; 102C0808
v_mul_f32_e64 v23, 0, s8 ; D2100017 00001080
s_branch BB0_1 ; BF820000
s_mov_b64 exec, s[2:3] ; BEFE0402
v_cmp_gt_i32_e32 vcc, s0, v28 ; 7D083800
s_and_b64 vcc, exec, vcc ; 87EA6A7E
s_cbranch_vccz BB0_11 ; BF860000
v_mov_b32_e32 v24, v23 ; 7E300317
v_mov_b32_e32 v25, v22 ; 7E320316
v_mov_b32_e32 v26, v23 ; 7E340317
v_mov_b32_e32 v27, v22 ; 7E360316
v_mov_b32_e32 v29, v22 ; 7E3A0316
v_mov_b32_e32 v4, v22 ; 7E080316
s_mov_b64 s[2:3], exec ; BE82047E
v_mov_b32_e32 v5, v23 ; 7E0A0317
v_mov_b32_e32 v6, v24 ; 7E0C0318
v_mov_b32_e32 v7, v25 ; 7E0E0319
v_mov_b32_e32 v8, v26 ; 7E10031A
v_mov_b32_e32 v9, v27 ; 7E12031B
v_mov_b32_e32 v10, v28 ; 7E14031C
v_mov_b32_e32 v11, v29 ; 7E16031D
v_readfirstlane_b32 s1, v28 ; 7E02051C
v_cmp_eq_u32_e32 vcc, s1, v28 ; 7D843801
s_mov_b32 m0, s1 ; BEFC0301
s_and_saveexec_b64 vcc, vcc ; BEEA246A
v_movreld_b32_e32 v4, v0 ; 7E088500
s_xor_b64 exec, exec, vcc ; 89FE6A7E
s_cbranch_execnz BB0_3 ; BF890000
s_mov_b64 exec, s[2:3] ; BEFE0402
v_mov_b32_e32 v15, v23 ; 7E1E0317
v_mov_b32_e32 v16, v22 ; 7E200316
v_mov_b32_e32 v17, v23 ; 7E220317
v_mov_b32_e32 v18, v22 ; 7E240316
v_mov_b32_e32 v19, v22 ; 7E260316
v_mov_b32_e32 v20, v23 ; 7E280317
v_mov_b32_e32 v21, v28 ; 7E2A031C
v_mov_b32_e32 v4, v15 ; 7E08030F
s_mov_b64 s[2:3], exec ; BE82047E
v_mov_b32_e32 v5, v16 ; 7E0A0310
v_mov_b32_e32 v6, v17 ; 7E0C0311
v_mov_b32_e32 v7, v18 ; 7E0E0312
v_mov_b32_e32 v8, v19 ; 7E100313
v_mov_b32_e32 v9, v20 ; 7E120314
v_mov_b32_e32 v10, v21 ; 7E140315
v_mov_b32_e32 v11, v22 ; 7E160316
v_readfirstlane_b32 s1, v28 ; 7E02051C
v_cmp_eq_u32_e32 vcc, s1, v28 ; 7D843801
s_mov_b32 m0, s1 ; BEFC0301
s_and_saveexec_b64 vcc, vcc ; BEEA246A
v_movreld_b32_e32 v4, v1 ; 7E088501
s_xor_b64 exec, exec, vcc ; 89FE6A7E
s_cbranch_execnz BB0_5 ; BF890000
s_mov_b64 exec, s[2:3] ; BEFE0402
v_mov_b32_e32 v4, v22 ; 7E080316
v_mov_b32_e32 v5, v23 ; 7E0A0317
v_mov_b32_e32 v6, v22 ; 7E0C0316
v_mov_b32_e32 v7, v23 ; 7E0E0317
v_mov_b32_e32 v8, v22 ; 7E100316
v_mov_b32_e32 v9, v22 ; 7E120316
v_mov_b32_e32 v10, v28 ; 7E14031C
v_mov_b32_e32 v11, v22 ; 7E160316
s_mov_b64 s[2:3], exec ; BE82047E
v_readfirstlane_b32 s1, v28 ; 7E02051C
v_cmp_eq_u32_e32 vcc, s1, v28 ; 7D843801
s_mov_b32 m0, s1 ; BEFC0301
s_and_saveexec_b64 vcc, vcc ; BEEA246A
v_movreld_b32_e32 v4, v2 ; 7E088502
s_xor_b64 exec, exec, vcc ; 89FE6A7E
s_cbranch_execnz BB0_7 ; BF890000
s_mov_b64 exec, s[2:3] ; BEFE0402
v_mov_b32_e32 v4, v23 ; 7E080317
v_mov_b32_e32 v5, v23 ; 7E0A0317
v_mov_b32_e32 v6, v23 ; 7E0C0317
v_mov_b32_e32 v7, v23 ; 7E0E0317
v_mov_b32_e32 v8, v23 ; 7E100317
v_mov_b32_e32 v9, v23 ; 7E120317
v_mov_b32_e32 v10, v28 ; 7E14031C
v_mov_b32_e32 v11, v23 ; 7E160317
s_mov_b64 s[2:3], exec ; BE82047E
v_readfirstlane_b32 s1, v28 ; 7E02051C
v_cmp_eq_u32_e32 vcc, s1, v28 ; 7D843801
s_mov_b32 m0, s1 ; BEFC0301
s_and_saveexec_b64 vcc, vcc ; BEEA246A
v_movreld_b32_e32 v4, v3 ; 7E088503
s_xor_b64 exec, exec, vcc ; 89FE6A7E
s_cbranch_execnz BB0_9 ; BF890000
s_branch BB0_10 ; BF820000
v_add_f32_e32 v0, v23, v22 ; 06002D17
v_add_f32_e32 v1, v22, v23 ; 06022F16
v_add_f32_e32 v2, v23, v22 ; 06042D17
v_add_f32_e32 v3, v23, v23 ; 06062F17
v_add_f32_e32 v0, v23, v0 ; 06000117
v_add_f32_e32 v1, v23, v1 ; 06020317
v_add_f32_e32 v2, v22, v2 ; 06040516
v_add_f32_e32 v3, v23, v3 ; 06060717
v_add_f32_e32 v0, v22, v0 ; 06000116
v_add_f32_e32 v1, v22, v1 ; 06020316
v_add_f32_e32 v2, v23, v2 ; 06040517
v_add_f32_e32 v3, v23, v3 ; 06060717
v_add_f32_e32 v0, v23, v0 ; 06000117
v_add_f32_e32 v1, v22, v1 ; 06020316
v_add_f32_e32 v2, v22, v2 ; 06040516
v_add_f32_e32 v3, v23, v3 ; 06060717
v_add_f32_e32 v0, v22, v0 ; 06000116
v_add_f32_e32 v1, v23, v1 ; 06020317
v_add_f32_e32 v2, v22, v2 ; 06040516
v_add_f32_e32 v3, v23, v3 ; 06060717
v_add_f32_e32 v0, v28, v0 ; 0600011C
v_add_f32_e32 v1, v28, v1 ; 0602031C
v_add_f32_e32 v2, v28, v2 ; 0604051C
v_add_f32_e32 v3, v28, v3 ; 0606071C
v_add_f32_e32 v0, v22, v0 ; 06000116
v_add_f32_e32 v1, v22, v1 ; 06020316
v_add_f32_e32 v2, v22, v2 ; 06040516
v_add_f32_e32 v3, v23, v3 ; 06060717
Shader epilog disassembly:
v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702
exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100
s_endpgm ; BF810000
*** SHADER CONFIG ***
SPI_PS_INPUT_ADDR = 0xd077
SPI_PS_INPUT_ENA = 0x0020
*** SHADER STATS ***
SGPRS: 16
VGPRS: 32
Spilled SGPRs: 0
Spilled VGPRs: 0
Code Size: 548 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 8
********************
Buffer list (in units of pages = 4kB):
[1;33m Size VM start page VM end page Usage[0m
16 0x0000000000812 0x0000000000822 BORDER_COLORS
1 0x0000000000822 0x0000000000823 IB2
1 -- hole --
1 0x0000000000824 0x0000000000825 CONST_BUFFER
17 -- hole --
1 0x0000000000836 0x0000000000837 TRACE
1 0x0000000000837 0x0000000000838 CMASK
64 0x0000000000838 0x0000000000878 COLOR_BUFFER
256 0x0000000000878 0x0000000000978 CONST_BUFFER, DESCRIPTORS, RINGS_STREAMOUT
256 0x0000000000978 0x0000000000a78 VERTEX_BUFFER
1 0x0000000000a78 0x0000000000a79 USER_SHADER
1 0x0000000000a79 0x0000000000a7a USER_SHADER
Note: The holes represent memory not used by the IB.
Other buffers can still be allocated there.
------------------ IB2: Init config begin ------------------
[1;32mCONTEXT_CONTROL[0m:
0x80000000
0x80000000
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mVGT_HOS_MAX_TESS_LEVEL[0m <- 64.0f (0x42800000)
[1;33mVGT_HOS_MIN_TESS_LEVEL[0m <- 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mVGT_GS_PER_ES[0m <- GS_PER_ES = 128 (0x80)
[1;33mVGT_ES_PER_GS[0m <- ES_PER_GS = 64 (0x40)
[1;33mVGT_GS_PER_VS[0m <- GS_PER_VS = 2
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mVGT_PRIMITIVEID_RESET[0m <- 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mVGT_STRMOUT_DRAW_OPAQUE_OFFSET[0m <- 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mVGT_STRMOUT_BUFFER_CONFIG[0m <- STREAM_0_BUFFER_EN = 0
STREAM_1_BUFFER_EN = 0
STREAM_2_BUFFER_EN = 0
STREAM_3_BUFFER_EN = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mVGT_VTX_CNT_EN[0m <- VTX_CNT_EN = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mPA_SC_CENTROID_PRIORITY_0[0m <- DISTANCE_0 = 0
DISTANCE_1 = 1
DISTANCE_2 = 2
DISTANCE_3 = 3
DISTANCE_4 = 4
DISTANCE_5 = 5
DISTANCE_6 = 6
DISTANCE_7 = 7
[1;33mPA_SC_CENTROID_PRIORITY_1[0m <- DISTANCE_8 = 8
DISTANCE_9 = 9
DISTANCE_10 = 10 (0xa)
DISTANCE_11 = 11 (0xb)
DISTANCE_12 = 12 (0xc)
DISTANCE_13 = 13 (0xd)
DISTANCE_14 = 14 (0xe)
DISTANCE_15 = 15 (0xf)
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mPA_SU_PRIM_FILTER_CNTL[0m <- TRIANGLE_FILTER_DISABLE = 0
LINE_FILTER_DISABLE = 0
POINT_FILTER_DISABLE = 0
RECTANGLE_FILTER_DISABLE = 0
TRIANGLE_EXPAND_ENA = 0
LINE_EXPAND_ENA = 0
POINT_EXPAND_ENA = 0
RECTANGLE_EXPAND_ENA = 0
PRIM_EXPAND_CONSTANT = 0
XMAX_RIGHT_EXCLUSION = 0
YMAX_BOTTOM_EXCLUSION = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mPA_SC_VPORT_ZMIN_0[0m <- 0
[1;33mPA_SC_VPORT_ZMAX_0[0m <- 1.0f (0x3f800000)
[1;33mPA_SC_VPORT_ZMIN_1[0m <- 0
[1;33mPA_SC_VPORT_ZMAX_1[0m <- 1.0f (0x3f800000)
[1;33mPA_SC_VPORT_ZMIN_2[0m <- 0
[1;33mPA_SC_VPORT_ZMAX_2[0m <- 1.0f (0x3f800000)
[1;33mPA_SC_VPORT_ZMIN_3[0m <- 0
[1;33mPA_SC_VPORT_ZMAX_3[0m <- 1.0f (0x3f800000)
[1;33mPA_SC_VPORT_ZMIN_4[0m <- 0
[1;33mPA_SC_VPORT_ZMAX_4[0m <- 1.0f (0x3f800000)
[1;33mPA_SC_VPORT_ZMIN_5[0m <- 0
[1;33mPA_SC_VPORT_ZMAX_5[0m <- 1.0f (0x3f800000)
[1;33mPA_SC_VPORT_ZMIN_6[0m <- 0
[1;33mPA_SC_VPORT_ZMAX_6[0m <- 1.0f (0x3f800000)
[1;33mPA_SC_VPORT_ZMIN_7[0m <- 0
[1;33mPA_SC_VPORT_ZMAX_7[0m <- 1.0f (0x3f800000)
[1;33mPA_SC_VPORT_ZMIN_8[0m <- 0
[1;33mPA_SC_VPORT_ZMAX_8[0m <- 1.0f (0x3f800000)
[1;33mPA_SC_VPORT_ZMIN_9[0m <- 0
[1;33mPA_SC_VPORT_ZMAX_9[0m <- 1.0f (0x3f800000)
[1;33mPA_SC_VPORT_ZMIN_10[0m <- 0
[1;33mPA_SC_VPORT_ZMAX_10[0m <- 1.0f (0x3f800000)
[1;33mPA_SC_VPORT_ZMIN_11[0m <- 0
[1;33mPA_SC_VPORT_ZMAX_11[0m <- 1.0f (0x3f800000)
[1;33mPA_SC_VPORT_ZMIN_12[0m <- 0
[1;33mPA_SC_VPORT_ZMAX_12[0m <- 1.0f (0x3f800000)
[1;33mPA_SC_VPORT_ZMIN_13[0m <- 0
[1;33mPA_SC_VPORT_ZMAX_13[0m <- 1.0f (0x3f800000)
[1;33mPA_SC_VPORT_ZMIN_14[0m <- 0
[1;33mPA_SC_VPORT_ZMAX_14[0m <- 1.0f (0x3f800000)
[1;33mPA_SC_VPORT_ZMIN_15[0m <- 0
[1;33mPA_SC_VPORT_ZMAX_15[0m <- 1.0f (0x3f800000)
[1;33mPA_SC_RASTER_CONFIG[0m <- RB_MAP_PKR0 = RASTER_CONFIG_RB_MAP_0
RB_MAP_PKR1 = RASTER_CONFIG_RB_MAP_0
RB_XSEL2 = RASTER_CONFIG_RB_XSEL2_0
RB_XSEL = 0
RB_YSEL = 0
PKR_MAP = RASTER_CONFIG_PKR_MAP_0
PKR_XSEL = RASTER_CONFIG_PKR_XSEL_0
PKR_YSEL = RASTER_CONFIG_PKR_YSEL_0
PKR_XSEL2 = RASTER_CONFIG_PKR_XSEL2_0
SC_MAP = RASTER_CONFIG_SC_MAP_0
SC_XSEL = RASTER_CONFIG_SC_XSEL_8_WIDE_TILE
SC_YSEL = RASTER_CONFIG_SC_YSEL_8_WIDE_TILE
SE_MAP = RASTER_CONFIG_SE_MAP_0
SE_XSEL = RASTER_CONFIG_SE_XSEL_8_WIDE_TILE
SE_YSEL = RASTER_CONFIG_SE_YSEL_8_WIDE_TILE
[1;33mPA_SC_RASTER_CONFIG_1[0m <- SE_PAIR_MAP = RASTER_CONFIG_SE_PAIR_MAP_0
SE_PAIR_XSEL = RASTER_CONFIG_SE_PAIR_XSEL_8_WIDE_TILE
SE_PAIR_YSEL = RASTER_CONFIG_SE_PAIR_YSEL_8_WIDE_TILE
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mPA_SC_WINDOW_SCISSOR_TL[0m <- TL_X = 0
TL_Y = 0
WINDOW_OFFSET_DISABLE = 1
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mPA_SC_GENERIC_SCISSOR_TL[0m <- TL_X = 0
TL_Y = 0
WINDOW_OFFSET_DISABLE = 1
[1;33mPA_SC_GENERIC_SCISSOR_BR[0m <- BR_X = 16384 (0x4000)
BR_Y = 16384 (0x4000)
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mPA_SC_SCREEN_SCISSOR_TL[0m <- TL_X = 0
TL_Y = 0
[1;33mPA_SC_SCREEN_SCISSOR_BR[0m <- BR_X = 16384 (0x4000)
BR_Y = 16384 (0x4000)
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mPA_SC_CLIPRECT_RULE[0m <- CLIP_RULE = 0xffff
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mPA_SC_EDGERULE[0m <- ER_TRI = 10 (0xa)
ER_POINT = 10 (0xa)
ER_RECT = 10 (0xa)
ER_LINE_LR = 26 (0x1a)
ER_LINE_RL = 38 (0x26)
ER_LINE_TB = 10 (0xa)
ER_LINE_BT = 10 (0xa)
[1;33mPA_SU_HARDWARE_SCREEN_OFFSET[0m <- HW_SCREEN_OFFSET_X = 0
HW_SCREEN_OFFSET_Y = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mPA_CL_NANINF_CNTL[0m <- VTE_XY_INF_DISCARD = 0
VTE_Z_INF_DISCARD = 0
VTE_W_INF_DISCARD = 0
VTE_0XNANINF_IS_0 = 0
VTE_XY_NAN_RETAIN = 0
VTE_Z_NAN_RETAIN = 0
VTE_W_NAN_RETAIN = 0
VTE_W_RECIP_NAN_IS_0 = 0
VS_XY_NAN_TO_INF = 0
VS_XY_INF_RETAIN = 0
VS_Z_NAN_TO_INF = 0
VS_Z_INF_RETAIN = 0
VS_W_NAN_TO_INF = 0
VS_W_INF_RETAIN = 0
VS_CLIP_DIST_INF_DISCARD = 0
VTE_NO_OUTPUT_NEG_0 = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mDB_SRESULTS_COMPARE_STATE0[0m <- COMPAREFUNC0 = REF_NEVER
COMPAREVALUE0 = 0
COMPAREMASK0 = 0
ENABLE0 = 0
[1;33mDB_SRESULTS_COMPARE_STATE1[0m <- COMPAREFUNC1 = REF_NEVER
COMPAREVALUE1 = 0
COMPAREMASK1 = 0
ENABLE1 = 0
[1;33mDB_PRELOAD_CONTROL[0m <- START_X = 0
START_Y = 0
MAX_X = 0
MAX_Y = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mDB_RENDER_OVERRIDE[0m <- FORCE_HIZ_ENABLE = FORCE_OFF
FORCE_HIS_ENABLE0 = FORCE_DISABLE
FORCE_HIS_ENABLE1 = FORCE_DISABLE
FORCE_SHADER_Z_ORDER = 0
FAST_Z_DISABLE = 0
FAST_STENCIL_DISABLE = 0
NOOP_CULL_DISABLE = 0
FORCE_COLOR_KILL = 0
FORCE_Z_READ = 0
FORCE_STENCIL_READ = 0
FORCE_FULL_Z_RANGE = FORCE_OFF
FORCE_QC_SMASK_CONFLICT = 0
DISABLE_VIEWPORT_CLAMP = 0
IGNORE_SC_ZRANGE = 0
DISABLE_FULLY_COVERED = 0
FORCE_Z_LIMIT_SUMM = FORCE_SUMM_OFF
MAX_TILES_IN_DTT = 0
DISABLE_TILE_RATE_TILES = 0
FORCE_Z_DIRTY = 0
FORCE_STENCIL_DIRTY = 0
FORCE_Z_VALID = 0
FORCE_STENCIL_VALID = 0
PRESERVE_COMPRESSION = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mVGT_MAX_VTX_INDX[0m <- 0xffffffff
[1;33mVGT_MIN_VTX_INDX[0m <- 0
[1;33mVGT_INDX_OFFSET[0m <- 0
[1;36mSET_SH_REG[1;36m:
[1;33mSPI_SHADER_PGM_RSRC3_LS[0m <- CU_EN = 0xffff
WAVE_LIMIT = 0
LOCK_LOW_THRESHOLD = 0
GROUP_FIFO_DEPTH = 0
[1;36mSET_SH_REG[1;36m:
[1;33mSPI_SHADER_PGM_RSRC3_HS[0m <- WAVE_LIMIT = 0
LOCK_LOW_THRESHOLD = 0
GROUP_FIFO_DEPTH = 0
[1;36mSET_SH_REG[1;36m:
[1;33mSPI_SHADER_PGM_RSRC3_ES[0m <- CU_EN = 0xffff
WAVE_LIMIT = 0
LOCK_LOW_THRESHOLD = 0
GROUP_FIFO_DEPTH = 0
[1;36mSET_SH_REG[1;36m:
[1;33mSPI_SHADER_PGM_RSRC3_GS[0m <- CU_EN = 0xffff
WAVE_LIMIT = 0
LOCK_LOW_THRESHOLD = 0
GROUP_FIFO_DEPTH = 0
[1;36mSET_SH_REG[1;36m:
[1;33mSPI_SHADER_PGM_RSRC3_VS[0m <- CU_EN = 0xfffe
WAVE_LIMIT = 0
LOCK_LOW_THRESHOLD = 0
[1;33mSPI_SHADER_LATE_ALLOC_VS[0m <- LIMIT = 31 (0x1f)
[1;36mSET_SH_REG[1;36m:
[1;33mSPI_SHADER_PGM_RSRC3_PS[0m <- CU_EN = 0xffff
WAVE_LIMIT = 0
LOCK_LOW_THRESHOLD = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mVGT_VERTEX_REUSE_BLOCK_CNTL[0m <- VTX_REUSE_DEPTH = 14 (0x0e)
[1;33mVGT_OUT_DEALLOC_CNTL[0m <- DEALLOC_DIST = 16 (0x10)
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mTA_BC_BASE_ADDR[0m <- 0x00008120
[1;33mTA_BC_BASE_ADDR_HI[0m <- ADDRESS = 0
------------------- IB2: Init config end -------------------
------------------ IB begin ------------------
[1;32mWRITE_DATA[0m:
[1;33mCONTROL[0m <- ENGINE_SEL = ME
WR_CONFIRM = 1
WR_ONE_ADDR = 0
DST_SEL = MEMORY_SYNC
[1;33mDST_ADDR_LO[0m <- 0x00836000
[1;33mDST_ADDR_HI[0m <- 0
0x00000001
[1;32mNOP[0m:
[31mTrace point ID: 1
[31mThis trace point was reached by the CP.[0m
[1;32mINDIRECT_BUFFER_CIK[0m:
[1;33mIB_BASE_LO[0m <- 0x00822000
[1;33mIB_BASE_HI[0m <- 0
[1;33mCONTROL[0m <- IB_SIZE = 128 (0x00080)
CHAIN = 0
VALID = 0
[1;32mPFP_SYNC_ME[0m:
0x00000000
[1;32mSURFACE_SYNC[0m:
[1;33mCP_COHER_CNTL[0m <- DEST_BASE_0_ENA = 0
DEST_BASE_1_ENA = 0
CB0_DEST_BASE_ENA = 0
CB1_DEST_BASE_ENA = 0
CB2_DEST_BASE_ENA = 0
CB3_DEST_BASE_ENA = 0
CB4_DEST_BASE_ENA = 0
CB5_DEST_BASE_ENA = 0
CB6_DEST_BASE_ENA = 0
CB7_DEST_BASE_ENA = 0
DB_DEST_BASE_ENA = 0
DEST_BASE_2_ENA = 0
DEST_BASE_3_ENA = 0
TCL1_ACTION_ENA = 0
TC_ACTION_ENA = 0
CB_ACTION_ENA = 0
DB_ACTION_ENA = 0
SH_KCACHE_ACTION_ENA = 1
SH_ICACHE_ACTION_ENA = 1
[1;33mCP_COHER_SIZE[0m <- 0xffffffff
[1;33mCP_COHER_BASE[0m <- 0
[1;33mPOLL_INTERVAL[0m <- 10 (0x000a)
[1;32mEVENT_WRITE[0m:
[1;33mVGT_EVENT_INITIATOR[0m <- EVENT_TYPE = PIPELINESTAT_START
[1;33mEVENT_INDEX[0m <- 0
[1;33mINV_L2[0m <- 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mVGT_STRMOUT_BUFFER_CONFIG[0m <- STREAM_0_BUFFER_EN = 0
STREAM_1_BUFFER_EN = 0
STREAM_2_BUFFER_EN = 0
STREAM_3_BUFFER_EN = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mVGT_STRMOUT_CONFIG[0m <- STREAMOUT_0_EN = 0
STREAMOUT_1_EN = 0
STREAMOUT_2_EN = 0
STREAMOUT_3_EN = 0
RAST_STREAM = 0
RAST_STREAM_MASK = 0
USE_RAST_STREAM_MASK = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mCB_COLOR0_BASE[0m <- 0x00008380
[1;33mCB_COLOR0_PITCH[0m <- TILE_MAX = 31 (0x1f)
FMASK_TILE_MAX = 31 (0x1f)
[1;33mCB_COLOR0_SLICE[0m <- TILE_MAX = 1023 (0x003ff)
[1;33mCB_COLOR0_VIEW[0m <- SLICE_START = 0
SLICE_MAX = 0
[1;33mCB_COLOR0_INFO[0m <- ENDIAN = ENDIAN_NONE
FORMAT = COLOR_8_8_8_8
LINEAR_GENERAL = 0
NUMBER_TYPE = NUMBER_UNORM
COMP_SWAP = SWAP_ALT
FAST_CLEAR = 1
COMPRESSION = 0
BLEND_CLAMP = 1
BLEND_BYPASS = 0
SIMPLE_FLOAT = 0
ROUND_MODE = 0
CMASK_IS_LINEAR = 0
BLEND_OPT_DONT_RD_DST = FORCE_OPT_AUTO
BLEND_OPT_DISCARD_PIXEL = FORCE_OPT_AUTO
FMASK_COMPRESSION_DISABLE = 0
FMASK_COMPRESS_1FRAG_ONLY = 0
DCC_ENABLE = 0
CMASK_ADDR_TYPE = 0
[1;33mCB_COLOR0_ATTRIB[0m <- TILE_MODE_INDEX = 10 (0xa)
FMASK_TILE_MODE_INDEX = 10 (0xa)
FMASK_BANK_HEIGHT = 0
NUM_SAMPLES = 0
NUM_FRAGMENTS = 0
FORCE_DST_ALPHA_1 = 0
[1;33mCB_COLOR0_DCC_CONTROL[0m <- OVERWRITE_COMBINER_DISABLE = 0
KEY_CLEAR_ENABLE = 0
MAX_UNCOMPRESSED_BLOCK_SIZE = 0
MIN_COMPRESSED_BLOCK_SIZE = 0
MAX_COMPRESSED_BLOCK_SIZE = 0
COLOR_TRANSFORM = 0
INDEPENDENT_64B_BLOCKS = 0
LOSSY_RGB_PRECISION = 0
LOSSY_ALPHA_PRECISION = 0
[1;33mCB_COLOR0_CMASK[0m <- 0x00008370
[1;33mCB_COLOR0_CMASK_SLICE[0m <- TILE_MAX = 3
[1;33mCB_COLOR0_FMASK[0m <- 0x00008380
[1;33mCB_COLOR0_FMASK_SLICE[0m <- TILE_MAX = 1023 (0x003ff)
[1;33mCB_COLOR0_CLEAR_WORD0[0m <- 0x80808080
[1;33mCB_COLOR0_CLEAR_WORD1[0m <- 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mCB_COLOR1_INFO[0m <- ENDIAN = ENDIAN_NONE
FORMAT = COLOR_8_8_8_8
LINEAR_GENERAL = 0
NUMBER_TYPE = NUMBER_UNORM
COMP_SWAP = SWAP_ALT
FAST_CLEAR = 1
COMPRESSION = 0
BLEND_CLAMP = 1
BLEND_BYPASS = 0
SIMPLE_FLOAT = 0
ROUND_MODE = 0
CMASK_IS_LINEAR = 0
BLEND_OPT_DONT_RD_DST = FORCE_OPT_AUTO
BLEND_OPT_DISCARD_PIXEL = FORCE_OPT_AUTO
FMASK_COMPRESSION_DISABLE = 0
FMASK_COMPRESS_1FRAG_ONLY = 0
DCC_ENABLE = 0
CMASK_ADDR_TYPE = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mCB_COLOR2_INFO[0m <- ENDIAN = ENDIAN_NONE
FORMAT = COLOR_INVALID
LINEAR_GENERAL = 0
NUMBER_TYPE = NUMBER_UNORM
COMP_SWAP = SWAP_STD
FAST_CLEAR = 0
COMPRESSION = 0
BLEND_CLAMP = 0
BLEND_BYPASS = 0
SIMPLE_FLOAT = 0
ROUND_MODE = 0
CMASK_IS_LINEAR = 0
BLEND_OPT_DONT_RD_DST = FORCE_OPT_AUTO
BLEND_OPT_DISCARD_PIXEL = FORCE_OPT_AUTO
FMASK_COMPRESSION_DISABLE = 0
FMASK_COMPRESS_1FRAG_ONLY = 0
DCC_ENABLE = 0
CMASK_ADDR_TYPE = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mCB_COLOR3_INFO[0m <- ENDIAN = ENDIAN_NONE
FORMAT = COLOR_INVALID
LINEAR_GENERAL = 0
NUMBER_TYPE = NUMBER_UNORM
COMP_SWAP = SWAP_STD
FAST_CLEAR = 0
COMPRESSION = 0
BLEND_CLAMP = 0
BLEND_BYPASS = 0
SIMPLE_FLOAT = 0
ROUND_MODE = 0
CMASK_IS_LINEAR = 0
BLEND_OPT_DONT_RD_DST = FORCE_OPT_AUTO
BLEND_OPT_DISCARD_PIXEL = FORCE_OPT_AUTO
FMASK_COMPRESSION_DISABLE = 0
FMASK_COMPRESS_1FRAG_ONLY = 0
DCC_ENABLE = 0
CMASK_ADDR_TYPE = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mCB_COLOR4_INFO[0m <- ENDIAN = ENDIAN_NONE
FORMAT = COLOR_INVALID
LINEAR_GENERAL = 0
NUMBER_TYPE = NUMBER_UNORM
COMP_SWAP = SWAP_STD
FAST_CLEAR = 0
COMPRESSION = 0
BLEND_CLAMP = 0
BLEND_BYPASS = 0
SIMPLE_FLOAT = 0
ROUND_MODE = 0
CMASK_IS_LINEAR = 0
BLEND_OPT_DONT_RD_DST = FORCE_OPT_AUTO
BLEND_OPT_DISCARD_PIXEL = FORCE_OPT_AUTO
FMASK_COMPRESSION_DISABLE = 0
FMASK_COMPRESS_1FRAG_ONLY = 0
DCC_ENABLE = 0
CMASK_ADDR_TYPE = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mCB_COLOR5_INFO[0m <- ENDIAN = ENDIAN_NONE
FORMAT = COLOR_INVALID
LINEAR_GENERAL = 0
NUMBER_TYPE = NUMBER_UNORM
COMP_SWAP = SWAP_STD
FAST_CLEAR = 0
COMPRESSION = 0
BLEND_CLAMP = 0
BLEND_BYPASS = 0
SIMPLE_FLOAT = 0
ROUND_MODE = 0
CMASK_IS_LINEAR = 0
BLEND_OPT_DONT_RD_DST = FORCE_OPT_AUTO
BLEND_OPT_DISCARD_PIXEL = FORCE_OPT_AUTO
FMASK_COMPRESSION_DISABLE = 0
FMASK_COMPRESS_1FRAG_ONLY = 0
DCC_ENABLE = 0
CMASK_ADDR_TYPE = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mCB_COLOR6_INFO[0m <- ENDIAN = ENDIAN_NONE
FORMAT = COLOR_INVALID
LINEAR_GENERAL = 0
NUMBER_TYPE = NUMBER_UNORM
COMP_SWAP = SWAP_STD
FAST_CLEAR = 0
COMPRESSION = 0
BLEND_CLAMP = 0
BLEND_BYPASS = 0
SIMPLE_FLOAT = 0
ROUND_MODE = 0
CMASK_IS_LINEAR = 0
BLEND_OPT_DONT_RD_DST = FORCE_OPT_AUTO
BLEND_OPT_DISCARD_PIXEL = FORCE_OPT_AUTO
FMASK_COMPRESSION_DISABLE = 0
FMASK_COMPRESS_1FRAG_ONLY = 0
DCC_ENABLE = 0
CMASK_ADDR_TYPE = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mCB_COLOR7_INFO[0m <- ENDIAN = ENDIAN_NONE
FORMAT = COLOR_INVALID
LINEAR_GENERAL = 0
NUMBER_TYPE = NUMBER_UNORM
COMP_SWAP = SWAP_STD
FAST_CLEAR = 0
COMPRESSION = 0
BLEND_CLAMP = 0
BLEND_BYPASS = 0
SIMPLE_FLOAT = 0
ROUND_MODE = 0
CMASK_IS_LINEAR = 0
BLEND_OPT_DONT_RD_DST = FORCE_OPT_AUTO
BLEND_OPT_DISCARD_PIXEL = FORCE_OPT_AUTO
FMASK_COMPRESSION_DISABLE = 0
FMASK_COMPRESS_1FRAG_ONLY = 0
DCC_ENABLE = 0
CMASK_ADDR_TYPE = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mDB_Z_INFO[0m <- FORMAT = Z_INVALID
NUM_SAMPLES = 0
TILE_SPLIT = ADDR_SURF_TILE_SPLIT_64B
TILE_MODE_INDEX = 0
DECOMPRESS_ON_N_ZPLANES = 0
ALLOW_EXPCLEAR = 0
READ_SIZE = 0
TILE_SURFACE_ENABLE = 0
CLEAR_DISALLOWED = 0
ZRANGE_PRECISION = 0
[1;33mDB_STENCIL_INFO[0m <- FORMAT = STENCIL_INVALID
TILE_SPLIT = ADDR_SURF_TILE_SPLIT_64B
TILE_MODE_INDEX = 0
ALLOW_EXPCLEAR = 0
TILE_STENCIL_DISABLE = 0
CLEAR_DISALLOWED = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mPA_SC_WINDOW_SCISSOR_BR[0m <- BR_X = 250 (0x0fa)
BR_Y = 250 (0x0fa)
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mDB_RENDER_CONTROL[0m <- DEPTH_CLEAR_ENABLE = 0
STENCIL_CLEAR_ENABLE = 0
DEPTH_COPY = 0
STENCIL_COPY = 0
RESUMMARIZE_ENABLE = 0
STENCIL_COMPRESS_DISABLE = 0
DEPTH_COMPRESS_DISABLE = 0
COPY_CENTROID = 0
COPY_SAMPLE = 0
DECOMPRESS_ENABLE = 0
[1;33mDB_COUNT_CONTROL[0m <- ZPASS_INCREMENT_DISABLE = 0
PERFECT_ZPASS_COUNTS = 0
SAMPLE_RATE = 0
ZPASS_ENABLE = 0
ZFAIL_ENABLE = 0
SFAIL_ENABLE = 0
DBFAIL_ENABLE = 0
SLICE_EVEN_ENABLE = 0
SLICE_ODD_ENABLE = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mDB_RENDER_OVERRIDE2[0m <- PARTIAL_SQUAD_LAUNCH_CONTROL = PSLC_AUTO
PARTIAL_SQUAD_LAUNCH_COUNTDOWN = 0
DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION = 0
DISABLE_SMEM_EXPCLEAR_OPTIMIZATION = 0
DISABLE_COLOR_ON_VALIDATION = 0
DECOMPRESS_Z_ON_FLUSH = 0
DISABLE_REG_SNOOP = 0
DEPTH_BOUNDS_HIER_DEPTH_DISABLE = 0
SEPARATE_HIZS_FUNC_ENABLE = 0
HIZ_ZFUNC = 0
HIS_SFUNC_FF = 0
HIS_SFUNC_BF = 0
PRESERVE_ZRANGE = 0
PRESERVE_SRESULTS = 0
DISABLE_FAST_PASS = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mDB_SHADER_CONTROL[0m <- Z_EXPORT_ENABLE = 0
STENCIL_TEST_VAL_EXPORT_ENABLE = 0
STENCIL_OP_VAL_EXPORT_ENABLE = 0
Z_ORDER = EARLY_Z_THEN_RE_Z
KILL_ENABLE = 0
COVERAGE_TO_MASK_ENABLE = 0
MASK_EXPORT_ENABLE = 0
EXEC_ON_HIER_FAIL = 0
EXEC_ON_NOOP = 0
ALPHA_TO_MASK_DISABLE = 0
DEPTH_BEFORE_SHADER = 0
CONSERVATIVE_Z_EXPORT = EXPORT_ANY_Z
DUAL_QUAD_DISABLE = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mPA_SC_LINE_CNTL[0m <- EXPAND_LINE_WIDTH = 0
LAST_PIXEL = 0
PERPENDICULAR_ENDCAP_ENA = 0
DX10_DIAMOND_TEST_ENA = 1
[1;33mPA_SC_AA_CONFIG[0m <- MSAA_NUM_SAMPLES = 0
AA_MASK_CENTROID_DTMN = 0
MAX_SAMPLE_DIST = 0
MSAA_EXPOSED_SAMPLES = 0
DETAIL_TO_EXPOSED_MODE = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mDB_EQAA[0m <- MAX_ANCHOR_SAMPLES = 0
PS_ITER_SAMPLES = 0
MASK_EXPORT_NUM_SAMPLES = 0
ALPHA_TO_MASK_NUM_SAMPLES = 0
HIGH_QUALITY_INTERSECTIONS = 1
INCOHERENT_EQAA_READS = 0
INTERPOLATE_COMP_Z = 0
INTERPOLATE_SRC_Z = 0
STATIC_ANCHOR_ASSOCIATIONS = 1
ALPHA_TO_MASK_EQAA_DISABLE = 0
OVERRASTERIZATION_AMOUNT = 0
ENABLE_POSTZ_OVERRASTERIZATION = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mPA_SC_MODE_CNTL_1[0m <- WALK_SIZE = 0
WALK_ALIGNMENT = 0
WALK_ALIGN8_PRIM_FITS_ST = 1
WALK_FENCE_ENABLE = 1
WALK_FENCE_SIZE = 3
SUPERTILE_WALK_ORDER_ENABLE = 1
TILE_WALK_ORDER_ENABLE = 1
TILE_COVER_DISABLE = 0
TILE_COVER_NO_SCISSOR = 0
ZMM_LINE_EXTENT = 0
ZMM_LINE_OFFSET = 0
ZMM_RECT_EXTENT = 0
KILL_PIX_POST_HI_Z = 0
KILL_PIX_POST_DETAIL_MASK = 0
PS_ITER_SAMPLE = 0
MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE = 1
MULTI_GPU_SUPERTILE_ENABLE = 0
GPU_ID_OVERRIDE_ENABLE = 0
GPU_ID_OVERRIDE = 0
MULTI_GPU_PRIM_DISCARD_ENABLE = 0
FORCE_EOV_CNTDWN_ENABLE = 1
FORCE_EOV_REZ_ENABLE = 1
OUT_OF_ORDER_PRIMITIVE_ENABLE = 0
OUT_OF_ORDER_WATER_MARK = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mPA_SC_AA_MASK_X0Y0_X1Y0[0m <- AA_MASK_X0Y0 = 0xffff
AA_MASK_X1Y0 = 0xffff
[1;33mPA_SC_AA_MASK_X0Y1_X1Y1[0m <- AA_MASK_X0Y1 = 0xffff
AA_MASK_X1Y1 = 0xffff
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mCB_TARGET_MASK[0m <- TARGET0_ENABLE = 15 (0xf)
TARGET1_ENABLE = 0
TARGET2_ENABLE = 0
TARGET3_ENABLE = 0
TARGET4_ENABLE = 0
TARGET5_ENABLE = 0
TARGET6_ENABLE = 0
TARGET7_ENABLE = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mCB_BLEND_RED[0m <- 0
[1;33mCB_BLEND_GREEN[0m <- 0
[1;33mCB_BLEND_BLUE[0m <- 0
[1;33mCB_BLEND_ALPHA[0m <- 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mPA_CL_VS_OUT_CNTL[0m <- CLIP_DIST_ENA_0 = 0
CLIP_DIST_ENA_1 = 0
CLIP_DIST_ENA_2 = 0
CLIP_DIST_ENA_3 = 0
CLIP_DIST_ENA_4 = 0
CLIP_DIST_ENA_5 = 0
CLIP_DIST_ENA_6 = 0
CLIP_DIST_ENA_7 = 0
CULL_DIST_ENA_0 = 0
CULL_DIST_ENA_1 = 0
CULL_DIST_ENA_2 = 0
CULL_DIST_ENA_3 = 0
CULL_DIST_ENA_4 = 0
CULL_DIST_ENA_5 = 0
CULL_DIST_ENA_6 = 0
CULL_DIST_ENA_7 = 0
USE_VTX_POINT_SIZE = 0
USE_VTX_EDGE_FLAG = 0
USE_VTX_RENDER_TARGET_INDX = 0
USE_VTX_VIEWPORT_INDX = 0
USE_VTX_KILL_FLAG = 0
VS_OUT_MISC_VEC_ENA = 0
VS_OUT_CCDIST0_VEC_ENA = 0
VS_OUT_CCDIST1_VEC_ENA = 0
VS_OUT_MISC_SIDE_BUS_ENA = 1
USE_VTX_GS_CUT_FLAG = 0
USE_VTX_LINE_WIDTH = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mPA_CL_CLIP_CNTL[0m <- UCP_ENA_0 = 0
UCP_ENA_1 = 0
UCP_ENA_2 = 0
UCP_ENA_3 = 0
UCP_ENA_4 = 0
UCP_ENA_5 = 0
PS_UCP_Y_SCALE_NEG = 0
PS_UCP_MODE = 0
CLIP_DISABLE = 0
UCP_CULL_ONLY_ENA = 0
BOUNDARY_EDGE_FLAG_ENA = 0
DX_CLIP_SPACE_DEF = 0
DIS_CLIP_ERR_DETECT = 0
VTX_KILL_OR = 0
DX_RASTERIZATION_KILL = 0
DX_LINEAR_ATTR_CLIP_ENA = 1
VTE_VPORT_PROVOKE_DISABLE = 0
ZCLIP_NEAR_DISABLE = 0
ZCLIP_FAR_DISABLE = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mVGT_REUSE_OFF[0m <- REUSE_OFF = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mPA_CL_UCP_0_X[0m <- 0
[1;33mPA_CL_UCP_0_Y[0m <- 0
[1;33mPA_CL_UCP_0_Z[0m <- 0
[1;33mPA_CL_UCP_0_W[0m <- 0
[1;33mPA_CL_UCP_1_X[0m <- 0
[1;33mPA_CL_UCP_1_Y[0m <- 0
[1;33mPA_CL_UCP_1_Z[0m <- 0
[1;33mPA_CL_UCP_1_W[0m <- 0
[1;33mPA_CL_UCP_2_X[0m <- 0
[1;33mPA_CL_UCP_2_Y[0m <- 0
[1;33mPA_CL_UCP_2_Z[0m <- 0
[1;33mPA_CL_UCP_2_W[0m <- 0
[1;33mPA_CL_UCP_3_X[0m <- 0
[1;33mPA_CL_UCP_3_Y[0m <- 0
[1;33mPA_CL_UCP_3_Z[0m <- 0
[1;33mPA_CL_UCP_3_W[0m <- 0
[1;33mPA_CL_UCP_4_X[0m <- 0
[1;33mPA_CL_UCP_4_Y[0m <- 0
[1;33mPA_CL_UCP_4_Z[0m <- 0
[1;33mPA_CL_UCP_4_W[0m <- 0
[1;33mPA_CL_UCP_5_X[0m <- 0
[1;33mPA_CL_UCP_5_Y[0m <- 0
[1;33mPA_CL_UCP_5_Z[0m <- 0
[1;33mPA_CL_UCP_5_W[0m <- 0
[1;36mSET_SH_REG[1;36m:
[1;33mSPI_SHADER_USER_DATA_PS_0[0m <- 0x00878700
[1;33mSPI_SHADER_USER_DATA_PS_1[0m <- 0
[1;36mSET_SH_REG[1;36m:
[1;33mSPI_SHADER_USER_DATA_VS_0[0m <- 0x00878700
[1;33mSPI_SHADER_USER_DATA_VS_1[0m <- 0
[1;36mSET_SH_REG[1;36m:
[1;33mSPI_SHADER_USER_DATA_GS_0[0m <- 0x00878700
[1;33mSPI_SHADER_USER_DATA_GS_1[0m <- 0
[1;36mSET_SH_REG[1;36m:
[1;33mSPI_SHADER_USER_DATA_ES_0[0m <- 0x00878700
[1;33mSPI_SHADER_USER_DATA_ES_1[0m <- 0
[1;36mSET_SH_REG[1;36m:
[1;33mSPI_SHADER_USER_DATA_HS_0[0m <- 0x00878700
[1;33mSPI_SHADER_USER_DATA_HS_1[0m <- 0
[1;36mSET_SH_REG[1;36m:
[1;33mSPI_SHADER_USER_DATA_VS_2[0m <- 0x0087d300
[1;33mSPI_SHADER_USER_DATA_VS_3[0m <- 0
[1;36mSET_SH_REG[1;36m:
[1;33mSPI_SHADER_USER_DATA_VS_8[0m <- 0x00878a00
[1;33mSPI_SHADER_USER_DATA_VS_9[0m <- 0
[1;36mSET_SH_REG[1;36m:
[1;33mSPI_SHADER_USER_DATA_VS_4[0m <- 0x00878b00
[1;33mSPI_SHADER_USER_DATA_VS_5[0m <- 0
[1;36mSET_SH_REG[1;36m:
[1;33mSPI_SHADER_USER_DATA_VS_6[0m <- 0x00879300
[1;33mSPI_SHADER_USER_DATA_VS_7[0m <- 0
[1;36mSET_SH_REG[1;36m:
[1;33mSPI_SHADER_USER_DATA_PS_2[0m <- 0x0087d400
[1;33mSPI_SHADER_USER_DATA_PS_3[0m <- 0
[1;36mSET_SH_REG[1;36m:
[1;33mSPI_SHADER_USER_DATA_PS_8[0m <- 0x00879600
[1;33mSPI_SHADER_USER_DATA_PS_9[0m <- 0
[1;36mSET_SH_REG[1;36m:
[1;33mSPI_SHADER_USER_DATA_PS_4[0m <- 0x0087d500
[1;33mSPI_SHADER_USER_DATA_PS_5[0m <- 0
[1;36mSET_SH_REG[1;36m:
[1;33mSPI_SHADER_USER_DATA_PS_6[0m <- 0x00879f00
[1;33mSPI_SHADER_USER_DATA_PS_7[0m <- 0
[1;36mSET_SH_REG[1;36m:
[1;33mSPI_SHADER_USER_DATA_GS_2[0m <- 0x0087a100
[1;33mSPI_SHADER_USER_DATA_GS_3[0m <- 0
[1;36mSET_SH_REG[1;36m:
[1;33mSPI_SHADER_USER_DATA_GS_8[0m <- 0x0087a200
[1;33mSPI_SHADER_USER_DATA_GS_9[0m <- 0
[1;36mSET_SH_REG[1;36m:
[1;33mSPI_SHADER_USER_DATA_GS_4[0m <- 0x0087a300
[1;33mSPI_SHADER_USER_DATA_GS_5[0m <- 0
[1;36mSET_SH_REG[1;36m:
[1;33mSPI_SHADER_USER_DATA_GS_6[0m <- 0x0087ab00
[1;33mSPI_SHADER_USER_DATA_GS_7[0m <- 0
[1;36mSET_SH_REG[1;36m:
[1;33mSPI_SHADER_USER_DATA_HS_2[0m <- 0x0087ad00
[1;33mSPI_SHADER_USER_DATA_HS_3[0m <- 0
[1;36mSET_SH_REG[1;36m:
[1;33mSPI_SHADER_USER_DATA_HS_8[0m <- 0x0087ae00
[1;33mSPI_SHADER_USER_DATA_HS_9[0m <- 0
[1;36mSET_SH_REG[1;36m:
[1;33mSPI_SHADER_USER_DATA_HS_4[0m <- 0x0087af00
[1;33mSPI_SHADER_USER_DATA_HS_5[0m <- 0
[1;36mSET_SH_REG[1;36m:
[1;33mSPI_SHADER_USER_DATA_HS_6[0m <- 0x0087b700
[1;33mSPI_SHADER_USER_DATA_HS_7[0m <- 0
[1;36mSET_SH_REG[1;36m:
[1;33mSPI_SHADER_USER_DATA_VS_10[0m <- 0x0087dd00
[1;33mSPI_SHADER_USER_DATA_VS_11[0m <- 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mPA_SC_VPORT_SCISSOR_0_TL[0m <- TL_X = 0
TL_Y = 0
WINDOW_OFFSET_DISABLE = 1
[1;33mPA_SC_VPORT_SCISSOR_0_BR[0m <- BR_X = 250 (0x0fa)
BR_Y = 250 (0x0fa)
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mPA_CL_GB_VERT_CLIP_ADJ[0m <- 0x43829168
[1;33mPA_CL_GB_VERT_DISC_ADJ[0m <- 1.0f (0x3f800000)
[1;33mPA_CL_GB_HORZ_CLIP_ADJ[0m <- 0x43829168
[1;33mPA_CL_GB_HORZ_DISC_ADJ[0m <- 1.0f (0x3f800000)
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mPA_CL_VPORT_XSCALE[0m <- 125.0f (0x42fa0000)
[1;33mPA_CL_VPORT_XOFFSET[0m <- 125.0f (0x42fa0000)
[1;33mPA_CL_VPORT_YSCALE[0m <- -125.0f (0xc2fa0000)
[1;33mPA_CL_VPORT_YOFFSET[0m <- 125.0f (0x42fa0000)
[1;33mPA_CL_VPORT_ZSCALE[0m <- 0.5f (0x3f000000)
[1;33mPA_CL_VPORT_ZOFFSET[0m <- 0.5f (0x3f000000)
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mDB_STENCILREFMASK[0m <- STENCILTESTVAL = 0
STENCILMASK = 0
STENCILWRITEMASK = 0
STENCILOPVAL = 1
[1;33mDB_STENCILREFMASK_BF[0m <- STENCILTESTVAL_BF = 0
STENCILMASK_BF = 0
STENCILWRITEMASK_BF = 0
STENCILOPVAL_BF = 1
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mDB_ALPHA_TO_MASK[0m <- ALPHA_TO_MASK_ENABLE = 0
ALPHA_TO_MASK_OFFSET0 = 2
ALPHA_TO_MASK_OFFSET1 = 2
ALPHA_TO_MASK_OFFSET2 = 2
ALPHA_TO_MASK_OFFSET3 = 2
OFFSET_ROUND = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mCB_BLEND0_CONTROL[0m <- COLOR_SRCBLEND = BLEND_ZERO
COLOR_COMB_FCN = COMB_DST_PLUS_SRC
COLOR_DESTBLEND = BLEND_ZERO
ALPHA_SRCBLEND = BLEND_ZERO
ALPHA_COMB_FCN = COMB_DST_PLUS_SRC
ALPHA_DESTBLEND = BLEND_ZERO
SEPARATE_ALPHA_BLEND = 0
ENABLE = 0
DISABLE_ROP3 = 0
[1;33mCB_BLEND1_CONTROL[0m <- COLOR_SRCBLEND = BLEND_ZERO
COLOR_COMB_FCN = COMB_DST_PLUS_SRC
COLOR_DESTBLEND = BLEND_ZERO
ALPHA_SRCBLEND = BLEND_ZERO
ALPHA_COMB_FCN = COMB_DST_PLUS_SRC
ALPHA_DESTBLEND = BLEND_ZERO
SEPARATE_ALPHA_BLEND = 0
ENABLE = 0
DISABLE_ROP3 = 0
[1;33mCB_BLEND2_CONTROL[0m <- COLOR_SRCBLEND = BLEND_ZERO
COLOR_COMB_FCN = COMB_DST_PLUS_SRC
COLOR_DESTBLEND = BLEND_ZERO
ALPHA_SRCBLEND = BLEND_ZERO
ALPHA_COMB_FCN = COMB_DST_PLUS_SRC
ALPHA_DESTBLEND = BLEND_ZERO
SEPARATE_ALPHA_BLEND = 0
ENABLE = 0
DISABLE_ROP3 = 0
[1;33mCB_BLEND3_CONTROL[0m <- COLOR_SRCBLEND = BLEND_ZERO
COLOR_COMB_FCN = COMB_DST_PLUS_SRC
COLOR_DESTBLEND = BLEND_ZERO
ALPHA_SRCBLEND = BLEND_ZERO
ALPHA_COMB_FCN = COMB_DST_PLUS_SRC
ALPHA_DESTBLEND = BLEND_ZERO
SEPARATE_ALPHA_BLEND = 0
ENABLE = 0
DISABLE_ROP3 = 0
[1;33mCB_BLEND4_CONTROL[0m <- COLOR_SRCBLEND = BLEND_ZERO
COLOR_COMB_FCN = COMB_DST_PLUS_SRC
COLOR_DESTBLEND = BLEND_ZERO
ALPHA_SRCBLEND = BLEND_ZERO
ALPHA_COMB_FCN = COMB_DST_PLUS_SRC
ALPHA_DESTBLEND = BLEND_ZERO
SEPARATE_ALPHA_BLEND = 0
ENABLE = 0
DISABLE_ROP3 = 0
[1;33mCB_BLEND5_CONTROL[0m <- COLOR_SRCBLEND = BLEND_ZERO
COLOR_COMB_FCN = COMB_DST_PLUS_SRC
COLOR_DESTBLEND = BLEND_ZERO
ALPHA_SRCBLEND = BLEND_ZERO
ALPHA_COMB_FCN = COMB_DST_PLUS_SRC
ALPHA_DESTBLEND = BLEND_ZERO
SEPARATE_ALPHA_BLEND = 0
ENABLE = 0
DISABLE_ROP3 = 0
[1;33mCB_BLEND6_CONTROL[0m <- COLOR_SRCBLEND = BLEND_ZERO
COLOR_COMB_FCN = COMB_DST_PLUS_SRC
COLOR_DESTBLEND = BLEND_ZERO
ALPHA_SRCBLEND = BLEND_ZERO
ALPHA_COMB_FCN = COMB_DST_PLUS_SRC
ALPHA_DESTBLEND = BLEND_ZERO
SEPARATE_ALPHA_BLEND = 0
ENABLE = 0
DISABLE_ROP3 = 0
[1;33mCB_BLEND7_CONTROL[0m <- COLOR_SRCBLEND = BLEND_ZERO
COLOR_COMB_FCN = COMB_DST_PLUS_SRC
COLOR_DESTBLEND = BLEND_ZERO
ALPHA_SRCBLEND = BLEND_ZERO
ALPHA_COMB_FCN = COMB_DST_PLUS_SRC
ALPHA_DESTBLEND = BLEND_ZERO
SEPARATE_ALPHA_BLEND = 0
ENABLE = 0
DISABLE_ROP3 = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mCB_COLOR_CONTROL[0m <- DISABLE_DUAL_QUAD = 0
DEGAMMA_ENABLE = 0
MODE = CB_NORMAL
ROP3 = X_0XCC
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mSPI_INTERP_CONTROL_0[0m <- FLAT_SHADE_ENA = 1
PNT_SPRITE_ENA = 1
PNT_SPRITE_OVRD_X = SPI_PNT_SPRITE_SEL_S
PNT_SPRITE_OVRD_Y = SPI_PNT_SPRITE_SEL_T
PNT_SPRITE_OVRD_Z = SPI_PNT_SPRITE_SEL_0
PNT_SPRITE_OVRD_W = SPI_PNT_SPRITE_SEL_1
PNT_SPRITE_TOP_1 = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mPA_SU_POINT_SIZE[0m <- HEIGHT = 8
WIDTH = 8
[1;33mPA_SU_POINT_MINMAX[0m <- MIN_SIZE = 8
MAX_SIZE = 8
[1;33mPA_SU_LINE_CNTL[0m <- WIDTH = 8
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mPA_SC_MODE_CNTL_0[0m <- MSAA_ENABLE = 0
VPORT_SCISSOR_ENABLE = 1
LINE_STIPPLE_ENABLE = 0
SEND_UNLIT_STILES_TO_PKR = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mPA_SU_VTX_CNTL[0m <- PIX_CENTER = 1
ROUND_MODE = X_TRUNCATE
QUANT_MODE = X_16_8_FIXED_POINT_1_256TH
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mPA_SU_POLY_OFFSET_CLAMP[0m <- 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mPA_SU_SC_MODE_CNTL[0m <- CULL_FRONT = 0
CULL_BACK = 0
FACE = 0
POLY_MODE = X_DISABLE_POLY_MODE
POLYMODE_FRONT_PTYPE = X_DRAW_TRIANGLES
POLYMODE_BACK_PTYPE = X_DRAW_TRIANGLES
POLY_OFFSET_FRONT_ENABLE = 0
POLY_OFFSET_BACK_ENABLE = 0
POLY_OFFSET_PARA_ENABLE = 0
VTX_WINDOW_OFFSET_ENABLE = 0
PROVOKING_VTX_LAST = 1
PERSP_CORR_DIS = 0
MULTI_PRIM_IB_ENA = 0
[1;36mSET_SH_REG[1;36m:
[1;33mSPI_SHADER_USER_DATA_VS_14[0m <- 1
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mDB_DEPTH_CONTROL[0m <- STENCIL_ENABLE = 0
Z_ENABLE = 0
Z_WRITE_ENABLE = 0
DEPTH_BOUNDS_ENABLE = 0
ZFUNC = FRAG_NEVER
BACKFACE_ENABLE = 0
STENCILFUNC = REF_NEVER
STENCILFUNC_BF = REF_NEVER
ENABLE_COLOR_WRITES_ON_DEPTH_FAIL = 0
DISABLE_COLOR_WRITES_ON_DEPTH_PASS = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mDB_STENCIL_CONTROL[0m <- STENCILFAIL = STENCIL_KEEP
STENCILZPASS = STENCIL_KEEP
STENCILZFAIL = STENCIL_KEEP
STENCILFAIL_BF = STENCIL_KEEP
STENCILZPASS_BF = STENCIL_KEEP
STENCILZFAIL_BF = STENCIL_KEEP
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mVGT_SHADER_STAGES_EN[0m <- LS_EN = LS_STAGE_OFF
HS_EN = 0
ES_EN = ES_STAGE_OFF
GS_EN = 0
VS_EN = VS_STAGE_REAL
DYNAMIC_HS = 0
DISPATCH_DRAW_EN = 0
DIS_DEALLOC_ACCUM_0 = 0
DIS_DEALLOC_ACCUM_1 = 0
VS_WAVE_ID_EN = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mVGT_GS_MODE[0m <- MODE = GS_OFF
RESERVED_0 = 0
CUT_MODE = GS_CUT_1024
RESERVED_1 = 0
GS_C_PACK_EN = 0
RESERVED_2 = 0
ES_PASSTHRU = 0
COMPUTE_MODE = 0
FAST_COMPUTE_MODE = 0
ELEMENT_INFO_EN = 0
PARTIAL_THD_AT_EOI = 0
SUPPRESS_CUTS = 0
ES_WRITE_OPTIMIZE = 0
GS_WRITE_OPTIMIZE = 0
ONCHIP = X_0_OFFCHIP_GS
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mVGT_PRIMITIVEID_EN[0m <- PRIMITIVEID_EN = 0
DISABLE_RESET_ON_EOI = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mSPI_VS_OUT_CONFIG[0m <- VS_EXPORT_COUNT = 0
VS_HALF_PACK = 0
VS_EXPORTS_FOG = 0
VS_OUT_FOG_VEC_ADDR = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mSPI_SHADER_POS_FORMAT[0m <- POS0_EXPORT_FORMAT = SPI_SHADER_4COMP
POS1_EXPORT_FORMAT = SPI_SHADER_NONE
POS2_EXPORT_FORMAT = SPI_SHADER_NONE
POS3_EXPORT_FORMAT = SPI_SHADER_NONE
[1;36mSET_SH_REG[1;36m:
[1;33mSPI_SHADER_PGM_LO_VS[0m <- 0x0000a780
[1;33mSPI_SHADER_PGM_HI_VS[0m <- MEM_BASE = 0
[1;33mSPI_SHADER_PGM_RSRC1_VS[0m <- VGPRS = 1
SGPRS = 2
PRIORITY = 0
FLOAT_MODE = 192 (0xc0)
PRIV = 0
DX10_CLAMP = 1
DEBUG_MODE = 0
IEEE_MODE = 0
VGPR_COMP_CNT = 0
CU_GROUP_ENABLE = 0
CACHE_CTL = 0
CDBG_USER = 0
[1;33mSPI_SHADER_PGM_RSRC2_VS[0m <- SCRATCH_EN = 0
USER_SGPR = 15 (0xf)
TRAP_PRESENT = 0
OC_LDS_EN = 0
SO_BASE0_EN = 0
SO_BASE1_EN = 0
SO_BASE2_EN = 0
SO_BASE3_EN = 0
SO_EN = 0
EXCP_EN = 0
EXCP_EN_CIK = 0
DISPATCH_DRAW_EN = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mPA_CL_VTE_CNTL[0m <- VPORT_X_SCALE_ENA = 1
VPORT_X_OFFSET_ENA = 1
VPORT_Y_SCALE_ENA = 1
VPORT_Y_OFFSET_ENA = 1
VPORT_Z_SCALE_ENA = 1
VPORT_Z_OFFSET_ENA = 1
VTX_XY_FMT = 0
VTX_Z_FMT = 0
VTX_W0_FMT = 1
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mSPI_PS_INPUT_ENA[0m <- PERSP_SAMPLE_ENA = 0
PERSP_CENTER_ENA = 0
PERSP_CENTROID_ENA = 0
PERSP_PULL_MODEL_ENA = 0
LINEAR_SAMPLE_ENA = 0
LINEAR_CENTER_ENA = 1
LINEAR_CENTROID_ENA = 0
LINE_STIPPLE_TEX_ENA = 0
POS_X_FLOAT_ENA = 0
POS_Y_FLOAT_ENA = 0
POS_Z_FLOAT_ENA = 0
POS_W_FLOAT_ENA = 0
FRONT_FACE_ENA = 0
ANCILLARY_ENA = 0
SAMPLE_COVERAGE_ENA = 0
POS_FIXED_PT_ENA = 0
[1;33mSPI_PS_INPUT_ADDR[0m <- PERSP_SAMPLE_ENA = 1
PERSP_CENTER_ENA = 1
PERSP_CENTROID_ENA = 1
PERSP_PULL_MODEL_ENA = 0
LINEAR_SAMPLE_ENA = 1
LINEAR_CENTER_ENA = 1
LINEAR_CENTROID_ENA = 1
LINE_STIPPLE_TEX_ENA = 0
POS_X_FLOAT_ENA = 0
POS_Y_FLOAT_ENA = 0
POS_Z_FLOAT_ENA = 0
POS_W_FLOAT_ENA = 0
FRONT_FACE_ENA = 1
ANCILLARY_ENA = 0
SAMPLE_COVERAGE_ENA = 1
POS_FIXED_PT_ENA = 1
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mSPI_BARYC_CNTL[0m <- PERSP_CENTER_CNTL = 0
PERSP_CENTROID_CNTL = 0
LINEAR_CENTER_CNTL = 0
LINEAR_CENTROID_CNTL = 0
POS_FLOAT_LOCATION = 2
POS_FLOAT_ULC = 0
FRONT_FACE_ALL_BITS = 1
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mSPI_PS_IN_CONTROL[0m <- NUM_INTERP = 0
PARAM_GEN = 0
FOG_ADDR = 0
BC_OPTIMIZE_DISABLE = 0
PASS_FOG_THROUGH_PS = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mSPI_SHADER_Z_FORMAT[0m <- Z_EXPORT_FORMAT = SPI_SHADER_ZERO
[1;33mSPI_SHADER_COL_FORMAT[0m <- COL0_EXPORT_FORMAT = SPI_SHADER_FP16_ABGR
COL1_EXPORT_FORMAT = SPI_SHADER_ZERO
COL2_EXPORT_FORMAT = SPI_SHADER_ZERO
COL3_EXPORT_FORMAT = SPI_SHADER_ZERO
COL4_EXPORT_FORMAT = SPI_SHADER_ZERO
COL5_EXPORT_FORMAT = SPI_SHADER_ZERO
COL6_EXPORT_FORMAT = SPI_SHADER_ZERO
COL7_EXPORT_FORMAT = SPI_SHADER_ZERO
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mCB_SHADER_MASK[0m <- OUTPUT0_ENABLE = 15 (0xf)
OUTPUT1_ENABLE = 0
OUTPUT2_ENABLE = 0
OUTPUT3_ENABLE = 0
OUTPUT4_ENABLE = 0
OUTPUT5_ENABLE = 0
OUTPUT6_ENABLE = 0
OUTPUT7_ENABLE = 0
[1;36mSET_SH_REG[1;36m:
[1;33mSPI_SHADER_PGM_LO_PS[0m <- 0x0000a790
[1;33mSPI_SHADER_PGM_HI_PS[0m <- MEM_BASE = 0
[1;33mSPI_SHADER_PGM_RSRC1_PS[0m <- VGPRS = 7
SGPRS = 1
PRIORITY = 0
FLOAT_MODE = FP_64_DENORMS
PRIV = 0
DX10_CLAMP = 1
DEBUG_MODE = 0
IEEE_MODE = 0
CU_GROUP_DISABLE = 0
CACHE_CTL = 0
CDBG_USER = 0
[1;33mSPI_SHADER_PGM_RSRC2_PS[0m <- SCRATCH_EN = 0
USER_SGPR = 11 (0xb)
TRAP_PRESENT = 0
WAVE_CNT_EN = 0
EXTRA_LDS_SIZE = 0
EXCP_EN = 0
EXCP_EN_CIK = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mSPI_TMPRING_SIZE[0m <- WAVES = 256 (0x100)
WAVESIZE = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33m0x40028aa8[0m = 0x0010007f[1;36mSET_CONTEXT_REG[1;36m:
[1;33m0x80028b58[0m = 0x00000000[1;36mSET_UCONFIG_REG[1;36m:
[1;33m0x40030908[0m = 0x00000006[1;36mSET_CONTEXT_REG[1;36m:
[1;33mVGT_GS_OUT_PRIM_TYPE[0m <- OUTPRIM_TYPE = OUTPRIM_TYPE_TRISTRIP
OUTPRIM_TYPE_1 = 0
OUTPRIM_TYPE_2 = 0
OUTPRIM_TYPE_3 = 0
UNIQUE_TYPE_PER_STREAM = 0
[1;36mSET_CONTEXT_REG[1;36m:
[1;33mVGT_MULTI_PRIM_IB_RESET_EN[0m <- RESET_EN = 0
[1;32mNUM_INSTANCES[0m:
[1;33mVGT_NUM_INSTANCES[0m <- 1
[1;36mSET_SH_REG[1;36m:
[1;33mSPI_SHADER_USER_DATA_VS_12[0m <- 0
[1;33mSPI_SHADER_USER_DATA_VS_13[0m <- 0
[1;32mDRAW_INDEX_AUTO[0m:
[1;33mVGT_NUM_INDICES[0m <- 4
[1;33mVGT_DRAW_INITIATOR[0m <- SOURCE_SELECT = DI_SRC_SEL_AUTO_INDEX
MAJOR_MODE = DI_MAJOR_MODE_0
NOT_EOP = 0
USE_OPAQUE = 0
[1;32mWRITE_DATA[0m:
[1;33mCONTROL[0m <- ENGINE_SEL = ME
WR_CONFIRM = 1
WR_ONE_ADDR = 0
DST_SEL = MEMORY_SYNC
[1;33mDST_ADDR_LO[0m <- 0x00836000
[1;33mDST_ADDR_HI[0m <- 0
0x00000002
[1;32mNOP[0m:
[31mTrace point ID: 2
[31m!!!!! This is the last trace point that was reached by the CP !!!!![0m
[1;32mEVENT_WRITE[0m:
[1;33mVGT_EVENT_INITIATOR[0m <- EVENT_TYPE = PS_PARTIAL_FLUSH
[1;33mEVENT_INDEX[0m <- 4
[1;33mINV_L2[0m <- 0
[1;32mEVENT_WRITE[0m:
[1;33mVGT_EVENT_INITIATOR[0m <- EVENT_TYPE = CS_PARTIAL_FLUSH
[1;33mEVENT_INDEX[0m <- 4
[1;33mINV_L2[0m <- 0
[1;32mPFP_SYNC_ME[0m:
0x00000000
[1;32mWRITE_DATA[0m:
[1;33mCONTROL[0m <- ENGINE_SEL = ME
WR_CONFIRM = 1
WR_ONE_ADDR = 0
DST_SEL = MEMORY_SYNC
[1;33mDST_ADDR_LO[0m <- 0x00836000
[1;33mDST_ADDR_HI[0m <- 0
0x00000003
[1;32mNOP[0m:
[31mTrace point ID: 3
[31m!!!!! This trace point was NOT reached by the CP !!!!![0m
------------------- IB end -------------------
Done.
More information about the mesa-dev
mailing list