[Mesa-dev] Mesa (master): Revert "radeon/llvm: Use alloca instructions for larger arrays"

Michel Dänzer michel at daenzer.net
Thu Jul 21 08:03:16 UTC 2016


On 21.07.2016 00:04, Michel Dänzer wrote:
> On 15.07.2016 05:15, Marek =?UNKNOWN?B?T2zFocOhaw==?= wrote:
>> Module: Mesa
>> Branch: master
>> Commit: f84e9d749fbb6da73a60fb70e6725db773c9b8f8
>> URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=f84e9d749fbb6da73a60fb70e6725db773c9b8f8
>>
>> Author: Marek Olšák <marek.olsak at amd.com>
>> Date:   Thu Jul 14 22:07:46 2016 +0200
>>
>> Revert "radeon/llvm: Use alloca instructions for larger arrays"
>>
>> This reverts commit 513fccdfb68e6a71180e21827f071617c93fd09b.
>>
>> Bioshock Infinite hangs with that.
> 
> Unfortunately, this change caused the piglit test
> shaders at glsl-fs-vec4-indexing-temp-dst-in-loop (and possibly others) to
> hang my Kaveri. Any ideas for how we can get out of this conundrum?

The hang was introduced by LLVM SVN r275934 ("AMDGPU: Expand register
indexing pseudos in custom inserter"). The good/bad (without/with
r275934) shader dumps and the GALLIUM_DDEBUG=800 dump corresponding to
the hang are attached.


BTW, even with Marek's change above reverted, I still see some piglit
regressions compared to last week, but I'm not sure if those are all
related to the same LLVM change.


-- 
Earthling Michel Dänzer               |               http://www.amd.com
Libre software enthusiast             |             Mesa and X developer
-------------- next part --------------
Gallium debugger active. The hang detection timeout is 800 ms.
VERT
PROPERTY NEXT_SHADER FRAG
DCL IN[0]
DCL OUT[0], POSITION
DCL CONST[0..3]
DCL TEMP[0], LOCAL
  0: MUL TEMP[0], CONST[0], IN[0].xxxx
  1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
  2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
  3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0]
  4: MOV OUT[0], TEMP[0]
  5: END
radeonsi: Compiling shader 1
TGSI shader LLVM IR:

; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"

define amdgpu_vs <{ float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) {
main_body:
  %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0
  %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !invariant.load !0
  %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 0)
  %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 4)
  %18 = call float @llvm.SI.load.const(<16 x i8> %15, i32 8)
  %19 = call float @llvm.SI.load.const(<16 x i8> %15, i32 12)
  %20 = call float @llvm.SI.load.const(<16 x i8> %15, i32 16)
  %21 = call float @llvm.SI.load.const(<16 x i8> %15, i32 20)
  %22 = call float @llvm.SI.load.const(<16 x i8> %15, i32 24)
  %23 = call float @llvm.SI.load.const(<16 x i8> %15, i32 28)
  %24 = call float @llvm.SI.load.const(<16 x i8> %15, i32 32)
  %25 = call float @llvm.SI.load.const(<16 x i8> %15, i32 36)
  %26 = call float @llvm.SI.load.const(<16 x i8> %15, i32 40)
  %27 = call float @llvm.SI.load.const(<16 x i8> %15, i32 44)
  %28 = call float @llvm.SI.load.const(<16 x i8> %15, i32 48)
  %29 = call float @llvm.SI.load.const(<16 x i8> %15, i32 52)
  %30 = call float @llvm.SI.load.const(<16 x i8> %15, i32 56)
  %31 = call float @llvm.SI.load.const(<16 x i8> %15, i32 60)
  %32 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0, !amdgpu.uniform !0
  %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !invariant.load !0
  %34 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %13)
  %35 = extractelement <4 x float> %34, i32 0
  %36 = extractelement <4 x float> %34, i32 1
  %37 = extractelement <4 x float> %34, i32 2
  %38 = extractelement <4 x float> %34, i32 3
  %39 = fmul float %16, %35
  %40 = fmul float %17, %35
  %41 = fmul float %18, %35
  %42 = fmul float %19, %35
  %43 = fmul float %20, %36
  %44 = fadd float %43, %39
  %45 = fmul float %21, %36
  %46 = fadd float %45, %40
  %47 = fmul float %22, %36
  %48 = fadd float %47, %41
  %49 = fmul float %23, %36
  %50 = fadd float %49, %42
  %51 = fmul float %24, %37
  %52 = fadd float %51, %44
  %53 = fmul float %25, %37
  %54 = fadd float %53, %46
  %55 = fmul float %26, %37
  %56 = fadd float %55, %48
  %57 = fmul float %27, %37
  %58 = fadd float %57, %50
  %59 = fmul float %28, %38
  %60 = fadd float %59, %52
  %61 = fmul float %29, %38
  %62 = fadd float %61, %54
  %63 = fmul float %30, %38
  %64 = fadd float %63, %56
  %65 = fmul float %31, %38
  %66 = fadd float %65, %58
  %67 = bitcast i32 %11 to float
  %68 = insertvalue <{ float, float, float }> undef, float %67, 2
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %60, float %62, float %64, float %66)
  ret <{ float, float, float }> %68
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #0

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0

; Function Attrs: nounwind
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #1

attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }

!0 = !{}

FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL OUT[0], COLOR
DCL CONST[0..2]
DCL TEMP[0..7], ARRAY(1), LOCAL
DCL TEMP[8..9], LOCAL
DCL ADDR[0]
IMM[0] FLT32 {    0.2000,     0.0000,     0.0000,     0.0000}
IMM[1] INT32 {0, 1, 0, 0}
  0: MUL TEMP[0], IMM[0].xyxy, CONST[0].xxxx
  1: MUL TEMP[1], IMM[0].yxyy, CONST[0].xxxx
  2: MUL TEMP[2], IMM[0].yyxy, CONST[0].xxxx
  3: MUL TEMP[3], IMM[0].xxyy, CONST[0].xxxx
  4: MUL TEMP[4], IMM[0].yxxy, CONST[0].xxxx
  5: MUL TEMP[5], IMM[0].xyxy, CONST[0].xxxx
  6: MOV TEMP[6], IMM[0].yyyy
  7: MUL TEMP[7], IMM[0].xxxy, CONST[0].xxxx
  8: MOV TEMP[8].x, IMM[1].xxxx
  9: BGNLOOP :0
 10:   ISGE TEMP[9].x, TEMP[8].xxxx, CONST[1].xxxx
 11:   UIF TEMP[9].xxxx :0
 12:     BRK
 13:   ENDIF
 14:   UARL ADDR[0].x, TEMP[8].xxxx
 15:   MOV TEMP[ADDR[0].x](1), CONST[2]
 16:   UADD TEMP[8].x, TEMP[8].xxxx, IMM[1].yyyy
 17: ENDLOOP :0
 18: ADD TEMP[8], TEMP[0], TEMP[1]
 19: ADD TEMP[8], TEMP[8], TEMP[2]
 20: ADD TEMP[8], TEMP[8], TEMP[3]
 21: ADD TEMP[8], TEMP[8], TEMP[4]
 22: ADD TEMP[8], TEMP[8], TEMP[5]
 23: ADD TEMP[8], TEMP[8], TEMP[6]
 24: ADD TEMP[8], TEMP[8], TEMP[7]
 25: MOV OUT[0], TEMP[8]
 26: END
radeonsi: Compiling shader 2
TGSI shader LLVM IR:

; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"

define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
main_body:
  %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0
  %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !invariant.load !0
  %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0)
  %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16)
  %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 32)
  %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 36)
  %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 40)
  %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 44)
  %31 = fmul float %25, 0x3FC99999A0000000
  %32 = fmul float %25, 0.000000e+00
  %33 = fmul float %25, 0x3FC99999A0000000
  %34 = fmul float %25, 0.000000e+00
  %35 = fmul float %25, 0.000000e+00
  %36 = fmul float %25, 0x3FC99999A0000000
  %37 = fmul float %25, 0.000000e+00
  %38 = fmul float %25, 0.000000e+00
  %39 = fmul float %25, 0.000000e+00
  %40 = fmul float %25, 0.000000e+00
  %41 = fmul float %25, 0x3FC99999A0000000
  %42 = fmul float %25, 0.000000e+00
  %43 = fmul float %25, 0x3FC99999A0000000
  %44 = fmul float %25, 0x3FC99999A0000000
  %45 = fmul float %25, 0.000000e+00
  %46 = fmul float %25, 0.000000e+00
  %47 = fmul float %25, 0.000000e+00
  %48 = fmul float %25, 0x3FC99999A0000000
  %49 = fmul float %25, 0x3FC99999A0000000
  %50 = fmul float %25, 0.000000e+00
  %51 = fmul float %25, 0x3FC99999A0000000
  %52 = fmul float %25, 0.000000e+00
  %53 = fmul float %25, 0x3FC99999A0000000
  %54 = fmul float %25, 0.000000e+00
  %55 = fmul float %25, 0x3FC99999A0000000
  %56 = fmul float %25, 0x3FC99999A0000000
  %57 = fmul float %25, 0x3FC99999A0000000
  %58 = fmul float %25, 0.000000e+00
  %59 = bitcast float %26 to i32
  br label %LOOP

LOOP:                                             ; preds = %ENDIF, %main_body
  %temp2.0 = phi float [ %33, %main_body ], [ %141, %ENDIF ]
  %temp3.0 = phi float [ %34, %main_body ], [ %158, %ENDIF ]
  %temp4.0 = phi float [ %35, %main_body ], [ %108, %ENDIF ]
  %temp5.0 = phi float [ %36, %main_body ], [ %125, %ENDIF ]
  %temp6.0 = phi float [ %37, %main_body ], [ %142, %ENDIF ]
  %temp7.0 = phi float [ %38, %main_body ], [ %159, %ENDIF ]
  %temp8.0 = phi float [ %39, %main_body ], [ %109, %ENDIF ]
  %temp9.0 = phi float [ %40, %main_body ], [ %126, %ENDIF ]
  %temp10.0 = phi float [ %41, %main_body ], [ %143, %ENDIF ]
  %temp11.0 = phi float [ %42, %main_body ], [ %160, %ENDIF ]
  %temp12.0 = phi float [ %43, %main_body ], [ %110, %ENDIF ]
  %temp13.0 = phi float [ %44, %main_body ], [ %127, %ENDIF ]
  %temp14.0 = phi float [ %45, %main_body ], [ %144, %ENDIF ]
  %temp15.0 = phi float [ %46, %main_body ], [ %161, %ENDIF ]
  %temp16.0 = phi float [ %47, %main_body ], [ %111, %ENDIF ]
  %temp17.0 = phi float [ %48, %main_body ], [ %128, %ENDIF ]
  %temp18.0 = phi float [ %49, %main_body ], [ %145, %ENDIF ]
  %temp19.0 = phi float [ %50, %main_body ], [ %162, %ENDIF ]
  %temp20.0 = phi float [ %51, %main_body ], [ %112, %ENDIF ]
  %temp21.0 = phi float [ %52, %main_body ], [ %129, %ENDIF ]
  %temp22.0 = phi float [ %53, %main_body ], [ %146, %ENDIF ]
  %temp23.0 = phi float [ %54, %main_body ], [ %163, %ENDIF ]
  %temp24.0 = phi float [ 0.000000e+00, %main_body ], [ %113, %ENDIF ]
  %temp25.0 = phi float [ 0.000000e+00, %main_body ], [ %130, %ENDIF ]
  %temp26.0 = phi float [ 0.000000e+00, %main_body ], [ %147, %ENDIF ]
  %temp27.0 = phi float [ 0.000000e+00, %main_body ], [ %164, %ENDIF ]
  %temp28.0 = phi float [ %55, %main_body ], [ %114, %ENDIF ]
  %temp29.0 = phi float [ %56, %main_body ], [ %131, %ENDIF ]
  %temp30.0 = phi float [ %57, %main_body ], [ %148, %ENDIF ]
  %temp31.0 = phi float [ %58, %main_body ], [ %165, %ENDIF ]
  %temp32.0 = phi float [ 0.000000e+00, %main_body ], [ %168, %ENDIF ]
  %temp1.0 = phi float [ %32, %main_body ], [ %124, %ENDIF ]
  %temp.0 = phi float [ %31, %main_body ], [ %107, %ENDIF ]
  %60 = bitcast float %temp32.0 to i32
  %61 = icmp slt i32 %60, %59
  br i1 %61, label %ENDIF, label %IF

IF:                                               ; preds = %LOOP
  %62 = fadd float %temp.0, %temp4.0
  %63 = fadd float %temp1.0, %temp5.0
  %64 = fadd float %temp2.0, %temp6.0
  %65 = fadd float %temp3.0, %temp7.0
  %66 = fadd float %62, %temp8.0
  %67 = fadd float %63, %temp9.0
  %68 = fadd float %64, %temp10.0
  %69 = fadd float %65, %temp11.0
  %70 = fadd float %66, %temp12.0
  %71 = fadd float %67, %temp13.0
  %72 = fadd float %68, %temp14.0
  %73 = fadd float %69, %temp15.0
  %74 = fadd float %70, %temp16.0
  %75 = fadd float %71, %temp17.0
  %76 = fadd float %72, %temp18.0
  %77 = fadd float %73, %temp19.0
  %78 = fadd float %74, %temp20.0
  %79 = fadd float %75, %temp21.0
  %80 = fadd float %76, %temp22.0
  %81 = fadd float %77, %temp23.0
  %82 = fadd float %78, %temp24.0
  %83 = fadd float %79, %temp25.0
  %84 = fadd float %80, %temp26.0
  %85 = fadd float %81, %temp27.0
  %86 = fadd float %82, %temp28.0
  %87 = fadd float %83, %temp29.0
  %88 = fadd float %84, %temp30.0
  %89 = fadd float %85, %temp31.0
  %90 = bitcast float %5 to i32
  %91 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %90, 10
  %92 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %91, float %86, 11
  %93 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %92, float %87, 12
  %94 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %93, float %88, 13
  %95 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %94, float %89, 14
  %96 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %95, float %21, 24
  ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %96

ENDIF:                                            ; preds = %LOOP
  %97 = bitcast float %temp32.0 to i32
  %98 = insertelement <8 x float> undef, float %temp.0, i32 0
  %99 = insertelement <8 x float> %98, float %temp4.0, i32 1
  %100 = insertelement <8 x float> %99, float %temp8.0, i32 2
  %101 = insertelement <8 x float> %100, float %temp12.0, i32 3
  %102 = insertelement <8 x float> %101, float %temp16.0, i32 4
  %103 = insertelement <8 x float> %102, float %temp20.0, i32 5
  %104 = insertelement <8 x float> %103, float %temp24.0, i32 6
  %105 = insertelement <8 x float> %104, float %temp28.0, i32 7
  %106 = insertelement <8 x float> %105, float %27, i32 %97
  %107 = extractelement <8 x float> %106, i32 0
  %108 = extractelement <8 x float> %106, i32 1
  %109 = extractelement <8 x float> %106, i32 2
  %110 = extractelement <8 x float> %106, i32 3
  %111 = extractelement <8 x float> %106, i32 4
  %112 = extractelement <8 x float> %106, i32 5
  %113 = extractelement <8 x float> %106, i32 6
  %114 = extractelement <8 x float> %106, i32 7
  %115 = insertelement <8 x float> undef, float %temp1.0, i32 0
  %116 = insertelement <8 x float> %115, float %temp5.0, i32 1
  %117 = insertelement <8 x float> %116, float %temp9.0, i32 2
  %118 = insertelement <8 x float> %117, float %temp13.0, i32 3
  %119 = insertelement <8 x float> %118, float %temp17.0, i32 4
  %120 = insertelement <8 x float> %119, float %temp21.0, i32 5
  %121 = insertelement <8 x float> %120, float %temp25.0, i32 6
  %122 = insertelement <8 x float> %121, float %temp29.0, i32 7
  %123 = insertelement <8 x float> %122, float %28, i32 %97
  %124 = extractelement <8 x float> %123, i32 0
  %125 = extractelement <8 x float> %123, i32 1
  %126 = extractelement <8 x float> %123, i32 2
  %127 = extractelement <8 x float> %123, i32 3
  %128 = extractelement <8 x float> %123, i32 4
  %129 = extractelement <8 x float> %123, i32 5
  %130 = extractelement <8 x float> %123, i32 6
  %131 = extractelement <8 x float> %123, i32 7
  %132 = insertelement <8 x float> undef, float %temp2.0, i32 0
  %133 = insertelement <8 x float> %132, float %temp6.0, i32 1
  %134 = insertelement <8 x float> %133, float %temp10.0, i32 2
  %135 = insertelement <8 x float> %134, float %temp14.0, i32 3
  %136 = insertelement <8 x float> %135, float %temp18.0, i32 4
  %137 = insertelement <8 x float> %136, float %temp22.0, i32 5
  %138 = insertelement <8 x float> %137, float %temp26.0, i32 6
  %139 = insertelement <8 x float> %138, float %temp30.0, i32 7
  %140 = insertelement <8 x float> %139, float %29, i32 %97
  %141 = extractelement <8 x float> %140, i32 0
  %142 = extractelement <8 x float> %140, i32 1
  %143 = extractelement <8 x float> %140, i32 2
  %144 = extractelement <8 x float> %140, i32 3
  %145 = extractelement <8 x float> %140, i32 4
  %146 = extractelement <8 x float> %140, i32 5
  %147 = extractelement <8 x float> %140, i32 6
  %148 = extractelement <8 x float> %140, i32 7
  %149 = insertelement <8 x float> undef, float %temp3.0, i32 0
  %150 = insertelement <8 x float> %149, float %temp7.0, i32 1
  %151 = insertelement <8 x float> %150, float %temp11.0, i32 2
  %152 = insertelement <8 x float> %151, float %temp15.0, i32 3
  %153 = insertelement <8 x float> %152, float %temp19.0, i32 4
  %154 = insertelement <8 x float> %153, float %temp23.0, i32 5
  %155 = insertelement <8 x float> %154, float %temp27.0, i32 6
  %156 = insertelement <8 x float> %155, float %temp31.0, i32 7
  %157 = insertelement <8 x float> %156, float %30, i32 %97
  %158 = extractelement <8 x float> %157, i32 0
  %159 = extractelement <8 x float> %157, i32 1
  %160 = extractelement <8 x float> %157, i32 2
  %161 = extractelement <8 x float> %157, i32 3
  %162 = extractelement <8 x float> %157, i32 4
  %163 = extractelement <8 x float> %157, i32 5
  %164 = extractelement <8 x float> %157, i32 6
  %165 = extractelement <8 x float> %157, i32 7
  %166 = bitcast float %temp32.0 to i32
  %167 = add i32 %166, 1
  %168 = bitcast i32 %167 to float
  br label %LOOP
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

attributes #0 = { "InitialPSInputAddr"="36983" }
attributes #1 = { nounwind readnone }

!0 = !{}

radeonsi: Compiling shader 3
Vertex Shader Prolog LLVM IR:

; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"

define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> @main(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) {
main_body:
  %19 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> undef, i32 %0, 0
  %20 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %19, i32 %1, 1
  %21 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %20, i32 %2, 2
  %22 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %21, i32 %3, 3
  %23 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %22, i32 %4, 4
  %24 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %23, i32 %5, 5
  %25 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %24, i32 %6, 6
  %26 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %25, i32 %7, 7
  %27 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %26, i32 %8, 8
  %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %27, i32 %9, 9
  %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %28, i32 %10, 10
  %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %29, i32 %11, 11
  %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %30, i32 %12, 12
  %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %31, i32 %13, 13
  %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %32, i32 %14, 14
  %34 = bitcast i32 %15 to float
  %35 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %33, float %34, 15
  %36 = bitcast i32 %16 to float
  %37 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %35, float %36, 16
  %38 = bitcast i32 %17 to float
  %39 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %37, float %38, 17
  %40 = bitcast i32 %18 to float
  %41 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %39, float %40, 18
  %42 = add i32 %15, %12
  %43 = bitcast i32 %42 to float
  %44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %41, float %43, 19
  ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %44
}

radeonsi: Compiling shader 4
Vertex Shader Epilog LLVM IR:

; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"

define amdgpu_vs void @main() {
main_body:
  ret void
}


Vertex Shader as VS:
Shader prolog disassembly:
	v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C
Shader main disassembly:
	s_load_dwordx4 s[4:7], s[10:11], 0x0                ; C0820B00
	s_load_dwordx4 s[0:3], s[2:3], 0x0                  ; C0800300
	s_waitcnt lgkmcnt(0)                                ; BF8C007F
	buffer_load_format_xyzw v[3:6], v4, s[4:7], 0 idxen ; E00C2000 80010304
	s_buffer_load_dword s4, s[0:3], 0x0                 ; C2020100
	s_buffer_load_dword s5, s[0:3], 0x1                 ; C2028101
	s_buffer_load_dword s6, s[0:3], 0x2                 ; C2030102
	s_buffer_load_dword s7, s[0:3], 0x3                 ; C2038103
	s_buffer_load_dword s8, s[0:3], 0x4                 ; C2040104
	s_buffer_load_dword s9, s[0:3], 0x5                 ; C2048105
	s_buffer_load_dword s10, s[0:3], 0x6                ; C2050106
	s_buffer_load_dword s11, s[0:3], 0x7                ; C2058107
	s_buffer_load_dword s12, s[0:3], 0x8                ; C2060108
	s_buffer_load_dword s13, s[0:3], 0x9                ; C2068109
	s_buffer_load_dword s14, s[0:3], 0xa                ; C207010A
	s_buffer_load_dword s15, s[0:3], 0xb                ; C207810B
	s_buffer_load_dword s16, s[0:3], 0xc                ; C208010C
	s_buffer_load_dword s17, s[0:3], 0xd                ; C208810D
	s_buffer_load_dword s18, s[0:3], 0xe                ; C209010E
	s_buffer_load_dword s0, s[0:3], 0xf                 ; C200010F
	s_waitcnt vmcnt(0) lgkmcnt(0)                       ; BF8C0070
	v_mul_f32_e32 v0, s4, v3                            ; 10000604
	v_mul_f32_e32 v1, s5, v3                            ; 10020605
	v_mul_f32_e32 v7, s6, v3                            ; 100E0606
	v_mul_f32_e32 v3, s7, v3                            ; 10060607
	v_mac_f32_e32 v0, s8, v4                            ; 3E000808
	v_mac_f32_e32 v1, s9, v4                            ; 3E020809
	v_mac_f32_e32 v7, s10, v4                           ; 3E0E080A
	v_mac_f32_e32 v3, s11, v4                           ; 3E06080B
	v_mac_f32_e32 v0, s12, v5                           ; 3E000A0C
	v_mac_f32_e32 v1, s13, v5                           ; 3E020A0D
	v_mac_f32_e32 v7, s14, v5                           ; 3E0E0A0E
	v_mac_f32_e32 v3, s15, v5                           ; 3E060A0F
	v_mac_f32_e32 v0, s16, v6                           ; 3E000C10
	v_mac_f32_e32 v1, s17, v6                           ; 3E020C11
	v_mac_f32_e32 v7, s18, v6                           ; 3E0E0C12
	v_mac_f32_e32 v3, s0, v6                            ; 3E060C00
	exp 15, 12, 0, 1, 0, v0, v1, v7, v3                 ; F80008CF 03070100
	s_waitcnt expcnt(0)                                 ; BF8C0F0F
Shader epilog disassembly:
	s_endpgm ; BF810000

*** SHADER STATS ***
SGPRS: 24
VGPRS: 8
Spilled SGPRs: 0
Spilled VGPRs: 0
Code Size: 172 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 10
********************
radeonsi: Compiling shader 5
Fragment Shader Epilog LLVM IR:

; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"

define amdgpu_ps void @main(i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, float inreg, float, float, float, float, float, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = call i32 @llvm.SI.packf16(float %6, float %7)
  %21 = bitcast i32 %20 to float
  %22 = call i32 @llvm.SI.packf16(float %8, float %9)
  %23 = bitcast i32 %22 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %21, float %23, float undef, float undef)
  ret void
}

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

; Function Attrs: nounwind
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #2

attributes #0 = { "InitialPSInputAddr"="16777215" }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind }


Pixel Shader:
Shader main disassembly:
	s_load_dwordx4 s[0:3], s[2:3], 0x0  ; C0800300
	v_mov_b32_e32 v4, 0x3e4ccccd        ; 7E0802FF 3E4CCCCD
	v_mov_b32_e32 v28, 0                ; 7E380280
	s_waitcnt lgkmcnt(0)                ; BF8C007F
	s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100
	s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108
	s_buffer_load_dword s5, s[0:3], 0x9 ; C2028109
	s_buffer_load_dword s6, s[0:3], 0xa ; C203010A
	s_buffer_load_dword s7, s[0:3], 0xb ; C203810B
	s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104
	s_waitcnt lgkmcnt(0)                ; BF8C007F
	v_mov_b32_e32 v0, s4                ; 7E000204
	v_mov_b32_e32 v1, s5                ; 7E020205
	v_mov_b32_e32 v2, s6                ; 7E040206
	v_mov_b32_e32 v3, s7                ; 7E060207
	v_mul_f32_e32 v22, s8, v4           ; 102C0808
	v_mul_f32_e64 v23, 0, s8            ; D2100017 00001080
	s_branch BB0_1                      ; BF820000
	s_mov_b64 exec, s[2:3]              ; BEFE0402
	v_cmp_gt_i32_e32 vcc, s0, v28       ; 7D083800
	s_and_b64 vcc, exec, vcc            ; 87EA6A7E
	s_cbranch_vccz BB0_11               ; BF860000
	v_mov_b32_e32 v24, v23              ; 7E300317
	v_mov_b32_e32 v25, v22              ; 7E320316
	v_mov_b32_e32 v26, v23              ; 7E340317
	v_mov_b32_e32 v27, v22              ; 7E360316
	v_mov_b32_e32 v29, v22              ; 7E3A0316
	v_mov_b32_e32 v4, v22               ; 7E080316
	s_mov_b64 s[2:3], exec              ; BE82047E
	v_mov_b32_e32 v5, v23               ; 7E0A0317
	v_mov_b32_e32 v6, v24               ; 7E0C0318
	v_mov_b32_e32 v7, v25               ; 7E0E0319
	v_mov_b32_e32 v8, v26               ; 7E10031A
	v_mov_b32_e32 v9, v27               ; 7E12031B
	v_mov_b32_e32 v10, v28              ; 7E14031C
	v_mov_b32_e32 v11, v29              ; 7E16031D
	v_readfirstlane_b32 s1, v28         ; 7E02051C
	v_cmp_eq_u32_e32 vcc, s1, v28       ; 7D843801
	s_mov_b32 m0, s1                    ; BEFC0301
	s_and_saveexec_b64 vcc, vcc         ; BEEA246A
	v_movreld_b32_e32 v4, v0            ; 7E088500
	s_xor_b64 exec, exec, vcc           ; 89FE6A7E
	s_cbranch_execnz BB0_3              ; BF890000
	s_mov_b64 exec, s[2:3]              ; BEFE0402
	v_mov_b32_e32 v15, v23              ; 7E1E0317
	v_mov_b32_e32 v16, v22              ; 7E200316
	v_mov_b32_e32 v17, v23              ; 7E220317
	v_mov_b32_e32 v18, v22              ; 7E240316
	v_mov_b32_e32 v19, v22              ; 7E260316
	v_mov_b32_e32 v20, v23              ; 7E280317
	v_mov_b32_e32 v21, v28              ; 7E2A031C
	v_mov_b32_e32 v4, v15               ; 7E08030F
	s_mov_b64 s[2:3], exec              ; BE82047E
	v_mov_b32_e32 v5, v16               ; 7E0A0310
	v_mov_b32_e32 v6, v17               ; 7E0C0311
	v_mov_b32_e32 v7, v18               ; 7E0E0312
	v_mov_b32_e32 v8, v19               ; 7E100313
	v_mov_b32_e32 v9, v20               ; 7E120314
	v_mov_b32_e32 v10, v21              ; 7E140315
	v_mov_b32_e32 v11, v22              ; 7E160316
	v_readfirstlane_b32 s1, v28         ; 7E02051C
	v_cmp_eq_u32_e32 vcc, s1, v28       ; 7D843801
	s_mov_b32 m0, s1                    ; BEFC0301
	s_and_saveexec_b64 vcc, vcc         ; BEEA246A
	v_movreld_b32_e32 v4, v1            ; 7E088501
	s_xor_b64 exec, exec, vcc           ; 89FE6A7E
	s_cbranch_execnz BB0_5              ; BF890000
	s_mov_b64 exec, s[2:3]              ; BEFE0402
	v_mov_b32_e32 v4, v22               ; 7E080316
	v_mov_b32_e32 v5, v23               ; 7E0A0317
	v_mov_b32_e32 v6, v22               ; 7E0C0316
	v_mov_b32_e32 v7, v23               ; 7E0E0317
	v_mov_b32_e32 v8, v22               ; 7E100316
	v_mov_b32_e32 v9, v22               ; 7E120316
	v_mov_b32_e32 v10, v28              ; 7E14031C
	v_mov_b32_e32 v11, v22              ; 7E160316
	s_mov_b64 s[2:3], exec              ; BE82047E
	v_readfirstlane_b32 s1, v28         ; 7E02051C
	v_cmp_eq_u32_e32 vcc, s1, v28       ; 7D843801
	s_mov_b32 m0, s1                    ; BEFC0301
	s_and_saveexec_b64 vcc, vcc         ; BEEA246A
	v_movreld_b32_e32 v4, v2            ; 7E088502
	s_xor_b64 exec, exec, vcc           ; 89FE6A7E
	s_cbranch_execnz BB0_7              ; BF890000
	s_mov_b64 exec, s[2:3]              ; BEFE0402
	v_mov_b32_e32 v4, v23               ; 7E080317
	v_mov_b32_e32 v5, v23               ; 7E0A0317
	v_mov_b32_e32 v6, v23               ; 7E0C0317
	v_mov_b32_e32 v7, v23               ; 7E0E0317
	v_mov_b32_e32 v8, v23               ; 7E100317
	v_mov_b32_e32 v9, v23               ; 7E120317
	v_mov_b32_e32 v10, v28              ; 7E14031C
	v_mov_b32_e32 v11, v23              ; 7E160317
	s_mov_b64 s[2:3], exec              ; BE82047E
	v_readfirstlane_b32 s1, v28         ; 7E02051C
	v_cmp_eq_u32_e32 vcc, s1, v28       ; 7D843801
	s_mov_b32 m0, s1                    ; BEFC0301
	s_and_saveexec_b64 vcc, vcc         ; BEEA246A
	v_movreld_b32_e32 v4, v3            ; 7E088503
	s_xor_b64 exec, exec, vcc           ; 89FE6A7E
	s_cbranch_execnz BB0_9              ; BF890000
	s_branch BB0_10                     ; BF820000
	v_add_f32_e32 v0, v23, v22          ; 06002D17
	v_add_f32_e32 v1, v22, v23          ; 06022F16
	v_add_f32_e32 v2, v23, v22          ; 06042D17
	v_add_f32_e32 v3, v23, v23          ; 06062F17
	v_add_f32_e32 v0, v23, v0           ; 06000117
	v_add_f32_e32 v1, v23, v1           ; 06020317
	v_add_f32_e32 v2, v22, v2           ; 06040516
	v_add_f32_e32 v3, v23, v3           ; 06060717
	v_add_f32_e32 v0, v22, v0           ; 06000116
	v_add_f32_e32 v1, v22, v1           ; 06020316
	v_add_f32_e32 v2, v23, v2           ; 06040517
	v_add_f32_e32 v3, v23, v3           ; 06060717
	v_add_f32_e32 v0, v23, v0           ; 06000117
	v_add_f32_e32 v1, v22, v1           ; 06020316
	v_add_f32_e32 v2, v22, v2           ; 06040516
	v_add_f32_e32 v3, v23, v3           ; 06060717
	v_add_f32_e32 v0, v22, v0           ; 06000116
	v_add_f32_e32 v1, v23, v1           ; 06020317
	v_add_f32_e32 v2, v22, v2           ; 06040516
	v_add_f32_e32 v3, v23, v3           ; 06060717
	v_add_f32_e32 v0, v28, v0           ; 0600011C
	v_add_f32_e32 v1, v28, v1           ; 0602031C
	v_add_f32_e32 v2, v28, v2           ; 0604051C
	v_add_f32_e32 v3, v28, v3           ; 0606071C
	v_add_f32_e32 v0, v22, v0           ; 06000116
	v_add_f32_e32 v1, v22, v1           ; 06020316
	v_add_f32_e32 v2, v22, v2           ; 06040516
	v_add_f32_e32 v3, v23, v3           ; 06060717
Shader epilog disassembly:
	v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
	v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702
	exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100
	s_endpgm                           ; BF810000

*** SHADER CONFIG ***
SPI_PS_INPUT_ADDR = 0xd077
SPI_PS_INPUT_ENA  = 0x0020
*** SHADER STATS ***
SGPRS: 16
VGPRS: 32
Spilled SGPRs: 0
Spilled VGPRs: 0
Code Size: 548 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 8
********************
FRAG
DCL IN[0], GENERIC[0], CONSTANT
DCL OUT[0], COLOR
  0: MOV OUT[0], IN[0]
  1: END
radeonsi: Compiling shader 6
TGSI shader LLVM IR:

; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"

define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
main_body:
  %23 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %6)
  %24 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %6)
  %25 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %6)
  %26 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %6)
  %27 = bitcast float %5 to i32
  %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %27, 10
  %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %28, float %23, 11
  %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %29, float %24, 12
  %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %30, float %25, 13
  %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %31, float %26, 14
  %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %32, float %21, 24
  ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %33
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.constant(i32, i32, i32) #1

attributes #0 = { "InitialPSInputAddr"="36983" }
attributes #1 = { nounwind readnone }

VERT
PROPERTY NEXT_SHADER FRAG
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
  0: MOV OUT[0], IN[0]
  1: MOV OUT[1], IN[1]
  2: END
radeonsi: Compiling shader 7
TGSI shader LLVM IR:

; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"

define amdgpu_vs <{ float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) {
main_body:
  %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0, !amdgpu.uniform !0
  %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !invariant.load !0
  %17 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %16, i32 0, i32 %13)
  %18 = extractelement <4 x float> %17, i32 0
  %19 = extractelement <4 x float> %17, i32 1
  %20 = extractelement <4 x float> %17, i32 2
  %21 = extractelement <4 x float> %17, i32 3
  %22 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1, !amdgpu.uniform !0
  %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !invariant.load !0
  %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %23, i32 0, i32 %14)
  %25 = extractelement <4 x float> %24, i32 0
  %26 = extractelement <4 x float> %24, i32 1
  %27 = extractelement <4 x float> %24, i32 2
  %28 = extractelement <4 x float> %24, i32 3
  %29 = bitcast i32 %11 to float
  %30 = insertvalue <{ float, float, float }> undef, float %29, 2
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %25, float %26, float %27, float %28)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %18, float %19, float %20, float %21)
  ret <{ float, float, float }> %30
}

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0

; Function Attrs: nounwind
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #1

attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }

!0 = !{}

radeonsi: Compiling shader 8
Vertex Shader Prolog LLVM IR:

; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"

define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> @main(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) {
main_body:
  %19 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> undef, i32 %0, 0
  %20 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %19, i32 %1, 1
  %21 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %20, i32 %2, 2
  %22 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %21, i32 %3, 3
  %23 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %22, i32 %4, 4
  %24 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %23, i32 %5, 5
  %25 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %24, i32 %6, 6
  %26 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %25, i32 %7, 7
  %27 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %26, i32 %8, 8
  %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %27, i32 %9, 9
  %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %28, i32 %10, 10
  %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %29, i32 %11, 11
  %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %30, i32 %12, 12
  %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %31, i32 %13, 13
  %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %32, i32 %14, 14
  %34 = bitcast i32 %15 to float
  %35 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %33, float %34, 15
  %36 = bitcast i32 %16 to float
  %37 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %35, float %36, 16
  %38 = bitcast i32 %17 to float
  %39 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %37, float %38, 17
  %40 = bitcast i32 %18 to float
  %41 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %39, float %40, 18
  %42 = add i32 %15, %12
  %43 = bitcast i32 %42 to float
  %44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %41, float %43, 19
  %45 = add i32 %15, %12
  %46 = bitcast i32 %45 to float
  %47 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %44, float %46, 20
  ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %47
}


Vertex Shader as VS:
Shader prolog disassembly:
	v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C
	v_mov_b32_e32 v5, v4           ; 7E0A0304
Shader main disassembly:
	s_load_dwordx4 s[0:3], s[10:11], 0x0                  ; C0800B00
	s_load_dwordx4 s[4:7], s[10:11], 0x4                  ; C0820B04
	s_waitcnt lgkmcnt(0)                                  ; BF8C007F
	buffer_load_format_xyzw v[6:9], v4, s[0:3], 0 idxen   ; E00C2000 80000604
	buffer_load_format_xyzw v[10:13], v5, s[4:7], 0 idxen ; E00C2000 80010A05
	s_waitcnt vmcnt(0)                                    ; BF8C0F70
	exp 15, 32, 0, 0, 0, v10, v11, v12, v13               ; F800020F 0D0C0B0A
	exp 15, 12, 0, 1, 0, v6, v7, v8, v9                   ; F80008CF 09080706
	s_waitcnt expcnt(0)                                   ; BF8C0F0F
Shader epilog disassembly:
	s_endpgm ; BF810000

*** SHADER STATS ***
SGPRS: 24
VGPRS: 16
Spilled SGPRs: 0
Spilled VGPRs: 0
Code Size: 64 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 10
********************

Pixel Shader:
Shader main disassembly:
	s_mov_b32 m0, s11                   ; BEFC030B
	v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002
	v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102
	v_interp_mov_f32 v2, P0, 2, 0, [m0] ; C80A0202
	v_interp_mov_f32 v3, P0, 3, 0, [m0] ; C80E0302
Shader epilog disassembly:
	v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
	v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702
	exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100
	s_endpgm                           ; BF810000

*** SHADER CONFIG ***
SPI_PS_INPUT_ADDR = 0xd077
SPI_PS_INPUT_ENA  = 0x0020
*** SHADER STATS ***
SGPRS: 16
VGPRS: 16
Spilled SGPRs: 0
Spilled VGPRs: 0
Code Size: 40 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 10
********************
FRAG
DCL IN[0], GENERIC[0], LINEAR
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
  0: TEX OUT[0], IN[0], SAMP[0], 2D
  1: END
radeonsi: Compiling shader 9
TGSI shader LLVM IR:

; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"

define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
main_body:
  %23 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0, !amdgpu.uniform !0
  %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !invariant.load !0
  %25 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)*
  %26 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %25, i64 0, i64 3, !amdgpu.uniform !0
  %27 = load <4 x i32>, <4 x i32> addrspace(2)* %26, align 16, !invariant.load !0
  %28 = extractelement <8 x i32> %24, i32 7
  %29 = extractelement <4 x i32> %27, i32 0
  %30 = and i32 %29, %28
  %31 = insertelement <4 x i32> %27, i32 %30, i32 0
  %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %12)
  %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %12)
  %34 = bitcast float %32 to i32
  %35 = bitcast float %33 to i32
  %36 = insertelement <2 x i32> undef, i32 %34, i32 0
  %37 = insertelement <2 x i32> %36, i32 %35, i32 1
  %38 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %37, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
  %39 = extractelement <4 x float> %38, i32 0
  %40 = extractelement <4 x float> %38, i32 1
  %41 = extractelement <4 x float> %38, i32 2
  %42 = extractelement <4 x float> %38, i32 3
  %43 = bitcast float %5 to i32
  %44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %43, 10
  %45 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %44, float %39, 11
  %46 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %45, float %40, 12
  %47 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %46, float %41, 13
  %48 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %47, float %42, 14
  %49 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %48, float %21, 24
  ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %49
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1

attributes #0 = { "InitialPSInputAddr"="36983" }
attributes #1 = { nounwind readnone }

!0 = !{}


Pixel Shader:
Shader main disassembly:
	s_wqm_b64 exec, exec                                    ; BEFE0A7E
	s_load_dwordx8 s[12:19], s[4:5], 0x0                    ; C0C60500
	s_load_dwordx4 s[0:3], s[4:5], 0xc                      ; C080050C
	s_mov_b32 m0, s11                                       ; BEFC030B
	v_interp_p1_f32 v0, v8, 0, 0, [m0]                      ; C8000008
	v_interp_p2_f32 v0, [v0], v9, 0, 0, [m0]                ; C8010009
	v_interp_p1_f32 v1, v8, 1, 0, [m0]                      ; C8040108
	s_waitcnt lgkmcnt(0)                                    ; BF8C007F
	s_and_b32 s0, s0, s19                                   ; 87001300
	v_interp_p2_f32 v1, [v1], v9, 1, 0, [m0]                ; C8050109
	image_sample v[0:3], v[0:1], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030000
	s_waitcnt vmcnt(0)                                      ; BF8C0F70
Shader epilog disassembly:
	v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
	v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702
	exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100
	s_endpgm                           ; BF810000

*** SHADER CONFIG ***
SPI_PS_INPUT_ADDR = 0xd077
SPI_PS_INPUT_ENA  = 0x0020
*** SHADER STATS ***
SGPRS: 24
VGPRS: 16
Spilled SGPRs: 0
Spilled VGPRs: 0
Code Size: 72 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 10
********************
dd: GPU hang detected!
dd: Aborting the process...
-------------- next part --------------
VERT
PROPERTY NEXT_SHADER FRAG
DCL IN[0]
DCL OUT[0], POSITION
DCL CONST[0..3]
DCL TEMP[0], LOCAL
  0: MUL TEMP[0], CONST[0], IN[0].xxxx
  1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
  2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
  3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0]
  4: MOV OUT[0], TEMP[0]
  5: END
radeonsi: Compiling shader 1
TGSI shader LLVM IR:

; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"

define amdgpu_vs <{ float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) {
main_body:
  %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0
  %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !invariant.load !0
  %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 0)
  %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 4)
  %18 = call float @llvm.SI.load.const(<16 x i8> %15, i32 8)
  %19 = call float @llvm.SI.load.const(<16 x i8> %15, i32 12)
  %20 = call float @llvm.SI.load.const(<16 x i8> %15, i32 16)
  %21 = call float @llvm.SI.load.const(<16 x i8> %15, i32 20)
  %22 = call float @llvm.SI.load.const(<16 x i8> %15, i32 24)
  %23 = call float @llvm.SI.load.const(<16 x i8> %15, i32 28)
  %24 = call float @llvm.SI.load.const(<16 x i8> %15, i32 32)
  %25 = call float @llvm.SI.load.const(<16 x i8> %15, i32 36)
  %26 = call float @llvm.SI.load.const(<16 x i8> %15, i32 40)
  %27 = call float @llvm.SI.load.const(<16 x i8> %15, i32 44)
  %28 = call float @llvm.SI.load.const(<16 x i8> %15, i32 48)
  %29 = call float @llvm.SI.load.const(<16 x i8> %15, i32 52)
  %30 = call float @llvm.SI.load.const(<16 x i8> %15, i32 56)
  %31 = call float @llvm.SI.load.const(<16 x i8> %15, i32 60)
  %32 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0, !amdgpu.uniform !0
  %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !invariant.load !0
  %34 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %13)
  %35 = extractelement <4 x float> %34, i32 0
  %36 = extractelement <4 x float> %34, i32 1
  %37 = extractelement <4 x float> %34, i32 2
  %38 = extractelement <4 x float> %34, i32 3
  %39 = fmul float %16, %35
  %40 = fmul float %17, %35
  %41 = fmul float %18, %35
  %42 = fmul float %19, %35
  %43 = fmul float %20, %36
  %44 = fadd float %43, %39
  %45 = fmul float %21, %36
  %46 = fadd float %45, %40
  %47 = fmul float %22, %36
  %48 = fadd float %47, %41
  %49 = fmul float %23, %36
  %50 = fadd float %49, %42
  %51 = fmul float %24, %37
  %52 = fadd float %51, %44
  %53 = fmul float %25, %37
  %54 = fadd float %53, %46
  %55 = fmul float %26, %37
  %56 = fadd float %55, %48
  %57 = fmul float %27, %37
  %58 = fadd float %57, %50
  %59 = fmul float %28, %38
  %60 = fadd float %59, %52
  %61 = fmul float %29, %38
  %62 = fadd float %61, %54
  %63 = fmul float %30, %38
  %64 = fadd float %63, %56
  %65 = fmul float %31, %38
  %66 = fadd float %65, %58
  %67 = bitcast i32 %11 to float
  %68 = insertvalue <{ float, float, float }> undef, float %67, 2
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %60, float %62, float %64, float %66)
  ret <{ float, float, float }> %68
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #0

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0

; Function Attrs: nounwind
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #1

attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }

!0 = !{}

FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL OUT[0], COLOR
DCL CONST[0..2]
DCL TEMP[0..7], ARRAY(1), LOCAL
DCL TEMP[8..9], LOCAL
DCL ADDR[0]
IMM[0] FLT32 {    0.2000,     0.0000,     0.0000,     0.0000}
IMM[1] INT32 {0, 1, 0, 0}
  0: MUL TEMP[0], IMM[0].xyxy, CONST[0].xxxx
  1: MUL TEMP[1], IMM[0].yxyy, CONST[0].xxxx
  2: MUL TEMP[2], IMM[0].yyxy, CONST[0].xxxx
  3: MUL TEMP[3], IMM[0].xxyy, CONST[0].xxxx
  4: MUL TEMP[4], IMM[0].yxxy, CONST[0].xxxx
  5: MUL TEMP[5], IMM[0].xyxy, CONST[0].xxxx
  6: MOV TEMP[6], IMM[0].yyyy
  7: MUL TEMP[7], IMM[0].xxxy, CONST[0].xxxx
  8: MOV TEMP[8].x, IMM[1].xxxx
  9: BGNLOOP :0
 10:   ISGE TEMP[9].x, TEMP[8].xxxx, CONST[1].xxxx
 11:   UIF TEMP[9].xxxx :0
 12:     BRK
 13:   ENDIF
 14:   UARL ADDR[0].x, TEMP[8].xxxx
 15:   MOV TEMP[ADDR[0].x](1), CONST[2]
 16:   UADD TEMP[8].x, TEMP[8].xxxx, IMM[1].yyyy
 17: ENDLOOP :0
 18: ADD TEMP[8], TEMP[0], TEMP[1]
 19: ADD TEMP[8], TEMP[8], TEMP[2]
 20: ADD TEMP[8], TEMP[8], TEMP[3]
 21: ADD TEMP[8], TEMP[8], TEMP[4]
 22: ADD TEMP[8], TEMP[8], TEMP[5]
 23: ADD TEMP[8], TEMP[8], TEMP[6]
 24: ADD TEMP[8], TEMP[8], TEMP[7]
 25: MOV OUT[0], TEMP[8]
 26: END
radeonsi: Compiling shader 2
TGSI shader LLVM IR:

; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"

define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
main_body:
  %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0
  %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !invariant.load !0
  %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0)
  %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16)
  %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 32)
  %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 36)
  %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 40)
  %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 44)
  %31 = fmul float %25, 0x3FC99999A0000000
  %32 = fmul float %25, 0.000000e+00
  %33 = fmul float %25, 0x3FC99999A0000000
  %34 = fmul float %25, 0.000000e+00
  %35 = fmul float %25, 0.000000e+00
  %36 = fmul float %25, 0x3FC99999A0000000
  %37 = fmul float %25, 0.000000e+00
  %38 = fmul float %25, 0.000000e+00
  %39 = fmul float %25, 0.000000e+00
  %40 = fmul float %25, 0.000000e+00
  %41 = fmul float %25, 0x3FC99999A0000000
  %42 = fmul float %25, 0.000000e+00
  %43 = fmul float %25, 0x3FC99999A0000000
  %44 = fmul float %25, 0x3FC99999A0000000
  %45 = fmul float %25, 0.000000e+00
  %46 = fmul float %25, 0.000000e+00
  %47 = fmul float %25, 0.000000e+00
  %48 = fmul float %25, 0x3FC99999A0000000
  %49 = fmul float %25, 0x3FC99999A0000000
  %50 = fmul float %25, 0.000000e+00
  %51 = fmul float %25, 0x3FC99999A0000000
  %52 = fmul float %25, 0.000000e+00
  %53 = fmul float %25, 0x3FC99999A0000000
  %54 = fmul float %25, 0.000000e+00
  %55 = fmul float %25, 0x3FC99999A0000000
  %56 = fmul float %25, 0x3FC99999A0000000
  %57 = fmul float %25, 0x3FC99999A0000000
  %58 = fmul float %25, 0.000000e+00
  %59 = bitcast float %26 to i32
  br label %LOOP

LOOP:                                             ; preds = %ENDIF, %main_body
  %temp2.0 = phi float [ %33, %main_body ], [ %141, %ENDIF ]
  %temp3.0 = phi float [ %34, %main_body ], [ %158, %ENDIF ]
  %temp4.0 = phi float [ %35, %main_body ], [ %108, %ENDIF ]
  %temp5.0 = phi float [ %36, %main_body ], [ %125, %ENDIF ]
  %temp6.0 = phi float [ %37, %main_body ], [ %142, %ENDIF ]
  %temp7.0 = phi float [ %38, %main_body ], [ %159, %ENDIF ]
  %temp8.0 = phi float [ %39, %main_body ], [ %109, %ENDIF ]
  %temp9.0 = phi float [ %40, %main_body ], [ %126, %ENDIF ]
  %temp10.0 = phi float [ %41, %main_body ], [ %143, %ENDIF ]
  %temp11.0 = phi float [ %42, %main_body ], [ %160, %ENDIF ]
  %temp12.0 = phi float [ %43, %main_body ], [ %110, %ENDIF ]
  %temp13.0 = phi float [ %44, %main_body ], [ %127, %ENDIF ]
  %temp14.0 = phi float [ %45, %main_body ], [ %144, %ENDIF ]
  %temp15.0 = phi float [ %46, %main_body ], [ %161, %ENDIF ]
  %temp16.0 = phi float [ %47, %main_body ], [ %111, %ENDIF ]
  %temp17.0 = phi float [ %48, %main_body ], [ %128, %ENDIF ]
  %temp18.0 = phi float [ %49, %main_body ], [ %145, %ENDIF ]
  %temp19.0 = phi float [ %50, %main_body ], [ %162, %ENDIF ]
  %temp20.0 = phi float [ %51, %main_body ], [ %112, %ENDIF ]
  %temp21.0 = phi float [ %52, %main_body ], [ %129, %ENDIF ]
  %temp22.0 = phi float [ %53, %main_body ], [ %146, %ENDIF ]
  %temp23.0 = phi float [ %54, %main_body ], [ %163, %ENDIF ]
  %temp24.0 = phi float [ 0.000000e+00, %main_body ], [ %113, %ENDIF ]
  %temp25.0 = phi float [ 0.000000e+00, %main_body ], [ %130, %ENDIF ]
  %temp26.0 = phi float [ 0.000000e+00, %main_body ], [ %147, %ENDIF ]
  %temp27.0 = phi float [ 0.000000e+00, %main_body ], [ %164, %ENDIF ]
  %temp28.0 = phi float [ %55, %main_body ], [ %114, %ENDIF ]
  %temp29.0 = phi float [ %56, %main_body ], [ %131, %ENDIF ]
  %temp30.0 = phi float [ %57, %main_body ], [ %148, %ENDIF ]
  %temp31.0 = phi float [ %58, %main_body ], [ %165, %ENDIF ]
  %temp32.0 = phi float [ 0.000000e+00, %main_body ], [ %168, %ENDIF ]
  %temp1.0 = phi float [ %32, %main_body ], [ %124, %ENDIF ]
  %temp.0 = phi float [ %31, %main_body ], [ %107, %ENDIF ]
  %60 = bitcast float %temp32.0 to i32
  %61 = icmp slt i32 %60, %59
  br i1 %61, label %ENDIF, label %IF

IF:                                               ; preds = %LOOP
  %62 = fadd float %temp.0, %temp4.0
  %63 = fadd float %temp1.0, %temp5.0
  %64 = fadd float %temp2.0, %temp6.0
  %65 = fadd float %temp3.0, %temp7.0
  %66 = fadd float %62, %temp8.0
  %67 = fadd float %63, %temp9.0
  %68 = fadd float %64, %temp10.0
  %69 = fadd float %65, %temp11.0
  %70 = fadd float %66, %temp12.0
  %71 = fadd float %67, %temp13.0
  %72 = fadd float %68, %temp14.0
  %73 = fadd float %69, %temp15.0
  %74 = fadd float %70, %temp16.0
  %75 = fadd float %71, %temp17.0
  %76 = fadd float %72, %temp18.0
  %77 = fadd float %73, %temp19.0
  %78 = fadd float %74, %temp20.0
  %79 = fadd float %75, %temp21.0
  %80 = fadd float %76, %temp22.0
  %81 = fadd float %77, %temp23.0
  %82 = fadd float %78, %temp24.0
  %83 = fadd float %79, %temp25.0
  %84 = fadd float %80, %temp26.0
  %85 = fadd float %81, %temp27.0
  %86 = fadd float %82, %temp28.0
  %87 = fadd float %83, %temp29.0
  %88 = fadd float %84, %temp30.0
  %89 = fadd float %85, %temp31.0
  %90 = bitcast float %5 to i32
  %91 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %90, 10
  %92 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %91, float %86, 11
  %93 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %92, float %87, 12
  %94 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %93, float %88, 13
  %95 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %94, float %89, 14
  %96 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %95, float %21, 24
  ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %96

ENDIF:                                            ; preds = %LOOP
  %97 = bitcast float %temp32.0 to i32
  %98 = insertelement <8 x float> undef, float %temp.0, i32 0
  %99 = insertelement <8 x float> %98, float %temp4.0, i32 1
  %100 = insertelement <8 x float> %99, float %temp8.0, i32 2
  %101 = insertelement <8 x float> %100, float %temp12.0, i32 3
  %102 = insertelement <8 x float> %101, float %temp16.0, i32 4
  %103 = insertelement <8 x float> %102, float %temp20.0, i32 5
  %104 = insertelement <8 x float> %103, float %temp24.0, i32 6
  %105 = insertelement <8 x float> %104, float %temp28.0, i32 7
  %106 = insertelement <8 x float> %105, float %27, i32 %97
  %107 = extractelement <8 x float> %106, i32 0
  %108 = extractelement <8 x float> %106, i32 1
  %109 = extractelement <8 x float> %106, i32 2
  %110 = extractelement <8 x float> %106, i32 3
  %111 = extractelement <8 x float> %106, i32 4
  %112 = extractelement <8 x float> %106, i32 5
  %113 = extractelement <8 x float> %106, i32 6
  %114 = extractelement <8 x float> %106, i32 7
  %115 = insertelement <8 x float> undef, float %temp1.0, i32 0
  %116 = insertelement <8 x float> %115, float %temp5.0, i32 1
  %117 = insertelement <8 x float> %116, float %temp9.0, i32 2
  %118 = insertelement <8 x float> %117, float %temp13.0, i32 3
  %119 = insertelement <8 x float> %118, float %temp17.0, i32 4
  %120 = insertelement <8 x float> %119, float %temp21.0, i32 5
  %121 = insertelement <8 x float> %120, float %temp25.0, i32 6
  %122 = insertelement <8 x float> %121, float %temp29.0, i32 7
  %123 = insertelement <8 x float> %122, float %28, i32 %97
  %124 = extractelement <8 x float> %123, i32 0
  %125 = extractelement <8 x float> %123, i32 1
  %126 = extractelement <8 x float> %123, i32 2
  %127 = extractelement <8 x float> %123, i32 3
  %128 = extractelement <8 x float> %123, i32 4
  %129 = extractelement <8 x float> %123, i32 5
  %130 = extractelement <8 x float> %123, i32 6
  %131 = extractelement <8 x float> %123, i32 7
  %132 = insertelement <8 x float> undef, float %temp2.0, i32 0
  %133 = insertelement <8 x float> %132, float %temp6.0, i32 1
  %134 = insertelement <8 x float> %133, float %temp10.0, i32 2
  %135 = insertelement <8 x float> %134, float %temp14.0, i32 3
  %136 = insertelement <8 x float> %135, float %temp18.0, i32 4
  %137 = insertelement <8 x float> %136, float %temp22.0, i32 5
  %138 = insertelement <8 x float> %137, float %temp26.0, i32 6
  %139 = insertelement <8 x float> %138, float %temp30.0, i32 7
  %140 = insertelement <8 x float> %139, float %29, i32 %97
  %141 = extractelement <8 x float> %140, i32 0
  %142 = extractelement <8 x float> %140, i32 1
  %143 = extractelement <8 x float> %140, i32 2
  %144 = extractelement <8 x float> %140, i32 3
  %145 = extractelement <8 x float> %140, i32 4
  %146 = extractelement <8 x float> %140, i32 5
  %147 = extractelement <8 x float> %140, i32 6
  %148 = extractelement <8 x float> %140, i32 7
  %149 = insertelement <8 x float> undef, float %temp3.0, i32 0
  %150 = insertelement <8 x float> %149, float %temp7.0, i32 1
  %151 = insertelement <8 x float> %150, float %temp11.0, i32 2
  %152 = insertelement <8 x float> %151, float %temp15.0, i32 3
  %153 = insertelement <8 x float> %152, float %temp19.0, i32 4
  %154 = insertelement <8 x float> %153, float %temp23.0, i32 5
  %155 = insertelement <8 x float> %154, float %temp27.0, i32 6
  %156 = insertelement <8 x float> %155, float %temp31.0, i32 7
  %157 = insertelement <8 x float> %156, float %30, i32 %97
  %158 = extractelement <8 x float> %157, i32 0
  %159 = extractelement <8 x float> %157, i32 1
  %160 = extractelement <8 x float> %157, i32 2
  %161 = extractelement <8 x float> %157, i32 3
  %162 = extractelement <8 x float> %157, i32 4
  %163 = extractelement <8 x float> %157, i32 5
  %164 = extractelement <8 x float> %157, i32 6
  %165 = extractelement <8 x float> %157, i32 7
  %166 = bitcast float %temp32.0 to i32
  %167 = add i32 %166, 1
  %168 = bitcast i32 %167 to float
  br label %LOOP
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

attributes #0 = { "InitialPSInputAddr"="36983" }
attributes #1 = { nounwind readnone }

!0 = !{}

radeonsi: Compiling shader 3
Vertex Shader Prolog LLVM IR:

; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"

define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> @main(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) {
main_body:
  %19 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> undef, i32 %0, 0
  %20 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %19, i32 %1, 1
  %21 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %20, i32 %2, 2
  %22 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %21, i32 %3, 3
  %23 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %22, i32 %4, 4
  %24 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %23, i32 %5, 5
  %25 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %24, i32 %6, 6
  %26 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %25, i32 %7, 7
  %27 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %26, i32 %8, 8
  %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %27, i32 %9, 9
  %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %28, i32 %10, 10
  %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %29, i32 %11, 11
  %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %30, i32 %12, 12
  %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %31, i32 %13, 13
  %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %32, i32 %14, 14
  %34 = bitcast i32 %15 to float
  %35 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %33, float %34, 15
  %36 = bitcast i32 %16 to float
  %37 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %35, float %36, 16
  %38 = bitcast i32 %17 to float
  %39 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %37, float %38, 17
  %40 = bitcast i32 %18 to float
  %41 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %39, float %40, 18
  %42 = add i32 %15, %12
  %43 = bitcast i32 %42 to float
  %44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %41, float %43, 19
  ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float }> %44
}

radeonsi: Compiling shader 4
Vertex Shader Epilog LLVM IR:

; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"

define amdgpu_vs void @main() {
main_body:
  ret void
}


Vertex Shader as VS:
Shader prolog disassembly:
	v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C
Shader main disassembly:
	s_load_dwordx4 s[4:7], s[10:11], 0x0                ; C0820B00
	s_load_dwordx4 s[0:3], s[2:3], 0x0                  ; C0800300
	s_waitcnt lgkmcnt(0)                                ; BF8C007F
	buffer_load_format_xyzw v[3:6], v4, s[4:7], 0 idxen ; E00C2000 80010304
	s_buffer_load_dword s4, s[0:3], 0x0                 ; C2020100
	s_buffer_load_dword s5, s[0:3], 0x1                 ; C2028101
	s_buffer_load_dword s6, s[0:3], 0x2                 ; C2030102
	s_buffer_load_dword s7, s[0:3], 0x3                 ; C2038103
	s_buffer_load_dword s8, s[0:3], 0x4                 ; C2040104
	s_buffer_load_dword s9, s[0:3], 0x5                 ; C2048105
	s_buffer_load_dword s10, s[0:3], 0x6                ; C2050106
	s_buffer_load_dword s11, s[0:3], 0x7                ; C2058107
	s_buffer_load_dword s12, s[0:3], 0x8                ; C2060108
	s_buffer_load_dword s13, s[0:3], 0x9                ; C2068109
	s_buffer_load_dword s14, s[0:3], 0xa                ; C207010A
	s_buffer_load_dword s15, s[0:3], 0xb                ; C207810B
	s_buffer_load_dword s16, s[0:3], 0xc                ; C208010C
	s_buffer_load_dword s17, s[0:3], 0xd                ; C208810D
	s_buffer_load_dword s18, s[0:3], 0xe                ; C209010E
	s_buffer_load_dword s0, s[0:3], 0xf                 ; C200010F
	s_waitcnt vmcnt(0) lgkmcnt(0)                       ; BF8C0070
	v_mul_f32_e32 v0, s4, v3                            ; 10000604
	v_mul_f32_e32 v1, s5, v3                            ; 10020605
	v_mul_f32_e32 v7, s6, v3                            ; 100E0606
	v_mul_f32_e32 v3, s7, v3                            ; 10060607
	v_mac_f32_e32 v0, s8, v4                            ; 3E000808
	v_mac_f32_e32 v1, s9, v4                            ; 3E020809
	v_mac_f32_e32 v7, s10, v4                           ; 3E0E080A
	v_mac_f32_e32 v3, s11, v4                           ; 3E06080B
	v_mac_f32_e32 v0, s12, v5                           ; 3E000A0C
	v_mac_f32_e32 v1, s13, v5                           ; 3E020A0D
	v_mac_f32_e32 v7, s14, v5                           ; 3E0E0A0E
	v_mac_f32_e32 v3, s15, v5                           ; 3E060A0F
	v_mac_f32_e32 v0, s16, v6                           ; 3E000C10
	v_mac_f32_e32 v1, s17, v6                           ; 3E020C11
	v_mac_f32_e32 v7, s18, v6                           ; 3E0E0C12
	v_mac_f32_e32 v3, s0, v6                            ; 3E060C00
	exp 15, 12, 0, 1, 0, v0, v1, v7, v3                 ; F80008CF 03070100
	s_waitcnt expcnt(0)                                 ; BF8C0F0F
Shader epilog disassembly:
	s_endpgm ; BF810000

*** SHADER STATS ***
SGPRS: 24
VGPRS: 8
Spilled SGPRs: 0
Spilled VGPRs: 0
Code Size: 172 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 10
********************
radeonsi: Compiling shader 5
Fragment Shader Epilog LLVM IR:

; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"

define amdgpu_ps void @main(i64 inreg, i64 inreg, i64 inreg, i64 inreg, i64 inreg, float inreg, float, float, float, float, float, float, float, float, float, float, float, float, float, float) #0 {
main_body:
  %20 = call i32 @llvm.SI.packf16(float %6, float %7)
  %21 = bitcast i32 %20 to float
  %22 = call i32 @llvm.SI.packf16(float %8, float %9)
  %23 = bitcast i32 %22 to float
  call void @llvm.SI.export(i32 15, i32 1, i32 1, i32 0, i32 1, float %21, float %23, float undef, float undef)
  ret void
}

; Function Attrs: nounwind readnone
declare i32 @llvm.SI.packf16(float, float) #1

; Function Attrs: nounwind
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #2

attributes #0 = { "InitialPSInputAddr"="16777215" }
attributes #1 = { nounwind readnone }
attributes #2 = { nounwind }


Pixel Shader:
Shader main disassembly:
	s_load_dwordx4 s[0:3], s[2:3], 0x0  ; C0800300
	v_mov_b32_e32 v3, 0x3e4ccccd        ; 7E0602FF 3E4CCCCD
	v_mov_b32_e32 v36, 0                ; 7E480280
	v_mov_b32_e32 v12, 0                ; 7E180280
	v_mov_b32_e32 v28, v36              ; 7E380324
	s_waitcnt lgkmcnt(0)                ; BF8C007F
	s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100
	s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108
	s_buffer_load_dword s5, s[0:3], 0x9 ; C2028109
	s_buffer_load_dword s6, s[0:3], 0xa ; C203010A
	s_buffer_load_dword s7, s[0:3], 0xb ; C203810B
	s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104
	s_waitcnt lgkmcnt(0)                ; BF8C007F
	v_mul_f32_e32 v14, s8, v3           ; 101C0608
	v_mul_f32_e64 v3, 0, s8             ; D2100003 00001080
	v_mov_b32_e32 v0, s4                ; 7E000204
	v_mov_b32_e32 v1, s5                ; 7E020205
	v_mov_b32_e32 v2, s6                ; 7E040206
	v_mov_b32_e32 v11, s7               ; 7E160207
	v_mov_b32_e32 v4, v3                ; 7E080303
	v_mov_b32_e32 v16, v14              ; 7E20030E
	v_mov_b32_e32 v5, v3                ; 7E0A0303
	v_mov_b32_e32 v6, v3                ; 7E0C0303
	v_mov_b32_e32 v18, v14              ; 7E24030E
	v_mov_b32_e32 v7, v3                ; 7E0E0303
	v_mov_b32_e32 v19, v14              ; 7E26030E
	v_mov_b32_e32 v8, v3                ; 7E100303
	v_mov_b32_e32 v21, v14              ; 7E2A030E
	v_mov_b32_e32 v10, v3               ; 7E140303
	v_mov_b32_e32 v31, v3               ; 7E3E0303
	v_mov_b32_e32 v23, v14              ; 7E2E030E
	v_mov_b32_e32 v15, v3               ; 7E1E0303
	v_mov_b32_e32 v32, v3               ; 7E400303
	v_mov_b32_e32 v24, v3               ; 7E300303
	v_mov_b32_e32 v33, v14              ; 7E42030E
	v_mov_b32_e32 v25, v14              ; 7E32030E
	v_mov_b32_e32 v17, v3               ; 7E220303
	v_mov_b32_e32 v34, v3               ; 7E440303
	v_mov_b32_e32 v26, v14              ; 7E34030E
	v_mov_b32_e32 v35, v14              ; 7E46030E
	v_mov_b32_e32 v27, v3               ; 7E360303
	v_mov_b32_e32 v20, v36              ; 7E280324
	v_mov_b32_e32 v9, v36               ; 7E120324
	v_mov_b32_e32 v37, v14              ; 7E4A030E
	v_mov_b32_e32 v29, v14              ; 7E3A030E
	v_mov_b32_e32 v22, v3               ; 7E2C0303
	v_mov_b32_e32 v30, v14              ; 7E3C030E
	s_branch BB0_1                      ; BF820000
	s_mov_b64 s[2:3], exec              ; BE82047E
	v_readfirstlane_b32 vcc_lo, v12     ; 7ED4050C
	s_mov_b32 m0, vcc_lo                ; BEFC036A
	v_cmp_eq_u32_e32 vcc, m0, v12       ; 7D84187C
	s_and_saveexec_b64 vcc, vcc         ; BEEA246A
	v_movreld_b32_e32 v30, v0           ; 7E3C8500
	s_xor_b64 exec, exec, vcc           ; 89FE6A7E
	s_cbranch_execnz BB0_4              ; BF890000
	s_mov_b64 exec, s[2:3]              ; BEFE0402
	s_mov_b64 s[2:3], exec              ; BE82047E
	v_readfirstlane_b32 vcc_lo, v12     ; 7ED4050C
	s_mov_b32 m0, vcc_lo                ; BEFC036A
	v_cmp_eq_u32_e32 vcc, m0, v12       ; 7D84187C
	s_and_saveexec_b64 vcc, vcc         ; BEEA246A
	v_movreld_b32_e32 v22, v1           ; 7E2C8501
	s_xor_b64 exec, exec, vcc           ; 89FE6A7E
	s_cbranch_execnz BB0_6              ; BF890000
	s_mov_b64 exec, s[2:3]              ; BEFE0402
	s_mov_b64 s[2:3], exec              ; BE82047E
	v_readfirstlane_b32 vcc_lo, v12     ; 7ED4050C
	s_mov_b32 m0, vcc_lo                ; BEFC036A
	v_cmp_eq_u32_e32 vcc, m0, v12       ; 7D84187C
	s_and_saveexec_b64 vcc, vcc         ; BEEA246A
	v_movreld_b32_e32 v14, v2           ; 7E1C8502
	s_xor_b64 exec, exec, vcc           ; 89FE6A7E
	s_cbranch_execnz BB0_8              ; BF890000
	s_mov_b64 exec, s[2:3]              ; BEFE0402
	s_mov_b64 s[2:3], exec              ; BE82047E
	v_readfirstlane_b32 vcc_lo, v12     ; 7ED4050C
	s_mov_b32 m0, vcc_lo                ; BEFC036A
	v_cmp_eq_u32_e32 vcc, m0, v12       ; 7D84187C
	s_and_saveexec_b64 vcc, vcc         ; BEEA246A
	v_movreld_b32_e32 v3, v11           ; 7E06850B
	s_xor_b64 exec, exec, vcc           ; 89FE6A7E
	s_cbranch_execnz BB0_10             ; BF890000
	s_mov_b64 exec, s[2:3]              ; BEFE0402
	v_add_i32_e32 v12, vcc, 1, v12      ; 4A181881
	v_cmp_gt_i32_e32 vcc, s0, v12       ; 7D081800
	s_and_b64 vcc, exec, vcc            ; 87EA6A7E
	s_cbranch_vccnz BB0_3               ; BF870000
	v_add_f32_e32 v0, v31, v30          ; 06003D1F
	v_add_f32_e32 v1, v23, v22          ; 06022D17
	v_add_f32_e32 v2, v15, v14          ; 06041D0F
	v_add_f32_e32 v3, v4, v3            ; 06060704
	v_add_f32_e32 v0, v32, v0           ; 06000120
	v_add_f32_e32 v1, v24, v1           ; 06020318
	v_add_f32_e32 v2, v16, v2           ; 06040510
	v_add_f32_e32 v3, v5, v3            ; 06060705
	v_add_f32_e32 v0, v33, v0           ; 06000121
	v_add_f32_e32 v1, v25, v1           ; 06020319
	v_add_f32_e32 v2, v17, v2           ; 06040511
	v_add_f32_e32 v3, v6, v3            ; 06060706
	v_add_f32_e32 v0, v34, v0           ; 06000122
	v_add_f32_e32 v1, v26, v1           ; 0602031A
	v_add_f32_e32 v2, v18, v2           ; 06040512
	v_add_f32_e32 v3, v7, v3            ; 06060707
	v_add_f32_e32 v0, v35, v0           ; 06000123
	v_add_f32_e32 v1, v27, v1           ; 0602031B
	v_add_f32_e32 v2, v19, v2           ; 06040513
	v_add_f32_e32 v3, v8, v3            ; 06060708
	v_add_f32_e32 v0, v36, v0           ; 06000124
	v_add_f32_e32 v1, v28, v1           ; 0602031C
	v_add_f32_e32 v2, v20, v2           ; 06040514
	v_add_f32_e32 v3, v9, v3            ; 06060709
	v_add_f32_e32 v0, v37, v0           ; 06000125
	v_add_f32_e32 v1, v29, v1           ; 0602031D
	v_add_f32_e32 v2, v21, v2           ; 06040515
	v_add_f32_e32 v3, v10, v3           ; 0606070A
Shader epilog disassembly:
	v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
	v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702
	exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100
	s_endpgm                           ; BF810000

*** SHADER CONFIG ***
SPI_PS_INPUT_ADDR = 0xd077
SPI_PS_INPUT_ENA  = 0x0020
*** SHADER STATS ***
SGPRS: 16
VGPRS: 40
Spilled SGPRs: 0
Spilled VGPRs: 0
Code Size: 492 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 6
********************
FRAG
DCL IN[0], GENERIC[0], CONSTANT
DCL OUT[0], COLOR
  0: MOV OUT[0], IN[0]
  1: END
radeonsi: Compiling shader 6
TGSI shader LLVM IR:

; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"

define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
main_body:
  %23 = call float @llvm.SI.fs.constant(i32 0, i32 0, i32 %6)
  %24 = call float @llvm.SI.fs.constant(i32 1, i32 0, i32 %6)
  %25 = call float @llvm.SI.fs.constant(i32 2, i32 0, i32 %6)
  %26 = call float @llvm.SI.fs.constant(i32 3, i32 0, i32 %6)
  %27 = bitcast float %5 to i32
  %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %27, 10
  %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %28, float %23, 11
  %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %29, float %24, 12
  %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %30, float %25, 13
  %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %31, float %26, 14
  %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %32, float %21, 24
  ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %33
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.constant(i32, i32, i32) #1

attributes #0 = { "InitialPSInputAddr"="36983" }
attributes #1 = { nounwind readnone }

VERT
PROPERTY NEXT_SHADER FRAG
DCL IN[0]
DCL IN[1]
DCL OUT[0], POSITION
DCL OUT[1], GENERIC[0]
  0: MOV OUT[0], IN[0]
  1: MOV OUT[1], IN[1]
  2: END
radeonsi: Compiling shader 7
TGSI shader LLVM IR:

; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"

define amdgpu_vs <{ float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32, i32) {
main_body:
  %15 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0, !amdgpu.uniform !0
  %16 = load <16 x i8>, <16 x i8> addrspace(2)* %15, align 16, !invariant.load !0
  %17 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %16, i32 0, i32 %13)
  %18 = extractelement <4 x float> %17, i32 0
  %19 = extractelement <4 x float> %17, i32 1
  %20 = extractelement <4 x float> %17, i32 2
  %21 = extractelement <4 x float> %17, i32 3
  %22 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 1, !amdgpu.uniform !0
  %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !invariant.load !0
  %24 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %23, i32 0, i32 %14)
  %25 = extractelement <4 x float> %24, i32 0
  %26 = extractelement <4 x float> %24, i32 1
  %27 = extractelement <4 x float> %24, i32 2
  %28 = extractelement <4 x float> %24, i32 3
  %29 = bitcast i32 %11 to float
  %30 = insertvalue <{ float, float, float }> undef, float %29, 2
  call void @llvm.SI.export(i32 15, i32 0, i32 0, i32 32, i32 0, float %25, float %26, float %27, float %28)
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %18, float %19, float %20, float %21)
  ret <{ float, float, float }> %30
}

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0

; Function Attrs: nounwind
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #1

attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }

!0 = !{}

radeonsi: Compiling shader 8
Vertex Shader Prolog LLVM IR:

; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"

define amdgpu_vs <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> @main(i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32) {
main_body:
  %19 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> undef, i32 %0, 0
  %20 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %19, i32 %1, 1
  %21 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %20, i32 %2, 2
  %22 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %21, i32 %3, 3
  %23 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %22, i32 %4, 4
  %24 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %23, i32 %5, 5
  %25 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %24, i32 %6, 6
  %26 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %25, i32 %7, 7
  %27 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %26, i32 %8, 8
  %28 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %27, i32 %9, 9
  %29 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %28, i32 %10, 10
  %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %29, i32 %11, 11
  %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %30, i32 %12, 12
  %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %31, i32 %13, 13
  %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %32, i32 %14, 14
  %34 = bitcast i32 %15 to float
  %35 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %33, float %34, 15
  %36 = bitcast i32 %16 to float
  %37 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %35, float %36, 16
  %38 = bitcast i32 %17 to float
  %39 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %37, float %38, 17
  %40 = bitcast i32 %18 to float
  %41 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %39, float %40, 18
  %42 = add i32 %15, %12
  %43 = bitcast i32 %42 to float
  %44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %41, float %43, 19
  %45 = add i32 %15, %12
  %46 = bitcast i32 %45 to float
  %47 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %44, float %46, 20
  ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float }> %47
}


Vertex Shader as VS:
Shader prolog disassembly:
	v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C
	v_mov_b32_e32 v5, v4           ; 7E0A0304
Shader main disassembly:
	s_load_dwordx4 s[0:3], s[10:11], 0x0                  ; C0800B00
	s_load_dwordx4 s[4:7], s[10:11], 0x4                  ; C0820B04
	s_waitcnt lgkmcnt(0)                                  ; BF8C007F
	buffer_load_format_xyzw v[6:9], v4, s[0:3], 0 idxen   ; E00C2000 80000604
	buffer_load_format_xyzw v[10:13], v5, s[4:7], 0 idxen ; E00C2000 80010A05
	s_waitcnt vmcnt(0)                                    ; BF8C0F70
	exp 15, 32, 0, 0, 0, v10, v11, v12, v13               ; F800020F 0D0C0B0A
	exp 15, 12, 0, 1, 0, v6, v7, v8, v9                   ; F80008CF 09080706
	s_waitcnt expcnt(0)                                   ; BF8C0F0F
Shader epilog disassembly:
	s_endpgm ; BF810000

*** SHADER STATS ***
SGPRS: 24
VGPRS: 16
Spilled SGPRs: 0
Spilled VGPRs: 0
Code Size: 64 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 10
********************

Pixel Shader:
Shader main disassembly:
	s_mov_b32 m0, s11                   ; BEFC030B
	v_interp_mov_f32 v0, P0, 0, 0, [m0] ; C8020002
	v_interp_mov_f32 v1, P0, 1, 0, [m0] ; C8060102
	v_interp_mov_f32 v2, P0, 2, 0, [m0] ; C80A0202
	v_interp_mov_f32 v3, P0, 3, 0, [m0] ; C80E0302
Shader epilog disassembly:
	v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
	v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702
	exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100
	s_endpgm                           ; BF810000

*** SHADER CONFIG ***
SPI_PS_INPUT_ADDR = 0xd077
SPI_PS_INPUT_ENA  = 0x0020
*** SHADER STATS ***
SGPRS: 16
VGPRS: 16
Spilled SGPRs: 0
Spilled VGPRs: 0
Code Size: 40 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 10
********************
FRAG
DCL IN[0], GENERIC[0], LINEAR
DCL OUT[0], COLOR
DCL SAMP[0]
DCL SVIEW[0], 2D, FLOAT
  0: TEX OUT[0], IN[0], SAMP[0], 2D
  1: END
radeonsi: Compiling shader 9
TGSI shader LLVM IR:

; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"

define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
main_body:
  %23 = getelementptr [32 x <8 x i32>], [32 x <8 x i32>] addrspace(2)* %2, i64 0, i64 0, !amdgpu.uniform !0
  %24 = load <8 x i32>, <8 x i32> addrspace(2)* %23, align 32, !invariant.load !0
  %25 = bitcast [32 x <8 x i32>] addrspace(2)* %2 to [0 x <4 x i32>] addrspace(2)*
  %26 = getelementptr [0 x <4 x i32>], [0 x <4 x i32>] addrspace(2)* %25, i64 0, i64 3, !amdgpu.uniform !0
  %27 = load <4 x i32>, <4 x i32> addrspace(2)* %26, align 16, !invariant.load !0
  %28 = extractelement <8 x i32> %24, i32 7
  %29 = extractelement <4 x i32> %27, i32 0
  %30 = and i32 %29, %28
  %31 = insertelement <4 x i32> %27, i32 %30, i32 0
  %32 = call float @llvm.SI.fs.interp(i32 0, i32 0, i32 %6, <2 x i32> %12)
  %33 = call float @llvm.SI.fs.interp(i32 1, i32 0, i32 %6, <2 x i32> %12)
  %34 = bitcast float %32 to i32
  %35 = bitcast float %33 to i32
  %36 = insertelement <2 x i32> undef, i32 %34, i32 0
  %37 = insertelement <2 x i32> %36, i32 %35, i32 1
  %38 = call <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32> %37, <8 x i32> %24, <4 x i32> %31, i32 15, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
  %39 = extractelement <4 x float> %38, i32 0
  %40 = extractelement <4 x float> %38, i32 1
  %41 = extractelement <4 x float> %38, i32 2
  %42 = extractelement <4 x float> %38, i32 3
  %43 = bitcast float %5 to i32
  %44 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %43, 10
  %45 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %44, float %39, 11
  %46 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %45, float %40, 12
  %47 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %46, float %41, 13
  %48 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %47, float %42, 14
  %49 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %48, float %21, 24
  ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %49
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.fs.interp(i32, i32, i32, <2 x i32>) #1

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.image.sample.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) #1

attributes #0 = { "InitialPSInputAddr"="36983" }
attributes #1 = { nounwind readnone }

!0 = !{}


Pixel Shader:
Shader main disassembly:
	s_wqm_b64 exec, exec                                    ; BEFE0A7E
	s_load_dwordx8 s[12:19], s[4:5], 0x0                    ; C0C60500
	s_load_dwordx4 s[0:3], s[4:5], 0xc                      ; C080050C
	s_mov_b32 m0, s11                                       ; BEFC030B
	v_interp_p1_f32 v0, v8, 0, 0, [m0]                      ; C8000008
	v_interp_p2_f32 v0, [v0], v9, 0, 0, [m0]                ; C8010009
	v_interp_p1_f32 v1, v8, 1, 0, [m0]                      ; C8040108
	s_waitcnt lgkmcnt(0)                                    ; BF8C007F
	s_and_b32 s0, s0, s19                                   ; 87001300
	v_interp_p2_f32 v1, [v1], v9, 1, 0, [m0]                ; C8050109
	image_sample v[0:3], v[0:1], s[12:19], s[0:3] dmask:0xf ; F0800F00 00030000
	s_waitcnt vmcnt(0)                                      ; BF8C0F70
Shader epilog disassembly:
	v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
	v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702
	exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100
	s_endpgm                           ; BF810000

*** SHADER CONFIG ***
SPI_PS_INPUT_ADDR = 0xd077
SPI_PS_INPUT_ENA  = 0x0020
*** SHADER STATS ***
SGPRS: 24
VGPRS: 16
Spilled SGPRs: 0
Spilled VGPRs: 0
Code Size: 72 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 10
********************
-------------- next part --------------
Driver vendor: X.Org
Device vendor: AMD
Device name: AMD KAVERI (DRM 2.45.0 / 4.6.2+, LLVM 4.0.0)

draw_info: {indexed = 0, mode = triangle_strip, start = 0, count = 4, start_instance = 0, instance_count = 1, vertices_per_patch = 3, index_bias = 0, min_index = 0, max_index = 3, primitive_restart = 0, restart_index = 0, count_from_stream_output = NULL, indirect = NULL, indirect_offset = 0, }

vertex_buffer 0: {stride = 16, buffer_offset = 64, buffer = 0x02475fe0, user_buffer = NULL, }
  buffer: {target = buffer, format = PIPE_FORMAT_R8_UNORM, width0 = 1048576, height0 = 1, depth0 = 1, array_size = 1, last_level = 0, nr_samples = 0, usage = 3, bind = 16, flags = 3, }
num vertex elements = 1
  vertex_element 0: {src_offset = 0, instance_divisor = 0, vertex_buffer_index = 0, src_format = PIPE_FORMAT_R32G32B32A32_FLOAT, }
num stream output targets = 0

begin shader: VERTEX
shader_state: {tokens = "
VERT
PROPERTY NEXT_SHADER FRAG
DCL IN[0]
DCL OUT[0], POSITION
DCL CONST[0..3]
DCL TEMP[0], LOCAL
  0: MUL TEMP[0], CONST[0], IN[0].xxxx
  1: MAD TEMP[0], CONST[1], IN[0].yyyy, TEMP[0]
  2: MAD TEMP[0], CONST[2], IN[0].zzzz, TEMP[0]
  3: MAD TEMP[0], CONST[3], IN[0].wwww, TEMP[0]
  4: MOV OUT[0], TEMP[0]
  5: END
", }
constant_buffer 0: {buffer = NULL, buffer_offset = 0, buffer_size = 64, user_buffer = 0x02473bd0, }
end shader: VERTEX

viewport_state 0: {scale = {125, -125, 0.5, }, translate = {125, 125, 0.5, }, }
rasterizer_state: {flatshade = 0, light_twoside = 0, clamp_vertex_color = 1, clamp_fragment_color = 0, front_ccw = 1, cull_face = 0, fill_front = 0, fill_back = 0, offset_point = 0, offset_line = 0, offset_tri = 0, scissor = 0, poly_smooth = 0, poly_stipple_enable = 0, point_smooth = 0, sprite_coord_enable = 0, sprite_coord_mode = 0, point_quad_rasterization = 0, point_tri_clip = 0, point_size_per_vertex = 0, multisample = 0, line_smooth = 0, line_stipple_enable = 0, line_stipple_factor = 0, line_stipple_pattern = 65535, line_last_pixel = 0, flatshade_first = 0, half_pixel_center = 1, bottom_edge_rule = 1, rasterizer_discard = 0, depth_clip = 1, clip_halfz = 0, clip_plane_enable = 0, line_width = 1, point_size = 1, offset_units = 0, offset_scale = 0, offset_clamp = 0, }

begin shader: FRAGMENT
shader_state: {tokens = "
FRAG
PROPERTY FS_COLOR0_WRITES_ALL_CBUFS 1
DCL OUT[0], COLOR
DCL CONST[0..2]
DCL TEMP[0..7], ARRAY(1), LOCAL
DCL TEMP[8..9], LOCAL
DCL ADDR[0]
IMM[0] FLT32 {    0.2000,     0.0000,     0.0000,     0.0000}
IMM[1] INT32 {0, 1, 0, 0}
  0: MUL TEMP[0], IMM[0].xyxy, CONST[0].xxxx
  1: MUL TEMP[1], IMM[0].yxyy, CONST[0].xxxx
  2: MUL TEMP[2], IMM[0].yyxy, CONST[0].xxxx
  3: MUL TEMP[3], IMM[0].xxyy, CONST[0].xxxx
  4: MUL TEMP[4], IMM[0].yxxy, CONST[0].xxxx
  5: MUL TEMP[5], IMM[0].xyxy, CONST[0].xxxx
  6: MOV TEMP[6], IMM[0].yyyy
  7: MUL TEMP[7], IMM[0].xxxy, CONST[0].xxxx
  8: MOV TEMP[8].x, IMM[1].xxxx
  9: BGNLOOP :0
 10:   ISGE TEMP[9].x, TEMP[8].xxxx, CONST[1].xxxx
 11:   UIF TEMP[9].xxxx :0
 12:     BRK
 13:   ENDIF
 14:   UARL ADDR[0].x, TEMP[8].xxxx
 15:   MOV TEMP[ADDR[0].x](1), CONST[2]
 16:   UADD TEMP[8].x, TEMP[8].xxxx, IMM[1].yyyy
 17: ENDLOOP :0
 18: ADD TEMP[8], TEMP[0], TEMP[1]
 19: ADD TEMP[8], TEMP[8], TEMP[2]
 20: ADD TEMP[8], TEMP[8], TEMP[3]
 21: ADD TEMP[8], TEMP[8], TEMP[4]
 22: ADD TEMP[8], TEMP[8], TEMP[5]
 23: ADD TEMP[8], TEMP[8], TEMP[6]
 24: ADD TEMP[8], TEMP[8], TEMP[7]
 25: MOV OUT[0], TEMP[8]
 26: END
", }
constant_buffer 0: {buffer = NULL, buffer_offset = 0, buffer_size = 48, user_buffer = 0x024ae1b0, }
end shader: FRAGMENT

depth_stencil_alpha_state: {depth = {enabled = 0, }, stencil = {{enabled = 0, }, {enabled = 0, }, }, alpha = {enabled = 0, }, }
stencil_ref: {ref_value = {0, 0, }, }
blend_state: {dither = 1, alpha_to_coverage = 0, alpha_to_one = 0, logicop_enable = 0, independent_blend_enable = 0, rt = {{blend_enable = 0, colormask = 15, }, }, }
blend_color: {color = {0, 0, 0, 0, }, }
min_samples = 1
sample_mask = 0xffffffff

framebuffer_state: {width = 250, height = 250, samples = 0, layers = 0, nr_cbufs = 1, cbufs = {0x0246f280, NULL, NULL, NULL, NULL, NULL, NULL, NULL, }, zsbuf = NULL, }
  cbufs[0]:
    surface: {format = PIPE_FORMAT_B8G8R8A8_UNORM, width = 250, height = 250, texture = 0x0246dc30, u.tex.level = 0, u.tex.first_layer = 0, u.tex.last_layer = 0, }
    resource: {target = 2d, format = PIPE_FORMAT_B8G8R8A8_UNORM, width0 = 250, height0 = 250, depth0 = 1, array_size = 1, last_level = 0, nr_samples = 0, usage = 0, bind = 1572874, flags = 0, }



*****************************************************************************
Driver-specific state:

Memory-mapped registers:
        GRBM_STATUS <- ME0PIPE0_CMDFIFO_AVAIL = 8
                       SRBM_RQ_PENDING = 1
                       ME0PIPE0_CF_RQ_PENDING = 0
                       ME0PIPE0_PF_RQ_PENDING = 0
                       GDS_DMA_RQ_PENDING = 0
                       DB_CLEAN = 1
                       CB_CLEAN = 0
                       TA_BUSY = 0
                       GDS_BUSY = 0
                       WD_BUSY_NO_DMA = 0
                       VGT_BUSY = 0
                       IA_BUSY_NO_DMA = 0
                       IA_BUSY = 0
                       SX_BUSY = 0
                       WD_BUSY = 0
                       SPI_BUSY = 1
                       BCI_BUSY = 0
                       SC_BUSY = 0
                       PA_BUSY = 0
                       DB_BUSY = 1
                       CP_COHERENCY_BUSY = 0
                       CP_BUSY = 1
                       CB_BUSY = 1
                       GUI_ACTIVE = 1

Color buffer 0:
  Info: npix_x=250, npix_y=250, npix_z=1, blk_w=1, blk_h=1, blk_d=1, array_size=1, last_level=0, bpe=4, nsamples=1, flags=0x110301, b8g8r8a8_unorm
  Layout: size=262144, alignment=16384, bankw=1, bankh=1, nbanks=0, mtilea=2, tilesplit=512, pipeconfig=0, scanout=1
  CMask: offset=0, size=1024, alignment=1024, pitch=256, height=256, xalign=256, yalign=256, slice_tile_max=3
  Level[0]: offset=0, slice_size=262144, npix_x=250, npix_y=250, npix_z=1, nblk_x=256, nblk_y=256, nblk_z=1, pitch_bytes=1024, mode=3

SHADER KEY
  instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
  as_es = 0
  as_ls = 0
  export_prim_id = 0

Vertex Shader as VS - main shader part - LLVM IR:

; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"

define amdgpu_vs <{ float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) {
main_body:
  %14 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0
  %15 = load <16 x i8>, <16 x i8> addrspace(2)* %14, align 16, !invariant.load !0
  %16 = call float @llvm.SI.load.const(<16 x i8> %15, i32 0)
  %17 = call float @llvm.SI.load.const(<16 x i8> %15, i32 4)
  %18 = call float @llvm.SI.load.const(<16 x i8> %15, i32 8)
  %19 = call float @llvm.SI.load.const(<16 x i8> %15, i32 12)
  %20 = call float @llvm.SI.load.const(<16 x i8> %15, i32 16)
  %21 = call float @llvm.SI.load.const(<16 x i8> %15, i32 20)
  %22 = call float @llvm.SI.load.const(<16 x i8> %15, i32 24)
  %23 = call float @llvm.SI.load.const(<16 x i8> %15, i32 28)
  %24 = call float @llvm.SI.load.const(<16 x i8> %15, i32 32)
  %25 = call float @llvm.SI.load.const(<16 x i8> %15, i32 36)
  %26 = call float @llvm.SI.load.const(<16 x i8> %15, i32 40)
  %27 = call float @llvm.SI.load.const(<16 x i8> %15, i32 44)
  %28 = call float @llvm.SI.load.const(<16 x i8> %15, i32 48)
  %29 = call float @llvm.SI.load.const(<16 x i8> %15, i32 52)
  %30 = call float @llvm.SI.load.const(<16 x i8> %15, i32 56)
  %31 = call float @llvm.SI.load.const(<16 x i8> %15, i32 60)
  %32 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %5, i64 0, i64 0, !amdgpu.uniform !0
  %33 = load <16 x i8>, <16 x i8> addrspace(2)* %32, align 16, !invariant.load !0
  %34 = call <4 x float> @llvm.SI.vs.load.input(<16 x i8> %33, i32 0, i32 %13)
  %35 = extractelement <4 x float> %34, i32 0
  %36 = extractelement <4 x float> %34, i32 1
  %37 = extractelement <4 x float> %34, i32 2
  %38 = extractelement <4 x float> %34, i32 3
  %39 = fmul float %16, %35
  %40 = fmul float %17, %35
  %41 = fmul float %18, %35
  %42 = fmul float %19, %35
  %43 = fmul float %20, %36
  %44 = fadd float %43, %39
  %45 = fmul float %21, %36
  %46 = fadd float %45, %40
  %47 = fmul float %22, %36
  %48 = fadd float %47, %41
  %49 = fmul float %23, %36
  %50 = fadd float %49, %42
  %51 = fmul float %24, %37
  %52 = fadd float %51, %44
  %53 = fmul float %25, %37
  %54 = fadd float %53, %46
  %55 = fmul float %26, %37
  %56 = fadd float %55, %48
  %57 = fmul float %27, %37
  %58 = fadd float %57, %50
  %59 = fmul float %28, %38
  %60 = fadd float %59, %52
  %61 = fmul float %29, %38
  %62 = fadd float %61, %54
  %63 = fmul float %30, %38
  %64 = fadd float %63, %56
  %65 = fmul float %31, %38
  %66 = fadd float %65, %58
  %67 = bitcast i32 %11 to float
  %68 = insertvalue <{ float, float, float }> undef, float %67, 2
  call void @llvm.SI.export(i32 15, i32 0, i32 1, i32 12, i32 0, float %60, float %62, float %64, float %66)
  ret <{ float, float, float }> %68
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #0

; Function Attrs: nounwind readnone
declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #0

; Function Attrs: nounwind
declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float) #1

attributes #0 = { nounwind readnone }
attributes #1 = { nounwind }

!0 = !{}


Vertex Shader as VS:
Shader prolog disassembly:
	v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C
Shader main disassembly:
	s_load_dwordx4 s[4:7], s[10:11], 0x0                ; C0820B00
	s_load_dwordx4 s[0:3], s[2:3], 0x0                  ; C0800300
	s_waitcnt lgkmcnt(0)                                ; BF8C007F
	buffer_load_format_xyzw v[3:6], v4, s[4:7], 0 idxen ; E00C2000 80010304
	s_buffer_load_dword s4, s[0:3], 0x0                 ; C2020100
	s_buffer_load_dword s5, s[0:3], 0x1                 ; C2028101
	s_buffer_load_dword s6, s[0:3], 0x2                 ; C2030102
	s_buffer_load_dword s7, s[0:3], 0x3                 ; C2038103
	s_buffer_load_dword s8, s[0:3], 0x4                 ; C2040104
	s_buffer_load_dword s9, s[0:3], 0x5                 ; C2048105
	s_buffer_load_dword s10, s[0:3], 0x6                ; C2050106
	s_buffer_load_dword s11, s[0:3], 0x7                ; C2058107
	s_buffer_load_dword s12, s[0:3], 0x8                ; C2060108
	s_buffer_load_dword s13, s[0:3], 0x9                ; C2068109
	s_buffer_load_dword s14, s[0:3], 0xa                ; C207010A
	s_buffer_load_dword s15, s[0:3], 0xb                ; C207810B
	s_buffer_load_dword s16, s[0:3], 0xc                ; C208010C
	s_buffer_load_dword s17, s[0:3], 0xd                ; C208810D
	s_buffer_load_dword s18, s[0:3], 0xe                ; C209010E
	s_buffer_load_dword s0, s[0:3], 0xf                 ; C200010F
	s_waitcnt vmcnt(0) lgkmcnt(0)                       ; BF8C0070
	v_mul_f32_e32 v0, s4, v3                            ; 10000604
	v_mul_f32_e32 v1, s5, v3                            ; 10020605
	v_mul_f32_e32 v7, s6, v3                            ; 100E0606
	v_mul_f32_e32 v3, s7, v3                            ; 10060607
	v_mac_f32_e32 v0, s8, v4                            ; 3E000808
	v_mac_f32_e32 v1, s9, v4                            ; 3E020809
	v_mac_f32_e32 v7, s10, v4                           ; 3E0E080A
	v_mac_f32_e32 v3, s11, v4                           ; 3E06080B
	v_mac_f32_e32 v0, s12, v5                           ; 3E000A0C
	v_mac_f32_e32 v1, s13, v5                           ; 3E020A0D
	v_mac_f32_e32 v7, s14, v5                           ; 3E0E0A0E
	v_mac_f32_e32 v3, s15, v5                           ; 3E060A0F
	v_mac_f32_e32 v0, s16, v6                           ; 3E000C10
	v_mac_f32_e32 v1, s17, v6                           ; 3E020C11
	v_mac_f32_e32 v7, s18, v6                           ; 3E0E0C12
	v_mac_f32_e32 v3, s0, v6                            ; 3E060C00
	exp 15, 12, 0, 1, 0, v0, v1, v7, v3                 ; F80008CF 03070100
	s_waitcnt expcnt(0)                                 ; BF8C0F0F
Shader epilog disassembly:
	s_endpgm ; BF810000

*** SHADER STATS ***
SGPRS: 24
VGPRS: 8
Spilled SGPRs: 0
Spilled VGPRs: 0
Code Size: 172 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 10
********************
SHADER KEY
  prolog.color_two_side = 0
  prolog.flatshade_colors = 0
  prolog.poly_stipple = 0
  prolog.force_persp_sample_interp = 0
  prolog.force_linear_sample_interp = 0
  prolog.force_persp_center_interp = 0
  prolog.force_linear_center_interp = 0
  prolog.bc_optimize_for_persp = 0
  prolog.bc_optimize_for_linear = 0
  epilog.spi_shader_col_format = 0x4
  epilog.color_is_int8 = 0x0
  epilog.last_cbuf = 0
  epilog.alpha_func = 7
  epilog.alpha_to_one = 0
  epilog.poly_line_smoothing = 0
  epilog.clamp_color = 0

Pixel Shader - main shader part - LLVM IR:

; ModuleID = 'tgsi'
source_filename = "tgsi"
target triple = "amdgcn--"

define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([17 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
main_body:
  %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0
  %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !invariant.load !0
  %25 = call float @llvm.SI.load.const(<16 x i8> %24, i32 0)
  %26 = call float @llvm.SI.load.const(<16 x i8> %24, i32 16)
  %27 = call float @llvm.SI.load.const(<16 x i8> %24, i32 32)
  %28 = call float @llvm.SI.load.const(<16 x i8> %24, i32 36)
  %29 = call float @llvm.SI.load.const(<16 x i8> %24, i32 40)
  %30 = call float @llvm.SI.load.const(<16 x i8> %24, i32 44)
  %31 = fmul float %25, 0x3FC99999A0000000
  %32 = fmul float %25, 0.000000e+00
  %33 = fmul float %25, 0x3FC99999A0000000
  %34 = fmul float %25, 0.000000e+00
  %35 = fmul float %25, 0.000000e+00
  %36 = fmul float %25, 0x3FC99999A0000000
  %37 = fmul float %25, 0.000000e+00
  %38 = fmul float %25, 0.000000e+00
  %39 = fmul float %25, 0.000000e+00
  %40 = fmul float %25, 0.000000e+00
  %41 = fmul float %25, 0x3FC99999A0000000
  %42 = fmul float %25, 0.000000e+00
  %43 = fmul float %25, 0x3FC99999A0000000
  %44 = fmul float %25, 0x3FC99999A0000000
  %45 = fmul float %25, 0.000000e+00
  %46 = fmul float %25, 0.000000e+00
  %47 = fmul float %25, 0.000000e+00
  %48 = fmul float %25, 0x3FC99999A0000000
  %49 = fmul float %25, 0x3FC99999A0000000
  %50 = fmul float %25, 0.000000e+00
  %51 = fmul float %25, 0x3FC99999A0000000
  %52 = fmul float %25, 0.000000e+00
  %53 = fmul float %25, 0x3FC99999A0000000
  %54 = fmul float %25, 0.000000e+00
  %55 = fmul float %25, 0x3FC99999A0000000
  %56 = fmul float %25, 0x3FC99999A0000000
  %57 = fmul float %25, 0x3FC99999A0000000
  %58 = fmul float %25, 0.000000e+00
  %59 = bitcast float %26 to i32
  br label %LOOP

LOOP:                                             ; preds = %ENDIF, %main_body
  %temp2.0 = phi float [ %33, %main_body ], [ %141, %ENDIF ]
  %temp3.0 = phi float [ %34, %main_body ], [ %158, %ENDIF ]
  %temp4.0 = phi float [ %35, %main_body ], [ %108, %ENDIF ]
  %temp5.0 = phi float [ %36, %main_body ], [ %125, %ENDIF ]
  %temp6.0 = phi float [ %37, %main_body ], [ %142, %ENDIF ]
  %temp7.0 = phi float [ %38, %main_body ], [ %159, %ENDIF ]
  %temp8.0 = phi float [ %39, %main_body ], [ %109, %ENDIF ]
  %temp9.0 = phi float [ %40, %main_body ], [ %126, %ENDIF ]
  %temp10.0 = phi float [ %41, %main_body ], [ %143, %ENDIF ]
  %temp11.0 = phi float [ %42, %main_body ], [ %160, %ENDIF ]
  %temp12.0 = phi float [ %43, %main_body ], [ %110, %ENDIF ]
  %temp13.0 = phi float [ %44, %main_body ], [ %127, %ENDIF ]
  %temp14.0 = phi float [ %45, %main_body ], [ %144, %ENDIF ]
  %temp15.0 = phi float [ %46, %main_body ], [ %161, %ENDIF ]
  %temp16.0 = phi float [ %47, %main_body ], [ %111, %ENDIF ]
  %temp17.0 = phi float [ %48, %main_body ], [ %128, %ENDIF ]
  %temp18.0 = phi float [ %49, %main_body ], [ %145, %ENDIF ]
  %temp19.0 = phi float [ %50, %main_body ], [ %162, %ENDIF ]
  %temp20.0 = phi float [ %51, %main_body ], [ %112, %ENDIF ]
  %temp21.0 = phi float [ %52, %main_body ], [ %129, %ENDIF ]
  %temp22.0 = phi float [ %53, %main_body ], [ %146, %ENDIF ]
  %temp23.0 = phi float [ %54, %main_body ], [ %163, %ENDIF ]
  %temp24.0 = phi float [ 0.000000e+00, %main_body ], [ %113, %ENDIF ]
  %temp25.0 = phi float [ 0.000000e+00, %main_body ], [ %130, %ENDIF ]
  %temp26.0 = phi float [ 0.000000e+00, %main_body ], [ %147, %ENDIF ]
  %temp27.0 = phi float [ 0.000000e+00, %main_body ], [ %164, %ENDIF ]
  %temp28.0 = phi float [ %55, %main_body ], [ %114, %ENDIF ]
  %temp29.0 = phi float [ %56, %main_body ], [ %131, %ENDIF ]
  %temp30.0 = phi float [ %57, %main_body ], [ %148, %ENDIF ]
  %temp31.0 = phi float [ %58, %main_body ], [ %165, %ENDIF ]
  %temp32.0 = phi float [ 0.000000e+00, %main_body ], [ %168, %ENDIF ]
  %temp1.0 = phi float [ %32, %main_body ], [ %124, %ENDIF ]
  %temp.0 = phi float [ %31, %main_body ], [ %107, %ENDIF ]
  %60 = bitcast float %temp32.0 to i32
  %61 = icmp slt i32 %60, %59
  br i1 %61, label %ENDIF, label %IF

IF:                                               ; preds = %LOOP
  %62 = fadd float %temp.0, %temp4.0
  %63 = fadd float %temp1.0, %temp5.0
  %64 = fadd float %temp2.0, %temp6.0
  %65 = fadd float %temp3.0, %temp7.0
  %66 = fadd float %62, %temp8.0
  %67 = fadd float %63, %temp9.0
  %68 = fadd float %64, %temp10.0
  %69 = fadd float %65, %temp11.0
  %70 = fadd float %66, %temp12.0
  %71 = fadd float %67, %temp13.0
  %72 = fadd float %68, %temp14.0
  %73 = fadd float %69, %temp15.0
  %74 = fadd float %70, %temp16.0
  %75 = fadd float %71, %temp17.0
  %76 = fadd float %72, %temp18.0
  %77 = fadd float %73, %temp19.0
  %78 = fadd float %74, %temp20.0
  %79 = fadd float %75, %temp21.0
  %80 = fadd float %76, %temp22.0
  %81 = fadd float %77, %temp23.0
  %82 = fadd float %78, %temp24.0
  %83 = fadd float %79, %temp25.0
  %84 = fadd float %80, %temp26.0
  %85 = fadd float %81, %temp27.0
  %86 = fadd float %82, %temp28.0
  %87 = fadd float %83, %temp29.0
  %88 = fadd float %84, %temp30.0
  %89 = fadd float %85, %temp31.0
  %90 = bitcast float %5 to i32
  %91 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %90, 10
  %92 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %91, float %86, 11
  %93 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %92, float %87, 12
  %94 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %93, float %88, 13
  %95 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %94, float %89, 14
  %96 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %95, float %21, 24
  ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %96

ENDIF:                                            ; preds = %LOOP
  %97 = bitcast float %temp32.0 to i32
  %98 = insertelement <8 x float> undef, float %temp.0, i32 0
  %99 = insertelement <8 x float> %98, float %temp4.0, i32 1
  %100 = insertelement <8 x float> %99, float %temp8.0, i32 2
  %101 = insertelement <8 x float> %100, float %temp12.0, i32 3
  %102 = insertelement <8 x float> %101, float %temp16.0, i32 4
  %103 = insertelement <8 x float> %102, float %temp20.0, i32 5
  %104 = insertelement <8 x float> %103, float %temp24.0, i32 6
  %105 = insertelement <8 x float> %104, float %temp28.0, i32 7
  %106 = insertelement <8 x float> %105, float %27, i32 %97
  %107 = extractelement <8 x float> %106, i32 0
  %108 = extractelement <8 x float> %106, i32 1
  %109 = extractelement <8 x float> %106, i32 2
  %110 = extractelement <8 x float> %106, i32 3
  %111 = extractelement <8 x float> %106, i32 4
  %112 = extractelement <8 x float> %106, i32 5
  %113 = extractelement <8 x float> %106, i32 6
  %114 = extractelement <8 x float> %106, i32 7
  %115 = insertelement <8 x float> undef, float %temp1.0, i32 0
  %116 = insertelement <8 x float> %115, float %temp5.0, i32 1
  %117 = insertelement <8 x float> %116, float %temp9.0, i32 2
  %118 = insertelement <8 x float> %117, float %temp13.0, i32 3
  %119 = insertelement <8 x float> %118, float %temp17.0, i32 4
  %120 = insertelement <8 x float> %119, float %temp21.0, i32 5
  %121 = insertelement <8 x float> %120, float %temp25.0, i32 6
  %122 = insertelement <8 x float> %121, float %temp29.0, i32 7
  %123 = insertelement <8 x float> %122, float %28, i32 %97
  %124 = extractelement <8 x float> %123, i32 0
  %125 = extractelement <8 x float> %123, i32 1
  %126 = extractelement <8 x float> %123, i32 2
  %127 = extractelement <8 x float> %123, i32 3
  %128 = extractelement <8 x float> %123, i32 4
  %129 = extractelement <8 x float> %123, i32 5
  %130 = extractelement <8 x float> %123, i32 6
  %131 = extractelement <8 x float> %123, i32 7
  %132 = insertelement <8 x float> undef, float %temp2.0, i32 0
  %133 = insertelement <8 x float> %132, float %temp6.0, i32 1
  %134 = insertelement <8 x float> %133, float %temp10.0, i32 2
  %135 = insertelement <8 x float> %134, float %temp14.0, i32 3
  %136 = insertelement <8 x float> %135, float %temp18.0, i32 4
  %137 = insertelement <8 x float> %136, float %temp22.0, i32 5
  %138 = insertelement <8 x float> %137, float %temp26.0, i32 6
  %139 = insertelement <8 x float> %138, float %temp30.0, i32 7
  %140 = insertelement <8 x float> %139, float %29, i32 %97
  %141 = extractelement <8 x float> %140, i32 0
  %142 = extractelement <8 x float> %140, i32 1
  %143 = extractelement <8 x float> %140, i32 2
  %144 = extractelement <8 x float> %140, i32 3
  %145 = extractelement <8 x float> %140, i32 4
  %146 = extractelement <8 x float> %140, i32 5
  %147 = extractelement <8 x float> %140, i32 6
  %148 = extractelement <8 x float> %140, i32 7
  %149 = insertelement <8 x float> undef, float %temp3.0, i32 0
  %150 = insertelement <8 x float> %149, float %temp7.0, i32 1
  %151 = insertelement <8 x float> %150, float %temp11.0, i32 2
  %152 = insertelement <8 x float> %151, float %temp15.0, i32 3
  %153 = insertelement <8 x float> %152, float %temp19.0, i32 4
  %154 = insertelement <8 x float> %153, float %temp23.0, i32 5
  %155 = insertelement <8 x float> %154, float %temp27.0, i32 6
  %156 = insertelement <8 x float> %155, float %temp31.0, i32 7
  %157 = insertelement <8 x float> %156, float %30, i32 %97
  %158 = extractelement <8 x float> %157, i32 0
  %159 = extractelement <8 x float> %157, i32 1
  %160 = extractelement <8 x float> %157, i32 2
  %161 = extractelement <8 x float> %157, i32 3
  %162 = extractelement <8 x float> %157, i32 4
  %163 = extractelement <8 x float> %157, i32 5
  %164 = extractelement <8 x float> %157, i32 6
  %165 = extractelement <8 x float> %157, i32 7
  %166 = bitcast float %temp32.0 to i32
  %167 = add i32 %166, 1
  %168 = bitcast i32 %167 to float
  br label %LOOP
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

attributes #0 = { "InitialPSInputAddr"="36983" }
attributes #1 = { nounwind readnone }

!0 = !{}


Pixel Shader:
Shader main disassembly:
	s_load_dwordx4 s[0:3], s[2:3], 0x0  ; C0800300
	v_mov_b32_e32 v4, 0x3e4ccccd        ; 7E0802FF 3E4CCCCD
	v_mov_b32_e32 v28, 0                ; 7E380280
	s_waitcnt lgkmcnt(0)                ; BF8C007F
	s_buffer_load_dword s8, s[0:3], 0x0 ; C2040100
	s_buffer_load_dword s4, s[0:3], 0x8 ; C2020108
	s_buffer_load_dword s5, s[0:3], 0x9 ; C2028109
	s_buffer_load_dword s6, s[0:3], 0xa ; C203010A
	s_buffer_load_dword s7, s[0:3], 0xb ; C203810B
	s_buffer_load_dword s0, s[0:3], 0x4 ; C2000104
	s_waitcnt lgkmcnt(0)                ; BF8C007F
	v_mov_b32_e32 v0, s4                ; 7E000204
	v_mov_b32_e32 v1, s5                ; 7E020205
	v_mov_b32_e32 v2, s6                ; 7E040206
	v_mov_b32_e32 v3, s7                ; 7E060207
	v_mul_f32_e32 v22, s8, v4           ; 102C0808
	v_mul_f32_e64 v23, 0, s8            ; D2100017 00001080
	s_branch BB0_1                      ; BF820000
	s_mov_b64 exec, s[2:3]              ; BEFE0402
	v_cmp_gt_i32_e32 vcc, s0, v28       ; 7D083800
	s_and_b64 vcc, exec, vcc            ; 87EA6A7E
	s_cbranch_vccz BB0_11               ; BF860000
	v_mov_b32_e32 v24, v23              ; 7E300317
	v_mov_b32_e32 v25, v22              ; 7E320316
	v_mov_b32_e32 v26, v23              ; 7E340317
	v_mov_b32_e32 v27, v22              ; 7E360316
	v_mov_b32_e32 v29, v22              ; 7E3A0316
	v_mov_b32_e32 v4, v22               ; 7E080316
	s_mov_b64 s[2:3], exec              ; BE82047E
	v_mov_b32_e32 v5, v23               ; 7E0A0317
	v_mov_b32_e32 v6, v24               ; 7E0C0318
	v_mov_b32_e32 v7, v25               ; 7E0E0319
	v_mov_b32_e32 v8, v26               ; 7E10031A
	v_mov_b32_e32 v9, v27               ; 7E12031B
	v_mov_b32_e32 v10, v28              ; 7E14031C
	v_mov_b32_e32 v11, v29              ; 7E16031D
	v_readfirstlane_b32 s1, v28         ; 7E02051C
	v_cmp_eq_u32_e32 vcc, s1, v28       ; 7D843801
	s_mov_b32 m0, s1                    ; BEFC0301
	s_and_saveexec_b64 vcc, vcc         ; BEEA246A
	v_movreld_b32_e32 v4, v0            ; 7E088500
	s_xor_b64 exec, exec, vcc           ; 89FE6A7E
	s_cbranch_execnz BB0_3              ; BF890000
	s_mov_b64 exec, s[2:3]              ; BEFE0402
	v_mov_b32_e32 v15, v23              ; 7E1E0317
	v_mov_b32_e32 v16, v22              ; 7E200316
	v_mov_b32_e32 v17, v23              ; 7E220317
	v_mov_b32_e32 v18, v22              ; 7E240316
	v_mov_b32_e32 v19, v22              ; 7E260316
	v_mov_b32_e32 v20, v23              ; 7E280317
	v_mov_b32_e32 v21, v28              ; 7E2A031C
	v_mov_b32_e32 v4, v15               ; 7E08030F
	s_mov_b64 s[2:3], exec              ; BE82047E
	v_mov_b32_e32 v5, v16               ; 7E0A0310
	v_mov_b32_e32 v6, v17               ; 7E0C0311
	v_mov_b32_e32 v7, v18               ; 7E0E0312
	v_mov_b32_e32 v8, v19               ; 7E100313
	v_mov_b32_e32 v9, v20               ; 7E120314
	v_mov_b32_e32 v10, v21              ; 7E140315
	v_mov_b32_e32 v11, v22              ; 7E160316
	v_readfirstlane_b32 s1, v28         ; 7E02051C
	v_cmp_eq_u32_e32 vcc, s1, v28       ; 7D843801
	s_mov_b32 m0, s1                    ; BEFC0301
	s_and_saveexec_b64 vcc, vcc         ; BEEA246A
	v_movreld_b32_e32 v4, v1            ; 7E088501
	s_xor_b64 exec, exec, vcc           ; 89FE6A7E
	s_cbranch_execnz BB0_5              ; BF890000
	s_mov_b64 exec, s[2:3]              ; BEFE0402
	v_mov_b32_e32 v4, v22               ; 7E080316
	v_mov_b32_e32 v5, v23               ; 7E0A0317
	v_mov_b32_e32 v6, v22               ; 7E0C0316
	v_mov_b32_e32 v7, v23               ; 7E0E0317
	v_mov_b32_e32 v8, v22               ; 7E100316
	v_mov_b32_e32 v9, v22               ; 7E120316
	v_mov_b32_e32 v10, v28              ; 7E14031C
	v_mov_b32_e32 v11, v22              ; 7E160316
	s_mov_b64 s[2:3], exec              ; BE82047E
	v_readfirstlane_b32 s1, v28         ; 7E02051C
	v_cmp_eq_u32_e32 vcc, s1, v28       ; 7D843801
	s_mov_b32 m0, s1                    ; BEFC0301
	s_and_saveexec_b64 vcc, vcc         ; BEEA246A
	v_movreld_b32_e32 v4, v2            ; 7E088502
	s_xor_b64 exec, exec, vcc           ; 89FE6A7E
	s_cbranch_execnz BB0_7              ; BF890000
	s_mov_b64 exec, s[2:3]              ; BEFE0402
	v_mov_b32_e32 v4, v23               ; 7E080317
	v_mov_b32_e32 v5, v23               ; 7E0A0317
	v_mov_b32_e32 v6, v23               ; 7E0C0317
	v_mov_b32_e32 v7, v23               ; 7E0E0317
	v_mov_b32_e32 v8, v23               ; 7E100317
	v_mov_b32_e32 v9, v23               ; 7E120317
	v_mov_b32_e32 v10, v28              ; 7E14031C
	v_mov_b32_e32 v11, v23              ; 7E160317
	s_mov_b64 s[2:3], exec              ; BE82047E
	v_readfirstlane_b32 s1, v28         ; 7E02051C
	v_cmp_eq_u32_e32 vcc, s1, v28       ; 7D843801
	s_mov_b32 m0, s1                    ; BEFC0301
	s_and_saveexec_b64 vcc, vcc         ; BEEA246A
	v_movreld_b32_e32 v4, v3            ; 7E088503
	s_xor_b64 exec, exec, vcc           ; 89FE6A7E
	s_cbranch_execnz BB0_9              ; BF890000
	s_branch BB0_10                     ; BF820000
	v_add_f32_e32 v0, v23, v22          ; 06002D17
	v_add_f32_e32 v1, v22, v23          ; 06022F16
	v_add_f32_e32 v2, v23, v22          ; 06042D17
	v_add_f32_e32 v3, v23, v23          ; 06062F17
	v_add_f32_e32 v0, v23, v0           ; 06000117
	v_add_f32_e32 v1, v23, v1           ; 06020317
	v_add_f32_e32 v2, v22, v2           ; 06040516
	v_add_f32_e32 v3, v23, v3           ; 06060717
	v_add_f32_e32 v0, v22, v0           ; 06000116
	v_add_f32_e32 v1, v22, v1           ; 06020316
	v_add_f32_e32 v2, v23, v2           ; 06040517
	v_add_f32_e32 v3, v23, v3           ; 06060717
	v_add_f32_e32 v0, v23, v0           ; 06000117
	v_add_f32_e32 v1, v22, v1           ; 06020316
	v_add_f32_e32 v2, v22, v2           ; 06040516
	v_add_f32_e32 v3, v23, v3           ; 06060717
	v_add_f32_e32 v0, v22, v0           ; 06000116
	v_add_f32_e32 v1, v23, v1           ; 06020317
	v_add_f32_e32 v2, v22, v2           ; 06040516
	v_add_f32_e32 v3, v23, v3           ; 06060717
	v_add_f32_e32 v0, v28, v0           ; 0600011C
	v_add_f32_e32 v1, v28, v1           ; 0602031C
	v_add_f32_e32 v2, v28, v2           ; 0604051C
	v_add_f32_e32 v3, v28, v3           ; 0606071C
	v_add_f32_e32 v0, v22, v0           ; 06000116
	v_add_f32_e32 v1, v22, v1           ; 06020316
	v_add_f32_e32 v2, v22, v2           ; 06040516
	v_add_f32_e32 v3, v23, v3           ; 06060717
Shader epilog disassembly:
	v_cvt_pkrtz_f16_f32_e32 v0, v0, v1 ; 5E000300
	v_cvt_pkrtz_f16_f32_e32 v1, v2, v3 ; 5E020702
	exp 15, 0, 1, 1, 1, v0, v1, v0, v0 ; F8001C0F 00000100
	s_endpgm                           ; BF810000

*** SHADER CONFIG ***
SPI_PS_INPUT_ADDR = 0xd077
SPI_PS_INPUT_ENA  = 0x0020
*** SHADER STATS ***
SGPRS: 16
VGPRS: 32
Spilled SGPRs: 0
Spilled VGPRs: 0
Code Size: 548 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 8
********************
Buffer list (in units of pages = 4kB):
        Size    VM start page         VM end page           Usage
          16    0x0000000000812       0x0000000000822       BORDER_COLORS
           1    0x0000000000822       0x0000000000823       IB2
           1    -- hole --
           1    0x0000000000824       0x0000000000825       CONST_BUFFER
          17    -- hole --
           1    0x0000000000836       0x0000000000837       TRACE
           1    0x0000000000837       0x0000000000838       CMASK
          64    0x0000000000838       0x0000000000878       COLOR_BUFFER
         256    0x0000000000878       0x0000000000978       CONST_BUFFER, DESCRIPTORS, RINGS_STREAMOUT
         256    0x0000000000978       0x0000000000a78       VERTEX_BUFFER
           1    0x0000000000a78       0x0000000000a79       USER_SHADER
           1    0x0000000000a79       0x0000000000a7a       USER_SHADER

Note: The holes represent memory not used by the IB.
      Other buffers can still be allocated there.

------------------ IB2: Init config begin ------------------
CONTEXT_CONTROL:
        0x80000000
        0x80000000
SET_CONTEXT_REG:
        VGT_HOS_MAX_TESS_LEVEL <- 64.0f (0x42800000)
        VGT_HOS_MIN_TESS_LEVEL <- 0
SET_CONTEXT_REG:
        VGT_GS_PER_ES <- GS_PER_ES = 128 (0x80)
        VGT_ES_PER_GS <- ES_PER_GS = 64 (0x40)
        VGT_GS_PER_VS <- GS_PER_VS = 2
SET_CONTEXT_REG:
        VGT_PRIMITIVEID_RESET <- 0
SET_CONTEXT_REG:
        VGT_STRMOUT_DRAW_OPAQUE_OFFSET <- 0
SET_CONTEXT_REG:
        VGT_STRMOUT_BUFFER_CONFIG <- STREAM_0_BUFFER_EN = 0
                                     STREAM_1_BUFFER_EN = 0
                                     STREAM_2_BUFFER_EN = 0
                                     STREAM_3_BUFFER_EN = 0
SET_CONTEXT_REG:
        VGT_VTX_CNT_EN <- VTX_CNT_EN = 0
SET_CONTEXT_REG:
        PA_SC_CENTROID_PRIORITY_0 <- DISTANCE_0 = 0
                                     DISTANCE_1 = 1
                                     DISTANCE_2 = 2
                                     DISTANCE_3 = 3
                                     DISTANCE_4 = 4
                                     DISTANCE_5 = 5
                                     DISTANCE_6 = 6
                                     DISTANCE_7 = 7
        PA_SC_CENTROID_PRIORITY_1 <- DISTANCE_8 = 8
                                     DISTANCE_9 = 9
                                     DISTANCE_10 = 10 (0xa)
                                     DISTANCE_11 = 11 (0xb)
                                     DISTANCE_12 = 12 (0xc)
                                     DISTANCE_13 = 13 (0xd)
                                     DISTANCE_14 = 14 (0xe)
                                     DISTANCE_15 = 15 (0xf)
SET_CONTEXT_REG:
        PA_SU_PRIM_FILTER_CNTL <- TRIANGLE_FILTER_DISABLE = 0
                                  LINE_FILTER_DISABLE = 0
                                  POINT_FILTER_DISABLE = 0
                                  RECTANGLE_FILTER_DISABLE = 0
                                  TRIANGLE_EXPAND_ENA = 0
                                  LINE_EXPAND_ENA = 0
                                  POINT_EXPAND_ENA = 0
                                  RECTANGLE_EXPAND_ENA = 0
                                  PRIM_EXPAND_CONSTANT = 0
                                  XMAX_RIGHT_EXCLUSION = 0
                                  YMAX_BOTTOM_EXCLUSION = 0
SET_CONTEXT_REG:
        PA_SC_VPORT_ZMIN_0 <- 0
        PA_SC_VPORT_ZMAX_0 <- 1.0f (0x3f800000)
        PA_SC_VPORT_ZMIN_1 <- 0
        PA_SC_VPORT_ZMAX_1 <- 1.0f (0x3f800000)
        PA_SC_VPORT_ZMIN_2 <- 0
        PA_SC_VPORT_ZMAX_2 <- 1.0f (0x3f800000)
        PA_SC_VPORT_ZMIN_3 <- 0
        PA_SC_VPORT_ZMAX_3 <- 1.0f (0x3f800000)
        PA_SC_VPORT_ZMIN_4 <- 0
        PA_SC_VPORT_ZMAX_4 <- 1.0f (0x3f800000)
        PA_SC_VPORT_ZMIN_5 <- 0
        PA_SC_VPORT_ZMAX_5 <- 1.0f (0x3f800000)
        PA_SC_VPORT_ZMIN_6 <- 0
        PA_SC_VPORT_ZMAX_6 <- 1.0f (0x3f800000)
        PA_SC_VPORT_ZMIN_7 <- 0
        PA_SC_VPORT_ZMAX_7 <- 1.0f (0x3f800000)
        PA_SC_VPORT_ZMIN_8 <- 0
        PA_SC_VPORT_ZMAX_8 <- 1.0f (0x3f800000)
        PA_SC_VPORT_ZMIN_9 <- 0
        PA_SC_VPORT_ZMAX_9 <- 1.0f (0x3f800000)
        PA_SC_VPORT_ZMIN_10 <- 0
        PA_SC_VPORT_ZMAX_10 <- 1.0f (0x3f800000)
        PA_SC_VPORT_ZMIN_11 <- 0
        PA_SC_VPORT_ZMAX_11 <- 1.0f (0x3f800000)
        PA_SC_VPORT_ZMIN_12 <- 0
        PA_SC_VPORT_ZMAX_12 <- 1.0f (0x3f800000)
        PA_SC_VPORT_ZMIN_13 <- 0
        PA_SC_VPORT_ZMAX_13 <- 1.0f (0x3f800000)
        PA_SC_VPORT_ZMIN_14 <- 0
        PA_SC_VPORT_ZMAX_14 <- 1.0f (0x3f800000)
        PA_SC_VPORT_ZMIN_15 <- 0
        PA_SC_VPORT_ZMAX_15 <- 1.0f (0x3f800000)
        PA_SC_RASTER_CONFIG <- RB_MAP_PKR0 = RASTER_CONFIG_RB_MAP_0
                               RB_MAP_PKR1 = RASTER_CONFIG_RB_MAP_0
                               RB_XSEL2 = RASTER_CONFIG_RB_XSEL2_0
                               RB_XSEL = 0
                               RB_YSEL = 0
                               PKR_MAP = RASTER_CONFIG_PKR_MAP_0
                               PKR_XSEL = RASTER_CONFIG_PKR_XSEL_0
                               PKR_YSEL = RASTER_CONFIG_PKR_YSEL_0
                               PKR_XSEL2 = RASTER_CONFIG_PKR_XSEL2_0
                               SC_MAP = RASTER_CONFIG_SC_MAP_0
                               SC_XSEL = RASTER_CONFIG_SC_XSEL_8_WIDE_TILE
                               SC_YSEL = RASTER_CONFIG_SC_YSEL_8_WIDE_TILE
                               SE_MAP = RASTER_CONFIG_SE_MAP_0
                               SE_XSEL = RASTER_CONFIG_SE_XSEL_8_WIDE_TILE
                               SE_YSEL = RASTER_CONFIG_SE_YSEL_8_WIDE_TILE
        PA_SC_RASTER_CONFIG_1 <- SE_PAIR_MAP = RASTER_CONFIG_SE_PAIR_MAP_0
                                 SE_PAIR_XSEL = RASTER_CONFIG_SE_PAIR_XSEL_8_WIDE_TILE
                                 SE_PAIR_YSEL = RASTER_CONFIG_SE_PAIR_YSEL_8_WIDE_TILE
SET_CONTEXT_REG:
        PA_SC_WINDOW_SCISSOR_TL <- TL_X = 0
                                   TL_Y = 0
                                   WINDOW_OFFSET_DISABLE = 1
SET_CONTEXT_REG:
        PA_SC_GENERIC_SCISSOR_TL <- TL_X = 0
                                    TL_Y = 0
                                    WINDOW_OFFSET_DISABLE = 1
        PA_SC_GENERIC_SCISSOR_BR <- BR_X = 16384 (0x4000)
                                    BR_Y = 16384 (0x4000)
SET_CONTEXT_REG:
        PA_SC_SCREEN_SCISSOR_TL <- TL_X = 0
                                   TL_Y = 0
        PA_SC_SCREEN_SCISSOR_BR <- BR_X = 16384 (0x4000)
                                   BR_Y = 16384 (0x4000)
SET_CONTEXT_REG:
        PA_SC_CLIPRECT_RULE <- CLIP_RULE = 0xffff
SET_CONTEXT_REG:
        PA_SC_EDGERULE <- ER_TRI = 10 (0xa)
                          ER_POINT = 10 (0xa)
                          ER_RECT = 10 (0xa)
                          ER_LINE_LR = 26 (0x1a)
                          ER_LINE_RL = 38 (0x26)
                          ER_LINE_TB = 10 (0xa)
                          ER_LINE_BT = 10 (0xa)
        PA_SU_HARDWARE_SCREEN_OFFSET <- HW_SCREEN_OFFSET_X = 0
                                        HW_SCREEN_OFFSET_Y = 0
SET_CONTEXT_REG:
        PA_CL_NANINF_CNTL <- VTE_XY_INF_DISCARD = 0
                             VTE_Z_INF_DISCARD = 0
                             VTE_W_INF_DISCARD = 0
                             VTE_0XNANINF_IS_0 = 0
                             VTE_XY_NAN_RETAIN = 0
                             VTE_Z_NAN_RETAIN = 0
                             VTE_W_NAN_RETAIN = 0
                             VTE_W_RECIP_NAN_IS_0 = 0
                             VS_XY_NAN_TO_INF = 0
                             VS_XY_INF_RETAIN = 0
                             VS_Z_NAN_TO_INF = 0
                             VS_Z_INF_RETAIN = 0
                             VS_W_NAN_TO_INF = 0
                             VS_W_INF_RETAIN = 0
                             VS_CLIP_DIST_INF_DISCARD = 0
                             VTE_NO_OUTPUT_NEG_0 = 0
SET_CONTEXT_REG:
        DB_SRESULTS_COMPARE_STATE0 <- COMPAREFUNC0 = REF_NEVER
                                      COMPAREVALUE0 = 0
                                      COMPAREMASK0 = 0
                                      ENABLE0 = 0
        DB_SRESULTS_COMPARE_STATE1 <- COMPAREFUNC1 = REF_NEVER
                                      COMPAREVALUE1 = 0
                                      COMPAREMASK1 = 0
                                      ENABLE1 = 0
        DB_PRELOAD_CONTROL <- START_X = 0
                              START_Y = 0
                              MAX_X = 0
                              MAX_Y = 0
SET_CONTEXT_REG:
        DB_RENDER_OVERRIDE <- FORCE_HIZ_ENABLE = FORCE_OFF
                              FORCE_HIS_ENABLE0 = FORCE_DISABLE
                              FORCE_HIS_ENABLE1 = FORCE_DISABLE
                              FORCE_SHADER_Z_ORDER = 0
                              FAST_Z_DISABLE = 0
                              FAST_STENCIL_DISABLE = 0
                              NOOP_CULL_DISABLE = 0
                              FORCE_COLOR_KILL = 0
                              FORCE_Z_READ = 0
                              FORCE_STENCIL_READ = 0
                              FORCE_FULL_Z_RANGE = FORCE_OFF
                              FORCE_QC_SMASK_CONFLICT = 0
                              DISABLE_VIEWPORT_CLAMP = 0
                              IGNORE_SC_ZRANGE = 0
                              DISABLE_FULLY_COVERED = 0
                              FORCE_Z_LIMIT_SUMM = FORCE_SUMM_OFF
                              MAX_TILES_IN_DTT = 0
                              DISABLE_TILE_RATE_TILES = 0
                              FORCE_Z_DIRTY = 0
                              FORCE_STENCIL_DIRTY = 0
                              FORCE_Z_VALID = 0
                              FORCE_STENCIL_VALID = 0
                              PRESERVE_COMPRESSION = 0
SET_CONTEXT_REG:
        VGT_MAX_VTX_INDX <- 0xffffffff
        VGT_MIN_VTX_INDX <- 0
        VGT_INDX_OFFSET <- 0
SET_SH_REG:
        SPI_SHADER_PGM_RSRC3_LS <- CU_EN = 0xffff
                                   WAVE_LIMIT = 0
                                   LOCK_LOW_THRESHOLD = 0
                                   GROUP_FIFO_DEPTH = 0
SET_SH_REG:
        SPI_SHADER_PGM_RSRC3_HS <- WAVE_LIMIT = 0
                                   LOCK_LOW_THRESHOLD = 0
                                   GROUP_FIFO_DEPTH = 0
SET_SH_REG:
        SPI_SHADER_PGM_RSRC3_ES <- CU_EN = 0xffff
                                   WAVE_LIMIT = 0
                                   LOCK_LOW_THRESHOLD = 0
                                   GROUP_FIFO_DEPTH = 0
SET_SH_REG:
        SPI_SHADER_PGM_RSRC3_GS <- CU_EN = 0xffff
                                   WAVE_LIMIT = 0
                                   LOCK_LOW_THRESHOLD = 0
                                   GROUP_FIFO_DEPTH = 0
SET_SH_REG:
        SPI_SHADER_PGM_RSRC3_VS <- CU_EN = 0xfffe
                                   WAVE_LIMIT = 0
                                   LOCK_LOW_THRESHOLD = 0
        SPI_SHADER_LATE_ALLOC_VS <- LIMIT = 31 (0x1f)
SET_SH_REG:
        SPI_SHADER_PGM_RSRC3_PS <- CU_EN = 0xffff
                                   WAVE_LIMIT = 0
                                   LOCK_LOW_THRESHOLD = 0
SET_CONTEXT_REG:
        VGT_VERTEX_REUSE_BLOCK_CNTL <- VTX_REUSE_DEPTH = 14 (0x0e)
        VGT_OUT_DEALLOC_CNTL <- DEALLOC_DIST = 16 (0x10)
SET_CONTEXT_REG:
        TA_BC_BASE_ADDR <- 0x00008120
        TA_BC_BASE_ADDR_HI <- ADDRESS = 0
------------------- IB2: Init config end -------------------

------------------ IB begin ------------------
WRITE_DATA:
        CONTROL <- ENGINE_SEL = ME
                   WR_CONFIRM = 1
                   WR_ONE_ADDR = 0
                   DST_SEL = MEMORY_SYNC
        DST_ADDR_LO <- 0x00836000
        DST_ADDR_HI <- 0
        0x00000001
NOP:
        Trace point ID: 1
        This trace point was reached by the CP.
INDIRECT_BUFFER_CIK:
        IB_BASE_LO <- 0x00822000
        IB_BASE_HI <- 0
        CONTROL <- IB_SIZE = 128 (0x00080)
                   CHAIN = 0
                   VALID = 0
PFP_SYNC_ME:
        0x00000000
SURFACE_SYNC:
        CP_COHER_CNTL <- DEST_BASE_0_ENA = 0
                         DEST_BASE_1_ENA = 0
                         CB0_DEST_BASE_ENA = 0
                         CB1_DEST_BASE_ENA = 0
                         CB2_DEST_BASE_ENA = 0
                         CB3_DEST_BASE_ENA = 0
                         CB4_DEST_BASE_ENA = 0
                         CB5_DEST_BASE_ENA = 0
                         CB6_DEST_BASE_ENA = 0
                         CB7_DEST_BASE_ENA = 0
                         DB_DEST_BASE_ENA = 0
                         DEST_BASE_2_ENA = 0
                         DEST_BASE_3_ENA = 0
                         TCL1_ACTION_ENA = 0
                         TC_ACTION_ENA = 0
                         CB_ACTION_ENA = 0
                         DB_ACTION_ENA = 0
                         SH_KCACHE_ACTION_ENA = 1
                         SH_ICACHE_ACTION_ENA = 1
        CP_COHER_SIZE <- 0xffffffff
        CP_COHER_BASE <- 0
        POLL_INTERVAL <- 10 (0x000a)
EVENT_WRITE:
        VGT_EVENT_INITIATOR <- EVENT_TYPE = PIPELINESTAT_START
        EVENT_INDEX <- 0
        INV_L2 <- 0
SET_CONTEXT_REG:
        VGT_STRMOUT_BUFFER_CONFIG <- STREAM_0_BUFFER_EN = 0
                                     STREAM_1_BUFFER_EN = 0
                                     STREAM_2_BUFFER_EN = 0
                                     STREAM_3_BUFFER_EN = 0
SET_CONTEXT_REG:
        VGT_STRMOUT_CONFIG <- STREAMOUT_0_EN = 0
                              STREAMOUT_1_EN = 0
                              STREAMOUT_2_EN = 0
                              STREAMOUT_3_EN = 0
                              RAST_STREAM = 0
                              RAST_STREAM_MASK = 0
                              USE_RAST_STREAM_MASK = 0
SET_CONTEXT_REG:
        CB_COLOR0_BASE <- 0x00008380
        CB_COLOR0_PITCH <- TILE_MAX = 31 (0x1f)
                           FMASK_TILE_MAX = 31 (0x1f)
        CB_COLOR0_SLICE <- TILE_MAX = 1023 (0x003ff)
        CB_COLOR0_VIEW <- SLICE_START = 0
                          SLICE_MAX = 0
        CB_COLOR0_INFO <- ENDIAN = ENDIAN_NONE
                          FORMAT = COLOR_8_8_8_8
                          LINEAR_GENERAL = 0
                          NUMBER_TYPE = NUMBER_UNORM
                          COMP_SWAP = SWAP_ALT
                          FAST_CLEAR = 1
                          COMPRESSION = 0
                          BLEND_CLAMP = 1
                          BLEND_BYPASS = 0
                          SIMPLE_FLOAT = 0
                          ROUND_MODE = 0
                          CMASK_IS_LINEAR = 0
                          BLEND_OPT_DONT_RD_DST = FORCE_OPT_AUTO
                          BLEND_OPT_DISCARD_PIXEL = FORCE_OPT_AUTO
                          FMASK_COMPRESSION_DISABLE = 0
                          FMASK_COMPRESS_1FRAG_ONLY = 0
                          DCC_ENABLE = 0
                          CMASK_ADDR_TYPE = 0
        CB_COLOR0_ATTRIB <- TILE_MODE_INDEX = 10 (0xa)
                            FMASK_TILE_MODE_INDEX = 10 (0xa)
                            FMASK_BANK_HEIGHT = 0
                            NUM_SAMPLES = 0
                            NUM_FRAGMENTS = 0
                            FORCE_DST_ALPHA_1 = 0
        CB_COLOR0_DCC_CONTROL <- OVERWRITE_COMBINER_DISABLE = 0
                                 KEY_CLEAR_ENABLE = 0
                                 MAX_UNCOMPRESSED_BLOCK_SIZE = 0
                                 MIN_COMPRESSED_BLOCK_SIZE = 0
                                 MAX_COMPRESSED_BLOCK_SIZE = 0
                                 COLOR_TRANSFORM = 0
                                 INDEPENDENT_64B_BLOCKS = 0
                                 LOSSY_RGB_PRECISION = 0
                                 LOSSY_ALPHA_PRECISION = 0
        CB_COLOR0_CMASK <- 0x00008370
        CB_COLOR0_CMASK_SLICE <- TILE_MAX = 3
        CB_COLOR0_FMASK <- 0x00008380
        CB_COLOR0_FMASK_SLICE <- TILE_MAX = 1023 (0x003ff)
        CB_COLOR0_CLEAR_WORD0 <- 0x80808080
        CB_COLOR0_CLEAR_WORD1 <- 0
SET_CONTEXT_REG:
        CB_COLOR1_INFO <- ENDIAN = ENDIAN_NONE
                          FORMAT = COLOR_8_8_8_8
                          LINEAR_GENERAL = 0
                          NUMBER_TYPE = NUMBER_UNORM
                          COMP_SWAP = SWAP_ALT
                          FAST_CLEAR = 1
                          COMPRESSION = 0
                          BLEND_CLAMP = 1
                          BLEND_BYPASS = 0
                          SIMPLE_FLOAT = 0
                          ROUND_MODE = 0
                          CMASK_IS_LINEAR = 0
                          BLEND_OPT_DONT_RD_DST = FORCE_OPT_AUTO
                          BLEND_OPT_DISCARD_PIXEL = FORCE_OPT_AUTO
                          FMASK_COMPRESSION_DISABLE = 0
                          FMASK_COMPRESS_1FRAG_ONLY = 0
                          DCC_ENABLE = 0
                          CMASK_ADDR_TYPE = 0
SET_CONTEXT_REG:
        CB_COLOR2_INFO <- ENDIAN = ENDIAN_NONE
                          FORMAT = COLOR_INVALID
                          LINEAR_GENERAL = 0
                          NUMBER_TYPE = NUMBER_UNORM
                          COMP_SWAP = SWAP_STD
                          FAST_CLEAR = 0
                          COMPRESSION = 0
                          BLEND_CLAMP = 0
                          BLEND_BYPASS = 0
                          SIMPLE_FLOAT = 0
                          ROUND_MODE = 0
                          CMASK_IS_LINEAR = 0
                          BLEND_OPT_DONT_RD_DST = FORCE_OPT_AUTO
                          BLEND_OPT_DISCARD_PIXEL = FORCE_OPT_AUTO
                          FMASK_COMPRESSION_DISABLE = 0
                          FMASK_COMPRESS_1FRAG_ONLY = 0
                          DCC_ENABLE = 0
                          CMASK_ADDR_TYPE = 0
SET_CONTEXT_REG:
        CB_COLOR3_INFO <- ENDIAN = ENDIAN_NONE
                          FORMAT = COLOR_INVALID
                          LINEAR_GENERAL = 0
                          NUMBER_TYPE = NUMBER_UNORM
                          COMP_SWAP = SWAP_STD
                          FAST_CLEAR = 0
                          COMPRESSION = 0
                          BLEND_CLAMP = 0
                          BLEND_BYPASS = 0
                          SIMPLE_FLOAT = 0
                          ROUND_MODE = 0
                          CMASK_IS_LINEAR = 0
                          BLEND_OPT_DONT_RD_DST = FORCE_OPT_AUTO
                          BLEND_OPT_DISCARD_PIXEL = FORCE_OPT_AUTO
                          FMASK_COMPRESSION_DISABLE = 0
                          FMASK_COMPRESS_1FRAG_ONLY = 0
                          DCC_ENABLE = 0
                          CMASK_ADDR_TYPE = 0
SET_CONTEXT_REG:
        CB_COLOR4_INFO <- ENDIAN = ENDIAN_NONE
                          FORMAT = COLOR_INVALID
                          LINEAR_GENERAL = 0
                          NUMBER_TYPE = NUMBER_UNORM
                          COMP_SWAP = SWAP_STD
                          FAST_CLEAR = 0
                          COMPRESSION = 0
                          BLEND_CLAMP = 0
                          BLEND_BYPASS = 0
                          SIMPLE_FLOAT = 0
                          ROUND_MODE = 0
                          CMASK_IS_LINEAR = 0
                          BLEND_OPT_DONT_RD_DST = FORCE_OPT_AUTO
                          BLEND_OPT_DISCARD_PIXEL = FORCE_OPT_AUTO
                          FMASK_COMPRESSION_DISABLE = 0
                          FMASK_COMPRESS_1FRAG_ONLY = 0
                          DCC_ENABLE = 0
                          CMASK_ADDR_TYPE = 0
SET_CONTEXT_REG:
        CB_COLOR5_INFO <- ENDIAN = ENDIAN_NONE
                          FORMAT = COLOR_INVALID
                          LINEAR_GENERAL = 0
                          NUMBER_TYPE = NUMBER_UNORM
                          COMP_SWAP = SWAP_STD
                          FAST_CLEAR = 0
                          COMPRESSION = 0
                          BLEND_CLAMP = 0
                          BLEND_BYPASS = 0
                          SIMPLE_FLOAT = 0
                          ROUND_MODE = 0
                          CMASK_IS_LINEAR = 0
                          BLEND_OPT_DONT_RD_DST = FORCE_OPT_AUTO
                          BLEND_OPT_DISCARD_PIXEL = FORCE_OPT_AUTO
                          FMASK_COMPRESSION_DISABLE = 0
                          FMASK_COMPRESS_1FRAG_ONLY = 0
                          DCC_ENABLE = 0
                          CMASK_ADDR_TYPE = 0
SET_CONTEXT_REG:
        CB_COLOR6_INFO <- ENDIAN = ENDIAN_NONE
                          FORMAT = COLOR_INVALID
                          LINEAR_GENERAL = 0
                          NUMBER_TYPE = NUMBER_UNORM
                          COMP_SWAP = SWAP_STD
                          FAST_CLEAR = 0
                          COMPRESSION = 0
                          BLEND_CLAMP = 0
                          BLEND_BYPASS = 0
                          SIMPLE_FLOAT = 0
                          ROUND_MODE = 0
                          CMASK_IS_LINEAR = 0
                          BLEND_OPT_DONT_RD_DST = FORCE_OPT_AUTO
                          BLEND_OPT_DISCARD_PIXEL = FORCE_OPT_AUTO
                          FMASK_COMPRESSION_DISABLE = 0
                          FMASK_COMPRESS_1FRAG_ONLY = 0
                          DCC_ENABLE = 0
                          CMASK_ADDR_TYPE = 0
SET_CONTEXT_REG:
        CB_COLOR7_INFO <- ENDIAN = ENDIAN_NONE
                          FORMAT = COLOR_INVALID
                          LINEAR_GENERAL = 0
                          NUMBER_TYPE = NUMBER_UNORM
                          COMP_SWAP = SWAP_STD
                          FAST_CLEAR = 0
                          COMPRESSION = 0
                          BLEND_CLAMP = 0
                          BLEND_BYPASS = 0
                          SIMPLE_FLOAT = 0
                          ROUND_MODE = 0
                          CMASK_IS_LINEAR = 0
                          BLEND_OPT_DONT_RD_DST = FORCE_OPT_AUTO
                          BLEND_OPT_DISCARD_PIXEL = FORCE_OPT_AUTO
                          FMASK_COMPRESSION_DISABLE = 0
                          FMASK_COMPRESS_1FRAG_ONLY = 0
                          DCC_ENABLE = 0
                          CMASK_ADDR_TYPE = 0
SET_CONTEXT_REG:
        DB_Z_INFO <- FORMAT = Z_INVALID
                     NUM_SAMPLES = 0
                     TILE_SPLIT = ADDR_SURF_TILE_SPLIT_64B
                     TILE_MODE_INDEX = 0
                     DECOMPRESS_ON_N_ZPLANES = 0
                     ALLOW_EXPCLEAR = 0
                     READ_SIZE = 0
                     TILE_SURFACE_ENABLE = 0
                     CLEAR_DISALLOWED = 0
                     ZRANGE_PRECISION = 0
        DB_STENCIL_INFO <- FORMAT = STENCIL_INVALID
                           TILE_SPLIT = ADDR_SURF_TILE_SPLIT_64B
                           TILE_MODE_INDEX = 0
                           ALLOW_EXPCLEAR = 0
                           TILE_STENCIL_DISABLE = 0
                           CLEAR_DISALLOWED = 0
SET_CONTEXT_REG:
        PA_SC_WINDOW_SCISSOR_BR <- BR_X = 250 (0x0fa)
                                   BR_Y = 250 (0x0fa)
SET_CONTEXT_REG:
        DB_RENDER_CONTROL <- DEPTH_CLEAR_ENABLE = 0
                             STENCIL_CLEAR_ENABLE = 0
                             DEPTH_COPY = 0
                             STENCIL_COPY = 0
                             RESUMMARIZE_ENABLE = 0
                             STENCIL_COMPRESS_DISABLE = 0
                             DEPTH_COMPRESS_DISABLE = 0
                             COPY_CENTROID = 0
                             COPY_SAMPLE = 0
                             DECOMPRESS_ENABLE = 0
        DB_COUNT_CONTROL <- ZPASS_INCREMENT_DISABLE = 0
                            PERFECT_ZPASS_COUNTS = 0
                            SAMPLE_RATE = 0
                            ZPASS_ENABLE = 0
                            ZFAIL_ENABLE = 0
                            SFAIL_ENABLE = 0
                            DBFAIL_ENABLE = 0
                            SLICE_EVEN_ENABLE = 0
                            SLICE_ODD_ENABLE = 0
SET_CONTEXT_REG:
        DB_RENDER_OVERRIDE2 <- PARTIAL_SQUAD_LAUNCH_CONTROL = PSLC_AUTO
                               PARTIAL_SQUAD_LAUNCH_COUNTDOWN = 0
                               DISABLE_ZMASK_EXPCLEAR_OPTIMIZATION = 0
                               DISABLE_SMEM_EXPCLEAR_OPTIMIZATION = 0
                               DISABLE_COLOR_ON_VALIDATION = 0
                               DECOMPRESS_Z_ON_FLUSH = 0
                               DISABLE_REG_SNOOP = 0
                               DEPTH_BOUNDS_HIER_DEPTH_DISABLE = 0
                               SEPARATE_HIZS_FUNC_ENABLE = 0
                               HIZ_ZFUNC = 0
                               HIS_SFUNC_FF = 0
                               HIS_SFUNC_BF = 0
                               PRESERVE_ZRANGE = 0
                               PRESERVE_SRESULTS = 0
                               DISABLE_FAST_PASS = 0
SET_CONTEXT_REG:
        DB_SHADER_CONTROL <- Z_EXPORT_ENABLE = 0
                             STENCIL_TEST_VAL_EXPORT_ENABLE = 0
                             STENCIL_OP_VAL_EXPORT_ENABLE = 0
                             Z_ORDER = EARLY_Z_THEN_RE_Z
                             KILL_ENABLE = 0
                             COVERAGE_TO_MASK_ENABLE = 0
                             MASK_EXPORT_ENABLE = 0
                             EXEC_ON_HIER_FAIL = 0
                             EXEC_ON_NOOP = 0
                             ALPHA_TO_MASK_DISABLE = 0
                             DEPTH_BEFORE_SHADER = 0
                             CONSERVATIVE_Z_EXPORT = EXPORT_ANY_Z
                             DUAL_QUAD_DISABLE = 0
SET_CONTEXT_REG:
        PA_SC_LINE_CNTL <- EXPAND_LINE_WIDTH = 0
                           LAST_PIXEL = 0
                           PERPENDICULAR_ENDCAP_ENA = 0
                           DX10_DIAMOND_TEST_ENA = 1
        PA_SC_AA_CONFIG <- MSAA_NUM_SAMPLES = 0
                           AA_MASK_CENTROID_DTMN = 0
                           MAX_SAMPLE_DIST = 0
                           MSAA_EXPOSED_SAMPLES = 0
                           DETAIL_TO_EXPOSED_MODE = 0
SET_CONTEXT_REG:
        DB_EQAA <- MAX_ANCHOR_SAMPLES = 0
                   PS_ITER_SAMPLES = 0
                   MASK_EXPORT_NUM_SAMPLES = 0
                   ALPHA_TO_MASK_NUM_SAMPLES = 0
                   HIGH_QUALITY_INTERSECTIONS = 1
                   INCOHERENT_EQAA_READS = 0
                   INTERPOLATE_COMP_Z = 0
                   INTERPOLATE_SRC_Z = 0
                   STATIC_ANCHOR_ASSOCIATIONS = 1
                   ALPHA_TO_MASK_EQAA_DISABLE = 0
                   OVERRASTERIZATION_AMOUNT = 0
                   ENABLE_POSTZ_OVERRASTERIZATION = 0
SET_CONTEXT_REG:
        PA_SC_MODE_CNTL_1 <- WALK_SIZE = 0
                             WALK_ALIGNMENT = 0
                             WALK_ALIGN8_PRIM_FITS_ST = 1
                             WALK_FENCE_ENABLE = 1
                             WALK_FENCE_SIZE = 3
                             SUPERTILE_WALK_ORDER_ENABLE = 1
                             TILE_WALK_ORDER_ENABLE = 1
                             TILE_COVER_DISABLE = 0
                             TILE_COVER_NO_SCISSOR = 0
                             ZMM_LINE_EXTENT = 0
                             ZMM_LINE_OFFSET = 0
                             ZMM_RECT_EXTENT = 0
                             KILL_PIX_POST_HI_Z = 0
                             KILL_PIX_POST_DETAIL_MASK = 0
                             PS_ITER_SAMPLE = 0
                             MULTI_SHADER_ENGINE_PRIM_DISCARD_ENABLE = 1
                             MULTI_GPU_SUPERTILE_ENABLE = 0
                             GPU_ID_OVERRIDE_ENABLE = 0
                             GPU_ID_OVERRIDE = 0
                             MULTI_GPU_PRIM_DISCARD_ENABLE = 0
                             FORCE_EOV_CNTDWN_ENABLE = 1
                             FORCE_EOV_REZ_ENABLE = 1
                             OUT_OF_ORDER_PRIMITIVE_ENABLE = 0
                             OUT_OF_ORDER_WATER_MARK = 0
SET_CONTEXT_REG:
        PA_SC_AA_MASK_X0Y0_X1Y0 <- AA_MASK_X0Y0 = 0xffff
                                   AA_MASK_X1Y0 = 0xffff
        PA_SC_AA_MASK_X0Y1_X1Y1 <- AA_MASK_X0Y1 = 0xffff
                                   AA_MASK_X1Y1 = 0xffff
SET_CONTEXT_REG:
        CB_TARGET_MASK <- TARGET0_ENABLE = 15 (0xf)
                          TARGET1_ENABLE = 0
                          TARGET2_ENABLE = 0
                          TARGET3_ENABLE = 0
                          TARGET4_ENABLE = 0
                          TARGET5_ENABLE = 0
                          TARGET6_ENABLE = 0
                          TARGET7_ENABLE = 0
SET_CONTEXT_REG:
        CB_BLEND_RED <- 0
        CB_BLEND_GREEN <- 0
        CB_BLEND_BLUE <- 0
        CB_BLEND_ALPHA <- 0
SET_CONTEXT_REG:
        PA_CL_VS_OUT_CNTL <- CLIP_DIST_ENA_0 = 0
                             CLIP_DIST_ENA_1 = 0
                             CLIP_DIST_ENA_2 = 0
                             CLIP_DIST_ENA_3 = 0
                             CLIP_DIST_ENA_4 = 0
                             CLIP_DIST_ENA_5 = 0
                             CLIP_DIST_ENA_6 = 0
                             CLIP_DIST_ENA_7 = 0
                             CULL_DIST_ENA_0 = 0
                             CULL_DIST_ENA_1 = 0
                             CULL_DIST_ENA_2 = 0
                             CULL_DIST_ENA_3 = 0
                             CULL_DIST_ENA_4 = 0
                             CULL_DIST_ENA_5 = 0
                             CULL_DIST_ENA_6 = 0
                             CULL_DIST_ENA_7 = 0
                             USE_VTX_POINT_SIZE = 0
                             USE_VTX_EDGE_FLAG = 0
                             USE_VTX_RENDER_TARGET_INDX = 0
                             USE_VTX_VIEWPORT_INDX = 0
                             USE_VTX_KILL_FLAG = 0
                             VS_OUT_MISC_VEC_ENA = 0
                             VS_OUT_CCDIST0_VEC_ENA = 0
                             VS_OUT_CCDIST1_VEC_ENA = 0
                             VS_OUT_MISC_SIDE_BUS_ENA = 1
                             USE_VTX_GS_CUT_FLAG = 0
                             USE_VTX_LINE_WIDTH = 0
SET_CONTEXT_REG:
        PA_CL_CLIP_CNTL <- UCP_ENA_0 = 0
                           UCP_ENA_1 = 0
                           UCP_ENA_2 = 0
                           UCP_ENA_3 = 0
                           UCP_ENA_4 = 0
                           UCP_ENA_5 = 0
                           PS_UCP_Y_SCALE_NEG = 0
                           PS_UCP_MODE = 0
                           CLIP_DISABLE = 0
                           UCP_CULL_ONLY_ENA = 0
                           BOUNDARY_EDGE_FLAG_ENA = 0
                           DX_CLIP_SPACE_DEF = 0
                           DIS_CLIP_ERR_DETECT = 0
                           VTX_KILL_OR = 0
                           DX_RASTERIZATION_KILL = 0
                           DX_LINEAR_ATTR_CLIP_ENA = 1
                           VTE_VPORT_PROVOKE_DISABLE = 0
                           ZCLIP_NEAR_DISABLE = 0
                           ZCLIP_FAR_DISABLE = 0
SET_CONTEXT_REG:
        VGT_REUSE_OFF <- REUSE_OFF = 0
SET_CONTEXT_REG:
        PA_CL_UCP_0_X <- 0
        PA_CL_UCP_0_Y <- 0
        PA_CL_UCP_0_Z <- 0
        PA_CL_UCP_0_W <- 0
        PA_CL_UCP_1_X <- 0
        PA_CL_UCP_1_Y <- 0
        PA_CL_UCP_1_Z <- 0
        PA_CL_UCP_1_W <- 0
        PA_CL_UCP_2_X <- 0
        PA_CL_UCP_2_Y <- 0
        PA_CL_UCP_2_Z <- 0
        PA_CL_UCP_2_W <- 0
        PA_CL_UCP_3_X <- 0
        PA_CL_UCP_3_Y <- 0
        PA_CL_UCP_3_Z <- 0
        PA_CL_UCP_3_W <- 0
        PA_CL_UCP_4_X <- 0
        PA_CL_UCP_4_Y <- 0
        PA_CL_UCP_4_Z <- 0
        PA_CL_UCP_4_W <- 0
        PA_CL_UCP_5_X <- 0
        PA_CL_UCP_5_Y <- 0
        PA_CL_UCP_5_Z <- 0
        PA_CL_UCP_5_W <- 0
SET_SH_REG:
        SPI_SHADER_USER_DATA_PS_0 <- 0x00878700
        SPI_SHADER_USER_DATA_PS_1 <- 0
SET_SH_REG:
        SPI_SHADER_USER_DATA_VS_0 <- 0x00878700
        SPI_SHADER_USER_DATA_VS_1 <- 0
SET_SH_REG:
        SPI_SHADER_USER_DATA_GS_0 <- 0x00878700
        SPI_SHADER_USER_DATA_GS_1 <- 0
SET_SH_REG:
        SPI_SHADER_USER_DATA_ES_0 <- 0x00878700
        SPI_SHADER_USER_DATA_ES_1 <- 0
SET_SH_REG:
        SPI_SHADER_USER_DATA_HS_0 <- 0x00878700
        SPI_SHADER_USER_DATA_HS_1 <- 0
SET_SH_REG:
        SPI_SHADER_USER_DATA_VS_2 <- 0x0087d300
        SPI_SHADER_USER_DATA_VS_3 <- 0
SET_SH_REG:
        SPI_SHADER_USER_DATA_VS_8 <- 0x00878a00
        SPI_SHADER_USER_DATA_VS_9 <- 0
SET_SH_REG:
        SPI_SHADER_USER_DATA_VS_4 <- 0x00878b00
        SPI_SHADER_USER_DATA_VS_5 <- 0
SET_SH_REG:
        SPI_SHADER_USER_DATA_VS_6 <- 0x00879300
        SPI_SHADER_USER_DATA_VS_7 <- 0
SET_SH_REG:
        SPI_SHADER_USER_DATA_PS_2 <- 0x0087d400
        SPI_SHADER_USER_DATA_PS_3 <- 0
SET_SH_REG:
        SPI_SHADER_USER_DATA_PS_8 <- 0x00879600
        SPI_SHADER_USER_DATA_PS_9 <- 0
SET_SH_REG:
        SPI_SHADER_USER_DATA_PS_4 <- 0x0087d500
        SPI_SHADER_USER_DATA_PS_5 <- 0
SET_SH_REG:
        SPI_SHADER_USER_DATA_PS_6 <- 0x00879f00
        SPI_SHADER_USER_DATA_PS_7 <- 0
SET_SH_REG:
        SPI_SHADER_USER_DATA_GS_2 <- 0x0087a100
        SPI_SHADER_USER_DATA_GS_3 <- 0
SET_SH_REG:
        SPI_SHADER_USER_DATA_GS_8 <- 0x0087a200
        SPI_SHADER_USER_DATA_GS_9 <- 0
SET_SH_REG:
        SPI_SHADER_USER_DATA_GS_4 <- 0x0087a300
        SPI_SHADER_USER_DATA_GS_5 <- 0
SET_SH_REG:
        SPI_SHADER_USER_DATA_GS_6 <- 0x0087ab00
        SPI_SHADER_USER_DATA_GS_7 <- 0
SET_SH_REG:
        SPI_SHADER_USER_DATA_HS_2 <- 0x0087ad00
        SPI_SHADER_USER_DATA_HS_3 <- 0
SET_SH_REG:
        SPI_SHADER_USER_DATA_HS_8 <- 0x0087ae00
        SPI_SHADER_USER_DATA_HS_9 <- 0
SET_SH_REG:
        SPI_SHADER_USER_DATA_HS_4 <- 0x0087af00
        SPI_SHADER_USER_DATA_HS_5 <- 0
SET_SH_REG:
        SPI_SHADER_USER_DATA_HS_6 <- 0x0087b700
        SPI_SHADER_USER_DATA_HS_7 <- 0
SET_SH_REG:
        SPI_SHADER_USER_DATA_VS_10 <- 0x0087dd00
        SPI_SHADER_USER_DATA_VS_11 <- 0
SET_CONTEXT_REG:
        PA_SC_VPORT_SCISSOR_0_TL <- TL_X = 0
                                    TL_Y = 0
                                    WINDOW_OFFSET_DISABLE = 1
        PA_SC_VPORT_SCISSOR_0_BR <- BR_X = 250 (0x0fa)
                                    BR_Y = 250 (0x0fa)
SET_CONTEXT_REG:
        PA_CL_GB_VERT_CLIP_ADJ <- 0x43829168
        PA_CL_GB_VERT_DISC_ADJ <- 1.0f (0x3f800000)
        PA_CL_GB_HORZ_CLIP_ADJ <- 0x43829168
        PA_CL_GB_HORZ_DISC_ADJ <- 1.0f (0x3f800000)
SET_CONTEXT_REG:
        PA_CL_VPORT_XSCALE <- 125.0f (0x42fa0000)
        PA_CL_VPORT_XOFFSET <- 125.0f (0x42fa0000)
        PA_CL_VPORT_YSCALE <- -125.0f (0xc2fa0000)
        PA_CL_VPORT_YOFFSET <- 125.0f (0x42fa0000)
        PA_CL_VPORT_ZSCALE <- 0.5f (0x3f000000)
        PA_CL_VPORT_ZOFFSET <- 0.5f (0x3f000000)
SET_CONTEXT_REG:
        DB_STENCILREFMASK <- STENCILTESTVAL = 0
                             STENCILMASK = 0
                             STENCILWRITEMASK = 0
                             STENCILOPVAL = 1
        DB_STENCILREFMASK_BF <- STENCILTESTVAL_BF = 0
                                STENCILMASK_BF = 0
                                STENCILWRITEMASK_BF = 0
                                STENCILOPVAL_BF = 1
SET_CONTEXT_REG:
        DB_ALPHA_TO_MASK <- ALPHA_TO_MASK_ENABLE = 0
                            ALPHA_TO_MASK_OFFSET0 = 2
                            ALPHA_TO_MASK_OFFSET1 = 2
                            ALPHA_TO_MASK_OFFSET2 = 2
                            ALPHA_TO_MASK_OFFSET3 = 2
                            OFFSET_ROUND = 0
SET_CONTEXT_REG:
        CB_BLEND0_CONTROL <- COLOR_SRCBLEND = BLEND_ZERO
                             COLOR_COMB_FCN = COMB_DST_PLUS_SRC
                             COLOR_DESTBLEND = BLEND_ZERO
                             ALPHA_SRCBLEND = BLEND_ZERO
                             ALPHA_COMB_FCN = COMB_DST_PLUS_SRC
                             ALPHA_DESTBLEND = BLEND_ZERO
                             SEPARATE_ALPHA_BLEND = 0
                             ENABLE = 0
                             DISABLE_ROP3 = 0
        CB_BLEND1_CONTROL <- COLOR_SRCBLEND = BLEND_ZERO
                             COLOR_COMB_FCN = COMB_DST_PLUS_SRC
                             COLOR_DESTBLEND = BLEND_ZERO
                             ALPHA_SRCBLEND = BLEND_ZERO
                             ALPHA_COMB_FCN = COMB_DST_PLUS_SRC
                             ALPHA_DESTBLEND = BLEND_ZERO
                             SEPARATE_ALPHA_BLEND = 0
                             ENABLE = 0
                             DISABLE_ROP3 = 0
        CB_BLEND2_CONTROL <- COLOR_SRCBLEND = BLEND_ZERO
                             COLOR_COMB_FCN = COMB_DST_PLUS_SRC
                             COLOR_DESTBLEND = BLEND_ZERO
                             ALPHA_SRCBLEND = BLEND_ZERO
                             ALPHA_COMB_FCN = COMB_DST_PLUS_SRC
                             ALPHA_DESTBLEND = BLEND_ZERO
                             SEPARATE_ALPHA_BLEND = 0
                             ENABLE = 0
                             DISABLE_ROP3 = 0
        CB_BLEND3_CONTROL <- COLOR_SRCBLEND = BLEND_ZERO
                             COLOR_COMB_FCN = COMB_DST_PLUS_SRC
                             COLOR_DESTBLEND = BLEND_ZERO
                             ALPHA_SRCBLEND = BLEND_ZERO
                             ALPHA_COMB_FCN = COMB_DST_PLUS_SRC
                             ALPHA_DESTBLEND = BLEND_ZERO
                             SEPARATE_ALPHA_BLEND = 0
                             ENABLE = 0
                             DISABLE_ROP3 = 0
        CB_BLEND4_CONTROL <- COLOR_SRCBLEND = BLEND_ZERO
                             COLOR_COMB_FCN = COMB_DST_PLUS_SRC
                             COLOR_DESTBLEND = BLEND_ZERO
                             ALPHA_SRCBLEND = BLEND_ZERO
                             ALPHA_COMB_FCN = COMB_DST_PLUS_SRC
                             ALPHA_DESTBLEND = BLEND_ZERO
                             SEPARATE_ALPHA_BLEND = 0
                             ENABLE = 0
                             DISABLE_ROP3 = 0
        CB_BLEND5_CONTROL <- COLOR_SRCBLEND = BLEND_ZERO
                             COLOR_COMB_FCN = COMB_DST_PLUS_SRC
                             COLOR_DESTBLEND = BLEND_ZERO
                             ALPHA_SRCBLEND = BLEND_ZERO
                             ALPHA_COMB_FCN = COMB_DST_PLUS_SRC
                             ALPHA_DESTBLEND = BLEND_ZERO
                             SEPARATE_ALPHA_BLEND = 0
                             ENABLE = 0
                             DISABLE_ROP3 = 0
        CB_BLEND6_CONTROL <- COLOR_SRCBLEND = BLEND_ZERO
                             COLOR_COMB_FCN = COMB_DST_PLUS_SRC
                             COLOR_DESTBLEND = BLEND_ZERO
                             ALPHA_SRCBLEND = BLEND_ZERO
                             ALPHA_COMB_FCN = COMB_DST_PLUS_SRC
                             ALPHA_DESTBLEND = BLEND_ZERO
                             SEPARATE_ALPHA_BLEND = 0
                             ENABLE = 0
                             DISABLE_ROP3 = 0
        CB_BLEND7_CONTROL <- COLOR_SRCBLEND = BLEND_ZERO
                             COLOR_COMB_FCN = COMB_DST_PLUS_SRC
                             COLOR_DESTBLEND = BLEND_ZERO
                             ALPHA_SRCBLEND = BLEND_ZERO
                             ALPHA_COMB_FCN = COMB_DST_PLUS_SRC
                             ALPHA_DESTBLEND = BLEND_ZERO
                             SEPARATE_ALPHA_BLEND = 0
                             ENABLE = 0
                             DISABLE_ROP3 = 0
SET_CONTEXT_REG:
        CB_COLOR_CONTROL <- DISABLE_DUAL_QUAD = 0
                            DEGAMMA_ENABLE = 0
                            MODE = CB_NORMAL
                            ROP3 = X_0XCC
SET_CONTEXT_REG:
        SPI_INTERP_CONTROL_0 <- FLAT_SHADE_ENA = 1
                                PNT_SPRITE_ENA = 1
                                PNT_SPRITE_OVRD_X = SPI_PNT_SPRITE_SEL_S
                                PNT_SPRITE_OVRD_Y = SPI_PNT_SPRITE_SEL_T
                                PNT_SPRITE_OVRD_Z = SPI_PNT_SPRITE_SEL_0
                                PNT_SPRITE_OVRD_W = SPI_PNT_SPRITE_SEL_1
                                PNT_SPRITE_TOP_1 = 0
SET_CONTEXT_REG:
        PA_SU_POINT_SIZE <- HEIGHT = 8
                            WIDTH = 8
        PA_SU_POINT_MINMAX <- MIN_SIZE = 8
                              MAX_SIZE = 8
        PA_SU_LINE_CNTL <- WIDTH = 8
SET_CONTEXT_REG:
        PA_SC_MODE_CNTL_0 <- MSAA_ENABLE = 0
                             VPORT_SCISSOR_ENABLE = 1
                             LINE_STIPPLE_ENABLE = 0
                             SEND_UNLIT_STILES_TO_PKR = 0
SET_CONTEXT_REG:
        PA_SU_VTX_CNTL <- PIX_CENTER = 1
                          ROUND_MODE = X_TRUNCATE
                          QUANT_MODE = X_16_8_FIXED_POINT_1_256TH
SET_CONTEXT_REG:
        PA_SU_POLY_OFFSET_CLAMP <- 0
SET_CONTEXT_REG:
        PA_SU_SC_MODE_CNTL <- CULL_FRONT = 0
                              CULL_BACK = 0
                              FACE = 0
                              POLY_MODE = X_DISABLE_POLY_MODE
                              POLYMODE_FRONT_PTYPE = X_DRAW_TRIANGLES
                              POLYMODE_BACK_PTYPE = X_DRAW_TRIANGLES
                              POLY_OFFSET_FRONT_ENABLE = 0
                              POLY_OFFSET_BACK_ENABLE = 0
                              POLY_OFFSET_PARA_ENABLE = 0
                              VTX_WINDOW_OFFSET_ENABLE = 0
                              PROVOKING_VTX_LAST = 1
                              PERSP_CORR_DIS = 0
                              MULTI_PRIM_IB_ENA = 0
SET_SH_REG:
        SPI_SHADER_USER_DATA_VS_14 <- 1
SET_CONTEXT_REG:
        DB_DEPTH_CONTROL <- STENCIL_ENABLE = 0
                            Z_ENABLE = 0
                            Z_WRITE_ENABLE = 0
                            DEPTH_BOUNDS_ENABLE = 0
                            ZFUNC = FRAG_NEVER
                            BACKFACE_ENABLE = 0
                            STENCILFUNC = REF_NEVER
                            STENCILFUNC_BF = REF_NEVER
                            ENABLE_COLOR_WRITES_ON_DEPTH_FAIL = 0
                            DISABLE_COLOR_WRITES_ON_DEPTH_PASS = 0
SET_CONTEXT_REG:
        DB_STENCIL_CONTROL <- STENCILFAIL = STENCIL_KEEP
                              STENCILZPASS = STENCIL_KEEP
                              STENCILZFAIL = STENCIL_KEEP
                              STENCILFAIL_BF = STENCIL_KEEP
                              STENCILZPASS_BF = STENCIL_KEEP
                              STENCILZFAIL_BF = STENCIL_KEEP
SET_CONTEXT_REG:
        VGT_SHADER_STAGES_EN <- LS_EN = LS_STAGE_OFF
                                HS_EN = 0
                                ES_EN = ES_STAGE_OFF
                                GS_EN = 0
                                VS_EN = VS_STAGE_REAL
                                DYNAMIC_HS = 0
                                DISPATCH_DRAW_EN = 0
                                DIS_DEALLOC_ACCUM_0 = 0
                                DIS_DEALLOC_ACCUM_1 = 0
                                VS_WAVE_ID_EN = 0
SET_CONTEXT_REG:
        VGT_GS_MODE <- MODE = GS_OFF
                       RESERVED_0 = 0
                       CUT_MODE = GS_CUT_1024
                       RESERVED_1 = 0
                       GS_C_PACK_EN = 0
                       RESERVED_2 = 0
                       ES_PASSTHRU = 0
                       COMPUTE_MODE = 0
                       FAST_COMPUTE_MODE = 0
                       ELEMENT_INFO_EN = 0
                       PARTIAL_THD_AT_EOI = 0
                       SUPPRESS_CUTS = 0
                       ES_WRITE_OPTIMIZE = 0
                       GS_WRITE_OPTIMIZE = 0
                       ONCHIP = X_0_OFFCHIP_GS
SET_CONTEXT_REG:
        VGT_PRIMITIVEID_EN <- PRIMITIVEID_EN = 0
                              DISABLE_RESET_ON_EOI = 0
SET_CONTEXT_REG:
        SPI_VS_OUT_CONFIG <- VS_EXPORT_COUNT = 0
                             VS_HALF_PACK = 0
                             VS_EXPORTS_FOG = 0
                             VS_OUT_FOG_VEC_ADDR = 0
SET_CONTEXT_REG:
        SPI_SHADER_POS_FORMAT <- POS0_EXPORT_FORMAT = SPI_SHADER_4COMP
                                 POS1_EXPORT_FORMAT = SPI_SHADER_NONE
                                 POS2_EXPORT_FORMAT = SPI_SHADER_NONE
                                 POS3_EXPORT_FORMAT = SPI_SHADER_NONE
SET_SH_REG:
        SPI_SHADER_PGM_LO_VS <- 0x0000a780
        SPI_SHADER_PGM_HI_VS <- MEM_BASE = 0
        SPI_SHADER_PGM_RSRC1_VS <- VGPRS = 1
                                   SGPRS = 2
                                   PRIORITY = 0
                                   FLOAT_MODE = 192 (0xc0)
                                   PRIV = 0
                                   DX10_CLAMP = 1
                                   DEBUG_MODE = 0
                                   IEEE_MODE = 0
                                   VGPR_COMP_CNT = 0
                                   CU_GROUP_ENABLE = 0
                                   CACHE_CTL = 0
                                   CDBG_USER = 0
        SPI_SHADER_PGM_RSRC2_VS <- SCRATCH_EN = 0
                                   USER_SGPR = 15 (0xf)
                                   TRAP_PRESENT = 0
                                   OC_LDS_EN = 0
                                   SO_BASE0_EN = 0
                                   SO_BASE1_EN = 0
                                   SO_BASE2_EN = 0
                                   SO_BASE3_EN = 0
                                   SO_EN = 0
                                   EXCP_EN = 0
                                   EXCP_EN_CIK = 0
                                   DISPATCH_DRAW_EN = 0
SET_CONTEXT_REG:
        PA_CL_VTE_CNTL <- VPORT_X_SCALE_ENA = 1
                          VPORT_X_OFFSET_ENA = 1
                          VPORT_Y_SCALE_ENA = 1
                          VPORT_Y_OFFSET_ENA = 1
                          VPORT_Z_SCALE_ENA = 1
                          VPORT_Z_OFFSET_ENA = 1
                          VTX_XY_FMT = 0
                          VTX_Z_FMT = 0
                          VTX_W0_FMT = 1
SET_CONTEXT_REG:
        SPI_PS_INPUT_ENA <- PERSP_SAMPLE_ENA = 0
                            PERSP_CENTER_ENA = 0
                            PERSP_CENTROID_ENA = 0
                            PERSP_PULL_MODEL_ENA = 0
                            LINEAR_SAMPLE_ENA = 0
                            LINEAR_CENTER_ENA = 1
                            LINEAR_CENTROID_ENA = 0
                            LINE_STIPPLE_TEX_ENA = 0
                            POS_X_FLOAT_ENA = 0
                            POS_Y_FLOAT_ENA = 0
                            POS_Z_FLOAT_ENA = 0
                            POS_W_FLOAT_ENA = 0
                            FRONT_FACE_ENA = 0
                            ANCILLARY_ENA = 0
                            SAMPLE_COVERAGE_ENA = 0
                            POS_FIXED_PT_ENA = 0
        SPI_PS_INPUT_ADDR <- PERSP_SAMPLE_ENA = 1
                             PERSP_CENTER_ENA = 1
                             PERSP_CENTROID_ENA = 1
                             PERSP_PULL_MODEL_ENA = 0
                             LINEAR_SAMPLE_ENA = 1
                             LINEAR_CENTER_ENA = 1
                             LINEAR_CENTROID_ENA = 1
                             LINE_STIPPLE_TEX_ENA = 0
                             POS_X_FLOAT_ENA = 0
                             POS_Y_FLOAT_ENA = 0
                             POS_Z_FLOAT_ENA = 0
                             POS_W_FLOAT_ENA = 0
                             FRONT_FACE_ENA = 1
                             ANCILLARY_ENA = 0
                             SAMPLE_COVERAGE_ENA = 1
                             POS_FIXED_PT_ENA = 1
SET_CONTEXT_REG:
        SPI_BARYC_CNTL <- PERSP_CENTER_CNTL = 0
                          PERSP_CENTROID_CNTL = 0
                          LINEAR_CENTER_CNTL = 0
                          LINEAR_CENTROID_CNTL = 0
                          POS_FLOAT_LOCATION = 2
                          POS_FLOAT_ULC = 0
                          FRONT_FACE_ALL_BITS = 1
SET_CONTEXT_REG:
        SPI_PS_IN_CONTROL <- NUM_INTERP = 0
                             PARAM_GEN = 0
                             FOG_ADDR = 0
                             BC_OPTIMIZE_DISABLE = 0
                             PASS_FOG_THROUGH_PS = 0
SET_CONTEXT_REG:
        SPI_SHADER_Z_FORMAT <- Z_EXPORT_FORMAT = SPI_SHADER_ZERO
        SPI_SHADER_COL_FORMAT <- COL0_EXPORT_FORMAT = SPI_SHADER_FP16_ABGR
                                 COL1_EXPORT_FORMAT = SPI_SHADER_ZERO
                                 COL2_EXPORT_FORMAT = SPI_SHADER_ZERO
                                 COL3_EXPORT_FORMAT = SPI_SHADER_ZERO
                                 COL4_EXPORT_FORMAT = SPI_SHADER_ZERO
                                 COL5_EXPORT_FORMAT = SPI_SHADER_ZERO
                                 COL6_EXPORT_FORMAT = SPI_SHADER_ZERO
                                 COL7_EXPORT_FORMAT = SPI_SHADER_ZERO
SET_CONTEXT_REG:
        CB_SHADER_MASK <- OUTPUT0_ENABLE = 15 (0xf)
                          OUTPUT1_ENABLE = 0
                          OUTPUT2_ENABLE = 0
                          OUTPUT3_ENABLE = 0
                          OUTPUT4_ENABLE = 0
                          OUTPUT5_ENABLE = 0
                          OUTPUT6_ENABLE = 0
                          OUTPUT7_ENABLE = 0
SET_SH_REG:
        SPI_SHADER_PGM_LO_PS <- 0x0000a790
        SPI_SHADER_PGM_HI_PS <- MEM_BASE = 0
        SPI_SHADER_PGM_RSRC1_PS <- VGPRS = 7
                                   SGPRS = 1
                                   PRIORITY = 0
                                   FLOAT_MODE = FP_64_DENORMS
                                   PRIV = 0
                                   DX10_CLAMP = 1
                                   DEBUG_MODE = 0
                                   IEEE_MODE = 0
                                   CU_GROUP_DISABLE = 0
                                   CACHE_CTL = 0
                                   CDBG_USER = 0
        SPI_SHADER_PGM_RSRC2_PS <- SCRATCH_EN = 0
                                   USER_SGPR = 11 (0xb)
                                   TRAP_PRESENT = 0
                                   WAVE_CNT_EN = 0
                                   EXTRA_LDS_SIZE = 0
                                   EXCP_EN = 0
                                   EXCP_EN_CIK = 0
SET_CONTEXT_REG:
        SPI_TMPRING_SIZE <- WAVES = 256 (0x100)
                            WAVESIZE = 0
SET_CONTEXT_REG:
0x40028aa8 = 0x0010007fSET_CONTEXT_REG:
0x80028b58 = 0x00000000SET_UCONFIG_REG:
0x40030908 = 0x00000006SET_CONTEXT_REG:
        VGT_GS_OUT_PRIM_TYPE <- OUTPRIM_TYPE = OUTPRIM_TYPE_TRISTRIP
                                OUTPRIM_TYPE_1 = 0
                                OUTPRIM_TYPE_2 = 0
                                OUTPRIM_TYPE_3 = 0
                                UNIQUE_TYPE_PER_STREAM = 0
SET_CONTEXT_REG:
        VGT_MULTI_PRIM_IB_RESET_EN <- RESET_EN = 0
NUM_INSTANCES:
        VGT_NUM_INSTANCES <- 1
SET_SH_REG:
        SPI_SHADER_USER_DATA_VS_12 <- 0
        SPI_SHADER_USER_DATA_VS_13 <- 0
DRAW_INDEX_AUTO:
        VGT_NUM_INDICES <- 4
        VGT_DRAW_INITIATOR <- SOURCE_SELECT = DI_SRC_SEL_AUTO_INDEX
                              MAJOR_MODE = DI_MAJOR_MODE_0
                              NOT_EOP = 0
                              USE_OPAQUE = 0
WRITE_DATA:
        CONTROL <- ENGINE_SEL = ME
                   WR_CONFIRM = 1
                   WR_ONE_ADDR = 0
                   DST_SEL = MEMORY_SYNC
        DST_ADDR_LO <- 0x00836000
        DST_ADDR_HI <- 0
        0x00000002
NOP:
        Trace point ID: 2
        !!!!! This is the last trace point that was reached by the CP !!!!!
EVENT_WRITE:
        VGT_EVENT_INITIATOR <- EVENT_TYPE = PS_PARTIAL_FLUSH
        EVENT_INDEX <- 4
        INV_L2 <- 0
EVENT_WRITE:
        VGT_EVENT_INITIATOR <- EVENT_TYPE = CS_PARTIAL_FLUSH
        EVENT_INDEX <- 4
        INV_L2 <- 0
PFP_SYNC_ME:
        0x00000000
WRITE_DATA:
        CONTROL <- ENGINE_SEL = ME
                   WR_CONFIRM = 1
                   WR_ONE_ADDR = 0
                   DST_SEL = MEMORY_SYNC
        DST_ADDR_LO <- 0x00836000
        DST_ADDR_HI <- 0
        0x00000003
NOP:
        Trace point ID: 3
        !!!!! This trace point was NOT reached by the CP !!!!!
------------------- IB end -------------------

Done.


More information about the mesa-dev mailing list