[Mesa-dev] [PATCH v2 22/25] gallium/radeon: transfers and invalidation for sparse buffers

Michel Dänzer michel at daenzer.net
Mon Apr 10 07:37:12 UTC 2017


On 28/03/17 06:12 PM, Nicolai Hähnle wrote:
> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
> 
> Sparse buffers can never be mapped by the CPU.

This change caused the piglit test

spec at amd_pinned_memory@map-buffer increment-offset

to randomly trigger GPUVM faults and fail on my Kaveri. Example report
from R600_DEBUG=check_vm attached.


-- 
Earthling Michel Dänzer               |               http://www.amd.com
Libre software enthusiast             |             Mesa and X developer
-------------- next part --------------
VM fault report.

Command: /home/daenzer/src/piglit-git/piglit/bin/amd_pinned_memory increment-offset map-buffer -auto -fbo 
Driver vendor: X.Org
Device vendor: AMD
Device name: AMD KAVERI (DRM 2.49.0 / 4.10.9+, LLVM 5.0.0)

Failing VM page: 0x00000f00

Color buffer 0:
  Info: npix_x=600, npix_y=480, npix_z=1, blk_w=1, blk_h=1, array_size=1, last_level=9, bpe=4, nsamples=0, flags=0x2180301, r8g8b8a8_unorm
  Layout: size=2796800, alignment=16384, bankw=1, bankh=1, nbanks=0, mtilea=2, tilesplit=512, pipeconfig=0, scanout=0
  Level[0]: offset=0, slice_size=2097152, npix_x=600, npix_y=480, npix_z=1, nblk_x=1024, nblk_y=512, mode=3, tiling_index = 14
  Level[1]: offset=2097152, slice_size=524288, npix_x=300, npix_y=240, npix_z=1, nblk_x=512, nblk_y=256, mode=3, tiling_index = 14
  Level[2]: offset=2621440, slice_size=131072, npix_x=150, npix_y=120, npix_z=1, nblk_x=256, nblk_y=128, mode=3, tiling_index = 14
  Level[3]: offset=2752512, slice_size=32768, npix_x=75, npix_y=60, npix_z=1, nblk_x=128, nblk_y=64, mode=3, tiling_index = 14
  Level[4]: offset=2785280, slice_size=8192, npix_x=37, npix_y=30, npix_z=1, nblk_x=64, nblk_y=32, mode=2, tiling_index = 13
  Level[5]: offset=2793472, slice_size=2048, npix_x=18, npix_y=15, npix_z=1, nblk_x=32, nblk_y=16, mode=2, tiling_index = 13
  Level[6]: offset=2795520, slice_size=512, npix_x=9, npix_y=7, npix_z=1, nblk_x=16, nblk_y=8, mode=2, tiling_index = 13
  Level[7]: offset=2796032, slice_size=256, npix_x=4, npix_y=3, npix_z=1, nblk_x=8, nblk_y=8, mode=2, tiling_index = 13
  Level[8]: offset=2796288, slice_size=256, npix_x=2, npix_y=1, npix_z=1, nblk_x=8, nblk_y=8, mode=2, tiling_index = 13
  Level[9]: offset=2796544, slice_size=256, npix_x=1, npix_y=1, npix_z=1, nblk_x=8, nblk_y=8, mode=2, tiling_index = 13

SHADER KEY
  part.vs.prolog.instance_divisors = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
  part.vs.epilog.export_prim_id = 0
  as_es = 0
  as_ls = 0
  mono.vs.fix_fetch = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
  opt.hw_vs.kill_outputs = 0x0
  opt.hw_vs.kill_outputs2 = 0x0
  opt.hw_vs.clip_disable = 0

Vertex Shader as VS - main shader part - LLVM IR:

; ModuleID = 'tgsi'
source_filename = "tgsi"
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "amdgcn--"

define amdgpu_vs <{ float, float, float }> @main([13 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), i32 inreg, i32 inreg, i32 inreg, i32 inreg, i32, i32, i32, i32, i32) #0 {
main_body:
  %15 = bitcast [16 x <16 x i8>] addrspace(2)* %5 to <4 x i32> addrspace(2)*
  %16 = load <4 x i32>, <4 x i32> addrspace(2)* %15, align 16, !invariant.load !0
  %17 = call nsz <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32> %16, i32 %14, i32 0, i1 false, i1 false) #2
  %18 = extractelement <4 x float> %17, i32 0
  %19 = extractelement <4 x float> %17, i32 1
  %20 = extractelement <4 x float> %17, i32 2
  %21 = extractelement <4 x float> %17, i32 3
  %22 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0
  %23 = load <16 x i8>, <16 x i8> addrspace(2)* %22, align 16, !invariant.load !0
  %24 = call nsz float @llvm.SI.load.const(<16 x i8> %23, i32 0)
  %25 = fmul nsz float %18, %24
  %26 = call nsz float @llvm.SI.load.const(<16 x i8> %23, i32 4)
  %27 = fmul nsz float %18, %26
  %28 = call nsz float @llvm.SI.load.const(<16 x i8> %23, i32 8)
  %29 = fmul nsz float %18, %28
  %30 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0
  %31 = load <16 x i8>, <16 x i8> addrspace(2)* %30, align 16, !invariant.load !0
  %32 = call nsz float @llvm.SI.load.const(<16 x i8> %31, i32 12)
  %33 = fmul nsz float %18, %32
  %34 = call nsz float @llvm.SI.load.const(<16 x i8> %31, i32 16)
  %35 = fmul nsz float %19, %34
  %36 = fadd nsz float %35, %25
  %37 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0
  %38 = load <16 x i8>, <16 x i8> addrspace(2)* %37, align 16, !invariant.load !0
  %39 = call nsz float @llvm.SI.load.const(<16 x i8> %38, i32 20)
  %40 = fmul nsz float %19, %39
  %41 = fadd nsz float %40, %27
  %42 = call nsz float @llvm.SI.load.const(<16 x i8> %38, i32 24)
  %43 = fmul nsz float %19, %42
  %44 = fadd nsz float %43, %29
  %45 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0
  %46 = load <16 x i8>, <16 x i8> addrspace(2)* %45, align 16, !invariant.load !0
  %47 = call nsz float @llvm.SI.load.const(<16 x i8> %46, i32 28)
  %48 = fmul nsz float %19, %47
  %49 = fadd nsz float %48, %33
  %50 = call nsz float @llvm.SI.load.const(<16 x i8> %46, i32 32)
  %51 = fmul nsz float %20, %50
  %52 = fadd nsz float %51, %36
  %53 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0
  %54 = load <16 x i8>, <16 x i8> addrspace(2)* %53, align 16, !invariant.load !0
  %55 = call nsz float @llvm.SI.load.const(<16 x i8> %54, i32 36)
  %56 = fmul nsz float %20, %55
  %57 = fadd nsz float %56, %41
  %58 = call nsz float @llvm.SI.load.const(<16 x i8> %54, i32 40)
  %59 = fmul nsz float %20, %58
  %60 = fadd nsz float %59, %44
  %61 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0
  %62 = load <16 x i8>, <16 x i8> addrspace(2)* %61, align 16, !invariant.load !0
  %63 = call nsz float @llvm.SI.load.const(<16 x i8> %62, i32 44)
  %64 = fmul nsz float %20, %63
  %65 = fadd nsz float %64, %49
  %66 = call nsz float @llvm.SI.load.const(<16 x i8> %62, i32 48)
  %67 = fmul nsz float %21, %66
  %68 = fadd nsz float %67, %52
  %69 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0
  %70 = load <16 x i8>, <16 x i8> addrspace(2)* %69, align 16, !invariant.load !0
  %71 = call nsz float @llvm.SI.load.const(<16 x i8> %70, i32 52)
  %72 = fmul nsz float %21, %71
  %73 = fadd nsz float %72, %57
  %74 = call nsz float @llvm.SI.load.const(<16 x i8> %70, i32 56)
  %75 = fmul nsz float %21, %74
  %76 = fadd nsz float %75, %60
  %77 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0
  %78 = load <16 x i8>, <16 x i8> addrspace(2)* %77, align 16, !invariant.load !0
  %79 = call nsz float @llvm.SI.load.const(<16 x i8> %78, i32 60)
  %80 = fmul nsz float %21, %79
  %81 = fadd nsz float %80, %65
  %82 = bitcast i32 %12 to float
  %83 = insertvalue <{ float, float, float }> undef, float %82, 2
  call void @llvm.amdgcn.exp.f32(i32 12, i32 15, float %68, float %73, float %76, float %81, i1 true, i1 false) #3
  ret <{ float, float, float }> %83
}

; Function Attrs: nounwind readonly
declare <4 x float> @llvm.amdgcn.buffer.load.format.v4f32(<4 x i32>, i32, i32, i1, i1) #1

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #2

; Function Attrs: nounwind
declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) #3

attributes #0 = { "no-signed-zeros-fp-math"="true" }
attributes #1 = { nounwind readonly }
attributes #2 = { nounwind readnone }
attributes #3 = { nounwind }

!0 = !{}


Vertex Shader as VS:
Shader prolog disassembly:
	v_add_i32_e32 v4, vcc, s12, v0 ; 4A08000C
Shader main disassembly:
	s_load_dwordx4 s[4:7], s[10:11], 0x0                ; C0820B00
	s_load_dwordx4 s[0:3], s[2:3], 0x0                  ; C0800300
	s_waitcnt lgkmcnt(0)                                ; BF8C007F
	buffer_load_format_xyzw v[3:6], v4, s[4:7], 0 idxen ; E00C2000 80010304
	s_buffer_load_dword s4, s[0:3], 0x0                 ; C2020100
	s_buffer_load_dword s5, s[0:3], 0x1                 ; C2028101
	s_buffer_load_dword s6, s[0:3], 0x2                 ; C2030102
	s_buffer_load_dword s7, s[0:3], 0x3                 ; C2038103
	s_buffer_load_dword s8, s[0:3], 0x4                 ; C2040104
	s_buffer_load_dword s9, s[0:3], 0x5                 ; C2048105
	s_buffer_load_dword s10, s[0:3], 0x6                ; C2050106
	s_buffer_load_dword s11, s[0:3], 0x7                ; C2058107
	s_buffer_load_dword s12, s[0:3], 0x8                ; C2060108
	s_buffer_load_dword s13, s[0:3], 0x9                ; C2068109
	s_buffer_load_dword s14, s[0:3], 0xa                ; C207010A
	s_buffer_load_dword s15, s[0:3], 0xb                ; C207810B
	s_buffer_load_dword s16, s[0:3], 0xc                ; C208010C
	s_buffer_load_dword s17, s[0:3], 0xd                ; C208810D
	s_buffer_load_dword s18, s[0:3], 0xe                ; C209010E
	s_buffer_load_dword s0, s[0:3], 0xf                 ; C200010F
	s_waitcnt vmcnt(0) lgkmcnt(0)                       ; BF8C0070
	v_mul_f32_e32 v0, s4, v3                            ; 10000604
	v_mul_f32_e32 v1, s5, v3                            ; 10020605
	v_mul_f32_e32 v7, s6, v3                            ; 100E0606
	v_mul_f32_e32 v3, s7, v3                            ; 10060607
	v_mac_f32_e32 v0, s8, v4                            ; 3E000808
	v_mac_f32_e32 v1, s9, v4                            ; 3E020809
	v_mac_f32_e32 v7, s10, v4                           ; 3E0E080A
	v_mac_f32_e32 v3, s11, v4                           ; 3E06080B
	v_mac_f32_e32 v0, s12, v5                           ; 3E000A0C
	v_mac_f32_e32 v1, s13, v5                           ; 3E020A0D
	v_mac_f32_e32 v7, s14, v5                           ; 3E0E0A0E
	v_mac_f32_e32 v3, s15, v5                           ; 3E060A0F
	v_mac_f32_e32 v0, s16, v6                           ; 3E000C10
	v_mac_f32_e32 v1, s17, v6                           ; 3E020C11
	v_mac_f32_e32 v7, s18, v6                           ; 3E0E0C12
	v_mac_f32_e32 v3, s0, v6                            ; 3E060C00
	exp pos0 v0, v1, v7, v3 done                        ; F80008CF 03070100
	s_waitcnt expcnt(0)                                 ; BF8C0F0F
Shader epilog disassembly:
	s_endpgm ; BF810000

*** SHADER STATS ***
SGPRS: 24
VGPRS: 8
Spilled SGPRs: 0
Spilled VGPRs: 0
Private memory VGPRs: 0
Code Size: 172 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 10
********************


SHADER KEY
  part.ps.prolog.color_two_side = 0
  part.ps.prolog.flatshade_colors = 0
  part.ps.prolog.poly_stipple = 0
  part.ps.prolog.force_persp_sample_interp = 0
  part.ps.prolog.force_linear_sample_interp = 0
  part.ps.prolog.force_persp_center_interp = 0
  part.ps.prolog.force_linear_center_interp = 0
  part.ps.prolog.bc_optimize_for_persp = 0
  part.ps.prolog.bc_optimize_for_linear = 0
  part.ps.epilog.spi_shader_col_format = 0x4
  part.ps.epilog.color_is_int8 = 0x0
  part.ps.epilog.color_is_int10 = 0x0
  part.ps.epilog.last_cbuf = 0
  part.ps.epilog.alpha_func = 7
  part.ps.epilog.alpha_to_one = 0
  part.ps.epilog.poly_line_smoothing = 0
  part.ps.epilog.clamp_color = 0

Pixel Shader - main shader part - LLVM IR:

; ModuleID = 'tgsi'
source_filename = "tgsi"
target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
target triple = "amdgcn--"

define amdgpu_ps <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> @main([13 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <16 x i8>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [32 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <8 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), [16 x <4 x i32>] addrspace(2)* byval noalias dereferenceable(18446744073709551615), float inreg, i32 inreg, <2 x i32>, <2 x i32>, <2 x i32>, <3 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, float, float, float, float, float, i32, i32, float, i32) #0 {
main_body:
  %23 = getelementptr [16 x <16 x i8>], [16 x <16 x i8>] addrspace(2)* %1, i64 0, i64 0, !amdgpu.uniform !0
  %24 = load <16 x i8>, <16 x i8> addrspace(2)* %23, align 16, !invariant.load !0
  %25 = call nsz float @llvm.SI.load.const(<16 x i8> %24, i32 48)
  %26 = call nsz float @llvm.SI.load.const(<16 x i8> %24, i32 52)
  %27 = call nsz float @llvm.SI.load.const(<16 x i8> %24, i32 56)
  %28 = call nsz float @llvm.SI.load.const(<16 x i8> %24, i32 60)
  %29 = bitcast float %5 to i32
  %30 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> undef, i32 %29, 10
  %31 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %30, float %25, 11
  %32 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %31, float %26, 12
  %33 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %32, float %27, 13
  %34 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %33, float %28, 14
  %35 = insertvalue <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %34, float %21, 24
  ret <{ i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float, float, float }> %35
}

; Function Attrs: nounwind readnone
declare float @llvm.SI.load.const(<16 x i8>, i32) #1

attributes #0 = { "InitialPSInputAddr"="36983" "no-signed-zeros-fp-math"="true" }
attributes #1 = { nounwind readnone }

!0 = !{}


Pixel Shader:
Shader main disassembly:
	s_load_dwordx4 s[0:3], s[2:3], 0x0  ; C0800300
	s_waitcnt lgkmcnt(0)                ; BF8C007F
	s_buffer_load_dword s4, s[0:3], 0xc ; C202010C
	s_buffer_load_dword s5, s[0:3], 0xd ; C202810D
	s_buffer_load_dword s6, s[0:3], 0xe ; C203010E
	s_buffer_load_dword s0, s[0:3], 0xf ; C200010F
	s_waitcnt lgkmcnt(0)                ; BF8C007F
	v_mov_b32_e32 v0, s4                ; 7E000204
	v_mov_b32_e32 v1, s5                ; 7E020205
	v_mov_b32_e32 v2, s6                ; 7E040206
	v_mov_b32_e32 v3, s0                ; 7E060200
Shader epilog disassembly:
	v_cvt_pkrtz_f16_f32_e32 v0, v0, v1    ; 5E000300
	v_cvt_pkrtz_f16_f32_e32 v1, v2, v3    ; 5E020702
	exp mrt0 v0, v0, v1, v1 done compr vm ; F8001C0F 00000100
	s_endpgm                              ; BF810000

*** SHADER CONFIG ***
SPI_PS_INPUT_ADDR = 0xd077
SPI_PS_INPUT_ENA  = 0x0020
*** SHADER STATS ***
SGPRS: 16
VGPRS: 16
Spilled SGPRs: 0
Spilled VGPRs: 0
Private memory VGPRs: 0
Code Size: 64 bytes
LDS: 0 blocks
Scratch: 0 bytes per wave
Max Waves: 10
********************


RW buffers slot 0 (GPU list):
        SQ_BUF_RSRC_WORD0 <- 0
        SQ_BUF_RSRC_WORD1 <- BASE_ADDRESS_HI = 0
                             STRIDE = 0
                             CACHE_SWIZZLE = 0
                             SWIZZLE_ENABLE = 0
        SQ_BUF_RSRC_WORD2 <- 0
        SQ_BUF_RSRC_WORD3 <- DST_SEL_X = SQ_SEL_0
                             DST_SEL_Y = SQ_SEL_0
                             DST_SEL_Z = SQ_SEL_0
                             DST_SEL_W = SQ_SEL_0
                             NUM_FORMAT = BUF_NUM_FORMAT_UNORM
                             DATA_FORMAT = BUF_DATA_FORMAT_INVALID
                             ELEMENT_SIZE = 0
                             INDEX_STRIDE = 0
                             ADD_TID_ENABLE = 0
                             ATC = 0
                             HASH_ENABLE = 0
                             HEAP = 0
                             MTYPE = 0
                             TYPE = SQ_RSRC_BUF
                             USER_VM_ENABLE = 0
                             USER_VM_MODE = 0
                             NV = 0

RW buffers slot 1 (GPU list):
        SQ_BUF_RSRC_WORD0 <- 0
        SQ_BUF_RSRC_WORD1 <- BASE_ADDRESS_HI = 0
                             STRIDE = 0
                             CACHE_SWIZZLE = 0
                             SWIZZLE_ENABLE = 0
        SQ_BUF_RSRC_WORD2 <- 0
        SQ_BUF_RSRC_WORD3 <- DST_SEL_X = SQ_SEL_0
                             DST_SEL_Y = SQ_SEL_0
                             DST_SEL_Z = SQ_SEL_0
                             DST_SEL_W = SQ_SEL_0
                             NUM_FORMAT = BUF_NUM_FORMAT_UNORM
                             DATA_FORMAT = BUF_DATA_FORMAT_INVALID
                             ELEMENT_SIZE = 0
                             INDEX_STRIDE = 0
                             ADD_TID_ENABLE = 0
                             ATC = 0
                             HASH_ENABLE = 0
                             HEAP = 0
                             MTYPE = 0
                             TYPE = SQ_RSRC_BUF
                             USER_VM_ENABLE = 0
                             USER_VM_MODE = 0
                             NV = 0

RW buffers slot 2 (GPU list):
        SQ_BUF_RSRC_WORD0 <- 0
        SQ_BUF_RSRC_WORD1 <- BASE_ADDRESS_HI = 0
                             STRIDE = 0
                             CACHE_SWIZZLE = 0
                             SWIZZLE_ENABLE = 0
        SQ_BUF_RSRC_WORD2 <- 0
        SQ_BUF_RSRC_WORD3 <- DST_SEL_X = SQ_SEL_0
                             DST_SEL_Y = SQ_SEL_0
                             DST_SEL_Z = SQ_SEL_0
                             DST_SEL_W = SQ_SEL_0
                             NUM_FORMAT = BUF_NUM_FORMAT_UNORM
                             DATA_FORMAT = BUF_DATA_FORMAT_INVALID
                             ELEMENT_SIZE = 0
                             INDEX_STRIDE = 0
                             ADD_TID_ENABLE = 0
                             ATC = 0
                             HASH_ENABLE = 0
                             HEAP = 0
                             MTYPE = 0
                             TYPE = SQ_RSRC_BUF
                             USER_VM_ENABLE = 0
                             USER_VM_MODE = 0
                             NV = 0

RW buffers slot 3 (GPU list):
        SQ_BUF_RSRC_WORD0 <- 0
        SQ_BUF_RSRC_WORD1 <- BASE_ADDRESS_HI = 0
                             STRIDE = 0
                             CACHE_SWIZZLE = 0
                             SWIZZLE_ENABLE = 0
        SQ_BUF_RSRC_WORD2 <- 0
        SQ_BUF_RSRC_WORD3 <- DST_SEL_X = SQ_SEL_0
                             DST_SEL_Y = SQ_SEL_0
                             DST_SEL_Z = SQ_SEL_0
                             DST_SEL_W = SQ_SEL_0
                             NUM_FORMAT = BUF_NUM_FORMAT_UNORM
                             DATA_FORMAT = BUF_DATA_FORMAT_INVALID
                             ELEMENT_SIZE = 0
                             INDEX_STRIDE = 0
                             ADD_TID_ENABLE = 0
                             ATC = 0
                             HASH_ENABLE = 0
                             HEAP = 0
                             MTYPE = 0
                             TYPE = SQ_RSRC_BUF
                             USER_VM_ENABLE = 0
                             USER_VM_MODE = 0
                             NV = 0

RW buffers slot 4 (GPU list):
        SQ_BUF_RSRC_WORD0 <- 0
        SQ_BUF_RSRC_WORD1 <- BASE_ADDRESS_HI = 0
                             STRIDE = 0
                             CACHE_SWIZZLE = 0
                             SWIZZLE_ENABLE = 0
        SQ_BUF_RSRC_WORD2 <- 0
        SQ_BUF_RSRC_WORD3 <- DST_SEL_X = SQ_SEL_0
                             DST_SEL_Y = SQ_SEL_0
                             DST_SEL_Z = SQ_SEL_0
                             DST_SEL_W = SQ_SEL_0
                             NUM_FORMAT = BUF_NUM_FORMAT_UNORM
                             DATA_FORMAT = BUF_DATA_FORMAT_INVALID
                             ELEMENT_SIZE = 0
                             INDEX_STRIDE = 0
                             ADD_TID_ENABLE = 0
                             ATC = 0
                             HASH_ENABLE = 0
                             HEAP = 0
                             MTYPE = 0
                             TYPE = SQ_RSRC_BUF
                             USER_VM_ENABLE = 0
                             USER_VM_MODE = 0
                             NV = 0

RW buffers slot 5 (GPU list):
        SQ_BUF_RSRC_WORD0 <- 0
        SQ_BUF_RSRC_WORD1 <- BASE_ADDRESS_HI = 0
                             STRIDE = 0
                             CACHE_SWIZZLE = 0
                             SWIZZLE_ENABLE = 0
        SQ_BUF_RSRC_WORD2 <- 0
        SQ_BUF_RSRC_WORD3 <- DST_SEL_X = SQ_SEL_0
                             DST_SEL_Y = SQ_SEL_0
                             DST_SEL_Z = SQ_SEL_0
                             DST_SEL_W = SQ_SEL_0
                             NUM_FORMAT = BUF_NUM_FORMAT_UNORM
                             DATA_FORMAT = BUF_DATA_FORMAT_INVALID
                             ELEMENT_SIZE = 0
                             INDEX_STRIDE = 0
                             ADD_TID_ENABLE = 0
                             ATC = 0
                             HASH_ENABLE = 0
                             HEAP = 0
                             MTYPE = 0
                             TYPE = SQ_RSRC_BUF
                             USER_VM_ENABLE = 0
                             USER_VM_MODE = 0
                             NV = 0

RW buffers slot 6 (GPU list):
        SQ_BUF_RSRC_WORD0 <- 0
        SQ_BUF_RSRC_WORD1 <- BASE_ADDRESS_HI = 0
                             STRIDE = 0
                             CACHE_SWIZZLE = 0
                             SWIZZLE_ENABLE = 0
        SQ_BUF_RSRC_WORD2 <- 0
        SQ_BUF_RSRC_WORD3 <- DST_SEL_X = SQ_SEL_0
                             DST_SEL_Y = SQ_SEL_0
                             DST_SEL_Z = SQ_SEL_0
                             DST_SEL_W = SQ_SEL_0
                             NUM_FORMAT = BUF_NUM_FORMAT_UNORM
                             DATA_FORMAT = BUF_DATA_FORMAT_INVALID
                             ELEMENT_SIZE = 0
                             INDEX_STRIDE = 0
                             ADD_TID_ENABLE = 0
                             ATC = 0
                             HASH_ENABLE = 0
                             HEAP = 0
                             MTYPE = 0
                             TYPE = SQ_RSRC_BUF
                             USER_VM_ENABLE = 0
                             USER_VM_MODE = 0
                             NV = 0

RW buffers slot 7 (GPU list):
        SQ_BUF_RSRC_WORD0 <- 0
        SQ_BUF_RSRC_WORD1 <- BASE_ADDRESS_HI = 0
                             STRIDE = 0
                             CACHE_SWIZZLE = 0
                             SWIZZLE_ENABLE = 0
        SQ_BUF_RSRC_WORD2 <- 0
        SQ_BUF_RSRC_WORD3 <- DST_SEL_X = SQ_SEL_0
                             DST_SEL_Y = SQ_SEL_0
                             DST_SEL_Z = SQ_SEL_0
                             DST_SEL_W = SQ_SEL_0
                             NUM_FORMAT = BUF_NUM_FORMAT_UNORM
                             DATA_FORMAT = BUF_DATA_FORMAT_INVALID
                             ELEMENT_SIZE = 0
                             INDEX_STRIDE = 0
                             ADD_TID_ENABLE = 0
                             ATC = 0
                             HASH_ENABLE = 0
                             HEAP = 0
                             MTYPE = 0
                             TYPE = SQ_RSRC_BUF
                             USER_VM_ENABLE = 0
                             USER_VM_MODE = 0
                             NV = 0

RW buffers slot 8 (GPU list):
        SQ_BUF_RSRC_WORD0 <- 0
        SQ_BUF_RSRC_WORD1 <- BASE_ADDRESS_HI = 0
                             STRIDE = 0
                             CACHE_SWIZZLE = 0
                             SWIZZLE_ENABLE = 0
        SQ_BUF_RSRC_WORD2 <- 0
        SQ_BUF_RSRC_WORD3 <- DST_SEL_X = SQ_SEL_0
                             DST_SEL_Y = SQ_SEL_0
                             DST_SEL_Z = SQ_SEL_0
                             DST_SEL_W = SQ_SEL_0
                             NUM_FORMAT = BUF_NUM_FORMAT_UNORM
                             DATA_FORMAT = BUF_DATA_FORMAT_INVALID
                             ELEMENT_SIZE = 0
                             INDEX_STRIDE = 0
                             ADD_TID_ENABLE = 0
                             ATC = 0
                             HASH_ENABLE = 0
                             HEAP = 0
                             MTYPE = 0
                             TYPE = SQ_RSRC_BUF
                             USER_VM_ENABLE = 0
                             USER_VM_MODE = 0
                             NV = 0

RW buffers slot 9 (GPU list):
        SQ_BUF_RSRC_WORD0 <- 0x00d0ee80
        SQ_BUF_RSRC_WORD1 <- BASE_ADDRESS_HI = 0
                             STRIDE = 0
                             CACHE_SWIZZLE = 0
                             SWIZZLE_ENABLE = 0
        SQ_BUF_RSRC_WORD2 <- 32 (0x00000020)
        SQ_BUF_RSRC_WORD3 <- DST_SEL_X = SQ_SEL_X
                             DST_SEL_Y = SQ_SEL_Y
                             DST_SEL_Z = SQ_SEL_Z
                             DST_SEL_W = SQ_SEL_W
                             NUM_FORMAT = BUF_NUM_FORMAT_FLOAT
                             DATA_FORMAT = BUF_DATA_FORMAT_32
                             ELEMENT_SIZE = 0
                             INDEX_STRIDE = 0
                             ADD_TID_ENABLE = 0
                             ATC = 0
                             HASH_ENABLE = 0
                             HEAP = 0
                             MTYPE = 0
                             TYPE = SQ_RSRC_BUF
                             USER_VM_ENABLE = 0
                             USER_VM_MODE = 0
                             NV = 0

RW buffers slot 10 (GPU list):
        SQ_BUF_RSRC_WORD0 <- 0x00820600
        SQ_BUF_RSRC_WORD1 <- BASE_ADDRESS_HI = 0
                             STRIDE = 0
                             CACHE_SWIZZLE = 0
                             SWIZZLE_ENABLE = 0
        SQ_BUF_RSRC_WORD2 <- 16 (0x00000010)
        SQ_BUF_RSRC_WORD3 <- DST_SEL_X = SQ_SEL_X
                             DST_SEL_Y = SQ_SEL_Y
                             DST_SEL_Z = SQ_SEL_Z
                             DST_SEL_W = SQ_SEL_W
                             NUM_FORMAT = BUF_NUM_FORMAT_FLOAT
                             DATA_FORMAT = BUF_DATA_FORMAT_32
                             ELEMENT_SIZE = 0
                             INDEX_STRIDE = 0
                             ADD_TID_ENABLE = 0
                             ATC = 0
                             HASH_ENABLE = 0
                             HEAP = 0
                             MTYPE = 0
                             TYPE = SQ_RSRC_BUF
                             USER_VM_ENABLE = 0
                             USER_VM_MODE = 0
                             NV = 0

RW buffers slot 11 (GPU list):
        SQ_BUF_RSRC_WORD0 <- 0x00d0ed80
        SQ_BUF_RSRC_WORD1 <- BASE_ADDRESS_HI = 0
                             STRIDE = 0
                             CACHE_SWIZZLE = 0
                             SWIZZLE_ENABLE = 0
        SQ_BUF_RSRC_WORD2 <- 128 (0x00000080)
        SQ_BUF_RSRC_WORD3 <- DST_SEL_X = SQ_SEL_X
                             DST_SEL_Y = SQ_SEL_Y
                             DST_SEL_Z = SQ_SEL_Z
                             DST_SEL_W = SQ_SEL_W
                             NUM_FORMAT = BUF_NUM_FORMAT_FLOAT
                             DATA_FORMAT = BUF_DATA_FORMAT_32
                             ELEMENT_SIZE = 0
                             INDEX_STRIDE = 0
                             ADD_TID_ENABLE = 0
                             ATC = 0
                             HASH_ENABLE = 0
                             HEAP = 0
                             MTYPE = 0
                             TYPE = SQ_RSRC_BUF
                             USER_VM_ENABLE = 0
                             USER_VM_MODE = 0
                             NV = 0

RW buffers slot 12 (GPU list):
        SQ_BUF_RSRC_WORD0 <- 0x00d0b000
        SQ_BUF_RSRC_WORD1 <- BASE_ADDRESS_HI = 0
                             STRIDE = 0
                             CACHE_SWIZZLE = 0
                             SWIZZLE_ENABLE = 0
        SQ_BUF_RSRC_WORD2 <- 8
        SQ_BUF_RSRC_WORD3 <- DST_SEL_X = SQ_SEL_X
                             DST_SEL_Y = SQ_SEL_Y
                             DST_SEL_Z = SQ_SEL_Z
                             DST_SEL_W = SQ_SEL_W
                             NUM_FORMAT = BUF_NUM_FORMAT_FLOAT
                             DATA_FORMAT = BUF_DATA_FORMAT_32
                             ELEMENT_SIZE = 0
                             INDEX_STRIDE = 0
                             ADD_TID_ENABLE = 0
                             ATC = 0
                             HASH_ENABLE = 0
                             HEAP = 0
                             MTYPE = 0
                             TYPE = SQ_RSRC_BUF
                             USER_VM_ENABLE = 0
                             USER_VM_MODE = 0
                             NV = 0

VS - Vertex buffer slot 0 (CPU list):
        SQ_BUF_RSRC_WORD0 <- 0
        SQ_BUF_RSRC_WORD1 <- BASE_ADDRESS_HI = 0
                             STRIDE = 0
                             CACHE_SWIZZLE = 0
                             SWIZZLE_ENABLE = 0
        SQ_BUF_RSRC_WORD2 <- 0
        SQ_BUF_RSRC_WORD3 <- DST_SEL_X = SQ_SEL_0
                             DST_SEL_Y = SQ_SEL_0
                             DST_SEL_Z = SQ_SEL_0
                             DST_SEL_W = SQ_SEL_0
                             NUM_FORMAT = BUF_NUM_FORMAT_UNORM
                             DATA_FORMAT = BUF_DATA_FORMAT_INVALID
                             ELEMENT_SIZE = 0
                             INDEX_STRIDE = 0
                             ADD_TID_ENABLE = 0
                             ATC = 0
                             HASH_ENABLE = 0
                             HEAP = 0
                             MTYPE = 0
                             TYPE = SQ_RSRC_BUF
                             USER_VM_ENABLE = 0
                             USER_VM_MODE = 0
                             NV = 0

VS - Constant buffer slot 0 (GPU list):
        SQ_BUF_RSRC_WORD0 <- 0x00d0ee00
        SQ_BUF_RSRC_WORD1 <- BASE_ADDRESS_HI = 0
                             STRIDE = 0
                             CACHE_SWIZZLE = 0
                             SWIZZLE_ENABLE = 0
        SQ_BUF_RSRC_WORD2 <- 64 (0x00000040)
        SQ_BUF_RSRC_WORD3 <- DST_SEL_X = SQ_SEL_X
                             DST_SEL_Y = SQ_SEL_Y
                             DST_SEL_Z = SQ_SEL_Z
                             DST_SEL_W = SQ_SEL_W
                             NUM_FORMAT = BUF_NUM_FORMAT_FLOAT
                             DATA_FORMAT = BUF_DATA_FORMAT_32
                             ELEMENT_SIZE = 0
                             INDEX_STRIDE = 0
                             ADD_TID_ENABLE = 0
                             ATC = 0
                             HASH_ENABLE = 0
                             HEAP = 0
                             MTYPE = 0
                             TYPE = SQ_RSRC_BUF
                             USER_VM_ENABLE = 0
                             USER_VM_MODE = 0
                             NV = 0

PS - Constant buffer slot 0 (GPU list):
        SQ_BUF_RSRC_WORD0 <- 0x00d0ee40
        SQ_BUF_RSRC_WORD1 <- BASE_ADDRESS_HI = 0
                             STRIDE = 0
                             CACHE_SWIZZLE = 0
                             SWIZZLE_ENABLE = 0
        SQ_BUF_RSRC_WORD2 <- 64 (0x00000040)
        SQ_BUF_RSRC_WORD3 <- DST_SEL_X = SQ_SEL_X
                             DST_SEL_Y = SQ_SEL_Y
                             DST_SEL_Z = SQ_SEL_Z
                             DST_SEL_W = SQ_SEL_W
                             NUM_FORMAT = BUF_NUM_FORMAT_FLOAT
                             DATA_FORMAT = BUF_DATA_FORMAT_32
                             ELEMENT_SIZE = 0
                             INDEX_STRIDE = 0
                             ADD_TID_ENABLE = 0
                             ATC = 0
                             HASH_ENABLE = 0
                             HEAP = 0
                             MTYPE = 0
                             TYPE = SQ_RSRC_BUF
                             USER_VM_ENABLE = 0
                             USER_VM_MODE = 0
                             NV = 0

Buffer list (in units of pages = 4kB):
        Size    VM start page         VM end page           Usage
          16    0x0000000000820       0x0000000000830       IB2, CP_DMA, CONST_BUFFER, SHADER_RINGS
          64    -- hole --
          16    0x0000000000870       0x0000000000880       TRACE, CP_DMA
          64    -- hole --
          16    0x00000000008C0       0x00000000008D0       BORDER_COLORS
        1083    -- hole --
          32    0x0000000000D0B       0x0000000000D2B       CONST_BUFFER, DESCRIPTORS, SHADER_RINGS
         469    -- hole --
           5    0x0000000000F00       0x0000000000F05       CP_DMA

Note: The holes represent memory not used by the IB.
      Other buffers can still be allocated there.

------------------ IB2: Init config begin ------------------
CONTEXT_CONTROL:
        0x80000000
        0x80000000
SET_CONTEXT_REG:
        VGT_HOS_MAX_TESS_LEVEL <- 64.0f (0x42800000)
        VGT_HOS_MIN_TESS_LEVEL <- 0
SET_CONTEXT_REG:
        VGT_GS_PER_ES <- GS_PER_ES = 128 (0x80)
        VGT_ES_PER_GS <- ES_PER_GS = 64 (0x40)
        VGT_GS_PER_VS <- GS_PER_VS = 2
SET_CONTEXT_REG:
        VGT_PRIMITIVEID_RESET <- 0
SET_CONTEXT_REG:
        VGT_STRMOUT_DRAW_OPAQUE_OFFSET <- 0
SET_CONTEXT_REG:
        VGT_STRMOUT_BUFFER_CONFIG <- STREAM_0_BUFFER_EN = 0
                                     STREAM_1_BUFFER_EN = 0
                                     STREAM_2_BUFFER_EN = 0
                                     STREAM_3_BUFFER_EN = 0
SET_CONTEXT_REG:
        VGT_VTX_CNT_EN <- VTX_CNT_EN = 0
SET_CONTEXT_REG:
        PA_SC_CENTROID_PRIORITY_0 <- DISTANCE_0 = 0
                                     DISTANCE_1 = 1
                                     DISTANCE_2 = 2
                                     DISTANCE_3 = 3
                                     DISTANCE_4 = 4
                                     DISTANCE_5 = 5
                                     DISTANCE_6 = 6
                                     DISTANCE_7 = 7
        PA_SC_CENTROID_PRIORITY_1 <- DISTANCE_8 = 8
                                     DISTANCE_9 = 9
                                     DISTANCE_10 = 10 (0xa)
                                     DISTANCE_11 = 11 (0xb)
                                     DISTANCE_12 = 12 (0xc)
                                     DISTANCE_13 = 13 (0xd)
                                     DISTANCE_14 = 14 (0xe)
                                     DISTANCE_15 = 15 (0xf)
SET_CONTEXT_REG:
        PA_SU_PRIM_FILTER_CNTL <- TRIANGLE_FILTER_DISABLE = 0
                                  LINE_FILTER_DISABLE = 0
                                  POINT_FILTER_DISABLE = 0
                                  RECTANGLE_FILTER_DISABLE = 0
                                  TRIANGLE_EXPAND_ENA = 0
                                  LINE_EXPAND_ENA = 0
                                  POINT_EXPAND_ENA = 0
                                  RECTANGLE_EXPAND_ENA = 0
                                  PRIM_EXPAND_CONSTANT = 0
                                  XMAX_RIGHT_EXCLUSION = 0
                                  YMAX_BOTTOM_EXCLUSION = 0
SET_CONTEXT_REG:
        PA_SC_RASTER_CONFIG <- RB_MAP_PKR0 = RASTER_CONFIG_RB_MAP_0
                               RB_MAP_PKR1 = RASTER_CONFIG_RB_MAP_0
                               RB_XSEL2 = RASTER_CONFIG_RB_XSEL2_0
                               RB_XSEL = 0
                               RB_YSEL = 0
                               PKR_MAP = RASTER_CONFIG_PKR_MAP_0
                               PKR_XSEL = RASTER_CONFIG_PKR_XSEL_0
                               PKR_YSEL = RASTER_CONFIG_PKR_YSEL_0
                               PKR_XSEL2 = RASTER_CONFIG_PKR_XSEL2_0
                               SC_MAP = RASTER_CONFIG_SC_MAP_0
                               SC_XSEL = RASTER_CONFIG_SC_XSEL_8_WIDE_TILE
                               SC_YSEL = RASTER_CONFIG_SC_YSEL_8_WIDE_TILE
                               SE_MAP = RASTER_CONFIG_SE_MAP_0
                               SE_XSEL_GFX6 = RASTER_CONFIG_SE_XSEL_8_WIDE_TILE
                               SE_YSEL_GFX6 = RASTER_CONFIG_SE_YSEL_8_WIDE_TILE
                               SE_XSEL_GFX9 = 0
                               SE_YSEL_GFX9 = 0
        PA_SC_RASTER_CONFIG_1 <- SE_PAIR_MAP = RASTER_CONFIG_SE_PAIR_MAP_0
                                 SE_PAIR_XSEL_GFX6 = RASTER_CONFIG_SE_PAIR_XSEL_8_WIDE_TILE
                                 SE_PAIR_YSEL_GFX6 = RASTER_CONFIG_SE_PAIR_YSEL_8_WIDE_TILE
                                 SE_PAIR_XSEL_GFX9 = 0
                                 SE_PAIR_YSEL_GFX9 = 0
SET_CONTEXT_REG:
        PA_SC_WINDOW_SCISSOR_TL <- TL_X = 0
                                   TL_Y = 0
                                   WINDOW_OFFSET_DISABLE = 1
SET_CONTEXT_REG:
        PA_SC_GENERIC_SCISSOR_TL <- TL_X = 0
                                    TL_Y = 0
                                    WINDOW_OFFSET_DISABLE = 1
        PA_SC_GENERIC_SCISSOR_BR <- BR_X = 16384 (0x4000)
                                    BR_Y = 16384 (0x4000)
SET_CONTEXT_REG:
        PA_SC_SCREEN_SCISSOR_TL <- TL_X = 0
                                   TL_Y = 0
        PA_SC_SCREEN_SCISSOR_BR <- BR_X = 16384 (0x4000)
                                   BR_Y = 16384 (0x4000)
SET_CONTEXT_REG:
        PA_SC_CLIPRECT_RULE <- CLIP_RULE = 0xffff
SET_CONTEXT_REG:
        PA_SC_EDGERULE <- ER_TRI = 10 (0xa)
                          ER_POINT = 10 (0xa)
                          ER_RECT = 10 (0xa)
                          ER_LINE_LR = 26 (0x1a)
                          ER_LINE_RL = 38 (0x26)
                          ER_LINE_TB = 10 (0xa)
                          ER_LINE_BT = 10 (0xa)
        PA_SU_HARDWARE_SCREEN_OFFSET <- HW_SCREEN_OFFSET_X = 0
                                        HW_SCREEN_OFFSET_Y = 0
SET_CONTEXT_REG:
        PA_CL_NANINF_CNTL <- VTE_XY_INF_DISCARD = 0
                             VTE_Z_INF_DISCARD = 0
                             VTE_W_INF_DISCARD = 0
                             VTE_0XNANINF_IS_0 = 0
                             VTE_XY_NAN_RETAIN = 0
                             VTE_Z_NAN_RETAIN = 0
                             VTE_W_NAN_RETAIN = 0
                             VTE_W_RECIP_NAN_IS_0 = 0
                             VS_XY_NAN_TO_INF = 0
                             VS_XY_INF_RETAIN = 0
                             VS_Z_NAN_TO_INF = 0
                             VS_Z_INF_RETAIN = 0
                             VS_W_NAN_TO_INF = 0
                             VS_W_INF_RETAIN = 0
                             VS_CLIP_DIST_INF_DISCARD = 0
                             VTE_NO_OUTPUT_NEG_0 = 0
SET_CONTEXT_REG:
        DB_SRESULTS_COMPARE_STATE0 <- COMPAREFUNC0 = REF_NEVER
                                      COMPAREVALUE0 = 0
                                      COMPAREMASK0 = 0
                                      ENABLE0 = 0
        DB_SRESULTS_COMPARE_STATE1 <- COMPAREFUNC1 = REF_NEVER
                                      COMPAREVALUE1 = 0
                                      COMPAREMASK1 = 0
                                      ENABLE1 = 0
        DB_PRELOAD_CONTROL <- START_X = 0
                              START_Y = 0
                              MAX_X = 0
                              MAX_Y = 0
SET_CONTEXT_REG:
        DB_RENDER_OVERRIDE <- FORCE_HIZ_ENABLE = FORCE_OFF
                              FORCE_HIS_ENABLE0 = FORCE_OFF
                              FORCE_HIS_ENABLE1 = FORCE_OFF
                              FORCE_SHADER_Z_ORDER = 0
                              FAST_Z_DISABLE = 0
                              FAST_STENCIL_DISABLE = 0
                              NOOP_CULL_DISABLE = 0
                              FORCE_COLOR_KILL = 0
                              FORCE_Z_READ = 0
                              FORCE_STENCIL_READ = 0
                              FORCE_FULL_Z_RANGE = FORCE_OFF
                              FORCE_QC_SMASK_CONFLICT = 0
                              DISABLE_VIEWPORT_CLAMP = 0
                              IGNORE_SC_ZRANGE = 0
                              DISABLE_FULLY_COVERED = 0
                              FORCE_Z_LIMIT_SUMM = FORCE_SUMM_OFF
                              MAX_TILES_IN_DTT = 0
                              DISABLE_TILE_RATE_TILES = 0
                              FORCE_Z_DIRTY = 0
                              FORCE_STENCIL_DIRTY = 0
                              FORCE_Z_VALID = 0
                              FORCE_STENCIL_VALID = 0
                              PRESERVE_COMPRESSION = 0
SET_CONTEXT_REG:
        VGT_MAX_VTX_INDX <- 0xffffffff
        VGT_MIN_VTX_INDX <- 0
        VGT_INDX_OFFSET <- 0
SET_CONTEXT_REG:
        VGT_GS_ONCHIP_CNTL <- ES_VERTS_PER_SUBGRP = 64 (0x40)
                              GS_PRIMS_PER_SUBGRP = 4
                              GS_INST_PRIMS_IN_SUBGRP = 0
SET_SH_REG:
        SPI_SHADER_PGM_RSRC3_LS <- CU_EN = 0xffff
                                   WAVE_LIMIT = 0
                                   LOCK_LOW_THRESHOLD = 0
                                   GROUP_FIFO_DEPTH = 0
SET_SH_REG:
        SPI_SHADER_PGM_RSRC3_HS <- WAVE_LIMIT = 0
                                   LOCK_LOW_THRESHOLD = 0
                                   GROUP_FIFO_DEPTH = 0
                                   SIMD_DISABLE = 0
                                   CU_EN = 0
SET_SH_REG:
        SPI_SHADER_PGM_RSRC3_ES <- CU_EN = 0xffff
                                   WAVE_LIMIT = 0
                                   LOCK_LOW_THRESHOLD = 0
                                   GROUP_FIFO_DEPTH = 0
SET_SH_REG:
        SPI_SHADER_PGM_RSRC3_GS <- CU_EN = 0xffff
                                   WAVE_LIMIT = 0
                                   LOCK_LOW_THRESHOLD = 0
                                   GROUP_FIFO_DEPTH = 0
                                   SIMD_DISABLE = 0
SET_SH_REG:
        SPI_SHADER_PGM_RSRC3_VS <- CU_EN = 0xfffe
                                   WAVE_LIMIT = 0
                                   LOCK_LOW_THRESHOLD = 0
                                   SIMD_DISABLE = 0
        SPI_SHADER_LATE_ALLOC_VS <- LIMIT = 31 (0x1f)
SET_SH_REG:
        SPI_SHADER_PGM_RSRC3_PS <- CU_EN = 0xffff
                                   WAVE_LIMIT = 0
                                   LOCK_LOW_THRESHOLD = 0
                                   SIMD_DISABLE = 0
SET_CONTEXT_REG:
        VGT_VERTEX_REUSE_BLOCK_CNTL <- VTX_REUSE_DEPTH = 14 (0x0e)
        VGT_OUT_DEALLOC_CNTL <- DEALLOC_DIST = 16 (0x10)
SET_CONTEXT_REG:
        TA_BC_BASE_ADDR <- 0x00008c00
        TA_BC_BASE_ADDR_HI <- ADDRESS = 0
------------------- IB2: Init config end -------------------

------------------ IB begin ------------------
WRITE_DATA:
        CONTROL <- ENGINE_SEL = ME
                   WR_CONFIRM = 1
                   WR_ONE_ADDR = 0
                   DST_SEL = MEMORY_SYNC
        DST_ADDR_LO <- 0x00870600
        DST_ADDR_HI <- 0
        0x00000001
NOP:
        Trace point ID: 1
        This trace point was reached by the CP.
INDIRECT_BUFFER_CIK:
        IB_BASE_LO <- 0x00820400
        IB_BASE_HI <- 0
        CONTROL <- IB_SIZE = 104 (0x00068)
                   CHAIN = 0
                   VALID = 0
PFP_SYNC_ME:
SURFACE_SYNC:
        CP_COHER_CNTL <- DEST_BASE_0_ENA = 0
                         DEST_BASE_1_ENA = 0
                         TC_SD_ACTION_ENA = 0
                         TC_NC_ACTION_ENA = 0
                         CB0_DEST_BASE_ENA = 0
                         CB1_DEST_BASE_ENA = 0
                         CB2_DEST_BASE_ENA = 0
                         CB3_DEST_BASE_ENA = 0
                         CB4_DEST_BASE_ENA = 0
                         CB5_DEST_BASE_ENA = 0
                         CB6_DEST_BASE_ENA = 0
                         CB7_DEST_BASE_ENA = 0
                         DB_DEST_BASE_ENA = 0
                         TCL1_VOL_ACTION_ENA = 0
                         TC_VOL_ACTION_ENA = 0
                         TC_WB_ACTION_ENA = 0
                         DEST_BASE_2_ENA = 0
                         DEST_BASE_3_ENA = 0
                         TCL1_ACTION_ENA = 0
                         TC_ACTION_ENA = 0
                         CB_ACTION_ENA = 0
                         DB_ACTION_ENA = 0
                         SH_KCACHE_ACTION_ENA = 1
                         SH_KCACHE_VOL_ACTION_ENA = 0
                         SH_ICACHE_ACTION_ENA = 1
                         SH_KCACHE_WB_ACTION_ENA = 0
                         SH_SD_ACTION_ENA = 0
                         TC_WC_ACTION_ENA = 0
                         TC_INV_METADATA_ACTION_ENA = 0
        CP_COHER_SIZE <- 0xffffffff
        CP_COHER_BASE <- 0
        POLL_INTERVAL <- 10 (0x000a)
EVENT_WRITE:
        VGT_EVENT_INITIATOR <- EVENT_TYPE = PIPELINESTAT_START
        EVENT_INDEX <- 0
        INV_L2 <- 0
DMA_DATA:
        DMA_DATA_WORD0 <- CP_SYNC = 0
                          SRC_SEL = SRC_ADDR_TC_L2
                          DSL_SEL = DST_ADDR_TC_L2
                          ENGINE = ME
        SRC_ADDR_LO <- 0x00870400
        SRC_ADDR_HI <- 0
        DST_ADDR_LO <- 0x00f00000
        DST_ADDR_HI <- 0
        COMMAND <- BYTE_COUNT_GFX6 = 24 (0x00018)
                   BYTE_COUNT_GFX9 = 0x200018
                   DISABLE_WR_CONFIRM_GFX6 = 1
                   SRC_SWAP = NONE
                   DST_SWAP = NONE
                   SAS = MEMORY
                   DAS = MEMORY
                   SAIC = INCREMENT
                   DAIC = INCREMENT
                   RAW_WAIT = 1
                   DISABLE_WR_CONFIRM_GFX9 = 0
DMA_DATA:
        DMA_DATA_WORD0 <- CP_SYNC = 1
                          SRC_SEL = SRC_ADDR
                          DSL_SEL = DST_ADDR
                          ENGINE = ME
        SRC_ADDR_LO <- 0x00820c20
        SRC_ADDR_HI <- 0
        DST_ADDR_LO <- 0x00820c00
        DST_ADDR_HI <- 0
        COMMAND <- BYTE_COUNT_GFX6 = 8
                   BYTE_COUNT_GFX9 = 8
                   DISABLE_WR_CONFIRM_GFX6 = 0
                   SRC_SWAP = NONE
                   DST_SWAP = NONE
                   SAS = MEMORY
                   DAS = MEMORY
                   SAIC = INCREMENT
                   DAIC = INCREMENT
                   RAW_WAIT = 0
                   DISABLE_WR_CONFIRM_GFX9 = 0
PFP_SYNC_ME:
EVENT_WRITE:
        VGT_EVENT_INITIATOR <- EVENT_TYPE = PS_PARTIAL_FLUSH
        EVENT_INDEX <- 4
        INV_L2 <- 0
PFP_SYNC_ME:
WRITE_DATA:
        CONTROL <- ENGINE_SEL = ME
                   WR_CONFIRM = 1
                   WR_ONE_ADDR = 0
                   DST_SEL = MEMORY_SYNC
        DST_ADDR_LO <- 0x00870600
        DST_ADDR_HI <- 0
        0x00000002
NOP:
        Trace point ID: 2
        !!!!! This is the last trace point that was reached by the CP !!!!!
------------------- IB end -------------------

Done.


More information about the mesa-dev mailing list