[PATCH umr 1/4] Fix wave SGPR reading

Tom St Denis tom.stdenis at amd.com
Mon Sep 11 13:31:00 UTC 2017


Hi Nicolai,

I don't get this patch, 'x' starts at 0 and goes to sgpr_size but that 
doesn't include the offset into the SGPR space right?

I mean I get the patch in umr_read_sgprs() but in print_waves() won't 
that mean you're printing out SGPRS[0..size]?

Or are you saying having the base added to the printout is confusing for 
UMD debugging since the shader you're debugging probably doesn't have 
the offsets explicitly stated?

Cheers,
Tom

On 09/09/17 06:55 AM, Nicolai Hähnle wrote:
> From: Nicolai Hähnle <nicolai.haehnle at amd.com>
> 
> The hardware adds the alloc base already, no need to do it in the tool.
> 
> Signed-off-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
> ---
>   src/app/print_waves.c | 8 ++++----
>   src/lib/read_sgpr.c   | 5 +++--
>   2 files changed, 7 insertions(+), 6 deletions(-)
> 
> diff --git a/src/app/print_waves.c b/src/app/print_waves.c
> index 1efd8a1..a9aaf39 100644
> --- a/src/app/print_waves.c
> +++ b/src/app/print_waves.c
> @@ -75,22 +75,22 @@ void umr_print_waves(struct umr_asic *asic)
>   "\n",
>   (unsigned)se, (unsigned)sh, (unsigned)cu, (unsigned)ws.hw_id.simd_id, (unsigned)ws.hw_id.wave_id,
>   (unsigned long)ws.wave_status.value, (unsigned long)ws.pc_hi, (unsigned long)ws.pc_lo,
>   (unsigned long)ws.wave_inst_dw0, (unsigned long)ws.wave_inst_dw1, (unsigned long)ws.exec_hi, (unsigned long)ws.exec_lo,
>   (unsigned long)ws.hw_id.value, (unsigned long)ws.gpr_alloc.value, (unsigned long)ws.lds_alloc.value, (unsigned long)ws.trapsts.value, (unsigned long)ws.ib_sts.value,
>   (unsigned long)ws.tba_hi, (unsigned long)ws.tba_lo, (unsigned long)ws.tma_hi, (unsigned long)ws.tma_lo, (unsigned long)ws.ib_dbg0, (unsigned long)ws.m0
>   );
>   						if (ws.wave_status.halt)
>   							for (x = 0; x < ((ws.gpr_alloc.sgpr_size + 1) << shift); x += 4)
>   								printf(">SGPRS[%u..%u] = { %08lx, %08lx, %08lx, %08lx }\n",
> -									(unsigned)((ws.gpr_alloc.sgpr_base << shift) + x),
> -									(unsigned)((ws.gpr_alloc.sgpr_base << shift) + x + 3),
> +									(unsigned)(x),
> +									(unsigned)(x + 3),
>   									(unsigned long)sgprs[x],
>   									(unsigned long)sgprs[x+1],
>   									(unsigned long)sgprs[x+2],
>   									(unsigned long)sgprs[x+3]);
>   
>   						pgm_addr = (((uint64_t)ws.pc_hi << 32) | ws.pc_lo) - (sizeof(opcodes)/2);
>   						umr_read_vram(asic, ws.hw_id.vm_id, pgm_addr, sizeof(opcodes), opcodes);
>   						for (x = 0; x < sizeof(opcodes)/4; x++) {
>   							printf(">pgm[%lu@%llx] = %08lx\n",
>   								(unsigned long)ws.hw_id.vm_id,
> @@ -156,22 +156,22 @@ void umr_print_waves(struct umr_asic *asic)
>   						Hv("GPR_ALLOC", ws.gpr_alloc.value);
>   						PP(gpr_alloc, vgpr_base);
>   						PP(gpr_alloc, vgpr_size);
>   						PP(gpr_alloc, sgpr_base);
>   						PP(gpr_alloc, sgpr_size);
>   
>   						if (ws.wave_status.halt) {
>   							printf("\n\nSGPRS:\n");
>   							for (x = 0; x < ((ws.gpr_alloc.sgpr_size + 1) << shift); x += 4)
>   								printf("\t[%4u..%4u] = { %08lx, %08lx, %08lx, %08lx }\n",
> -									(unsigned)((ws.gpr_alloc.sgpr_base << shift) + x),
> -									(unsigned)((ws.gpr_alloc.sgpr_base << shift) + x + 3),
> +									(unsigned)(x),
> +									(unsigned)(x + 3),
>   									(unsigned long)sgprs[x],
>   									(unsigned long)sgprs[x+1],
>   									(unsigned long)sgprs[x+2],
>   									(unsigned long)sgprs[x+3]);
>   						}
>   
>   						printf("\n\nPGM_MEM:\n");
>   						pgm_addr = (((uint64_t)ws.pc_hi << 32) | ws.pc_lo) - (sizeof(opcodes)/2);
>   						umr_read_vram(asic, ws.hw_id.vm_id, pgm_addr, sizeof(opcodes), opcodes);
>   						for (x = 0; x < sizeof(opcodes)/4; x++) {
> diff --git a/src/lib/read_sgpr.c b/src/lib/read_sgpr.c
> index cceb189..427cfc5 100644
> --- a/src/lib/read_sgpr.c
> +++ b/src/lib/read_sgpr.c
> @@ -56,27 +56,28 @@ int umr_read_sgprs(struct umr_asic *asic, struct umr_wave_status *ws, uint32_t *
>   	uint64_t addr, shift;
>   
>   	if (asic->family <= FAMILY_CIK)
>   		shift = 3;  // on SI..CIK allocations were done in 8-dword blocks
>   	else
>   		shift = 4;  // on VI allocations are in 16-dword blocks
>   
>   	if (!asic->options.no_kernel) {
>   		addr =
>   			(1ULL << 60)                             | // reading SGPRs
> -			((uint64_t)ws->gpr_alloc.sgpr_base << shift) | // starting address to read from
> +			((uint64_t)0)                            | // starting address to read from
>   			((uint64_t)ws->hw_id.se_id << 12)        |
>   			((uint64_t)ws->hw_id.sh_id << 20)        |
>   			((uint64_t)ws->hw_id.cu_id << 28)        |
>   			((uint64_t)ws->hw_id.wave_id << 36)      |
>   			((uint64_t)ws->hw_id.simd_id << 44)      |
>   			(0ULL << 52); // thread_id
>   
>   		lseek(asic->fd.gpr, addr, SEEK_SET);
>   		return read(asic->fd.gpr, dst, 4 * ((ws->gpr_alloc.sgpr_size + 1) << shift));
>   	} else {
>   		umr_grbm_select_index(asic, ws->hw_id.se_id, ws->hw_id.sh_id, ws->hw_id.cu_id);
> -		wave_read_regs_via_mmio(asic, ws->hw_id.simd_id, ws->hw_id.wave_id, ws->gpr_alloc.sgpr_base << shift, 0, (ws->gpr_alloc.sgpr_size + 1) << shift, dst);
> +		wave_read_regs_via_mmio(asic, ws->hw_id.simd_id, ws->hw_id.wave_id, 0, 0,
> +					(ws->gpr_alloc.sgpr_size + 1) << shift, dst);
>   		umr_grbm_select_index(asic, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
>   		return 0;
>   	}
>   }
> 



More information about the amd-gfx mailing list