[PATCH umr 1/4] Fix wave SGPR reading
Nicolai Hähnle
nhaehnle at gmail.com
Sat Sep 9 10:55:27 UTC 2017
From: Nicolai Hähnle <nicolai.haehnle at amd.com>
The hardware adds the alloc base already, no need to do it in the tool.
Signed-off-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
---
src/app/print_waves.c | 8 ++++----
src/lib/read_sgpr.c | 5 +++--
2 files changed, 7 insertions(+), 6 deletions(-)
diff --git a/src/app/print_waves.c b/src/app/print_waves.c
index 1efd8a1..a9aaf39 100644
--- a/src/app/print_waves.c
+++ b/src/app/print_waves.c
@@ -75,22 +75,22 @@ void umr_print_waves(struct umr_asic *asic)
"\n",
(unsigned)se, (unsigned)sh, (unsigned)cu, (unsigned)ws.hw_id.simd_id, (unsigned)ws.hw_id.wave_id,
(unsigned long)ws.wave_status.value, (unsigned long)ws.pc_hi, (unsigned long)ws.pc_lo,
(unsigned long)ws.wave_inst_dw0, (unsigned long)ws.wave_inst_dw1, (unsigned long)ws.exec_hi, (unsigned long)ws.exec_lo,
(unsigned long)ws.hw_id.value, (unsigned long)ws.gpr_alloc.value, (unsigned long)ws.lds_alloc.value, (unsigned long)ws.trapsts.value, (unsigned long)ws.ib_sts.value,
(unsigned long)ws.tba_hi, (unsigned long)ws.tba_lo, (unsigned long)ws.tma_hi, (unsigned long)ws.tma_lo, (unsigned long)ws.ib_dbg0, (unsigned long)ws.m0
);
if (ws.wave_status.halt)
for (x = 0; x < ((ws.gpr_alloc.sgpr_size + 1) << shift); x += 4)
printf(">SGPRS[%u..%u] = { %08lx, %08lx, %08lx, %08lx }\n",
- (unsigned)((ws.gpr_alloc.sgpr_base << shift) + x),
- (unsigned)((ws.gpr_alloc.sgpr_base << shift) + x + 3),
+ (unsigned)(x),
+ (unsigned)(x + 3),
(unsigned long)sgprs[x],
(unsigned long)sgprs[x+1],
(unsigned long)sgprs[x+2],
(unsigned long)sgprs[x+3]);
pgm_addr = (((uint64_t)ws.pc_hi << 32) | ws.pc_lo) - (sizeof(opcodes)/2);
umr_read_vram(asic, ws.hw_id.vm_id, pgm_addr, sizeof(opcodes), opcodes);
for (x = 0; x < sizeof(opcodes)/4; x++) {
printf(">pgm[%lu@%llx] = %08lx\n",
(unsigned long)ws.hw_id.vm_id,
@@ -156,22 +156,22 @@ void umr_print_waves(struct umr_asic *asic)
Hv("GPR_ALLOC", ws.gpr_alloc.value);
PP(gpr_alloc, vgpr_base);
PP(gpr_alloc, vgpr_size);
PP(gpr_alloc, sgpr_base);
PP(gpr_alloc, sgpr_size);
if (ws.wave_status.halt) {
printf("\n\nSGPRS:\n");
for (x = 0; x < ((ws.gpr_alloc.sgpr_size + 1) << shift); x += 4)
printf("\t[%4u..%4u] = { %08lx, %08lx, %08lx, %08lx }\n",
- (unsigned)((ws.gpr_alloc.sgpr_base << shift) + x),
- (unsigned)((ws.gpr_alloc.sgpr_base << shift) + x + 3),
+ (unsigned)(x),
+ (unsigned)(x + 3),
(unsigned long)sgprs[x],
(unsigned long)sgprs[x+1],
(unsigned long)sgprs[x+2],
(unsigned long)sgprs[x+3]);
}
printf("\n\nPGM_MEM:\n");
pgm_addr = (((uint64_t)ws.pc_hi << 32) | ws.pc_lo) - (sizeof(opcodes)/2);
umr_read_vram(asic, ws.hw_id.vm_id, pgm_addr, sizeof(opcodes), opcodes);
for (x = 0; x < sizeof(opcodes)/4; x++) {
diff --git a/src/lib/read_sgpr.c b/src/lib/read_sgpr.c
index cceb189..427cfc5 100644
--- a/src/lib/read_sgpr.c
+++ b/src/lib/read_sgpr.c
@@ -56,27 +56,28 @@ int umr_read_sgprs(struct umr_asic *asic, struct umr_wave_status *ws, uint32_t *
uint64_t addr, shift;
if (asic->family <= FAMILY_CIK)
shift = 3; // on SI..CIK allocations were done in 8-dword blocks
else
shift = 4; // on VI allocations are in 16-dword blocks
if (!asic->options.no_kernel) {
addr =
(1ULL << 60) | // reading SGPRs
- ((uint64_t)ws->gpr_alloc.sgpr_base << shift) | // starting address to read from
+ ((uint64_t)0) | // starting address to read from
((uint64_t)ws->hw_id.se_id << 12) |
((uint64_t)ws->hw_id.sh_id << 20) |
((uint64_t)ws->hw_id.cu_id << 28) |
((uint64_t)ws->hw_id.wave_id << 36) |
((uint64_t)ws->hw_id.simd_id << 44) |
(0ULL << 52); // thread_id
lseek(asic->fd.gpr, addr, SEEK_SET);
return read(asic->fd.gpr, dst, 4 * ((ws->gpr_alloc.sgpr_size + 1) << shift));
} else {
umr_grbm_select_index(asic, ws->hw_id.se_id, ws->hw_id.sh_id, ws->hw_id.cu_id);
- wave_read_regs_via_mmio(asic, ws->hw_id.simd_id, ws->hw_id.wave_id, ws->gpr_alloc.sgpr_base << shift, 0, (ws->gpr_alloc.sgpr_size + 1) << shift, dst);
+ wave_read_regs_via_mmio(asic, ws->hw_id.simd_id, ws->hw_id.wave_id, 0, 0,
+ (ws->gpr_alloc.sgpr_size + 1) << shift, dst);
umr_grbm_select_index(asic, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
return 0;
}
}
--
2.11.0
More information about the amd-gfx
mailing list