[PATCH umr 3/4] Read VGPRs of halted waves on gfx9
Nicolai Hähnle
nhaehnle at gmail.com
Sat Sep 9 10:55:29 UTC 2017
From: Nicolai Hähnle <nicolai.haehnle at amd.com>
Signed-off-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
---
src/app/print_waves.c | 40 +++++++++++++++++++++++++++++++++++++++-
src/lib/read_gpr.c | 30 ++++++++++++++++++++++++++++++
src/umr.h | 1 +
3 files changed, 70 insertions(+), 1 deletion(-)
diff --git a/src/app/print_waves.c b/src/app/print_waves.c
index a9aaf39..a72d224 100644
--- a/src/app/print_waves.c
+++ b/src/app/print_waves.c
@@ -29,20 +29,22 @@
#define P(x) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %8u | ", #x, (unsigned)ws.x);
#define X(x) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %08lx | ", #x, (unsigned long)ws.x);
#define H(x) if (col) { printf("\n"); }; col = 0; printf("\n\n%s:\n\t", x);
#define Hv(x, y) if (col) { printf("\n"); }; col = 0; printf("\n\n%s[%08lx]:\n\t", x, (unsigned long)y);
void umr_print_waves(struct umr_asic *asic)
{
uint32_t x, se, sh, cu, simd, wave, sgprs[1024], shift, opcodes[8];
+ uint32_t vgprs[64 * 256];
+ uint32_t thread;
uint64_t pgm_addr;
struct umr_wave_status ws;
int first = 1, col = 0;
if (asic->options.halt_waves)
umr_sq_cmd_halt_waves(asic, UMR_SQ_CMD_HALT);
if (asic->family <= FAMILY_CIK)
shift = 3; // on SI..CIK allocations were done in 8-dword blocks
else
@@ -50,24 +52,36 @@ void umr_print_waves(struct umr_asic *asic)
for (se = 0; se < asic->config.gfx.max_shader_engines; se++)
for (sh = 0; sh < asic->config.gfx.max_sh_per_se; sh++)
for (cu = 0; cu < asic->config.gfx.max_cu_per_sh; cu++) {
umr_get_wave_sq_info(asic, se, sh, cu, &ws);
if (ws.sq_info.busy) {
for (simd = 0; simd < 4; simd++)
for (wave = 0; wave < 10; wave++) { //both simd/wave are hard coded at the moment...
umr_get_wave_status(asic, se, sh, cu, simd, wave, &ws);
if (ws.wave_status.halt || ws.wave_status.valid) {
+ unsigned have_vgprs = 0;
+
// grab sgprs..
- if (ws.wave_status.halt)
+ if (ws.wave_status.halt) {
umr_read_sgprs(asic, &ws, &sgprs[0]);
+ if (options.bitfields) {
+ have_vgprs = 1;
+ for (thread = 0; thread < 64; ++thread) {
+ if (umr_read_vgprs(asic, &ws, thread,
+ &vgprs[256 * thread]) < 0)
+ have_vgprs = 0;
+ }
+ }
+ }
+
if (!options.bitfields && first) {
first = 0;
printf("SE SH CU SIMD WAVE# WAVE_STATUS PC_HI PC_LO INST_DW0 INST_DW1 EXEC_HI EXEC_LO HW_ID GPRALLOC LDSALLOC TRAPSTS IBSTS TBA_HI TBA_LO TMA_HI TMA_LO IB_DBG0 M0\n");
}
if (!options.bitfields) {
printf(
"%u %u %u %u %u " // se/sh/cu/simd/wave
"%08lx %08lx %08lx " // wave_status pc/hi/lo
"%08lx %08lx %08lx %08lx " // inst0/1 exec hi/lo
"%08lx %08lx %08lx %08lx %08lx " // HW_ID GPR/LDSALLOC TRAP/IB STS
@@ -164,20 +178,44 @@ void umr_print_waves(struct umr_asic *asic)
for (x = 0; x < ((ws.gpr_alloc.sgpr_size + 1) << shift); x += 4)
printf("\t[%4u..%4u] = { %08lx, %08lx, %08lx, %08lx }\n",
(unsigned)(x),
(unsigned)(x + 3),
(unsigned long)sgprs[x],
(unsigned long)sgprs[x+1],
(unsigned long)sgprs[x+2],
(unsigned long)sgprs[x+3]);
}
+
+ if (have_vgprs) {
+ printf("\n");
+ for (x = 0; x < ((ws.gpr_alloc.vgpr_size + 1) << 2); ++x) {
+ if (x % 16 == 0) {
+ if (x == 0)
+ printf("VGPRS: ");
+ else
+ printf(" ");
+ for (thread = 0; thread < 64; ++thread) {
+ unsigned live = thread < 32 ? (ws.exec_lo & (1u << thread))
+ : (ws.exec_hi & (1u << (thread - 32)));
+ printf(live ? " t%02u " : " (t%02u) ", thread);
+ }
+ printf("\n");
+ }
+
+ printf(" [%3u] = {", x);
+ for (thread = 0; thread < 64; ++thread)
+ printf(" %08x", vgprs[thread * 256 + x]);
+ printf(" }\n");
+ }
+ }
+
printf("\n\nPGM_MEM:\n");
pgm_addr = (((uint64_t)ws.pc_hi << 32) | ws.pc_lo) - (sizeof(opcodes)/2);
umr_read_vram(asic, ws.hw_id.vm_id, pgm_addr, sizeof(opcodes), opcodes);
for (x = 0; x < sizeof(opcodes)/4; x++) {
if (x == (sizeof(opcodes)/8))
printf("*\t");
else
printf("\t");
printf("pgm[%lu@%llx] = %08lx\n",
(unsigned long)ws.hw_id.vm_id,
diff --git a/src/lib/read_gpr.c b/src/lib/read_gpr.c
index 427cfc5..669a49b 100644
--- a/src/lib/read_gpr.c
+++ b/src/lib/read_gpr.c
@@ -74,10 +74,40 @@ int umr_read_sgprs(struct umr_asic *asic, struct umr_wave_status *ws, uint32_t *
lseek(asic->fd.gpr, addr, SEEK_SET);
return read(asic->fd.gpr, dst, 4 * ((ws->gpr_alloc.sgpr_size + 1) << shift));
} else {
umr_grbm_select_index(asic, ws->hw_id.se_id, ws->hw_id.sh_id, ws->hw_id.cu_id);
wave_read_regs_via_mmio(asic, ws->hw_id.simd_id, ws->hw_id.wave_id, 0, 0,
(ws->gpr_alloc.sgpr_size + 1) << shift, dst);
umr_grbm_select_index(asic, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
return 0;
}
}
+
+
+int umr_read_vgprs(struct umr_asic *asic, struct umr_wave_status *ws, uint32_t thread, uint32_t *dst)
+{
+ uint64_t addr;
+
+ if (asic->family < FAMILY_AI)
+ return -1;
+
+ if (!asic->options.no_kernel) {
+ addr =
+ (0ULL << 60) | // reading VGPRs
+ ((uint64_t)0) | // starting address to read from
+ ((uint64_t)ws->hw_id.se_id << 12) |
+ ((uint64_t)ws->hw_id.sh_id << 20) |
+ ((uint64_t)ws->hw_id.cu_id << 28) |
+ ((uint64_t)ws->hw_id.wave_id << 36) |
+ ((uint64_t)ws->hw_id.simd_id << 44) |
+ ((uint64_t)thread << 52);
+
+ lseek(asic->fd.gpr, addr, SEEK_SET);
+ return read(asic->fd.gpr, dst, 4 * ((ws->gpr_alloc.vgpr_size + 1) << 2));
+ } else {
+ umr_grbm_select_index(asic, ws->hw_id.se_id, ws->hw_id.sh_id, ws->hw_id.cu_id);
+ wave_read_regs_via_mmio(asic, ws->hw_id.simd_id, ws->hw_id.wave_id, thread, 0x400,
+ (ws->gpr_alloc.vgpr_size + 1) << 2, dst);
+ umr_grbm_select_index(asic, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
+ return 0;
+ }
+}
diff --git a/src/umr.h b/src/umr.h
index e49c80c..3d2252e 100644
--- a/src/umr.h
+++ b/src/umr.h
@@ -494,20 +494,21 @@ void umr_free_asic(struct umr_asic *asic);
void umr_free_maps(struct umr_asic *asic);
void umr_close_asic(struct umr_asic *asic); // call this to close a fully open asic
int umr_query_drm(struct umr_asic *asic, int field, void *ret, int size);
void umr_enumerate_devices(void);
int umr_update(struct umr_asic *asic, char *script);
/* lib helpers */
int umr_get_wave_status(struct umr_asic *asic, unsigned se, unsigned sh, unsigned cu, unsigned simd, unsigned wave, struct umr_wave_status *ws);
int umr_get_wave_sq_info(struct umr_asic *asic, unsigned se, unsigned sh, unsigned cu, struct umr_wave_status *ws);
int umr_read_sgprs(struct umr_asic *asic, struct umr_wave_status *ws, uint32_t *dst);
+int umr_read_vgprs(struct umr_asic *asic, struct umr_wave_status *ws, uint32_t thread, uint32_t *dst);
int umr_read_sensor(struct umr_asic *asic, int sensor, void *dst, int *size);
/* mmio helpers */
// init the mmio lookup table
int umr_create_mmio_accel(struct umr_asic *asic);
// find the word address of a register
uint32_t umr_find_reg(struct umr_asic *asic, char *regname);
// find the register data for a register
--
2.11.0
More information about the amd-gfx
mailing list