[PATCH umr 3/4] Read VGPRs of halted waves on gfx9

Nicolai Hähnle nhaehnle at gmail.com
Sat Sep 9 10:55:29 UTC 2017


From: Nicolai Hähnle <nicolai.haehnle at amd.com>

Signed-off-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
---
 src/app/print_waves.c | 40 +++++++++++++++++++++++++++++++++++++++-
 src/lib/read_gpr.c    | 30 ++++++++++++++++++++++++++++++
 src/umr.h             |  1 +
 3 files changed, 70 insertions(+), 1 deletion(-)

diff --git a/src/app/print_waves.c b/src/app/print_waves.c
index a9aaf39..a72d224 100644
--- a/src/app/print_waves.c
+++ b/src/app/print_waves.c
@@ -29,20 +29,22 @@
 
 #define P(x) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %8u | ", #x, (unsigned)ws.x); 
 #define X(x) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %08lx | ", #x, (unsigned long)ws.x);
 
 #define H(x) if (col) { printf("\n"); }; col = 0; printf("\n\n%s:\n\t", x);
 #define Hv(x, y) if (col) { printf("\n"); }; col = 0; printf("\n\n%s[%08lx]:\n\t", x, (unsigned long)y);
 
 void umr_print_waves(struct umr_asic *asic)
 {
 	uint32_t x, se, sh, cu, simd, wave, sgprs[1024], shift, opcodes[8];
+	uint32_t vgprs[64 * 256];
+	uint32_t thread;
 	uint64_t pgm_addr;
 	struct umr_wave_status ws;
 	int first = 1, col = 0;
 
 	if (asic->options.halt_waves)
 		umr_sq_cmd_halt_waves(asic, UMR_SQ_CMD_HALT);
 
 	if (asic->family <= FAMILY_CIK)
 		shift = 3;  // on SI..CIK allocations were done in 8-dword blocks
 	else
@@ -50,24 +52,36 @@ void umr_print_waves(struct umr_asic *asic)
 
 	for (se = 0; se < asic->config.gfx.max_shader_engines; se++)
 	for (sh = 0; sh < asic->config.gfx.max_sh_per_se; sh++)
 	for (cu = 0; cu < asic->config.gfx.max_cu_per_sh; cu++) {
 		umr_get_wave_sq_info(asic, se, sh, cu, &ws);
 		if (ws.sq_info.busy) {
 			for (simd = 0; simd < 4; simd++)
 			for (wave = 0; wave < 10; wave++) { //both simd/wave are hard coded at the moment...
 				umr_get_wave_status(asic, se, sh, cu, simd, wave, &ws);
 				if (ws.wave_status.halt || ws.wave_status.valid) {
+					unsigned have_vgprs = 0;
+
 					// grab sgprs..
-					if (ws.wave_status.halt)
+					if (ws.wave_status.halt) {
 						umr_read_sgprs(asic, &ws, &sgprs[0]);
 
+						if (options.bitfields) {
+							have_vgprs = 1;
+							for (thread = 0; thread < 64; ++thread) {
+								if (umr_read_vgprs(asic, &ws, thread,
+										   &vgprs[256 * thread]) < 0)
+									have_vgprs = 0;
+							}
+						}
+					}
+
 					if (!options.bitfields && first) {
 						first = 0;
 						printf("SE SH CU SIMD WAVE# WAVE_STATUS PC_HI PC_LO INST_DW0 INST_DW1 EXEC_HI EXEC_LO HW_ID GPRALLOC LDSALLOC TRAPSTS IBSTS TBA_HI TBA_LO TMA_HI TMA_LO IB_DBG0 M0\n");
 					}
 					if (!options.bitfields) {
 					printf(
 "%u %u %u %u %u " // se/sh/cu/simd/wave
 "%08lx %08lx %08lx " // wave_status pc/hi/lo
 "%08lx %08lx %08lx %08lx " // inst0/1 exec hi/lo
 "%08lx %08lx %08lx %08lx %08lx " // HW_ID GPR/LDSALLOC TRAP/IB STS
@@ -164,20 +178,44 @@ void umr_print_waves(struct umr_asic *asic)
 							for (x = 0; x < ((ws.gpr_alloc.sgpr_size + 1) << shift); x += 4)
 								printf("\t[%4u..%4u] = { %08lx, %08lx, %08lx, %08lx }\n",
 									(unsigned)(x),
 									(unsigned)(x + 3),
 									(unsigned long)sgprs[x],
 									(unsigned long)sgprs[x+1],
 									(unsigned long)sgprs[x+2],
 									(unsigned long)sgprs[x+3]);
 						}
 
+
+						if (have_vgprs) {
+							printf("\n");
+							for (x = 0; x < ((ws.gpr_alloc.vgpr_size + 1) << 2); ++x) {
+								if (x % 16 == 0) {
+									if (x == 0)
+										printf("VGPRS:       ");
+									else
+										printf("             ");
+									for (thread = 0; thread < 64; ++thread) {
+										unsigned live = thread < 32 ? (ws.exec_lo & (1u << thread))
+													    : (ws.exec_hi & (1u << (thread - 32)));
+										printf(live ? " t%02u     " : " (t%02u)   ", thread);
+									}
+									printf("\n");
+								}
+
+								printf("    [%3u] = {", x);
+								for (thread = 0; thread < 64; ++thread)
+									printf(" %08x", vgprs[thread * 256 + x]);
+								printf(" }\n");
+							}
+						}
+
 						printf("\n\nPGM_MEM:\n");
 						pgm_addr = (((uint64_t)ws.pc_hi << 32) | ws.pc_lo) - (sizeof(opcodes)/2);
 						umr_read_vram(asic, ws.hw_id.vm_id, pgm_addr, sizeof(opcodes), opcodes);
 						for (x = 0; x < sizeof(opcodes)/4; x++) {
 							if (x == (sizeof(opcodes)/8))
 								printf("*\t");
 							else
 								printf("\t");
 							printf("pgm[%lu@%llx] = %08lx\n",
 								(unsigned long)ws.hw_id.vm_id,
diff --git a/src/lib/read_gpr.c b/src/lib/read_gpr.c
index 427cfc5..669a49b 100644
--- a/src/lib/read_gpr.c
+++ b/src/lib/read_gpr.c
@@ -74,10 +74,40 @@ int umr_read_sgprs(struct umr_asic *asic, struct umr_wave_status *ws, uint32_t *
 		lseek(asic->fd.gpr, addr, SEEK_SET);
 		return read(asic->fd.gpr, dst, 4 * ((ws->gpr_alloc.sgpr_size + 1) << shift));
 	} else {
 		umr_grbm_select_index(asic, ws->hw_id.se_id, ws->hw_id.sh_id, ws->hw_id.cu_id);
 		wave_read_regs_via_mmio(asic, ws->hw_id.simd_id, ws->hw_id.wave_id, 0, 0,
 					(ws->gpr_alloc.sgpr_size + 1) << shift, dst);
 		umr_grbm_select_index(asic, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
 		return 0;
 	}
 }
+
+
+int umr_read_vgprs(struct umr_asic *asic, struct umr_wave_status *ws, uint32_t thread, uint32_t *dst)
+{
+	uint64_t addr;
+
+	if (asic->family < FAMILY_AI)
+		return -1;
+
+	if (!asic->options.no_kernel) {
+		addr =
+			(0ULL << 60)                             | // reading VGPRs
+			((uint64_t)0)                            | // starting address to read from
+			((uint64_t)ws->hw_id.se_id << 12)        |
+			((uint64_t)ws->hw_id.sh_id << 20)        |
+			((uint64_t)ws->hw_id.cu_id << 28)        |
+			((uint64_t)ws->hw_id.wave_id << 36)      |
+			((uint64_t)ws->hw_id.simd_id << 44)      |
+			((uint64_t)thread << 52);
+
+		lseek(asic->fd.gpr, addr, SEEK_SET);
+		return read(asic->fd.gpr, dst, 4 * ((ws->gpr_alloc.vgpr_size + 1) << 2));
+	} else {
+		umr_grbm_select_index(asic, ws->hw_id.se_id, ws->hw_id.sh_id, ws->hw_id.cu_id);
+		wave_read_regs_via_mmio(asic, ws->hw_id.simd_id, ws->hw_id.wave_id, thread, 0x400,
+					(ws->gpr_alloc.vgpr_size + 1) << 2, dst);
+		umr_grbm_select_index(asic, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
+		return 0;
+	}
+}
diff --git a/src/umr.h b/src/umr.h
index e49c80c..3d2252e 100644
--- a/src/umr.h
+++ b/src/umr.h
@@ -494,20 +494,21 @@ void umr_free_asic(struct umr_asic *asic);
 void umr_free_maps(struct umr_asic *asic);
 void umr_close_asic(struct umr_asic *asic); // call this to close a fully open asic
 int umr_query_drm(struct umr_asic *asic, int field, void *ret, int size);
 void umr_enumerate_devices(void);
 int umr_update(struct umr_asic *asic, char *script);
 
 /* lib helpers */
 int umr_get_wave_status(struct umr_asic *asic, unsigned se, unsigned sh, unsigned cu, unsigned simd, unsigned wave, struct umr_wave_status *ws);
 int umr_get_wave_sq_info(struct umr_asic *asic, unsigned se, unsigned sh, unsigned cu, struct umr_wave_status *ws);
 int umr_read_sgprs(struct umr_asic *asic, struct umr_wave_status *ws, uint32_t *dst);
+int umr_read_vgprs(struct umr_asic *asic, struct umr_wave_status *ws, uint32_t thread, uint32_t *dst);
 int umr_read_sensor(struct umr_asic *asic, int sensor, void *dst, int *size);
 
 /* mmio helpers */
 // init the mmio lookup table
 int umr_create_mmio_accel(struct umr_asic *asic);
 
 // find the word address of a register
 uint32_t umr_find_reg(struct umr_asic *asic, char *regname);
 
 // find the register data for a register
-- 
2.11.0



More information about the amd-gfx mailing list