[PATCH umr] refactor out wave scanning to new function umr_scan_wave_data()

Tom St Denis tom.stdenis at amd.com
Fri Apr 20 18:06:56 UTC 2018


Signed-off-by: Tom St Denis <tom.stdenis at amd.com>
---
 doc/sphinx/source/libwave_status.rst |  28 +++
 src/app/print_waves.c                | 379 +++++++++++++++++------------------
 src/lib/CMakeLists.txt               |   1 +
 src/lib/scan_waves.c                 |  97 +++++++++
 src/umr.h                            |   9 +
 5 files changed, 314 insertions(+), 200 deletions(-)
 create mode 100644 src/lib/scan_waves.c

diff --git a/doc/sphinx/source/libwave_status.rst b/doc/sphinx/source/libwave_status.rst
index 0f58a3c91855..2e16a4ac0cb6 100644
--- a/doc/sphinx/source/libwave_status.rst
+++ b/doc/sphinx/source/libwave_status.rst
@@ -43,6 +43,34 @@ can be read with the following function:
 This will populate many of the fields of the structure 'umr_wave_status'.  An
 example of reading them can be found in src/app/print_waves.c.
 
+---------------------
+Scanning Halted Waves
+---------------------
+
+If the waves have been halted (say with the function umr_sq_cmd_halt_waves()) then
+a list of halted valid waves can be made with the following function:
+
+
+::
+
+	struct umr_wave_data *umr_scan_wave_data(struct umr_asic *asic)
+
+This will return NULL on error (or no halted waves) or a pointer
+to the following structure:
+
+::
+
+	struct umr_wave_data {
+		uint32_t vgprs[64 * 256], sgprs[1024];
+		int se, sh, cu, simd, wave, have_vgprs;
+		struct umr_wave_status ws;
+		struct umr_wave_thread *threads;
+		struct umr_wave_data *next;
+	};
+
+The list of waves are stored as a linked list terminated by the
+last node having 'next' point to NULL.
+
 ------------
 Reading GPRs
 ------------
diff --git a/src/app/print_waves.c b/src/app/print_waves.c
index 563fc65bedb7..d901bc902ff3 100644
--- a/src/app/print_waves.c
+++ b/src/app/print_waves.c
@@ -24,11 +24,11 @@
  */
 #include "umrapp.h"
 
-#define PP(x, y) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %8u | ", #y, (unsigned)ws.x.y); 
-#define PX(x, y) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %08lx | ", #y, (unsigned long)ws.x.y);
+#define PP(x, y) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %8u | ", #y, (unsigned)wd->ws.x.y);
+#define PX(x, y) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %08lx | ", #y, (unsigned long)wd->ws.x.y);
 
-#define P(x) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %8u | ", #x, (unsigned)ws.x); 
-#define X(x) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %08lx | ", #x, (unsigned long)ws.x);
+#define P(x) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %8u | ", #x, (unsigned)wd->ws.x);
+#define X(x) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %08lx | ", #x, (unsigned long)wd->ws.x);
 
 #define H(x) if (col) { printf("\n"); }; col = 0; printf("\n\n%s:\n\t", x);
 #define Hv(x, y) if (col) { printf("\n"); }; col = 0; printf("\n\n%s[%08lx]:\n\t", x, (unsigned long)y);
@@ -37,11 +37,9 @@
 
 void umr_print_waves(struct umr_asic *asic)
 {
-	uint32_t x, y, se, sh, cu, simd, wave, sgprs[1024], shift;
-	uint32_t vgprs[64 * 256];
-	uint32_t thread;
+	uint32_t x, y, shift, thread;
 	uint64_t pgm_addr;
-	struct umr_wave_status ws;
+	struct umr_wave_data *wd, *owd;
 	int first = 1, col = 0;
 
 	if (asic->options.halt_waves)
@@ -52,228 +50,209 @@ void umr_print_waves(struct umr_asic *asic)
 	else
 		shift = 4;  // on VI allocations are in 16-dword blocks
 
-	for (se = 0; se < asic->config.gfx.max_shader_engines; se++)
-	for (sh = 0; sh < asic->config.gfx.max_sh_per_se; sh++)
-	for (cu = 0; cu < asic->config.gfx.max_cu_per_sh; cu++) {
-		umr_get_wave_sq_info(asic, se, sh, cu, &ws);
-		if (ws.sq_info.busy) {
-			for (simd = 0; simd < 4; simd++)
-			for (wave = 0; wave < 10; wave++) { //both simd/wave are hard coded at the moment...
-				umr_get_wave_status(asic, se, sh, cu, simd, wave, &ws);
-				if (ws.wave_status.halt || ws.wave_status.valid) {
-					unsigned have_vgprs = 0;
-
-					// grab sgprs..
-					if (ws.wave_status.halt) {
-						umr_read_sgprs(asic, &ws, &sgprs[0]);
-
-						if (asic->options.bitfields) {
-							have_vgprs = 1;
-							for (thread = 0; thread < 64; ++thread) {
-								if (umr_read_vgprs(asic, &ws, thread,
-										   &vgprs[256 * thread]) < 0)
-									have_vgprs = 0;
-							}
-						}
-					}
-
-					if (!asic->options.bitfields && first) {
-						first = 0;
-						printf("SE SH CU SIMD WAVE# WAVE_STATUS PC_HI PC_LO INST_DW0 INST_DW1 EXEC_HI EXEC_LO HW_ID GPRALLOC LDSALLOC TRAPSTS IBSTS TBA_HI TBA_LO TMA_HI TMA_LO IB_DBG0 M0\n");
-					}
-					if (!asic->options.bitfields) {
-					printf(
+	owd = wd = umr_scan_wave_data(asic);
+	while (wd) {
+		if (!asic->options.bitfields && first) {
+			first = 0;
+			printf("SE SH CU SIMD WAVE# WAVE_STATUS PC_HI PC_LO INST_DW0 INST_DW1 EXEC_HI EXEC_LO HW_ID GPRALLOC LDSALLOC TRAPSTS IBSTS TBA_HI TBA_LO TMA_HI TMA_LO IB_DBG0 M0\n");
+		}
+		if (!asic->options.bitfields) {
+		printf(
 "%u %u %u %u %u " // se/sh/cu/simd/wave
 "%08lx %08lx %08lx " // wave_status pc/hi/lo
 "%08lx %08lx %08lx %08lx " // inst0/1 exec hi/lo
 "%08lx %08lx %08lx %08lx %08lx " // HW_ID GPR/LDSALLOC TRAP/IB STS
 "%08lx %08lx %08lx %08lx %08lx %08lx " // TBA_HI TBA_LO TMA_HI TMA_LO IB_DBG0 M0\n");
 "\n",
-(unsigned)se, (unsigned)sh, (unsigned)cu, (unsigned)ws.hw_id.simd_id, (unsigned)ws.hw_id.wave_id,
-(unsigned long)ws.wave_status.value, (unsigned long)ws.pc_hi, (unsigned long)ws.pc_lo,
-(unsigned long)ws.wave_inst_dw0, (unsigned long)ws.wave_inst_dw1, (unsigned long)ws.exec_hi, (unsigned long)ws.exec_lo,
-(unsigned long)ws.hw_id.value, (unsigned long)ws.gpr_alloc.value, (unsigned long)ws.lds_alloc.value, (unsigned long)ws.trapsts.value, (unsigned long)ws.ib_sts.value,
-(unsigned long)ws.tba_hi, (unsigned long)ws.tba_lo, (unsigned long)ws.tma_hi, (unsigned long)ws.tma_lo, (unsigned long)ws.ib_dbg0, (unsigned long)ws.m0
+(unsigned)wd->se, (unsigned)wd->sh, (unsigned)wd->cu, (unsigned)wd->ws.hw_id.simd_id, (unsigned)wd->ws.hw_id.wave_id,
+(unsigned long)wd->ws.wave_status.value, (unsigned long)wd->ws.pc_hi, (unsigned long)wd->ws.pc_lo,
+(unsigned long)wd->ws.wave_inst_dw0, (unsigned long)wd->ws.wave_inst_dw1, (unsigned long)wd->ws.exec_hi, (unsigned long)wd->ws.exec_lo,
+(unsigned long)wd->ws.hw_id.value, (unsigned long)wd->ws.gpr_alloc.value, (unsigned long)wd->ws.lds_alloc.value, (unsigned long)wd->ws.trapsts.value, (unsigned long)wd->ws.ib_sts.value,
+(unsigned long)wd->ws.tba_hi, (unsigned long)wd->ws.tba_lo, (unsigned long)wd->ws.tma_hi, (unsigned long)wd->ws.tma_lo, (unsigned long)wd->ws.ib_dbg0, (unsigned long)wd->ws.m0
 );
-						if (ws.wave_status.halt) {
-							for (x = 0; x < ((ws.gpr_alloc.sgpr_size + 1) << shift); x += 4)
-								printf(">SGPRS[%u..%u] = { %08lx, %08lx, %08lx, %08lx }\n",
-									(unsigned)(x),
-									(unsigned)(x + 3),
-									(unsigned long)sgprs[x],
-									(unsigned long)sgprs[x+1],
-									(unsigned long)sgprs[x+2],
-									(unsigned long)sgprs[x+3]);
-
-							if (ws.wave_status.trap_en || ws.wave_status.priv) {
-								for (y = 0, x = 0x6C; x < (16 + 0x6C); x += 4) {
-									printf(">%s[%u..%u] = { %08lx, %08lx, %08lx, %08lx }\n",
-										(x < (0x6C + 4) && asic->family <= FAMILY_VI) ? "TBA/TMA" : "TTMP",
-										(unsigned)(y),
-										(unsigned)(y + 3),
-										(unsigned long)sgprs[x],
-										(unsigned long)sgprs[x+1],
-										(unsigned long)sgprs[x+2],
-										(unsigned long)sgprs[x+3]);
+			if (wd->ws.wave_status.halt) {
+				for (x = 0; x < ((wd->ws.gpr_alloc.sgpr_size + 1) << shift); x += 4)
+					printf(">SGPRS[%u..%u] = { %08lx, %08lx, %08lx, %08lx }\n",
+						(unsigned)(x),
+						(unsigned)(x + 3),
+						(unsigned long)wd->sgprs[x],
+						(unsigned long)wd->sgprs[x+1],
+						(unsigned long)wd->sgprs[x+2],
+						(unsigned long)wd->sgprs[x+3]);
 
-									// restart numbering on SI..VI with TTMP0
-									y += 4;
-									if (x == 0x6C && asic->family <= FAMILY_VI)
-										y = 0;
-								}
-							}
-						}
+				if (wd->ws.wave_status.trap_en || wd->ws.wave_status.priv) {
+					for (y = 0, x = 0x6C; x < (16 + 0x6C); x += 4) {
+						printf(">%s[%u..%u] = { %08lx, %08lx, %08lx, %08lx }\n",
+							(x < (0x6C + 4) && asic->family <= FAMILY_VI) ? "TBA/TMA" : "TTMP",
+							(unsigned)(y),
+							(unsigned)(y + 3),
+							(unsigned long)wd->sgprs[x],
+							(unsigned long)wd->sgprs[x+1],
+							(unsigned long)wd->sgprs[x+2],
+							(unsigned long)wd->sgprs[x+3]);
 
-						pgm_addr = (((uint64_t)ws.pc_hi << 32) | ws.pc_lo) - (NUM_OPCODE_WORDS*4)/2;
-						umr_vm_disasm(asic, ws.hw_id.vm_id, pgm_addr, (((uint64_t)ws.pc_hi << 32) | ws.pc_lo), NUM_OPCODE_WORDS*4);
-					} else {
-						first = 0;
-						printf("\n------------------------------------------------------\nse%u.sh%u.cu%u.simd%u.wave%u\n",
-						(unsigned)se, (unsigned)sh, (unsigned)cu, (unsigned)ws.hw_id.simd_id, (unsigned)ws.hw_id.wave_id);
+						// restart numbering on SI..VI with TTMP0
+						y += 4;
+						if (x == 0x6C && asic->family <= FAMILY_VI)
+							y = 0;
+					}
+				}
+			}
 
-						H("Main Registers");
-						X(pc_hi);
-						X(pc_lo);
-						X(wave_inst_dw0);
-						X(wave_inst_dw1);
-						X(exec_hi);
-						X(exec_lo);
-						X(tba_hi);
-						X(tba_lo);
-						X(tma_hi);
-						X(tma_lo);
-						X(m0);
-						X(ib_dbg0);
+			pgm_addr = (((uint64_t)wd->ws.pc_hi << 32) | wd->ws.pc_lo) - (NUM_OPCODE_WORDS*4)/2;
+			umr_vm_disasm(asic, wd->ws.hw_id.vm_id, pgm_addr, (((uint64_t)wd->ws.pc_hi << 32) | wd->ws.pc_lo), NUM_OPCODE_WORDS*4);
+		} else {
+			first = 0;
+			printf("\n------------------------------------------------------\nse%u.sh%u.cu%u.simd%u.wave%u\n",
+			(unsigned)wd->se, (unsigned)wd->sh, (unsigned)wd->cu, (unsigned)wd->ws.hw_id.simd_id, (unsigned)wd->ws.hw_id.wave_id);
 
-						Hv("Wave_Status", ws.wave_status.value);
-						PP(wave_status, scc);
-						PP(wave_status, execz);
-						PP(wave_status, vccz);
-						PP(wave_status, in_tg);
-						PP(wave_status, halt);
-						PP(wave_status, valid);
-						PP(wave_status, spi_prio);
-						PP(wave_status, wave_prio);
-						PP(wave_status, priv);
-						PP(wave_status, trap_en);
-						PP(wave_status, trap);
-						PP(wave_status, ttrace_en);
-						PP(wave_status, export_rdy);
-						PP(wave_status, in_barrier);
-						PP(wave_status, ecc_err);
-						PP(wave_status, skip_export);
-						PP(wave_status, perf_en);
-						PP(wave_status, cond_dbg_user);
-						PP(wave_status, cond_dbg_sys);
-						PP(wave_status, data_atc);
-						PP(wave_status, inst_atc);
-						PP(wave_status, dispatch_cache_ctrl);
-						PP(wave_status, must_export);
+			H("Main Registers");
+			X(pc_hi);
+			X(pc_lo);
+			X(wave_inst_dw0);
+			X(wave_inst_dw1);
+			X(exec_hi);
+			X(exec_lo);
+			X(tba_hi);
+			X(tba_lo);
+			X(tma_hi);
+			X(tma_lo);
+			X(m0);
+			X(ib_dbg0);
 
-						Hv("HW_ID", ws.hw_id.value);
-						PP(hw_id, wave_id);
-						PP(hw_id, simd_id);
-						PP(hw_id, pipe_id);
-						PP(hw_id, cu_id);
-						PP(hw_id, sh_id);
-						PP(hw_id, se_id);
-						PP(hw_id, tg_id);
-						PP(hw_id, vm_id);
-						PP(hw_id, queue_id);
-						PP(hw_id, state_id);
-						PP(hw_id, me_id);
+			Hv("Wave_Status", wd->ws.wave_status.value);
+			PP(wave_status, scc);
+			PP(wave_status, execz);
+			PP(wave_status, vccz);
+			PP(wave_status, in_tg);
+			PP(wave_status, halt);
+			PP(wave_status, valid);
+			PP(wave_status, spi_prio);
+			PP(wave_status, wave_prio);
+			PP(wave_status, priv);
+			PP(wave_status, trap_en);
+			PP(wave_status, trap);
+			PP(wave_status, ttrace_en);
+			PP(wave_status, export_rdy);
+			PP(wave_status, in_barrier);
+			PP(wave_status, ecc_err);
+			PP(wave_status, skip_export);
+			PP(wave_status, perf_en);
+			PP(wave_status, cond_dbg_user);
+			PP(wave_status, cond_dbg_sys);
+			PP(wave_status, data_atc);
+			PP(wave_status, inst_atc);
+			PP(wave_status, dispatch_cache_ctrl);
+			PP(wave_status, must_export);
 
-						Hv("GPR_ALLOC", ws.gpr_alloc.value);
-						PP(gpr_alloc, vgpr_base);
-						PP(gpr_alloc, vgpr_size);
-						PP(gpr_alloc, sgpr_base);
-						PP(gpr_alloc, sgpr_size);
+			Hv("HW_ID", wd->ws.hw_id.value);
+			PP(hw_id, wave_id);
+			PP(hw_id, simd_id);
+			PP(hw_id, pipe_id);
+			PP(hw_id, cu_id);
+			PP(hw_id, sh_id);
+			PP(hw_id, se_id);
+			PP(hw_id, tg_id);
+			PP(hw_id, vm_id);
+			PP(hw_id, queue_id);
+			PP(hw_id, state_id);
+			PP(hw_id, me_id);
 
-						if (ws.wave_status.halt) {
-							printf("\n\nSGPRS:\n");
-							for (x = 0; x < ((ws.gpr_alloc.sgpr_size + 1) << shift); x += 4)
-								printf("\t[%4u..%4u] = { %08lx, %08lx, %08lx, %08lx }\n",
-									(unsigned)(x),
-									(unsigned)(x + 3),
-									(unsigned long)sgprs[x],
-									(unsigned long)sgprs[x+1],
-									(unsigned long)sgprs[x+2],
-									(unsigned long)sgprs[x+3]);
+			Hv("GPR_ALLOC", wd->ws.gpr_alloc.value);
+			PP(gpr_alloc, vgpr_base);
+			PP(gpr_alloc, vgpr_size);
+			PP(gpr_alloc, sgpr_base);
+			PP(gpr_alloc, sgpr_size);
 
-							if (ws.wave_status.trap_en || ws.wave_status.priv) {
-								for (y  = 0, x = 0x6C; x < (16 + 0x6C); x += 4) {
-									// only print label once each
-									if ((asic->family <= FAMILY_VI && x < 0x6C + 8) ||
-										(asic->family > FAMILY_VI && x < 0x6C + 4))
-										printf("\n%s:\n", (x < 0x6C + 4 && asic->family <= FAMILY_VI) ? "TBA/TMA" : "TTMP");
-									printf("\t[%4u..%4u] = { %08lx, %08lx, %08lx, %08lx }\n",
-										(unsigned)(y),
-										(unsigned)(y + 3),
-										(unsigned long)sgprs[x],
-										(unsigned long)sgprs[x+1],
-										(unsigned long)sgprs[x+2],
-										(unsigned long)sgprs[x+3]);
+			if (wd->ws.wave_status.halt) {
+				printf("\n\nSGPRS:\n");
+				for (x = 0; x < ((wd->ws.gpr_alloc.sgpr_size + 1) << shift); x += 4)
+					printf("\t[%4u..%4u] = { %08lx, %08lx, %08lx, %08lx }\n",
+						(unsigned)(x),
+						(unsigned)(x + 3),
+						(unsigned long)wd->sgprs[x],
+						(unsigned long)wd->sgprs[x+1],
+						(unsigned long)wd->sgprs[x+2],
+						(unsigned long)wd->sgprs[x+3]);
 
-									// reset count on SI..VI
-									y += 4;
-									if (x == 0x6C && asic->family <= FAMILY_VI)
-										y = 0;
-								}
-							}
-						}
+				if (wd->ws.wave_status.trap_en || wd->ws.wave_status.priv) {
+					for (y  = 0, x = 0x6C; x < (16 + 0x6C); x += 4) {
+						// only print label once each
+						if ((asic->family <= FAMILY_VI && x < 0x6C + 8) ||
+							(asic->family > FAMILY_VI && x < 0x6C + 4))
+							printf("\n%s:\n", (x < 0x6C + 4 && asic->family <= FAMILY_VI) ? "TBA/TMA" : "TTMP");
+						printf("\t[%4u..%4u] = { %08lx, %08lx, %08lx, %08lx }\n",
+							(unsigned)(y),
+							(unsigned)(y + 3),
+							(unsigned long)wd->sgprs[x],
+							(unsigned long)wd->sgprs[x+1],
+							(unsigned long)wd->sgprs[x+2],
+							(unsigned long)wd->sgprs[x+3]);
 
-						if (have_vgprs) {
-							printf("\n");
-							for (x = 0; x < ((ws.gpr_alloc.vgpr_size + 1) << 2); ++x) {
-								if (x % 16 == 0) {
-									if (x == 0)
-										printf("VGPRS:       ");
-									else
-										printf("             ");
-									for (thread = 0; thread < 64; ++thread) {
-										unsigned live = thread < 32 ? (ws.exec_lo & (1u << thread))
-													    : (ws.exec_hi & (1u << (thread - 32)));
-										printf(live ? " t%02u     " : " (t%02u)   ", thread);
-									}
-									printf("\n");
-								}
+						// reset count on SI..VI
+						y += 4;
+						if (x == 0x6C && asic->family <= FAMILY_VI)
+							y = 0;
+					}
+				}
+			}
 
-								printf("    [%3u] = {", x);
-								for (thread = 0; thread < 64; ++thread)
-									printf(" %08x", vgprs[thread * 256 + x]);
-								printf(" }\n");
-							}
+			if (wd->have_vgprs) {
+				printf("\n");
+				for (x = 0; x < ((wd->ws.gpr_alloc.vgpr_size + 1) << 2); ++x) {
+					if (x % 16 == 0) {
+						if (x == 0)
+							printf("VGPRS:       ");
+						else
+							printf("             ");
+						for (thread = 0; thread < 64; ++thread) {
+							unsigned live = thread < 32 ? (wd->ws.exec_lo & (1u << thread))
+											: (wd->ws.exec_hi & (1u << (thread - 32)));
+							printf(live ? " t%02u     " : " (t%02u)   ", thread);
 						}
+						printf("\n");
+					}
 
-						printf("\n\nPGM_MEM:\n");
-						pgm_addr = (((uint64_t)ws.pc_hi << 32) | ws.pc_lo) - (NUM_OPCODE_WORDS*4)/2;
-						umr_vm_disasm(asic, ws.hw_id.vm_id, pgm_addr, (((uint64_t)ws.pc_hi << 32) | ws.pc_lo), NUM_OPCODE_WORDS*4);
+					printf("    [%3u] = {", x);
+					for (thread = 0; thread < 64; ++thread)
+						printf(" %08x", wd->vgprs[thread * 256 + x]);
+					printf(" }\n");
+				}
+			}
 
-						Hv("LDS_ALLOC", ws.lds_alloc.value);
-						PP(lds_alloc, lds_base);
-						PP(lds_alloc, lds_size);
+			printf("\n\nPGM_MEM:\n");
+			pgm_addr = (((uint64_t)wd->ws.pc_hi << 32) | wd->ws.pc_lo) - (NUM_OPCODE_WORDS*4)/2;
+			umr_vm_disasm(asic, wd->ws.hw_id.vm_id, pgm_addr, (((uint64_t)wd->ws.pc_hi << 32) | wd->ws.pc_lo), NUM_OPCODE_WORDS*4);
 
-						Hv("IB_STS", ws.ib_sts.value);
-						PP(ib_sts, vm_cnt);
-						PP(ib_sts, exp_cnt);
-						PP(ib_sts, lgkm_cnt);
-						PP(ib_sts, valu_cnt);
+			Hv("LDS_ALLOC", wd->ws.lds_alloc.value);
+			PP(lds_alloc, lds_base);
+			PP(lds_alloc, lds_size);
 
-						Hv("TRAPSTS", ws.trapsts.value);
-						PP(trapsts, excp);
-						PP(trapsts, excp_cycle);
-						PP(trapsts, dp_rate);
+			Hv("IB_STS", wd->ws.ib_sts.value);
+			PP(ib_sts, vm_cnt);
+			PP(ib_sts, exp_cnt);
+			PP(ib_sts, lgkm_cnt);
+			PP(ib_sts, valu_cnt);
 
-						printf("\n"); col = 0;
-					}
+			Hv("TRAPSTS", wd->ws.trapsts.value);
+			PP(trapsts, excp);
+			PP(trapsts, excp_cycle);
+			PP(trapsts, dp_rate);
 
-				}
-			}
+			printf("\n"); col = 0;
 		}
+		wd = wd->next;
 	}
 	if (first)
 		printf("No active waves!\n");
 
+	wd = owd;
+	while (wd) {
+		owd = wd->next;
+		free(wd);
+		wd = owd;
+	}
+
 	if (asic->options.halt_waves)
 		umr_sq_cmd_halt_waves(asic, UMR_SQ_CMD_RESUME);
 }
diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt
index c028c550fa3e..8d5427d63aae 100644
--- a/src/lib/CMakeLists.txt
+++ b/src/lib/CMakeLists.txt
@@ -21,6 +21,7 @@ add_library(umrcore STATIC
   read_vram.c
   ring_decode.c
   scan_config.c
+  scan_waves.c
   sq_cmd_halt_waves.c
   transfer_soc15.c
   wave_status.c
diff --git a/src/lib/scan_waves.c b/src/lib/scan_waves.c
new file mode 100644
index 000000000000..e3e7e131c888
--- /dev/null
+++ b/src/lib/scan_waves.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Tom St Denis <tom.stdenis at amd.com>
+ *
+ */
+#include "umr.h"
+
+struct umr_wave_data *umr_scan_wave_data(struct umr_asic *asic)
+{
+	uint32_t se, sh, cu, simd, wave, thread;
+	struct umr_wave_data *opwd, *ppwd, *pwd;
+
+	ppwd = opwd = pwd = calloc(1, sizeof *pwd);
+	if (!pwd) {
+		fprintf(stderr, "[ERROR]: Out of memory\n");
+		return NULL;
+	}
+
+	for (se = 0; se < asic->config.gfx.max_shader_engines; se++)
+	for (sh = 0; sh < asic->config.gfx.max_sh_per_se; sh++)
+	for (cu = 0; cu < asic->config.gfx.max_cu_per_sh; cu++) {
+		// ensure the wave data is zeroed out if it was forwarded
+		// from a previous iteration
+		memset(&pwd->ws, 0, sizeof(pwd->ws));
+
+		pwd->se = se;
+		pwd->sh = sh;
+		pwd->cu = cu;
+		umr_get_wave_sq_info(asic, se, sh, cu, &pwd->ws);
+		if (pwd->ws.sq_info.busy) {
+			for (simd = 0; simd < 4; simd++)
+			for (wave = 0; wave < 10; wave++) { //both simd/wave are hard coded at the moment...
+				pwd->simd = simd;
+				pwd->wave = wave;
+				umr_get_wave_status(asic, se, sh, cu, simd, wave, &pwd->ws);
+				if (pwd->ws.wave_status.halt || pwd->ws.wave_status.valid) {
+					// grab sgprs..
+					if (pwd->ws.wave_status.halt) {
+						umr_read_sgprs(asic, &pwd->ws, &pwd->sgprs[0]);
+
+						pwd->have_vgprs = 1;
+						for (thread = 0; thread < 64; ++thread) {
+							if (umr_read_vgprs(asic, &pwd->ws, thread,
+									   &pwd->vgprs[256 * thread]) < 0) {
+								pwd->have_vgprs = 0;
+								break;
+							}
+						}
+					}
+
+					pwd->next = calloc(1, sizeof(*pwd));
+					if (!pwd->next) {
+						fprintf(stderr, "[ERROR]: Out of memory\n");
+						return opwd;
+					}
+					pwd->next->se = pwd->se;
+					pwd->next->sh = pwd->sh;
+					pwd->next->cu = pwd->cu;
+					pwd->next->ws = pwd->ws;
+					ppwd = pwd;
+					pwd = pwd->next;
+				}
+			}
+		}
+	}
+
+	// no waves to capture
+	if (opwd == pwd) {
+		free(pwd);
+		return NULL;
+	}
+
+	// drop tail node
+	free(ppwd->next);
+	ppwd->next = NULL;
+
+	return opwd;
+}
diff --git a/src/umr.h b/src/umr.h
index 7154db7bb2c3..e99ee965527e 100644
--- a/src/umr.h
+++ b/src/umr.h
@@ -362,6 +362,14 @@ struct umr_wave_status {
 	} trapsts;
 };
 
+struct umr_wave_data {
+	uint32_t vgprs[64 * 256], sgprs[1024];
+	int se, sh, cu, simd, wave, have_vgprs;
+	struct umr_wave_status ws;
+	struct umr_wave_thread *threads;
+	struct umr_wave_data *next;
+};
+
 struct umr_shaders_pgm {
 	// VMID and length in bytes
 	uint32_t
@@ -561,6 +569,7 @@ int umr_update(struct umr_asic *asic, char *script);
 
 /* lib helpers */
 int umr_get_wave_status(struct umr_asic *asic, unsigned se, unsigned sh, unsigned cu, unsigned simd, unsigned wave, struct umr_wave_status *ws);
+struct umr_wave_data *umr_scan_wave_data(struct umr_asic *asic);
 int umr_get_wave_sq_info(struct umr_asic *asic, unsigned se, unsigned sh, unsigned cu, struct umr_wave_status *ws);
 int umr_read_sgprs(struct umr_asic *asic, struct umr_wave_status *ws, uint32_t *dst);
 int umr_read_vgprs(struct umr_asic *asic, struct umr_wave_status *ws, uint32_t thread, uint32_t *dst);
-- 
2.14.3



More information about the amd-gfx mailing list