[PATCH umr] refactor out wave scanning to new function umr_scan_wave_data()
Tom St Denis
tom.stdenis at amd.com
Fri Apr 20 18:06:56 UTC 2018
Signed-off-by: Tom St Denis <tom.stdenis at amd.com>
---
doc/sphinx/source/libwave_status.rst | 28 +++
src/app/print_waves.c | 379 +++++++++++++++++------------------
src/lib/CMakeLists.txt | 1 +
src/lib/scan_waves.c | 97 +++++++++
src/umr.h | 9 +
5 files changed, 314 insertions(+), 200 deletions(-)
create mode 100644 src/lib/scan_waves.c
diff --git a/doc/sphinx/source/libwave_status.rst b/doc/sphinx/source/libwave_status.rst
index 0f58a3c91855..2e16a4ac0cb6 100644
--- a/doc/sphinx/source/libwave_status.rst
+++ b/doc/sphinx/source/libwave_status.rst
@@ -43,6 +43,34 @@ can be read with the following function:
This will populate many of the fields of the structure 'umr_wave_status'. An
example of reading them can be found in src/app/print_waves.c.
+---------------------
+Scanning Halted Waves
+---------------------
+
+If the waves have been halted (say with the function umr_sq_cmd_halt_waves()) then
+a list of halted valid waves can be made with the following function:
+
+
+::
+
+ struct umr_wave_data *umr_scan_wave_data(struct umr_asic *asic)
+
+This will return NULL on error (or no halted waves) or a pointer
+to the following structure:
+
+::
+
+ struct umr_wave_data {
+ uint32_t vgprs[64 * 256], sgprs[1024];
+ int se, sh, cu, simd, wave, have_vgprs;
+ struct umr_wave_status ws;
+ struct umr_wave_thread *threads;
+ struct umr_wave_data *next;
+ };
+
+The list of waves are stored as a linked list terminated by the
+last node having 'next' point to NULL.
+
------------
Reading GPRs
------------
diff --git a/src/app/print_waves.c b/src/app/print_waves.c
index 563fc65bedb7..d901bc902ff3 100644
--- a/src/app/print_waves.c
+++ b/src/app/print_waves.c
@@ -24,11 +24,11 @@
*/
#include "umrapp.h"
-#define PP(x, y) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %8u | ", #y, (unsigned)ws.x.y);
-#define PX(x, y) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %08lx | ", #y, (unsigned long)ws.x.y);
+#define PP(x, y) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %8u | ", #y, (unsigned)wd->ws.x.y);
+#define PX(x, y) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %08lx | ", #y, (unsigned long)wd->ws.x.y);
-#define P(x) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %8u | ", #x, (unsigned)ws.x);
-#define X(x) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %08lx | ", #x, (unsigned long)ws.x);
+#define P(x) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %8u | ", #x, (unsigned)wd->ws.x);
+#define X(x) if (col++ == 4) { col = 1; printf("\n\t"); } printf("%20s: %08lx | ", #x, (unsigned long)wd->ws.x);
#define H(x) if (col) { printf("\n"); }; col = 0; printf("\n\n%s:\n\t", x);
#define Hv(x, y) if (col) { printf("\n"); }; col = 0; printf("\n\n%s[%08lx]:\n\t", x, (unsigned long)y);
@@ -37,11 +37,9 @@
void umr_print_waves(struct umr_asic *asic)
{
- uint32_t x, y, se, sh, cu, simd, wave, sgprs[1024], shift;
- uint32_t vgprs[64 * 256];
- uint32_t thread;
+ uint32_t x, y, shift, thread;
uint64_t pgm_addr;
- struct umr_wave_status ws;
+ struct umr_wave_data *wd, *owd;
int first = 1, col = 0;
if (asic->options.halt_waves)
@@ -52,228 +50,209 @@ void umr_print_waves(struct umr_asic *asic)
else
shift = 4; // on VI allocations are in 16-dword blocks
- for (se = 0; se < asic->config.gfx.max_shader_engines; se++)
- for (sh = 0; sh < asic->config.gfx.max_sh_per_se; sh++)
- for (cu = 0; cu < asic->config.gfx.max_cu_per_sh; cu++) {
- umr_get_wave_sq_info(asic, se, sh, cu, &ws);
- if (ws.sq_info.busy) {
- for (simd = 0; simd < 4; simd++)
- for (wave = 0; wave < 10; wave++) { //both simd/wave are hard coded at the moment...
- umr_get_wave_status(asic, se, sh, cu, simd, wave, &ws);
- if (ws.wave_status.halt || ws.wave_status.valid) {
- unsigned have_vgprs = 0;
-
- // grab sgprs..
- if (ws.wave_status.halt) {
- umr_read_sgprs(asic, &ws, &sgprs[0]);
-
- if (asic->options.bitfields) {
- have_vgprs = 1;
- for (thread = 0; thread < 64; ++thread) {
- if (umr_read_vgprs(asic, &ws, thread,
- &vgprs[256 * thread]) < 0)
- have_vgprs = 0;
- }
- }
- }
-
- if (!asic->options.bitfields && first) {
- first = 0;
- printf("SE SH CU SIMD WAVE# WAVE_STATUS PC_HI PC_LO INST_DW0 INST_DW1 EXEC_HI EXEC_LO HW_ID GPRALLOC LDSALLOC TRAPSTS IBSTS TBA_HI TBA_LO TMA_HI TMA_LO IB_DBG0 M0\n");
- }
- if (!asic->options.bitfields) {
- printf(
+ owd = wd = umr_scan_wave_data(asic);
+ while (wd) {
+ if (!asic->options.bitfields && first) {
+ first = 0;
+ printf("SE SH CU SIMD WAVE# WAVE_STATUS PC_HI PC_LO INST_DW0 INST_DW1 EXEC_HI EXEC_LO HW_ID GPRALLOC LDSALLOC TRAPSTS IBSTS TBA_HI TBA_LO TMA_HI TMA_LO IB_DBG0 M0\n");
+ }
+ if (!asic->options.bitfields) {
+ printf(
"%u %u %u %u %u " // se/sh/cu/simd/wave
"%08lx %08lx %08lx " // wave_status pc/hi/lo
"%08lx %08lx %08lx %08lx " // inst0/1 exec hi/lo
"%08lx %08lx %08lx %08lx %08lx " // HW_ID GPR/LDSALLOC TRAP/IB STS
"%08lx %08lx %08lx %08lx %08lx %08lx " // TBA_HI TBA_LO TMA_HI TMA_LO IB_DBG0 M0\n");
"\n",
-(unsigned)se, (unsigned)sh, (unsigned)cu, (unsigned)ws.hw_id.simd_id, (unsigned)ws.hw_id.wave_id,
-(unsigned long)ws.wave_status.value, (unsigned long)ws.pc_hi, (unsigned long)ws.pc_lo,
-(unsigned long)ws.wave_inst_dw0, (unsigned long)ws.wave_inst_dw1, (unsigned long)ws.exec_hi, (unsigned long)ws.exec_lo,
-(unsigned long)ws.hw_id.value, (unsigned long)ws.gpr_alloc.value, (unsigned long)ws.lds_alloc.value, (unsigned long)ws.trapsts.value, (unsigned long)ws.ib_sts.value,
-(unsigned long)ws.tba_hi, (unsigned long)ws.tba_lo, (unsigned long)ws.tma_hi, (unsigned long)ws.tma_lo, (unsigned long)ws.ib_dbg0, (unsigned long)ws.m0
+(unsigned)wd->se, (unsigned)wd->sh, (unsigned)wd->cu, (unsigned)wd->ws.hw_id.simd_id, (unsigned)wd->ws.hw_id.wave_id,
+(unsigned long)wd->ws.wave_status.value, (unsigned long)wd->ws.pc_hi, (unsigned long)wd->ws.pc_lo,
+(unsigned long)wd->ws.wave_inst_dw0, (unsigned long)wd->ws.wave_inst_dw1, (unsigned long)wd->ws.exec_hi, (unsigned long)wd->ws.exec_lo,
+(unsigned long)wd->ws.hw_id.value, (unsigned long)wd->ws.gpr_alloc.value, (unsigned long)wd->ws.lds_alloc.value, (unsigned long)wd->ws.trapsts.value, (unsigned long)wd->ws.ib_sts.value,
+(unsigned long)wd->ws.tba_hi, (unsigned long)wd->ws.tba_lo, (unsigned long)wd->ws.tma_hi, (unsigned long)wd->ws.tma_lo, (unsigned long)wd->ws.ib_dbg0, (unsigned long)wd->ws.m0
);
- if (ws.wave_status.halt) {
- for (x = 0; x < ((ws.gpr_alloc.sgpr_size + 1) << shift); x += 4)
- printf(">SGPRS[%u..%u] = { %08lx, %08lx, %08lx, %08lx }\n",
- (unsigned)(x),
- (unsigned)(x + 3),
- (unsigned long)sgprs[x],
- (unsigned long)sgprs[x+1],
- (unsigned long)sgprs[x+2],
- (unsigned long)sgprs[x+3]);
-
- if (ws.wave_status.trap_en || ws.wave_status.priv) {
- for (y = 0, x = 0x6C; x < (16 + 0x6C); x += 4) {
- printf(">%s[%u..%u] = { %08lx, %08lx, %08lx, %08lx }\n",
- (x < (0x6C + 4) && asic->family <= FAMILY_VI) ? "TBA/TMA" : "TTMP",
- (unsigned)(y),
- (unsigned)(y + 3),
- (unsigned long)sgprs[x],
- (unsigned long)sgprs[x+1],
- (unsigned long)sgprs[x+2],
- (unsigned long)sgprs[x+3]);
+ if (wd->ws.wave_status.halt) {
+ for (x = 0; x < ((wd->ws.gpr_alloc.sgpr_size + 1) << shift); x += 4)
+ printf(">SGPRS[%u..%u] = { %08lx, %08lx, %08lx, %08lx }\n",
+ (unsigned)(x),
+ (unsigned)(x + 3),
+ (unsigned long)wd->sgprs[x],
+ (unsigned long)wd->sgprs[x+1],
+ (unsigned long)wd->sgprs[x+2],
+ (unsigned long)wd->sgprs[x+3]);
- // restart numbering on SI..VI with TTMP0
- y += 4;
- if (x == 0x6C && asic->family <= FAMILY_VI)
- y = 0;
- }
- }
- }
+ if (wd->ws.wave_status.trap_en || wd->ws.wave_status.priv) {
+ for (y = 0, x = 0x6C; x < (16 + 0x6C); x += 4) {
+ printf(">%s[%u..%u] = { %08lx, %08lx, %08lx, %08lx }\n",
+ (x < (0x6C + 4) && asic->family <= FAMILY_VI) ? "TBA/TMA" : "TTMP",
+ (unsigned)(y),
+ (unsigned)(y + 3),
+ (unsigned long)wd->sgprs[x],
+ (unsigned long)wd->sgprs[x+1],
+ (unsigned long)wd->sgprs[x+2],
+ (unsigned long)wd->sgprs[x+3]);
- pgm_addr = (((uint64_t)ws.pc_hi << 32) | ws.pc_lo) - (NUM_OPCODE_WORDS*4)/2;
- umr_vm_disasm(asic, ws.hw_id.vm_id, pgm_addr, (((uint64_t)ws.pc_hi << 32) | ws.pc_lo), NUM_OPCODE_WORDS*4);
- } else {
- first = 0;
- printf("\n------------------------------------------------------\nse%u.sh%u.cu%u.simd%u.wave%u\n",
- (unsigned)se, (unsigned)sh, (unsigned)cu, (unsigned)ws.hw_id.simd_id, (unsigned)ws.hw_id.wave_id);
+ // restart numbering on SI..VI with TTMP0
+ y += 4;
+ if (x == 0x6C && asic->family <= FAMILY_VI)
+ y = 0;
+ }
+ }
+ }
- H("Main Registers");
- X(pc_hi);
- X(pc_lo);
- X(wave_inst_dw0);
- X(wave_inst_dw1);
- X(exec_hi);
- X(exec_lo);
- X(tba_hi);
- X(tba_lo);
- X(tma_hi);
- X(tma_lo);
- X(m0);
- X(ib_dbg0);
+ pgm_addr = (((uint64_t)wd->ws.pc_hi << 32) | wd->ws.pc_lo) - (NUM_OPCODE_WORDS*4)/2;
+ umr_vm_disasm(asic, wd->ws.hw_id.vm_id, pgm_addr, (((uint64_t)wd->ws.pc_hi << 32) | wd->ws.pc_lo), NUM_OPCODE_WORDS*4);
+ } else {
+ first = 0;
+ printf("\n------------------------------------------------------\nse%u.sh%u.cu%u.simd%u.wave%u\n",
+ (unsigned)wd->se, (unsigned)wd->sh, (unsigned)wd->cu, (unsigned)wd->ws.hw_id.simd_id, (unsigned)wd->ws.hw_id.wave_id);
- Hv("Wave_Status", ws.wave_status.value);
- PP(wave_status, scc);
- PP(wave_status, execz);
- PP(wave_status, vccz);
- PP(wave_status, in_tg);
- PP(wave_status, halt);
- PP(wave_status, valid);
- PP(wave_status, spi_prio);
- PP(wave_status, wave_prio);
- PP(wave_status, priv);
- PP(wave_status, trap_en);
- PP(wave_status, trap);
- PP(wave_status, ttrace_en);
- PP(wave_status, export_rdy);
- PP(wave_status, in_barrier);
- PP(wave_status, ecc_err);
- PP(wave_status, skip_export);
- PP(wave_status, perf_en);
- PP(wave_status, cond_dbg_user);
- PP(wave_status, cond_dbg_sys);
- PP(wave_status, data_atc);
- PP(wave_status, inst_atc);
- PP(wave_status, dispatch_cache_ctrl);
- PP(wave_status, must_export);
+ H("Main Registers");
+ X(pc_hi);
+ X(pc_lo);
+ X(wave_inst_dw0);
+ X(wave_inst_dw1);
+ X(exec_hi);
+ X(exec_lo);
+ X(tba_hi);
+ X(tba_lo);
+ X(tma_hi);
+ X(tma_lo);
+ X(m0);
+ X(ib_dbg0);
- Hv("HW_ID", ws.hw_id.value);
- PP(hw_id, wave_id);
- PP(hw_id, simd_id);
- PP(hw_id, pipe_id);
- PP(hw_id, cu_id);
- PP(hw_id, sh_id);
- PP(hw_id, se_id);
- PP(hw_id, tg_id);
- PP(hw_id, vm_id);
- PP(hw_id, queue_id);
- PP(hw_id, state_id);
- PP(hw_id, me_id);
+ Hv("Wave_Status", wd->ws.wave_status.value);
+ PP(wave_status, scc);
+ PP(wave_status, execz);
+ PP(wave_status, vccz);
+ PP(wave_status, in_tg);
+ PP(wave_status, halt);
+ PP(wave_status, valid);
+ PP(wave_status, spi_prio);
+ PP(wave_status, wave_prio);
+ PP(wave_status, priv);
+ PP(wave_status, trap_en);
+ PP(wave_status, trap);
+ PP(wave_status, ttrace_en);
+ PP(wave_status, export_rdy);
+ PP(wave_status, in_barrier);
+ PP(wave_status, ecc_err);
+ PP(wave_status, skip_export);
+ PP(wave_status, perf_en);
+ PP(wave_status, cond_dbg_user);
+ PP(wave_status, cond_dbg_sys);
+ PP(wave_status, data_atc);
+ PP(wave_status, inst_atc);
+ PP(wave_status, dispatch_cache_ctrl);
+ PP(wave_status, must_export);
- Hv("GPR_ALLOC", ws.gpr_alloc.value);
- PP(gpr_alloc, vgpr_base);
- PP(gpr_alloc, vgpr_size);
- PP(gpr_alloc, sgpr_base);
- PP(gpr_alloc, sgpr_size);
+ Hv("HW_ID", wd->ws.hw_id.value);
+ PP(hw_id, wave_id);
+ PP(hw_id, simd_id);
+ PP(hw_id, pipe_id);
+ PP(hw_id, cu_id);
+ PP(hw_id, sh_id);
+ PP(hw_id, se_id);
+ PP(hw_id, tg_id);
+ PP(hw_id, vm_id);
+ PP(hw_id, queue_id);
+ PP(hw_id, state_id);
+ PP(hw_id, me_id);
- if (ws.wave_status.halt) {
- printf("\n\nSGPRS:\n");
- for (x = 0; x < ((ws.gpr_alloc.sgpr_size + 1) << shift); x += 4)
- printf("\t[%4u..%4u] = { %08lx, %08lx, %08lx, %08lx }\n",
- (unsigned)(x),
- (unsigned)(x + 3),
- (unsigned long)sgprs[x],
- (unsigned long)sgprs[x+1],
- (unsigned long)sgprs[x+2],
- (unsigned long)sgprs[x+3]);
+ Hv("GPR_ALLOC", wd->ws.gpr_alloc.value);
+ PP(gpr_alloc, vgpr_base);
+ PP(gpr_alloc, vgpr_size);
+ PP(gpr_alloc, sgpr_base);
+ PP(gpr_alloc, sgpr_size);
- if (ws.wave_status.trap_en || ws.wave_status.priv) {
- for (y = 0, x = 0x6C; x < (16 + 0x6C); x += 4) {
- // only print label once each
- if ((asic->family <= FAMILY_VI && x < 0x6C + 8) ||
- (asic->family > FAMILY_VI && x < 0x6C + 4))
- printf("\n%s:\n", (x < 0x6C + 4 && asic->family <= FAMILY_VI) ? "TBA/TMA" : "TTMP");
- printf("\t[%4u..%4u] = { %08lx, %08lx, %08lx, %08lx }\n",
- (unsigned)(y),
- (unsigned)(y + 3),
- (unsigned long)sgprs[x],
- (unsigned long)sgprs[x+1],
- (unsigned long)sgprs[x+2],
- (unsigned long)sgprs[x+3]);
+ if (wd->ws.wave_status.halt) {
+ printf("\n\nSGPRS:\n");
+ for (x = 0; x < ((wd->ws.gpr_alloc.sgpr_size + 1) << shift); x += 4)
+ printf("\t[%4u..%4u] = { %08lx, %08lx, %08lx, %08lx }\n",
+ (unsigned)(x),
+ (unsigned)(x + 3),
+ (unsigned long)wd->sgprs[x],
+ (unsigned long)wd->sgprs[x+1],
+ (unsigned long)wd->sgprs[x+2],
+ (unsigned long)wd->sgprs[x+3]);
- // reset count on SI..VI
- y += 4;
- if (x == 0x6C && asic->family <= FAMILY_VI)
- y = 0;
- }
- }
- }
+ if (wd->ws.wave_status.trap_en || wd->ws.wave_status.priv) {
+ for (y = 0, x = 0x6C; x < (16 + 0x6C); x += 4) {
+ // only print label once each
+ if ((asic->family <= FAMILY_VI && x < 0x6C + 8) ||
+ (asic->family > FAMILY_VI && x < 0x6C + 4))
+ printf("\n%s:\n", (x < 0x6C + 4 && asic->family <= FAMILY_VI) ? "TBA/TMA" : "TTMP");
+ printf("\t[%4u..%4u] = { %08lx, %08lx, %08lx, %08lx }\n",
+ (unsigned)(y),
+ (unsigned)(y + 3),
+ (unsigned long)wd->sgprs[x],
+ (unsigned long)wd->sgprs[x+1],
+ (unsigned long)wd->sgprs[x+2],
+ (unsigned long)wd->sgprs[x+3]);
- if (have_vgprs) {
- printf("\n");
- for (x = 0; x < ((ws.gpr_alloc.vgpr_size + 1) << 2); ++x) {
- if (x % 16 == 0) {
- if (x == 0)
- printf("VGPRS: ");
- else
- printf(" ");
- for (thread = 0; thread < 64; ++thread) {
- unsigned live = thread < 32 ? (ws.exec_lo & (1u << thread))
- : (ws.exec_hi & (1u << (thread - 32)));
- printf(live ? " t%02u " : " (t%02u) ", thread);
- }
- printf("\n");
- }
+ // reset count on SI..VI
+ y += 4;
+ if (x == 0x6C && asic->family <= FAMILY_VI)
+ y = 0;
+ }
+ }
+ }
- printf(" [%3u] = {", x);
- for (thread = 0; thread < 64; ++thread)
- printf(" %08x", vgprs[thread * 256 + x]);
- printf(" }\n");
- }
+ if (wd->have_vgprs) {
+ printf("\n");
+ for (x = 0; x < ((wd->ws.gpr_alloc.vgpr_size + 1) << 2); ++x) {
+ if (x % 16 == 0) {
+ if (x == 0)
+ printf("VGPRS: ");
+ else
+ printf(" ");
+ for (thread = 0; thread < 64; ++thread) {
+ unsigned live = thread < 32 ? (wd->ws.exec_lo & (1u << thread))
+ : (wd->ws.exec_hi & (1u << (thread - 32)));
+ printf(live ? " t%02u " : " (t%02u) ", thread);
}
+ printf("\n");
+ }
- printf("\n\nPGM_MEM:\n");
- pgm_addr = (((uint64_t)ws.pc_hi << 32) | ws.pc_lo) - (NUM_OPCODE_WORDS*4)/2;
- umr_vm_disasm(asic, ws.hw_id.vm_id, pgm_addr, (((uint64_t)ws.pc_hi << 32) | ws.pc_lo), NUM_OPCODE_WORDS*4);
+ printf(" [%3u] = {", x);
+ for (thread = 0; thread < 64; ++thread)
+ printf(" %08x", wd->vgprs[thread * 256 + x]);
+ printf(" }\n");
+ }
+ }
- Hv("LDS_ALLOC", ws.lds_alloc.value);
- PP(lds_alloc, lds_base);
- PP(lds_alloc, lds_size);
+ printf("\n\nPGM_MEM:\n");
+ pgm_addr = (((uint64_t)wd->ws.pc_hi << 32) | wd->ws.pc_lo) - (NUM_OPCODE_WORDS*4)/2;
+ umr_vm_disasm(asic, wd->ws.hw_id.vm_id, pgm_addr, (((uint64_t)wd->ws.pc_hi << 32) | wd->ws.pc_lo), NUM_OPCODE_WORDS*4);
- Hv("IB_STS", ws.ib_sts.value);
- PP(ib_sts, vm_cnt);
- PP(ib_sts, exp_cnt);
- PP(ib_sts, lgkm_cnt);
- PP(ib_sts, valu_cnt);
+ Hv("LDS_ALLOC", wd->ws.lds_alloc.value);
+ PP(lds_alloc, lds_base);
+ PP(lds_alloc, lds_size);
- Hv("TRAPSTS", ws.trapsts.value);
- PP(trapsts, excp);
- PP(trapsts, excp_cycle);
- PP(trapsts, dp_rate);
+ Hv("IB_STS", wd->ws.ib_sts.value);
+ PP(ib_sts, vm_cnt);
+ PP(ib_sts, exp_cnt);
+ PP(ib_sts, lgkm_cnt);
+ PP(ib_sts, valu_cnt);
- printf("\n"); col = 0;
- }
+ Hv("TRAPSTS", wd->ws.trapsts.value);
+ PP(trapsts, excp);
+ PP(trapsts, excp_cycle);
+ PP(trapsts, dp_rate);
- }
- }
+ printf("\n"); col = 0;
}
+ wd = wd->next;
}
if (first)
printf("No active waves!\n");
+ wd = owd;
+ while (wd) {
+ owd = wd->next;
+ free(wd);
+ wd = owd;
+ }
+
if (asic->options.halt_waves)
umr_sq_cmd_halt_waves(asic, UMR_SQ_CMD_RESUME);
}
diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt
index c028c550fa3e..8d5427d63aae 100644
--- a/src/lib/CMakeLists.txt
+++ b/src/lib/CMakeLists.txt
@@ -21,6 +21,7 @@ add_library(umrcore STATIC
read_vram.c
ring_decode.c
scan_config.c
+ scan_waves.c
sq_cmd_halt_waves.c
transfer_soc15.c
wave_status.c
diff --git a/src/lib/scan_waves.c b/src/lib/scan_waves.c
new file mode 100644
index 000000000000..e3e7e131c888
--- /dev/null
+++ b/src/lib/scan_waves.c
@@ -0,0 +1,97 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Tom St Denis <tom.stdenis at amd.com>
+ *
+ */
+#include "umr.h"
+
+struct umr_wave_data *umr_scan_wave_data(struct umr_asic *asic)
+{
+ uint32_t se, sh, cu, simd, wave, thread;
+ struct umr_wave_data *opwd, *ppwd, *pwd;
+
+ ppwd = opwd = pwd = calloc(1, sizeof *pwd);
+ if (!pwd) {
+ fprintf(stderr, "[ERROR]: Out of memory\n");
+ return NULL;
+ }
+
+ for (se = 0; se < asic->config.gfx.max_shader_engines; se++)
+ for (sh = 0; sh < asic->config.gfx.max_sh_per_se; sh++)
+ for (cu = 0; cu < asic->config.gfx.max_cu_per_sh; cu++) {
+ // ensure the wave data is zeroed out if it was forwarded
+ // from a previous iteration
+ memset(&pwd->ws, 0, sizeof(pwd->ws));
+
+ pwd->se = se;
+ pwd->sh = sh;
+ pwd->cu = cu;
+ umr_get_wave_sq_info(asic, se, sh, cu, &pwd->ws);
+ if (pwd->ws.sq_info.busy) {
+ for (simd = 0; simd < 4; simd++)
+ for (wave = 0; wave < 10; wave++) { //both simd/wave are hard coded at the moment...
+ pwd->simd = simd;
+ pwd->wave = wave;
+ umr_get_wave_status(asic, se, sh, cu, simd, wave, &pwd->ws);
+ if (pwd->ws.wave_status.halt || pwd->ws.wave_status.valid) {
+ // grab sgprs..
+ if (pwd->ws.wave_status.halt) {
+ umr_read_sgprs(asic, &pwd->ws, &pwd->sgprs[0]);
+
+ pwd->have_vgprs = 1;
+ for (thread = 0; thread < 64; ++thread) {
+ if (umr_read_vgprs(asic, &pwd->ws, thread,
+ &pwd->vgprs[256 * thread]) < 0) {
+ pwd->have_vgprs = 0;
+ break;
+ }
+ }
+ }
+
+ pwd->next = calloc(1, sizeof(*pwd));
+ if (!pwd->next) {
+ fprintf(stderr, "[ERROR]: Out of memory\n");
+ return opwd;
+ }
+ pwd->next->se = pwd->se;
+ pwd->next->sh = pwd->sh;
+ pwd->next->cu = pwd->cu;
+ pwd->next->ws = pwd->ws;
+ ppwd = pwd;
+ pwd = pwd->next;
+ }
+ }
+ }
+ }
+
+ // no waves to capture
+ if (opwd == pwd) {
+ free(pwd);
+ return NULL;
+ }
+
+ // drop tail node
+ free(ppwd->next);
+ ppwd->next = NULL;
+
+ return opwd;
+}
diff --git a/src/umr.h b/src/umr.h
index 7154db7bb2c3..e99ee965527e 100644
--- a/src/umr.h
+++ b/src/umr.h
@@ -362,6 +362,14 @@ struct umr_wave_status {
} trapsts;
};
+struct umr_wave_data {
+ uint32_t vgprs[64 * 256], sgprs[1024];
+ int se, sh, cu, simd, wave, have_vgprs;
+ struct umr_wave_status ws;
+ struct umr_wave_thread *threads;
+ struct umr_wave_data *next;
+};
+
struct umr_shaders_pgm {
// VMID and length in bytes
uint32_t
@@ -561,6 +569,7 @@ int umr_update(struct umr_asic *asic, char *script);
/* lib helpers */
int umr_get_wave_status(struct umr_asic *asic, unsigned se, unsigned sh, unsigned cu, unsigned simd, unsigned wave, struct umr_wave_status *ws);
+struct umr_wave_data *umr_scan_wave_data(struct umr_asic *asic);
int umr_get_wave_sq_info(struct umr_asic *asic, unsigned se, unsigned sh, unsigned cu, struct umr_wave_status *ws);
int umr_read_sgprs(struct umr_asic *asic, struct umr_wave_status *ws, uint32_t *dst);
int umr_read_vgprs(struct umr_asic *asic, struct umr_wave_status *ws, uint32_t thread, uint32_t *dst);
--
2.14.3
More information about the amd-gfx
mailing list