[PATCH] umr: print MODE register as part of wave state

Joseph Greathouse Joseph.Greathouse at amd.com
Mon Jun 6 21:04:32 UTC 2022


The MODE register contains detailed per-wave information, but UMR
skipped printing it. This patch adds the ability to print each wave's
MODE register as part of the wave scan operation, and prints the MODE
register's sub-fields as part of the deeper print option.

Signed-off-by: Joseph Greathouse <Joseph.Greathouse at amd.com>
---
 src/app/print_waves.c | 41 ++++++++++++++++++++++++++++++++++------
 src/lib/scan_waves.c  | 44 +++++++++++++++++++++++++++++++++++++++++++
 src/umr.h             | 19 +++++++++++++++++++
 3 files changed, 98 insertions(+), 6 deletions(-)

diff --git a/src/app/print_waves.c b/src/app/print_waves.c
index 07dae2d..daedc24 100644
--- a/src/app/print_waves.c
+++ b/src/app/print_waves.c
@@ -91,7 +91,7 @@ static void umr_print_waves_si_ai(struct umr_asic *asic)
 		if (!asic->options.bitfields && first) {
 			static const char* titles[] = {
 				"WAVE_STATUS", "PC_HI", "PC_LO", "INST_DW0", "INST_DW1", "EXEC_HI", "EXEC_LO", "HW_ID", "GPRALLOC",
-				"LDSALLOC", "TRAPSTS", "IBSTS", "TBA_HI", "TBA_LO", "TMA_HI", "TMA_LO", "IB_DBG0", "M0", NULL
+				"LDSALLOC", "TRAPSTS", "IBSTS", "TBA_HI", "TBA_LO", "TMA_HI", "TMA_LO", "IB_DBG0", "M0", "MODE", NULL
 			};
 			first = 0;
 			printf("SE SH CU SIMD WAVE# ");
@@ -106,13 +106,13 @@ static void umr_print_waves_si_ai(struct umr_asic *asic)
 "   %08lx %08lx %08lx " // wave_status pc/hi/lo
 "%08lx %08lx %08lx %08lx " // inst0/1 exec hi/lo
 "%08lx %08lx %08lx %08lx %08lx " // HW_ID GPR/LDSALLOC TRAP/IB STS
-"%08lx %08lx %08lx %08lx %08lx %08lx " // TBA_HI TBA_LO TMA_HI TMA_LO IB_DBG0 M0\n");
+"%08lx %08lx %08lx %08lx %08lx %08lx %08lx " // TBA_HI TBA_LO TMA_HI TMA_LO IB_DBG0 M0 MODE\n");
 "\n",
 (unsigned)wd->se, (unsigned)wd->sh, (unsigned)wd->cu, (unsigned)wd->ws.hw_id.simd_id, (unsigned)wd->ws.hw_id.wave_id,
 (unsigned long)wd->ws.wave_status.value, (unsigned long)wd->ws.pc_hi, (unsigned long)wd->ws.pc_lo,
 (unsigned long)wd->ws.wave_inst_dw0, (unsigned long)wd->ws.wave_inst_dw1, (unsigned long)wd->ws.exec_hi, (unsigned long)wd->ws.exec_lo,
 (unsigned long)wd->ws.hw_id.value, (unsigned long)wd->ws.gpr_alloc.value, (unsigned long)wd->ws.lds_alloc.value, (unsigned long)wd->ws.trapsts.value, (unsigned long)wd->ws.ib_sts.value,
-(unsigned long)wd->ws.tba_hi, (unsigned long)wd->ws.tba_lo, (unsigned long)wd->ws.tma_hi, (unsigned long)wd->ws.tma_lo, (unsigned long)wd->ws.ib_dbg0, (unsigned long)wd->ws.m0
+(unsigned long)wd->ws.tba_hi, (unsigned long)wd->ws.tba_lo, (unsigned long)wd->ws.tma_hi, (unsigned long)wd->ws.tma_lo, (unsigned long)wd->ws.ib_dbg0, (unsigned long)wd->ws.m0, (unsigned long)wd->ws.mode.value
 );
 			if (wd->ws.wave_status.halt || wd->ws.wave_status.fatal_halt) {
 				for (x = 0; x < ((wd->ws.gpr_alloc.sgpr_size + 1) << shift); x += 4)
@@ -318,6 +318,24 @@ static void umr_print_waves_si_ai(struct umr_asic *asic)
 			PP(trapsts, excp_cycle);
 			PP(trapsts, dp_rate);
 
+			Hv("MODE", wd->ws.mode.value);
+			PP(mode, fp_round);
+			PP(mode, fp_denorm);
+			PP(mode, dx10_clamp);
+			PP(mode, ieee);
+			PP(mode, lod_clamped);
+			PP(mode, debug_en);
+			PP(mode, excp_en);
+			if (asic->family > FAMILY_VI)
+				PP(mode, fp16_ovfl);
+			PP(mode, pops_packer0);
+			PP(mode, pops_packer1);
+			if (asic->family > FAMILY_VI)
+				PP(mode, disable_perf);
+			PP(mode, gpr_idx_en);
+			PP(mode, vskip);
+			PP(mode, csp);
+
 			printf("\n"); col = 0;
 		}
 		wd = wd->next;
@@ -393,7 +411,7 @@ static void umr_print_waves_nv(struct umr_asic *asic)
 	while (wd) {
 		if (!asic->options.bitfields && first) {
 			static const char* titles[] = {
-				"WAVE_STATUS", "PC_HI", "PC_LO", "INST_DW0", "EXEC_HI", "EXEC_LO", "HW_ID1", "HW_ID2", "GPRALLOC", "LDSALLOC", "TRAPSTS", "IBSTS1", "IBSTS2", "IB_DBG1", "M0", NULL
+				"WAVE_STATUS", "PC_HI", "PC_LO", "INST_DW0", "EXEC_HI", "EXEC_LO", "HW_ID1", "HW_ID2", "GPRALLOC", "LDSALLOC", "TRAPSTS", "IBSTS1", "IBSTS2", "IB_DBG1", "M0", "MODE", NULL
 			};
 			first = 0;
 			printf("SE SA WGP SIMD WAVE# ");
@@ -408,13 +426,13 @@ static void umr_print_waves_nv(struct umr_asic *asic)
 "   %08lx %08lx %08lx " // wave_status pc/hi/lo
 "%08lx %08lx %08lx " // inst0 exec hi/lo
 "%08lx %08lx %08lx %08lx %08lx %08lx %08lx " // HW_ID1 HW_ID2 GPR/LDSALLOC TRAP/IB STS
-"%08lx %08lx " // IB_DBG1 M0\n");
+"%08lx %08lx %08lx " // IB_DBG1 M0 MODE\n");
 "\n",
 (unsigned)wd->se, (unsigned)wd->sh, (unsigned)wd->cu, (unsigned)wd->ws.hw_id1.simd_id, (unsigned)wd->ws.hw_id1.wave_id, // TODO: wgp printed out won't match geometry for now w.r.t. to SPI
 (unsigned long)wd->ws.wave_status.value, (unsigned long)wd->ws.pc_hi, (unsigned long)wd->ws.pc_lo,
 (unsigned long)wd->ws.wave_inst_dw0, (unsigned long)wd->ws.exec_hi, (unsigned long)wd->ws.exec_lo,
 (unsigned long)wd->ws.hw_id1.value, (unsigned long)wd->ws.hw_id2.value, (unsigned long)wd->ws.gpr_alloc.value, (unsigned long)wd->ws.lds_alloc.value, (unsigned long)wd->ws.trapsts.value,
-(unsigned long)wd->ws.ib_sts.value, (unsigned long)wd->ws.ib_sts2.value, (unsigned long)wd->ws.ib_dbg1, (unsigned long)wd->ws.m0);
+(unsigned long)wd->ws.ib_sts.value, (unsigned long)wd->ws.ib_sts2.value, (unsigned long)wd->ws.ib_dbg1, (unsigned long)wd->ws.m0, (unsigned long)wd->ws.mode.value);
 			if (wd->ws.wave_status.halt || wd->ws.wave_status.fatal_halt) {
 				for (x = 0; x < 112; x += 4)
 					printf(">SGPRS[%u..%u] = { %08lx, %08lx, %08lx, %08lx }\n",
@@ -627,6 +645,17 @@ static void umr_print_waves_nv(struct umr_asic *asic)
 			PP(trapsts, excp_group_mask);
 			PP(trapsts, utc_error);
 
+			Hv("MODE", wd->ws.mode.value);
+			PP(mode, fp_round);
+			PP(mode, fp_denorm);
+			PP(mode, dx10_clamp);
+			PP(mode, ieee);
+			PP(mode, lod_clamped);
+			PP(mode, debug_en);
+			PP(mode, excp_en);
+			PP(mode, fp16_ovfl);
+			PP(mode, disable_perf);
+
 			printf("\n"); col = 0;
 		}
 		wd = wd->next;
diff --git a/src/lib/scan_waves.c b/src/lib/scan_waves.c
index dae858e..71e595f 100644
--- a/src/lib/scan_waves.c
+++ b/src/lib/scan_waves.c
@@ -139,6 +139,7 @@ int umr_read_wave_status_via_mmio_gfx8_9(struct umr_asic *asic, uint32_t simd, u
 	}
 	dst[(*no_fields)++] = wave_read_ind(asic, simd, wave, umr_find_reg_data(asic, "ixSQ_WAVE_IB_DBG0")->addr);
 	dst[(*no_fields)++] = wave_read_ind(asic, simd, wave, umr_find_reg_data(asic, "ixSQ_WAVE_M0")->addr);
+	dst[(*no_fields)++] = wave_read_ind(asic, simd, wave, umr_find_reg_data(asic, "ixSQ_WAVE_MODE")->addr);
 
 	return 0;
 }
@@ -163,6 +164,7 @@ int umr_read_wave_status_via_mmio_gfx10(struct umr_asic *asic, uint32_t wave, ui
 	dst[(*no_fields)++] = wave_read_ind_nv(asic, wave, umr_find_reg_data(asic, "ixSQ_WAVE_IB_STS2")->addr);
 	dst[(*no_fields)++] = wave_read_ind_nv(asic, wave, umr_find_reg_data(asic, "ixSQ_WAVE_IB_DBG1")->addr);
 	dst[(*no_fields)++] = wave_read_ind_nv(asic, wave, umr_find_reg_data(asic, "ixSQ_WAVE_M0")->addr);
+	dst[(*no_fields)++] = wave_read_ind_nv(asic, wave, umr_find_reg_data(asic, "ixSQ_WAVE_MODE")->addr);
 
 	return 0;
 }
@@ -258,6 +260,19 @@ static int umr_parse_wave_data_gfx_8(struct umr_asic *asic, struct umr_wave_stat
 	ws->tma_hi = buf[x++];
 	ws->ib_dbg0 = buf[x++];
 	ws->m0 = buf[x++];
+
+	ws->mode.value = value = buf[x++];
+		reg = umr_find_reg_data(asic, "ixSQ_WAVE_MODE");
+		ws->mode.fp_round = umr_bitslice_reg(asic, reg, "FP_ROUND", value);
+		ws->mode.fp_denorm = umr_bitslice_reg(asic, reg, "FP_DENORM", value);
+		ws->mode.dx10_clamp = umr_bitslice_reg(asic, reg, "DX10_CLAMP", value);
+		ws->mode.ieee = umr_bitslice_reg(asic, reg, "IEEE", value);
+		ws->mode.lod_clamped = umr_bitslice_reg(asic, reg, "LOD_CLAMPED", value);
+		ws->mode.debug_en = umr_bitslice_reg(asic, reg, "DEBUG_EN", value);
+		ws->mode.excp_en = umr_bitslice_reg(asic, reg, "EXCP_EN", value);
+		ws->mode.gpr_idx_en = umr_bitslice_reg(asic, reg, "GPR_IDX_EN", value);
+		ws->mode.vskip = umr_bitslice_reg(asic, reg, "VSKIP", value);
+		ws->mode.csp = umr_bitslice_reg(asic, reg, "CSP", value);
 	return 0;
 }
 
@@ -348,6 +363,23 @@ static int umr_parse_wave_data_gfx_9(struct umr_asic *asic, struct umr_wave_stat
 
 	ws->ib_dbg0 = buf[x++];
 	ws->m0 = buf[x++];
+
+	ws->mode.value = value = buf[x++];
+		reg = umr_find_reg_data(asic, "ixSQ_WAVE_MODE");
+		ws->mode.fp_round = umr_bitslice_reg(asic, reg, "FP_ROUND", value);
+		ws->mode.fp_denorm = umr_bitslice_reg(asic, reg, "FP_DENORM", value);
+		ws->mode.dx10_clamp = umr_bitslice_reg(asic, reg, "DX10_CLAMP", value);
+		ws->mode.ieee = umr_bitslice_reg(asic, reg, "IEEE", value);
+		ws->mode.lod_clamped = umr_bitslice_reg(asic, reg, "LOD_CLAMPED", value);
+		ws->mode.debug_en = umr_bitslice_reg(asic, reg, "DEBUG_EN", value);
+		ws->mode.excp_en = umr_bitslice_reg(asic, reg, "EXCP_EN", value);
+		ws->mode.fp16_ovfl = umr_bitslice_reg(asic, reg, "FP16_OVFL", value);
+		ws->mode.pops_packer0 = umr_bitslice_reg(asic, reg, "POPS_PACKER0", value);
+		ws->mode.pops_packer1 = umr_bitslice_reg(asic, reg, "POPS_PACKER1", value);
+		ws->mode.disable_perf = umr_bitslice_reg(asic, reg, "DISABLE_PERF", value);
+		ws->mode.gpr_idx_en = umr_bitslice_reg(asic, reg, "GPR_IDX_EN", value);
+		ws->mode.vskip = umr_bitslice_reg(asic, reg, "VSKIP", value);
+		ws->mode.csp = umr_bitslice_reg(asic, reg, "CSP", value);
 	return 0;
 }
 
@@ -468,6 +500,18 @@ static int umr_parse_wave_data_gfx_10(struct umr_asic *asic, struct umr_wave_sta
 
 	ws->ib_dbg1 = buf[x++];
 	ws->m0 = buf[x++];
+
+	ws->mode.value = value = buf[x++];
+		reg = umr_find_reg_data(asic, "ixSQ_WAVE_MODE");
+		ws->mode.fp_round = umr_bitslice_reg(asic, reg, "FP_ROUND", value);
+		ws->mode.fp_denorm = umr_bitslice_reg(asic, reg, "FP_DENORM", value);
+		ws->mode.dx10_clamp = umr_bitslice_reg(asic, reg, "DX10_CLAMP", value);
+		ws->mode.ieee = umr_bitslice_reg(asic, reg, "IEEE", value);
+		ws->mode.lod_clamped = umr_bitslice_reg(asic, reg, "LOD_CLAMPED", value);
+		ws->mode.debug_en = umr_bitslice_reg(asic, reg, "DEBUG_EN", value);
+		ws->mode.excp_en = umr_bitslice_reg(asic, reg, "EXCP_EN", value);
+		ws->mode.fp16_ovfl = umr_bitslice_reg(asic, reg, "FP16_OVFL", value);
+		ws->mode.disable_perf = umr_bitslice_reg(asic, reg, "DISABLE_PERF", value);
 	return 0;
 }
 
diff --git a/src/umr.h b/src/umr.h
index ca62560..f541dd3 100644
--- a/src/umr.h
+++ b/src/umr.h
@@ -713,6 +713,25 @@ struct umr_wave_status {
 			excp_group_mask,
 			utc_error;
 	} trapsts;
+
+	struct {
+		uint32_t
+			value,
+			fp_round,
+			fp_denorm,
+			dx10_clamp,
+			ieee,
+			lod_clamped,
+			debug_en,
+			excp_en,
+			fp16_ovfl,
+			pops_packer0,
+			pops_packer1,
+			disable_perf,
+			gpr_idx_en,
+			vskip,
+			csp;
+	} mode;
 };
 
 struct umr_wave_data {
-- 
2.25.1



More information about the amd-gfx mailing list