[PATCH umr 05/17] gfx10+: fix SGPR counts

Tue Jun 6 09:17:13 UTC 2023

On gfx10+, every wave has 106 regular SGPRs followed immediately by VCC,
meaning we should show 108 SGPRs by default.

They are followed by 16 TTMPs, for 124 in total.

Signed-off-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
---
 src/app/gui/commands.c                | 16 ++++++++--------
 src/app/print_waves.c                 |  4 ++--
 src/lib/lowlevel/linux/read_gprwave.c |  2 +-
 3 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/app/gui/commands.c b/src/app/gui/commands.c
index 45bb9d4..b7b28a7 100644
--- a/src/app/gui/commands.c
+++ b/src/app/gui/commands.c
@@ -1626,29 +1626,29 @@ static void wave_to_json(struct umr_asic *asic, int is_halted, int include_shade
 		json_object_set_value(json_object(wave), "hw_id", hw_id);
 
 		JSON_Value *gpr_alloc = json_value_init_object();
 		json_object_set_number(json_object(gpr_alloc), "vgpr_base", wd->ws.gpr_alloc.vgpr_base);
 		json_object_set_number(json_object(gpr_alloc), "vgpr_size", wd->ws.gpr_alloc.vgpr_size);
 		json_object_set_number(json_object(gpr_alloc), "sgpr_base", wd->ws.gpr_alloc.sgpr_base);
 		json_object_set_number(json_object(gpr_alloc), "sgpr_size", wd->ws.gpr_alloc.sgpr_size);
 		json_object_set_value(json_object(wave), "gpr_alloc", gpr_alloc);
 
 		if (is_halted && wd->ws.gpr_alloc.value != 0xbebebeef) {
-			int shift;
-			if (asic->family <= FAMILY_CIK || asic->family >= FAMILY_NV)
-				shift = 3;
-			else
-				shift = 4;
-
-			int spgr_count = (wd->ws.gpr_alloc.sgpr_size + 1) << shift;
+			int sgpr_count;
+			if (asic->family <= FAMILY_AI) {
+				int shift = asic->family <= FAMILY_CIK ? 3 : 4;
+				sgpr_count = (wd->ws.gpr_alloc.sgpr_size + 1) << shift;
+			} else {
+				sgpr_count = 108; // regular SGPRs and VCC
+			}
 			JSON_Value *sgpr = json_value_init_array();
-			for (int x = 0; x < spgr_count; x++) {
+			for (int x = 0; x < sgpr_count; x++) {
 				json_array_append_number(json_array(sgpr), wd->sgprs[x]);
 			}
 			json_object_set_value(json_object(wave), "sgpr", sgpr);
 
 			JSON_Value *threads = json_value_init_array();
 			int num_threads = wd->num_threads;
 			for (int thread = 0; thread < num_threads; thread++) {
 				unsigned live = thread < 32 ? (wd->ws.exec_lo & (1u << thread))	: (wd->ws.exec_hi & (1u << (thread - 32)));
 				json_array_append_boolean(json_array(threads), live ? 1 : 0);
 			}
diff --git a/src/app/print_waves.c b/src/app/print_waves.c
index de93f93..04a4447 100644
--- a/src/app/print_waves.c
+++ b/src/app/print_waves.c
@@ -467,21 +467,21 @@ static void umr_print_waves_gfx_10_11(struct umr_asic *asic)
 					(unsigned)wd->ws.hw_id1.wave_id, // TODO: wgp printed out won't match geometry for now w.r.t. to SPI
 					(unsigned long)wd->ws.wave_status.value, (unsigned long)wd->ws.pc_hi, (unsigned long)wd->ws.pc_lo,
 					(unsigned long)wd->ws.wave_inst_dw0, (unsigned long)wd->ws.exec_hi, (unsigned long)wd->ws.exec_lo,
 					(unsigned long)wd->ws.hw_id1.value, (unsigned long)wd->ws.hw_id2.value, (unsigned long)wd->ws.gpr_alloc.value,
 					(unsigned long)wd->ws.lds_alloc.value, (unsigned long)wd->ws.trapsts.value,
 					(unsigned long)wd->ws.ib_sts.value, (unsigned long)wd->ws.ib_sts2.value, (unsigned long)wd->ws.ib_dbg1,
 					(unsigned long)wd->ws.m0, (unsigned long)wd->ws.mode.value);
 			}
 
 			if (wd->ws.wave_status.halt || wd->ws.wave_status.fatal_halt) {
-				for (x = 0; x < 112; x += 4)
+				for (x = 0; x < 108; x += 4)
 					printf(">SGPRS[%u..%u] = { %08lx, %08lx, %08lx, %08lx }\n",
 						(unsigned)(x),
 						(unsigned)(x + 3),
 						(unsigned long)wd->sgprs[x],
 						(unsigned long)wd->sgprs[x+1],
 						(unsigned long)wd->sgprs[x+2],
 						(unsigned long)wd->sgprs[x+3]);
 
 
 				if (wd->ws.wave_status.trap_en || wd->ws.wave_status.priv) {
@@ -567,21 +567,21 @@ static void umr_print_waves_gfx_10_11(struct umr_asic *asic)
 			PP(hw_id2, vm_id);
 
 			Hv("GPR_ALLOC", wd->ws.gpr_alloc.value);
 			PP(gpr_alloc, vgpr_base);
 			PP(gpr_alloc, vgpr_size);
 			PP(gpr_alloc, sgpr_base);
 			PP(gpr_alloc, sgpr_size);
 
 			if (wd->ws.wave_status.halt || wd->ws.wave_status.fatal_halt) {
 				printf("\n\nSGPRS:\n");
-				for (x = 0; x < 112; x += 4)
+				for (x = 0; x < 108; x += 4)
 					printf("\t[%4u..%4u] = { %08lx, %08lx, %08lx, %08lx }\n",
 						(unsigned)(x),
 						(unsigned)(x + 3),
 						(unsigned long)wd->sgprs[x],
 						(unsigned long)wd->sgprs[x+1],
 						(unsigned long)wd->sgprs[x+2],
 						(unsigned long)wd->sgprs[x+3]);
 
 				if (wd->ws.wave_status.trap_en || wd->ws.wave_status.priv) {
 					for (y  = 0, x = 0x6C; x < (16 + 0x6C); x += 4, y += 4) {
diff --git a/src/lib/lowlevel/linux/read_gprwave.c b/src/lib/lowlevel/linux/read_gprwave.c
index e861ee4..6d68b7e 100644
--- a/src/lib/lowlevel/linux/read_gprwave.c
+++ b/src/lib/lowlevel/linux/read_gprwave.c
@@ -99,21 +99,21 @@ static int read_gpr_gprwave(struct umr_asic *asic, int v_or_s, uint32_t thread,
 			id.gpr.thread = thread;
 			size = 4 * ((ws->gpr_alloc.vgpr_size + 1) << asic->parameters.vgpr_granularity);
 		}
 	} else {
 		id.se = ws->hw_id1.se_id;
 		id.sh = ws->hw_id1.sa_id;
 		id.cu = ((ws->hw_id1.wgp_id << 2) | ws->hw_id1.simd_id);
 		id.wave = ws->hw_id1.wave_id;
 		if (v_or_s == 0) {
 			id.gpr.thread = 0;
-			size = 4 * 112;
+			size = 4 * 124; // regular SGPRs, VCC, and TTMPs
 		} else {
 			id.gpr.thread = thread;
 			size = 4 * ((ws->gpr_alloc.vgpr_size + 1) << asic->parameters.vgpr_granularity);
 		}
 	}
 	id.gpr.vpgr_or_sgpr = v_or_s;
 	id.xcc_id = asic->options.vm_partition == -1 ? 0 : asic->options.vm_partition;
 
 	r = ioctl(asic->fd.gprwave, AMDGPU_DEBUGFS_GPRWAVE_IOC_SET_STATE, &id);
 	if (r)
-- 
2.40.0