[PATCH umr 05/17] gfx10+: fix SGPR counts
Nicolai Hähnle
nicolai.haehnle at amd.com
Tue Jun 6 09:17:13 UTC 2023
On gfx10+, every wave has 106 regular SGPRs followed immediately by VCC,
meaning we should show 108 SGPRs by default.
They are followed by 16 TTMPs, for 124 in total.
Signed-off-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
---
src/app/gui/commands.c | 16 ++++++++--------
src/app/print_waves.c | 4 ++--
src/lib/lowlevel/linux/read_gprwave.c | 2 +-
3 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/src/app/gui/commands.c b/src/app/gui/commands.c
index 45bb9d4..b7b28a7 100644
--- a/src/app/gui/commands.c
+++ b/src/app/gui/commands.c
@@ -1626,29 +1626,29 @@ static void wave_to_json(struct umr_asic *asic, int is_halted, int include_shade
json_object_set_value(json_object(wave), "hw_id", hw_id);
JSON_Value *gpr_alloc = json_value_init_object();
json_object_set_number(json_object(gpr_alloc), "vgpr_base", wd->ws.gpr_alloc.vgpr_base);
json_object_set_number(json_object(gpr_alloc), "vgpr_size", wd->ws.gpr_alloc.vgpr_size);
json_object_set_number(json_object(gpr_alloc), "sgpr_base", wd->ws.gpr_alloc.sgpr_base);
json_object_set_number(json_object(gpr_alloc), "sgpr_size", wd->ws.gpr_alloc.sgpr_size);
json_object_set_value(json_object(wave), "gpr_alloc", gpr_alloc);
if (is_halted && wd->ws.gpr_alloc.value != 0xbebebeef) {
- int shift;
- if (asic->family <= FAMILY_CIK || asic->family >= FAMILY_NV)
- shift = 3;
- else
- shift = 4;
-
- int spgr_count = (wd->ws.gpr_alloc.sgpr_size + 1) << shift;
+ int sgpr_count;
+ if (asic->family <= FAMILY_AI) {
+ int shift = asic->family <= FAMILY_CIK ? 3 : 4;
+ sgpr_count = (wd->ws.gpr_alloc.sgpr_size + 1) << shift;
+ } else {
+ sgpr_count = 108; // regular SGPRs and VCC
+ }
JSON_Value *sgpr = json_value_init_array();
- for (int x = 0; x < spgr_count; x++) {
+ for (int x = 0; x < sgpr_count; x++) {
json_array_append_number(json_array(sgpr), wd->sgprs[x]);
}
json_object_set_value(json_object(wave), "sgpr", sgpr);
JSON_Value *threads = json_value_init_array();
int num_threads = wd->num_threads;
for (int thread = 0; thread < num_threads; thread++) {
unsigned live = thread < 32 ? (wd->ws.exec_lo & (1u << thread)) : (wd->ws.exec_hi & (1u << (thread - 32)));
json_array_append_boolean(json_array(threads), live ? 1 : 0);
}
diff --git a/src/app/print_waves.c b/src/app/print_waves.c
index de93f93..04a4447 100644
--- a/src/app/print_waves.c
+++ b/src/app/print_waves.c
@@ -467,21 +467,21 @@ static void umr_print_waves_gfx_10_11(struct umr_asic *asic)
(unsigned)wd->ws.hw_id1.wave_id, // TODO: wgp printed out won't match geometry for now w.r.t. to SPI
(unsigned long)wd->ws.wave_status.value, (unsigned long)wd->ws.pc_hi, (unsigned long)wd->ws.pc_lo,
(unsigned long)wd->ws.wave_inst_dw0, (unsigned long)wd->ws.exec_hi, (unsigned long)wd->ws.exec_lo,
(unsigned long)wd->ws.hw_id1.value, (unsigned long)wd->ws.hw_id2.value, (unsigned long)wd->ws.gpr_alloc.value,
(unsigned long)wd->ws.lds_alloc.value, (unsigned long)wd->ws.trapsts.value,
(unsigned long)wd->ws.ib_sts.value, (unsigned long)wd->ws.ib_sts2.value, (unsigned long)wd->ws.ib_dbg1,
(unsigned long)wd->ws.m0, (unsigned long)wd->ws.mode.value);
}
if (wd->ws.wave_status.halt || wd->ws.wave_status.fatal_halt) {
- for (x = 0; x < 112; x += 4)
+ for (x = 0; x < 108; x += 4)
printf(">SGPRS[%u..%u] = { %08lx, %08lx, %08lx, %08lx }\n",
(unsigned)(x),
(unsigned)(x + 3),
(unsigned long)wd->sgprs[x],
(unsigned long)wd->sgprs[x+1],
(unsigned long)wd->sgprs[x+2],
(unsigned long)wd->sgprs[x+3]);
if (wd->ws.wave_status.trap_en || wd->ws.wave_status.priv) {
@@ -567,21 +567,21 @@ static void umr_print_waves_gfx_10_11(struct umr_asic *asic)
PP(hw_id2, vm_id);
Hv("GPR_ALLOC", wd->ws.gpr_alloc.value);
PP(gpr_alloc, vgpr_base);
PP(gpr_alloc, vgpr_size);
PP(gpr_alloc, sgpr_base);
PP(gpr_alloc, sgpr_size);
if (wd->ws.wave_status.halt || wd->ws.wave_status.fatal_halt) {
printf("\n\nSGPRS:\n");
- for (x = 0; x < 112; x += 4)
+ for (x = 0; x < 108; x += 4)
printf("\t[%4u..%4u] = { %08lx, %08lx, %08lx, %08lx }\n",
(unsigned)(x),
(unsigned)(x + 3),
(unsigned long)wd->sgprs[x],
(unsigned long)wd->sgprs[x+1],
(unsigned long)wd->sgprs[x+2],
(unsigned long)wd->sgprs[x+3]);
if (wd->ws.wave_status.trap_en || wd->ws.wave_status.priv) {
for (y = 0, x = 0x6C; x < (16 + 0x6C); x += 4, y += 4) {
diff --git a/src/lib/lowlevel/linux/read_gprwave.c b/src/lib/lowlevel/linux/read_gprwave.c
index e861ee4..6d68b7e 100644
--- a/src/lib/lowlevel/linux/read_gprwave.c
+++ b/src/lib/lowlevel/linux/read_gprwave.c
@@ -99,21 +99,21 @@ static int read_gpr_gprwave(struct umr_asic *asic, int v_or_s, uint32_t thread,
id.gpr.thread = thread;
size = 4 * ((ws->gpr_alloc.vgpr_size + 1) << asic->parameters.vgpr_granularity);
}
} else {
id.se = ws->hw_id1.se_id;
id.sh = ws->hw_id1.sa_id;
id.cu = ((ws->hw_id1.wgp_id << 2) | ws->hw_id1.simd_id);
id.wave = ws->hw_id1.wave_id;
if (v_or_s == 0) {
id.gpr.thread = 0;
- size = 4 * 112;
+ size = 4 * 124; // regular SGPRs, VCC, and TTMPs
} else {
id.gpr.thread = thread;
size = 4 * ((ws->gpr_alloc.vgpr_size + 1) << asic->parameters.vgpr_granularity);
}
}
id.gpr.vpgr_or_sgpr = v_or_s;
id.xcc_id = asic->options.vm_partition == -1 ? 0 : asic->options.vm_partition;
r = ioctl(asic->fd.gprwave, AMDGPU_DEBUGFS_GPRWAVE_IOC_SET_STATE, &id);
if (r)
--
2.40.0
More information about the amd-gfx
mailing list