[PATCH] umr: print MODE register as part of wave state
Alex Deucher
alexdeucher at gmail.com
Tue Jun 7 18:34:41 UTC 2022
On Mon, Jun 6, 2022 at 5:04 PM Joseph Greathouse
<Joseph.Greathouse at amd.com> wrote:
>
> The MODE register contains detailed per-wave information, but UMR
> skipped printing it. This patch adds the ability to print each wave's
> MODE register as part of the wave scan operation, and prints the MODE
> register's sub-fields as part of the deeper print option.
>
> Signed-off-by: Joseph Greathouse <Joseph.Greathouse at amd.com>
Acked-by: Alex Deucher <alexander.deucher at amd.com>
> ---
> src/app/print_waves.c | 41 ++++++++++++++++++++++++++++++++++------
> src/lib/scan_waves.c | 44 +++++++++++++++++++++++++++++++++++++++++++
> src/umr.h | 19 +++++++++++++++++++
> 3 files changed, 98 insertions(+), 6 deletions(-)
>
> diff --git a/src/app/print_waves.c b/src/app/print_waves.c
> index 07dae2d..daedc24 100644
> --- a/src/app/print_waves.c
> +++ b/src/app/print_waves.c
> @@ -91,7 +91,7 @@ static void umr_print_waves_si_ai(struct umr_asic *asic)
> if (!asic->options.bitfields && first) {
> static const char* titles[] = {
> "WAVE_STATUS", "PC_HI", "PC_LO", "INST_DW0", "INST_DW1", "EXEC_HI", "EXEC_LO", "HW_ID", "GPRALLOC",
> - "LDSALLOC", "TRAPSTS", "IBSTS", "TBA_HI", "TBA_LO", "TMA_HI", "TMA_LO", "IB_DBG0", "M0", NULL
> + "LDSALLOC", "TRAPSTS", "IBSTS", "TBA_HI", "TBA_LO", "TMA_HI", "TMA_LO", "IB_DBG0", "M0", "MODE", NULL
> };
> first = 0;
> printf("SE SH CU SIMD WAVE# ");
> @@ -106,13 +106,13 @@ static void umr_print_waves_si_ai(struct umr_asic *asic)
> " %08lx %08lx %08lx " // wave_status pc/hi/lo
> "%08lx %08lx %08lx %08lx " // inst0/1 exec hi/lo
> "%08lx %08lx %08lx %08lx %08lx " // HW_ID GPR/LDSALLOC TRAP/IB STS
> -"%08lx %08lx %08lx %08lx %08lx %08lx " // TBA_HI TBA_LO TMA_HI TMA_LO IB_DBG0 M0\n");
> +"%08lx %08lx %08lx %08lx %08lx %08lx %08lx " // TBA_HI TBA_LO TMA_HI TMA_LO IB_DBG0 M0 MODE\n");
> "\n",
> (unsigned)wd->se, (unsigned)wd->sh, (unsigned)wd->cu, (unsigned)wd->ws.hw_id.simd_id, (unsigned)wd->ws.hw_id.wave_id,
> (unsigned long)wd->ws.wave_status.value, (unsigned long)wd->ws.pc_hi, (unsigned long)wd->ws.pc_lo,
> (unsigned long)wd->ws.wave_inst_dw0, (unsigned long)wd->ws.wave_inst_dw1, (unsigned long)wd->ws.exec_hi, (unsigned long)wd->ws.exec_lo,
> (unsigned long)wd->ws.hw_id.value, (unsigned long)wd->ws.gpr_alloc.value, (unsigned long)wd->ws.lds_alloc.value, (unsigned long)wd->ws.trapsts.value, (unsigned long)wd->ws.ib_sts.value,
> -(unsigned long)wd->ws.tba_hi, (unsigned long)wd->ws.tba_lo, (unsigned long)wd->ws.tma_hi, (unsigned long)wd->ws.tma_lo, (unsigned long)wd->ws.ib_dbg0, (unsigned long)wd->ws.m0
> +(unsigned long)wd->ws.tba_hi, (unsigned long)wd->ws.tba_lo, (unsigned long)wd->ws.tma_hi, (unsigned long)wd->ws.tma_lo, (unsigned long)wd->ws.ib_dbg0, (unsigned long)wd->ws.m0, (unsigned long)wd->ws.mode.value
> );
> if (wd->ws.wave_status.halt || wd->ws.wave_status.fatal_halt) {
> for (x = 0; x < ((wd->ws.gpr_alloc.sgpr_size + 1) << shift); x += 4)
> @@ -318,6 +318,24 @@ static void umr_print_waves_si_ai(struct umr_asic *asic)
> PP(trapsts, excp_cycle);
> PP(trapsts, dp_rate);
>
> + Hv("MODE", wd->ws.mode.value);
> + PP(mode, fp_round);
> + PP(mode, fp_denorm);
> + PP(mode, dx10_clamp);
> + PP(mode, ieee);
> + PP(mode, lod_clamped);
> + PP(mode, debug_en);
> + PP(mode, excp_en);
> + if (asic->family > FAMILY_VI)
> + PP(mode, fp16_ovfl);
> + PP(mode, pops_packer0);
> + PP(mode, pops_packer1);
> + if (asic->family > FAMILY_VI)
> + PP(mode, disable_perf);
> + PP(mode, gpr_idx_en);
> + PP(mode, vskip);
> + PP(mode, csp);
> +
> printf("\n"); col = 0;
> }
> wd = wd->next;
> @@ -393,7 +411,7 @@ static void umr_print_waves_nv(struct umr_asic *asic)
> while (wd) {
> if (!asic->options.bitfields && first) {
> static const char* titles[] = {
> - "WAVE_STATUS", "PC_HI", "PC_LO", "INST_DW0", "EXEC_HI", "EXEC_LO", "HW_ID1", "HW_ID2", "GPRALLOC", "LDSALLOC", "TRAPSTS", "IBSTS1", "IBSTS2", "IB_DBG1", "M0", NULL
> + "WAVE_STATUS", "PC_HI", "PC_LO", "INST_DW0", "EXEC_HI", "EXEC_LO", "HW_ID1", "HW_ID2", "GPRALLOC", "LDSALLOC", "TRAPSTS", "IBSTS1", "IBSTS2", "IB_DBG1", "M0", "MODE", NULL
> };
> first = 0;
> printf("SE SA WGP SIMD WAVE# ");
> @@ -408,13 +426,13 @@ static void umr_print_waves_nv(struct umr_asic *asic)
> " %08lx %08lx %08lx " // wave_status pc/hi/lo
> "%08lx %08lx %08lx " // inst0 exec hi/lo
> "%08lx %08lx %08lx %08lx %08lx %08lx %08lx " // HW_ID1 HW_ID2 GPR/LDSALLOC TRAP/IB STS
> -"%08lx %08lx " // IB_DBG1 M0\n");
> +"%08lx %08lx %08lx " // IB_DBG1 M0 MODE\n");
> "\n",
> (unsigned)wd->se, (unsigned)wd->sh, (unsigned)wd->cu, (unsigned)wd->ws.hw_id1.simd_id, (unsigned)wd->ws.hw_id1.wave_id, // TODO: wgp printed out won't match geometry for now w.r.t. to SPI
> (unsigned long)wd->ws.wave_status.value, (unsigned long)wd->ws.pc_hi, (unsigned long)wd->ws.pc_lo,
> (unsigned long)wd->ws.wave_inst_dw0, (unsigned long)wd->ws.exec_hi, (unsigned long)wd->ws.exec_lo,
> (unsigned long)wd->ws.hw_id1.value, (unsigned long)wd->ws.hw_id2.value, (unsigned long)wd->ws.gpr_alloc.value, (unsigned long)wd->ws.lds_alloc.value, (unsigned long)wd->ws.trapsts.value,
> -(unsigned long)wd->ws.ib_sts.value, (unsigned long)wd->ws.ib_sts2.value, (unsigned long)wd->ws.ib_dbg1, (unsigned long)wd->ws.m0);
> +(unsigned long)wd->ws.ib_sts.value, (unsigned long)wd->ws.ib_sts2.value, (unsigned long)wd->ws.ib_dbg1, (unsigned long)wd->ws.m0, (unsigned long)wd->ws.mode.value);
> if (wd->ws.wave_status.halt || wd->ws.wave_status.fatal_halt) {
> for (x = 0; x < 112; x += 4)
> printf(">SGPRS[%u..%u] = { %08lx, %08lx, %08lx, %08lx }\n",
> @@ -627,6 +645,17 @@ static void umr_print_waves_nv(struct umr_asic *asic)
> PP(trapsts, excp_group_mask);
> PP(trapsts, utc_error);
>
> + Hv("MODE", wd->ws.mode.value);
> + PP(mode, fp_round);
> + PP(mode, fp_denorm);
> + PP(mode, dx10_clamp);
> + PP(mode, ieee);
> + PP(mode, lod_clamped);
> + PP(mode, debug_en);
> + PP(mode, excp_en);
> + PP(mode, fp16_ovfl);
> + PP(mode, disable_perf);
> +
> printf("\n"); col = 0;
> }
> wd = wd->next;
> diff --git a/src/lib/scan_waves.c b/src/lib/scan_waves.c
> index dae858e..71e595f 100644
> --- a/src/lib/scan_waves.c
> +++ b/src/lib/scan_waves.c
> @@ -139,6 +139,7 @@ int umr_read_wave_status_via_mmio_gfx8_9(struct umr_asic *asic, uint32_t simd, u
> }
> dst[(*no_fields)++] = wave_read_ind(asic, simd, wave, umr_find_reg_data(asic, "ixSQ_WAVE_IB_DBG0")->addr);
> dst[(*no_fields)++] = wave_read_ind(asic, simd, wave, umr_find_reg_data(asic, "ixSQ_WAVE_M0")->addr);
> + dst[(*no_fields)++] = wave_read_ind(asic, simd, wave, umr_find_reg_data(asic, "ixSQ_WAVE_MODE")->addr);
>
> return 0;
> }
> @@ -163,6 +164,7 @@ int umr_read_wave_status_via_mmio_gfx10(struct umr_asic *asic, uint32_t wave, ui
> dst[(*no_fields)++] = wave_read_ind_nv(asic, wave, umr_find_reg_data(asic, "ixSQ_WAVE_IB_STS2")->addr);
> dst[(*no_fields)++] = wave_read_ind_nv(asic, wave, umr_find_reg_data(asic, "ixSQ_WAVE_IB_DBG1")->addr);
> dst[(*no_fields)++] = wave_read_ind_nv(asic, wave, umr_find_reg_data(asic, "ixSQ_WAVE_M0")->addr);
> + dst[(*no_fields)++] = wave_read_ind_nv(asic, wave, umr_find_reg_data(asic, "ixSQ_WAVE_MODE")->addr);
>
> return 0;
> }
> @@ -258,6 +260,19 @@ static int umr_parse_wave_data_gfx_8(struct umr_asic *asic, struct umr_wave_stat
> ws->tma_hi = buf[x++];
> ws->ib_dbg0 = buf[x++];
> ws->m0 = buf[x++];
> +
> + ws->mode.value = value = buf[x++];
> + reg = umr_find_reg_data(asic, "ixSQ_WAVE_MODE");
> + ws->mode.fp_round = umr_bitslice_reg(asic, reg, "FP_ROUND", value);
> + ws->mode.fp_denorm = umr_bitslice_reg(asic, reg, "FP_DENORM", value);
> + ws->mode.dx10_clamp = umr_bitslice_reg(asic, reg, "DX10_CLAMP", value);
> + ws->mode.ieee = umr_bitslice_reg(asic, reg, "IEEE", value);
> + ws->mode.lod_clamped = umr_bitslice_reg(asic, reg, "LOD_CLAMPED", value);
> + ws->mode.debug_en = umr_bitslice_reg(asic, reg, "DEBUG_EN", value);
> + ws->mode.excp_en = umr_bitslice_reg(asic, reg, "EXCP_EN", value);
> + ws->mode.gpr_idx_en = umr_bitslice_reg(asic, reg, "GPR_IDX_EN", value);
> + ws->mode.vskip = umr_bitslice_reg(asic, reg, "VSKIP", value);
> + ws->mode.csp = umr_bitslice_reg(asic, reg, "CSP", value);
> return 0;
> }
>
> @@ -348,6 +363,23 @@ static int umr_parse_wave_data_gfx_9(struct umr_asic *asic, struct umr_wave_stat
>
> ws->ib_dbg0 = buf[x++];
> ws->m0 = buf[x++];
> +
> + ws->mode.value = value = buf[x++];
> + reg = umr_find_reg_data(asic, "ixSQ_WAVE_MODE");
> + ws->mode.fp_round = umr_bitslice_reg(asic, reg, "FP_ROUND", value);
> + ws->mode.fp_denorm = umr_bitslice_reg(asic, reg, "FP_DENORM", value);
> + ws->mode.dx10_clamp = umr_bitslice_reg(asic, reg, "DX10_CLAMP", value);
> + ws->mode.ieee = umr_bitslice_reg(asic, reg, "IEEE", value);
> + ws->mode.lod_clamped = umr_bitslice_reg(asic, reg, "LOD_CLAMPED", value);
> + ws->mode.debug_en = umr_bitslice_reg(asic, reg, "DEBUG_EN", value);
> + ws->mode.excp_en = umr_bitslice_reg(asic, reg, "EXCP_EN", value);
> + ws->mode.fp16_ovfl = umr_bitslice_reg(asic, reg, "FP16_OVFL", value);
> + ws->mode.pops_packer0 = umr_bitslice_reg(asic, reg, "POPS_PACKER0", value);
> + ws->mode.pops_packer1 = umr_bitslice_reg(asic, reg, "POPS_PACKER1", value);
> + ws->mode.disable_perf = umr_bitslice_reg(asic, reg, "DISABLE_PERF", value);
> + ws->mode.gpr_idx_en = umr_bitslice_reg(asic, reg, "GPR_IDX_EN", value);
> + ws->mode.vskip = umr_bitslice_reg(asic, reg, "VSKIP", value);
> + ws->mode.csp = umr_bitslice_reg(asic, reg, "CSP", value);
> return 0;
> }
>
> @@ -468,6 +500,18 @@ static int umr_parse_wave_data_gfx_10(struct umr_asic *asic, struct umr_wave_sta
>
> ws->ib_dbg1 = buf[x++];
> ws->m0 = buf[x++];
> +
> + ws->mode.value = value = buf[x++];
> + reg = umr_find_reg_data(asic, "ixSQ_WAVE_MODE");
> + ws->mode.fp_round = umr_bitslice_reg(asic, reg, "FP_ROUND", value);
> + ws->mode.fp_denorm = umr_bitslice_reg(asic, reg, "FP_DENORM", value);
> + ws->mode.dx10_clamp = umr_bitslice_reg(asic, reg, "DX10_CLAMP", value);
> + ws->mode.ieee = umr_bitslice_reg(asic, reg, "IEEE", value);
> + ws->mode.lod_clamped = umr_bitslice_reg(asic, reg, "LOD_CLAMPED", value);
> + ws->mode.debug_en = umr_bitslice_reg(asic, reg, "DEBUG_EN", value);
> + ws->mode.excp_en = umr_bitslice_reg(asic, reg, "EXCP_EN", value);
> + ws->mode.fp16_ovfl = umr_bitslice_reg(asic, reg, "FP16_OVFL", value);
> + ws->mode.disable_perf = umr_bitslice_reg(asic, reg, "DISABLE_PERF", value);
> return 0;
> }
>
> diff --git a/src/umr.h b/src/umr.h
> index ca62560..f541dd3 100644
> --- a/src/umr.h
> +++ b/src/umr.h
> @@ -713,6 +713,25 @@ struct umr_wave_status {
> excp_group_mask,
> utc_error;
> } trapsts;
> +
> + struct {
> + uint32_t
> + value,
> + fp_round,
> + fp_denorm,
> + dx10_clamp,
> + ieee,
> + lod_clamped,
> + debug_en,
> + excp_en,
> + fp16_ovfl,
> + pops_packer0,
> + pops_packer1,
> + disable_perf,
> + gpr_idx_en,
> + vskip,
> + csp;
> + } mode;
> };
>
> struct umr_wave_data {
> --
> 2.25.1
>
More information about the amd-gfx
mailing list