[PATCH umr 04/17] gfx10+: iterate only over existing WGPs when scanning waves
Nicolai Hähnle
nicolai.haehnle at amd.com
Tue Jun 6 09:17:12 UTC 2023
We overload "cu" to mean "wgp" in a bunch of places, but max_cu_per_sh
is always in terms of CUs.
Signed-off-by: Nicolai Hähnle <nicolai.haehnle at amd.com>
---
src/lib/scan_waves.c | 24 +++++++++++++-----------
1 file changed, 13 insertions(+), 11 deletions(-)
diff --git a/src/lib/scan_waves.c b/src/lib/scan_waves.c
index 767520c..3279cc2 100644
--- a/src/lib/scan_waves.c
+++ b/src/lib/scan_waves.c
@@ -618,48 +618,50 @@ static int umr_scan_wave_simd(struct umr_asic *asic, uint32_t se, uint32_t sh, u
return 0;
}
/**
* umr_scan_wave_data - Scan for any halted valid waves
*
* Returns NULL on error (or no waves found).
*/
struct umr_wave_data *umr_scan_wave_data(struct umr_asic *asic)
{
- uint32_t se, sh, cu, simd;
+ uint32_t se, sh, simd;
struct umr_wave_data *ohead, *head, **ptail;
int r;
ohead = head = calloc(1, sizeof *head);
if (!head) {
asic->err_msg("[ERROR]: Out of memory\n");
return NULL;
}
ptail = &head;
for (se = 0; se < asic->config.gfx.max_shader_engines; se++)
- for (sh = 0; sh < asic->config.gfx.max_sh_per_se; sh++)
- for (cu = 0; cu < asic->config.gfx.max_cu_per_sh; cu++) {
+ for (sh = 0; sh < asic->config.gfx.max_sh_per_se; sh++) {
if (asic->family <= FAMILY_AI) {
- asic->wave_funcs.get_wave_sq_info(asic, se, sh, cu, &(*ptail)->ws);
- if ((*ptail)->ws.sq_info.busy) {
- for (simd = 0; simd < 4; simd++) {
- r = umr_scan_wave_simd(asic, se, sh, cu, simd, &ptail);
- if (r < 0)
- goto error;
+ for (uint32_t cu = 0; cu < asic->config.gfx.max_cu_per_sh; cu++) {
+ asic->wave_funcs.get_wave_sq_info(asic, se, sh, cu, &(*ptail)->ws);
+ if ((*ptail)->ws.sq_info.busy) {
+ for (simd = 0; simd < 4; simd++) {
+ r = umr_scan_wave_simd(asic, se, sh, cu, simd, &ptail);
+ if (r < 0)
+ goto error;
+ }
}
}
} else {
+ for (uint32_t wgp = 0; wgp < asic->config.gfx.max_cu_per_sh / 2; wgp++)
for (simd = 0; simd < 4; simd++) {
- asic->wave_funcs.get_wave_sq_info(asic, se, sh, MANY_TO_INSTANCE(cu, simd), &(*ptail)->ws);
+ asic->wave_funcs.get_wave_sq_info(asic, se, sh, MANY_TO_INSTANCE(wgp, simd), &(*ptail)->ws);
if ((*ptail)->ws.sq_info.busy) {
- r = umr_scan_wave_simd(asic, se, sh, cu, simd, &ptail);
+ r = umr_scan_wave_simd(asic, se, sh, wgp, simd, &ptail);
if (r < 0)
goto error;
}
}
}
}
// drop the pre-allocated tail node
free(*ptail);
*ptail = NULL;
--
2.40.0
More information about the amd-gfx
mailing list