[PATCH] drm/amdkfd: Fix instruction hazard in gfx12 trap handler
Jay Cornwall
jay.cornwall at amd.com
Thu Feb 6 21:53:32 UTC 2025
VALU instructions with SGPR source need wait states to avoid hazard
with later instructions.
Signed-off-by: Jay Cornwall <jay.cornwall at amd.com>
Cc: Lancelot Six <lancelot.six at amd.com>
---
.../gpu/drm/amd/amdkfd/cwsr_trap_handler.h | 406 ++++++++++++++++--
.../amd/amdkfd/cwsr_trap_handler_gfx12.asm | 13 +
2 files changed, 392 insertions(+), 27 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
index 651660958e5b..112f28ff0018 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler.h
@@ -3644,7 +3644,7 @@ static const uint32_t cwsr_trap_gfx9_4_3_hex[] = {
};
static const uint32_t cwsr_trap_gfx12_hex[] = {
- 0xbfa00001, 0xbfa0024b,
+ 0xbfa00001, 0xbfa0050b,
0xb0804009, 0xb8f8f804,
0x9178ff78, 0x00008c00,
0xb8fbf811, 0x8b6eff78,
@@ -3718,7 +3718,15 @@ static const uint32_t cwsr_trap_gfx12_hex[] = {
0x00011677, 0xd7610000,
0x00011a79, 0xd7610000,
0x00011c7e, 0xd7610000,
- 0x00011e7f, 0xbefe00ff,
+ 0x00011e7f, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xbefe00ff,
0x00003fff, 0xbeff0080,
0xee0a407a, 0x000c0000,
0x00004000, 0xd760007a,
@@ -3759,33 +3767,153 @@ static const uint32_t cwsr_trap_gfx12_hex[] = {
0xbf94fffe, 0xb8faf804,
0x8b7a847a, 0x91788478,
0x8c787a78, 0xd7610002,
- 0x0000fa71, 0x807d817d,
+ 0x0000fa71, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0x807d817d,
0xd7610002, 0x0000fa6c,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
0x807d817d, 0x917aff6d,
0x80000000, 0xd7610002,
- 0x0000fa7a, 0x807d817d,
+ 0x0000fa7a, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0x807d817d,
0xd7610002, 0x0000fa6e,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
0x807d817d, 0xd7610002,
- 0x0000fa6f, 0x807d817d,
+ 0x0000fa6f, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0x807d817d,
0xd7610002, 0x0000fa78,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
0x807d817d, 0xb8faf811,
0xd7610002, 0x0000fa7a,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
0x807d817d, 0xd7610002,
- 0x0000fa7b, 0x807d817d,
+ 0x0000fa7b, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0x807d817d,
0xb8f1f801, 0xd7610002,
- 0x0000fa71, 0x807d817d,
+ 0x0000fa71, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0x807d817d,
0xb8f1f814, 0xd7610002,
- 0x0000fa71, 0x807d817d,
+ 0x0000fa71, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0x807d817d,
0xb8f1f815, 0xd7610002,
- 0x0000fa71, 0x807d817d,
+ 0x0000fa71, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0x807d817d,
0xb8f1f812, 0xd7610002,
- 0x0000fa71, 0x807d817d,
+ 0x0000fa71, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0x807d817d,
0xb8f1f813, 0xd7610002,
- 0x0000fa71, 0x807d817d,
+ 0x0000fa71, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0x807d817d,
0xb8faf802, 0xd7610002,
- 0x0000fa7a, 0x807d817d,
+ 0x0000fa7a, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0x807d817d,
0xbefa50c1, 0xbfc70000,
0xd7610002, 0x0000fa7a,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
0x807d817d, 0xbefe00ff,
0x0000ffff, 0xbeff0080,
0xc4068070, 0x008ce802,
@@ -3802,57 +3930,281 @@ static const uint32_t cwsr_trap_gfx12_hex[] = {
0xbe864106, 0xbe884108,
0xbe8a410a, 0xbe8c410c,
0xbe8e410e, 0xd7610002,
- 0x0000f200, 0x80798179,
+ 0x0000f200, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0x80798179,
0xd7610002, 0x0000f201,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
0x80798179, 0xd7610002,
- 0x0000f202, 0x80798179,
+ 0x0000f202, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0x80798179,
0xd7610002, 0x0000f203,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
0x80798179, 0xd7610002,
- 0x0000f204, 0x80798179,
+ 0x0000f204, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0x80798179,
0xd7610002, 0x0000f205,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
0x80798179, 0xd7610002,
- 0x0000f206, 0x80798179,
+ 0x0000f206, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0x80798179,
0xd7610002, 0x0000f207,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
0x80798179, 0xd7610002,
- 0x0000f208, 0x80798179,
+ 0x0000f208, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0x80798179,
0xd7610002, 0x0000f209,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
0x80798179, 0xd7610002,
- 0x0000f20a, 0x80798179,
+ 0x0000f20a, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0x80798179,
0xd7610002, 0x0000f20b,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
0x80798179, 0xd7610002,
- 0x0000f20c, 0x80798179,
+ 0x0000f20c, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0x80798179,
0xd7610002, 0x0000f20d,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
0x80798179, 0xd7610002,
- 0x0000f20e, 0x80798179,
+ 0x0000f20e, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0x80798179,
0xd7610002, 0x0000f20f,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
0x80798179, 0xbf06a079,
0xbfa10007, 0xc4068070,
0x008ce802, 0x00000000,
0x8070ff70, 0x00000080,
0xbef90080, 0x7e040280,
0x807d907d, 0xbf0aff7d,
- 0x00000060, 0xbfa2ffbb,
+ 0x00000060, 0xbfa2febb,
0xbe804100, 0xbe824102,
0xbe844104, 0xbe864106,
0xbe884108, 0xbe8a410a,
0xd7610002, 0x0000f200,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
0x80798179, 0xd7610002,
- 0x0000f201, 0x80798179,
+ 0x0000f201, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0x80798179,
0xd7610002, 0x0000f202,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
0x80798179, 0xd7610002,
- 0x0000f203, 0x80798179,
+ 0x0000f203, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0x80798179,
0xd7610002, 0x0000f204,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
0x80798179, 0xd7610002,
- 0x0000f205, 0x80798179,
+ 0x0000f205, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0x80798179,
0xd7610002, 0x0000f206,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
0x80798179, 0xd7610002,
- 0x0000f207, 0x80798179,
+ 0x0000f207, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0x80798179,
0xd7610002, 0x0000f208,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
0x80798179, 0xd7610002,
- 0x0000f209, 0x80798179,
+ 0x0000f209, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0x80798179,
0xd7610002, 0x0000f20a,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
+ 0xd8500000, 0x00000000,
0x80798179, 0xd7610002,
- 0x0000f20b, 0x80798179,
+ 0x0000f20b, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0xd8500000,
+ 0x00000000, 0x80798179,
0xc4068070, 0x008ce802,
0x00000000, 0xbefe00c1,
0x857d9973, 0x8b7d817d,
diff --git a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
index 7b9d36e5fa43..a952d4d4eeaa 100644
--- a/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
+++ b/drivers/gpu/drm/amd/amdkfd/cwsr_trap_handler_gfx12.asm
@@ -30,6 +30,7 @@
#define CHIP_GFX12 37
#define SINGLE_STEP_MISSED_WORKAROUND 1 //workaround for lost TRAP_AFTER_INST exception when SAVECTX raised
+#define HAVE_VALU_SGPR_HAZARD (ASIC_FAMILY == CHIP_GFX12)
var SQ_WAVE_STATE_PRIV_BARRIER_COMPLETE_MASK = 0x4
var SQ_WAVE_STATE_PRIV_SCC_SHIFT = 9
@@ -351,6 +352,7 @@ L_HAVE_VGPRS:
v_writelane_b32 v0, ttmp13, 0xD
v_writelane_b32 v0, exec_lo, 0xE
v_writelane_b32 v0, exec_hi, 0xF
+ valu_sgpr_hazard()
s_mov_b32 exec_lo, 0x3FFF
s_mov_b32 exec_hi, 0x0
@@ -1059,6 +1061,7 @@ end
function write_hwreg_to_v2(s)
// Copy into VGPR for later TCP store.
v_writelane_b32 v2, s, m0
+ valu_sgpr_hazard()
s_add_u32 m0, m0, 0x1
end
@@ -1067,6 +1070,7 @@ function write_16sgpr_to_v2(s)
// Copy into VGPR for later TCP store.
for var sgpr_idx = 0; sgpr_idx < 16; sgpr_idx ++
v_writelane_b32 v2, s[sgpr_idx], ttmp13
+ valu_sgpr_hazard()
s_add_u32 ttmp13, ttmp13, 0x1
end
end
@@ -1075,6 +1079,7 @@ function write_12sgpr_to_v2(s)
// Copy into VGPR for later TCP store.
for var sgpr_idx = 0; sgpr_idx < 12; sgpr_idx ++
v_writelane_b32 v2, s[sgpr_idx], ttmp13
+ valu_sgpr_hazard()
s_add_u32 ttmp13, ttmp13, 0x1
end
end
@@ -1128,3 +1133,11 @@ function get_wave_size2(s_reg)
s_getreg_b32 s_reg, hwreg(HW_REG_WAVE_STATUS,SQ_WAVE_STATUS_WAVE64_SHIFT,SQ_WAVE_STATUS_WAVE64_SIZE)
s_lshl_b32 s_reg, s_reg, S_WAVE_SIZE
end
+
+function valu_sgpr_hazard
+#if HAVE_VALU_SGPR_HAZARD
+ for var rep = 0; rep < 8; rep ++
+ ds_nop
+ end
+#endif
+end
--
2.34.1
More information about the amd-gfx
mailing list