[PATCH 1/2] lib/amdgpu: add ASIC-aware gfx hooks and introduce amd_ip_blocks_ex
vitaly.prosyak at amd.com
vitaly.prosyak at amd.com
Thu Aug 14 03:18:06 UTC 2025
From: Vitaly Prosyak <vitaly.prosyak at amd.com>
add ASIC-aware gfx hooks; introduce amd_ip_blocks_ex.c and initialize
from amd_ip_blocks.c The amdgpu IGT test suite currently hard-codes
PACKET3 programming for basic compute bring-up (state setup,
DISPATCH_DIRECT, and WR_CONFIRM write). Those sequences vary by GFX
family (e.g., dispatch flags, TMPRING handling), so baking them into
tests makes the code brittle and difficult to extend to new ASICs.
This change introduces three small, ASIC-aware helpers on
the amdgpu_ip_funcs table:
gfx_program_compute(...) – programs a minimal compute pipeline (PGM pointers,
resources, user data, thread dims).
gfx_dispatch_direct(...) – emits a direct compute dispatch (family-specific
tail flags handled inside).
gfx_write_confirm(...) – emits a confirmed WRITE_DATA to memory for simple
dependency validation.
Implementations live in a lib/amdgpu/amd_ip_blocks_ex.c, which provides
safe defaults and light per-family overrides (gfx9/gfx10/gfx11). We
keep all ASIC knowledge in the ASIC layer (amdgpu_ip_funcs) rather
than polluting the generic PM4 builder (amdgpu_cmd_base), which remains
a packet emitter (attach/emit helpers only).
amd_ip_blocks.c now calls amd_ip_blocks_ex_init() after populating the
device’s amdgpu_ip_funcs, so callers get the correct per-family
hooks without changing their code paths.
Cc: Christian König <christian.koenig at amd.com>
Cc: Alex Deucher <alexander.deucher at amd.com>
Cc: Jesse Zhang <jesse.zhang at amd.com>
Signed-off-by: Vitaly Prosyak <vitaly.prosyak at amd.com>
---
lib/amdgpu/amd_ip_blocks.c | 7 ++
lib/amdgpu/amd_ip_blocks.h | 31 ++++++
lib/amdgpu/amd_ip_blocks_ex.c | 181 ++++++++++++++++++++++++++++++++++
lib/meson.build | 1 +
4 files changed, 220 insertions(+)
create mode 100644 lib/amdgpu/amd_ip_blocks_ex.c
diff --git a/lib/amdgpu/amd_ip_blocks.c b/lib/amdgpu/amd_ip_blocks.c
index 80cd89f7c..d8d7efbef 100644
--- a/lib/amdgpu/amd_ip_blocks.c
+++ b/lib/amdgpu/amd_ip_blocks.c
@@ -1031,6 +1031,13 @@ amdgpu_device_ip_block_add(struct amdgpu_ip_block_version *ip_block_version)
amdgpu_ips.ip_blocks[amdgpu_ips.num_ip_blocks++] = ip_block_version;
+ if (ip_block_version->funcs &&
+ (!ip_block_version->funcs->gfx_program_compute ||
+ !ip_block_version->funcs->gfx_dispatch_direct ||
+ !ip_block_version->funcs->gfx_write_confirm )) {
+ amd_ip_blocks_ex_init(ip_block_version->funcs);
+ }
+
return 0;
}
diff --git a/lib/amdgpu/amd_ip_blocks.h b/lib/amdgpu/amd_ip_blocks.h
index b5731e13b..7154b45ba 100644
--- a/lib/amdgpu/amd_ip_blocks.h
+++ b/lib/amdgpu/amd_ip_blocks.h
@@ -206,6 +206,7 @@ struct amdgpu_ring_context {
struct drm_amdgpu_info_uq_fw_areas info;
};
+struct amdgpu_cmd_base;
struct amdgpu_ip_funcs {
uint32_t family_id;
@@ -231,6 +232,32 @@ struct amdgpu_ip_funcs {
void (*userq_create)(amdgpu_device_handle device_handle, struct amdgpu_ring_context *ctxt, unsigned int type);
void (*userq_submit)(amdgpu_device_handle device, struct amdgpu_ring_context *ring_context, unsigned int ip_type, uint64_t mc_address);
void (*userq_destroy)(amdgpu_device_handle device_handle, struct amdgpu_ring_context *ctxt, unsigned int type);
+
+ /* program minimal compute pipeline for a raw PM4 launch */
+ void (*gfx_program_compute)(
+ const struct amdgpu_ip_funcs *funcs,
+ struct amdgpu_cmd_base *base,
+ uint64_t code_addr,
+ uint64_t user_data0_addr,
+ uint32_t rsrc1_dw,
+ uint32_t rsrc2_or_tmp,
+ uint32_t thr_x, uint32_t thr_y, uint32_t thr_z
+ );
+
+ /* launch a direct dispatch (grid + family flags) */
+ void (*gfx_dispatch_direct)(
+ const struct amdgpu_ip_funcs *funcs,
+ struct amdgpu_cmd_base *base,
+ uint32_t grid_x, uint32_t grid_y, uint32_t grid_z,
+ uint32_t flags
+ );
+
+ /* WRITE_DATA with WR_CONFIRM (or family equivalent) */
+ void (*gfx_write_confirm)(
+ const struct amdgpu_ip_funcs *funcs,
+ struct amdgpu_cmd_base *base,
+ uint64_t dst_addr, uint32_t value
+ );
};
extern const struct amdgpu_ip_block_version gfx_v6_0_ip_block;
@@ -320,4 +347,8 @@ amdgpu_bo_alloc_and_map_uq(amdgpu_device_handle device_handle, unsigned int size
int
amdgpu_timeline_syncobj_wait(amdgpu_device_handle device_handle,
uint32_t timeline_syncobj_handle, uint64_t point);
+
+void
+amd_ip_blocks_ex_init(struct amdgpu_ip_funcs *funcs);
+
#endif
diff --git a/lib/amdgpu/amd_ip_blocks_ex.c b/lib/amdgpu/amd_ip_blocks_ex.c
new file mode 100644
index 000000000..21c4d952c
--- /dev/null
+++ b/lib/amdgpu/amd_ip_blocks_ex.c
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright 2025 Advanced Micro Devices, Inc.
+ */
+
+#include <stdint.h>
+#include "amd_ip_blocks.h"
+#include "amd_PM4.h"
+#include "amdgpu_asic_addr.h"
+
+static void gfx_program_compute_default(
+ const struct amdgpu_ip_funcs *f,
+ struct amdgpu_cmd_base *base,
+ uint64_t code_addr,
+ uint64_t user_data0_addr,
+ uint32_t rsrc1_dw,
+ uint32_t rsrc2_or_tmp,
+ uint32_t thr_x, uint32_t thr_y, uint32_t thr_z)
+{
+ base->emit(base, PACKET3(PKT3_CONTEXT_CONTROL, 1));
+ base->emit(base, 0x80000000);
+ base->emit(base, 0x80000000);
+
+ base->emit(base, PACKET3(PKT3_CLEAR_STATE, 0));
+ base->emit(base, 0x80000000);
+
+ base->emit(base, PACKET3(PKT3_SET_SH_REG, 2));
+ base->emit(base, f->get_reg_offset(COMPUTE_PGM_LO));
+ base->emit(base, (uint32_t)(code_addr >> 8));
+ base->emit(base, (uint32_t)(code_addr >> 40));
+
+ base->emit(base, PACKET3(PKT3_SET_SH_REG, 2));
+ base->emit(base, f->get_reg_offset(COMPUTE_PGM_RSRC1));
+ base->emit(base, rsrc1_dw);
+ base->emit(base, rsrc2_or_tmp);
+
+ base->emit(base, PACKET3(PKT3_SET_SH_REG, 1));
+ base->emit(base, f->get_reg_offset(COMPUTE_TMPRING_SIZE));
+ base->emit(base, 0x00000100);
+
+ base->emit(base, PACKET3(PKT3_SET_SH_REG, 2));
+ base->emit(base, f->get_reg_offset(COMPUTE_USER_DATA_0));
+ base->emit(base, (uint32_t)user_data0_addr);
+ base->emit(base, (uint32_t)(user_data0_addr >> 32));
+
+ base->emit(base, PACKET3(PKT3_SET_SH_REG, 1));
+ base->emit(base, f->get_reg_offset(COMPUTE_RESOURCE_LIMITS));
+ base->emit(base, 0);
+
+ base->emit(base, PACKET3(PKT3_SET_SH_REG, 3));
+ base->emit(base, f->get_reg_offset(COMPUTE_NUM_THREAD_X));
+ base->emit(base, thr_x);
+ base->emit(base, thr_y);
+ base->emit(base, thr_z);
+}
+
+static void gfx_dispatch_direct_default(
+ const struct amdgpu_ip_funcs *f,
+ struct amdgpu_cmd_base *base,
+ uint32_t gx, uint32_t gy, uint32_t gz,
+ uint32_t flags)
+{
+ base->emit(base, PACKET3(PACKET3_DISPATCH_DIRECT, 3));
+ base->emit(base, gx);
+ base->emit(base, gy);
+ base->emit(base, gz);
+ base->emit(base, flags);
+ base->emit_aligned(base, 7, GFX_COMPUTE_NOP);
+}
+
+static void gfx_write_confirm_default(
+ const struct amdgpu_ip_funcs *f,
+ struct amdgpu_cmd_base *base,
+ uint64_t dst, uint32_t val)
+{
+ base->emit(base, PACKET3(PACKET3_WRITE_DATA, 3));
+ base->emit(base, WRITE_DATA_DST_SEL(5) | WR_CONFIRM);
+ base->emit(base, (uint32_t)dst);
+ base->emit(base, (uint32_t)(dst >> 32));
+ base->emit(base, val);
+ base->emit_aligned(base, 7, GFX_COMPUTE_NOP);
+}
+
+static void gfx_dispatch_direct_gfx9(
+ const struct amdgpu_ip_funcs *f,
+ struct amdgpu_cmd_base *base,
+ uint32_t gx, uint32_t gy, uint32_t gz,
+ uint32_t flags_unused)
+{
+ gfx_dispatch_direct_default(f, base, gx, gy, gz, 0x00000000);
+}
+
+static void gfx_dispatch_direct_gfx10(
+ const struct amdgpu_ip_funcs *f,
+ struct amdgpu_cmd_base *base,
+ uint32_t gx, uint32_t gy, uint32_t gz,
+ uint32_t flags_unused)
+{
+ gfx_dispatch_direct_default(f, base, gx, gy, gz, 0x00000045);
+}
+
+static void gfx_program_compute_gfx11(
+ const struct amdgpu_ip_funcs *f,
+ struct amdgpu_cmd_base *base,
+ uint64_t code, uint64_t udata0,
+ uint32_t rsrc1, uint32_t rsrc2,
+ uint32_t tx, uint32_t ty, uint32_t tz)
+{
+ base->emit(base, PACKET3(PKT3_CONTEXT_CONTROL, 1));
+ base->emit(base, 0x80000000);
+ base->emit(base, 0x80000000);
+
+ base->emit(base, PACKET3(PKT3_CLEAR_STATE, 0));
+ base->emit(base, 0x80000000);
+
+ base->emit(base, PACKET3(PKT3_SET_SH_REG, 2));
+ base->emit(base, f->get_reg_offset(COMPUTE_PGM_LO));
+ base->emit(base, (uint32_t)(code >> 8));
+ base->emit(base, (uint32_t)(code >> 40));
+
+ base->emit(base, PACKET3(PKT3_SET_SH_REG, 2));
+ base->emit(base, f->get_reg_offset(COMPUTE_PGM_RSRC1));
+ base->emit(base, rsrc1);
+ base->emit(base, rsrc2);
+
+ base->emit(base, PACKET3(PKT3_SET_SH_REG, 2));
+ base->emit(base, f->get_reg_offset(COMPUTE_USER_DATA_0));
+ base->emit(base, (uint32_t)udata0);
+ base->emit(base, (uint32_t)(udata0 >> 32));
+
+ base->emit(base, PACKET3(PKT3_SET_SH_REG, 1));
+ base->emit(base, f->get_reg_offset(COMPUTE_RESOURCE_LIMITS));
+ base->emit(base, 0);
+
+ base->emit(base, PACKET3(PKT3_SET_SH_REG, 3));
+ base->emit(base, f->get_reg_offset(COMPUTE_NUM_THREAD_X));
+ base->emit(base, tx);
+ base->emit(base, ty);
+ base->emit(base, tz);
+}
+
+static void gfx_dispatch_direct_gfx11(
+ const struct amdgpu_ip_funcs *f,
+ struct amdgpu_cmd_base *base,
+ uint32_t gx, uint32_t gy, uint32_t gz,
+ uint32_t flags_unused)
+{
+ gfx_dispatch_direct_default(f, base, gx, gy, gz, 0x00000045);
+}
+
+void amd_ip_blocks_ex_init(struct amdgpu_ip_funcs *funcs)
+{
+ funcs->gfx_program_compute = gfx_program_compute_default;
+ funcs->gfx_dispatch_direct = gfx_dispatch_direct_default;
+ funcs->gfx_write_confirm = gfx_write_confirm_default;
+
+ switch (funcs->family_id) {
+ case AMDGPU_FAMILY_RV:
+ case AMDGPU_FAMILY_NV:
+ case AMDGPU_FAMILY_VGH:
+ funcs->gfx_dispatch_direct = gfx_dispatch_direct_gfx9;
+ break;
+ case AMDGPU_FAMILY_YC:
+ case AMDGPU_FAMILY_GC_10_3_6:
+ case AMDGPU_FAMILY_GC_10_3_7:
+ funcs->gfx_dispatch_direct = gfx_dispatch_direct_gfx10;
+ break;
+ case AMDGPU_FAMILY_GC_11_0_0:
+ case AMDGPU_FAMILY_GC_11_0_1:
+ case AMDGPU_FAMILY_GC_11_5_0:
+ funcs->gfx_program_compute = gfx_program_compute_gfx11;
+ funcs->gfx_dispatch_direct = gfx_dispatch_direct_gfx11;
+ break;
+ case AMDGPU_FAMILY_GC_12_0_0:
+ /*TODO*/
+ break;
+ default:
+ break;
+ }
+}
+
diff --git a/lib/meson.build b/lib/meson.build
index 2eaca42a4..f078dad4e 100644
--- a/lib/meson.build
+++ b/lib/meson.build
@@ -160,6 +160,7 @@ if libdrm_amdgpu.found()
'amdgpu/amd_cs_radv.c',
'amdgpu/amd_gfx.c',
'amdgpu/amd_ip_blocks.c',
+ 'amdgpu/amd_ip_blocks_ex.c',
'amdgpu/amd_shaders.c',
'amdgpu/amd_gfx_v8_0.c',
'amdgpu/amd_gfx_v9_0.c',
--
2.34.1
More information about the igt-dev
mailing list