[PATCH 9/11] libhsakmt: hsaKmtGetNodeProperties add gpu_id
Philip Yang
Philip.Yang at amd.com
Tue Jun 28 14:50:18 UTC 2022
Add KFDGpuID to HsaNodeProperties to return gpu_id to upper layer,
gpu_id is hash ID generated by KFD to distinguish GPUs on the system.
ROCr and ROCProfiler will use gpu_id to analyze SMI event message.
Change-Id: I6eabe6849230e04120674f5bc55e6ea254a532d6
Signed-off-by: Philip Yang <Philip.Yang at amd.com>
---
include/hsakmttypes.h | 4 +++-
src/fmm.c | 7 +++----
src/libhsakmt.h | 1 -
src/topology.c | 26 ++++++++++++--------------
4 files changed, 18 insertions(+), 20 deletions(-)
diff --git a/include/hsakmttypes.h b/include/hsakmttypes.h
index 9063f85..ab2591b 100644
--- a/include/hsakmttypes.h
+++ b/include/hsakmttypes.h
@@ -328,7 +328,9 @@ typedef struct _HsaNodeProperties
HSAuint32 VGPRSizePerCU; // VGPR size in bytes per CU
HSAuint32 SGPRSizePerCU; // SGPR size in bytes per CU
- HSAuint8 Reserved[12];
+
+ HSAuint32 KFDGpuID; // GPU Hash ID generated by KFD
+ HSAuint8 Reserved[8];
} HsaNodeProperties;
diff --git a/src/fmm.c b/src/fmm.c
index 35da3b8..92b76e1 100644
--- a/src/fmm.c
+++ b/src/fmm.c
@@ -2170,7 +2170,6 @@ HSAKMT_STATUS fmm_init_process_apertures(unsigned int NumNodes)
{
uint32_t i;
int32_t gpu_mem_id = 0;
- uint32_t gpu_id;
HsaNodeProperties props;
struct kfd_process_device_apertures *process_apertures;
uint32_t num_of_sysfs_nodes;
@@ -2235,14 +2234,14 @@ HSAKMT_STATUS fmm_init_process_apertures(unsigned int NumNodes)
for (i = 0; i < NumNodes; i++) {
memset(&props, 0, sizeof(props));
- ret = topology_sysfs_get_node_props(i, &props, &gpu_id, NULL, NULL);
+ ret = topology_sysfs_get_node_props(i, &props, NULL, NULL);
if (ret != HSAKMT_STATUS_SUCCESS)
goto sysfs_parse_failed;
topology_setup_is_dgpu_param(&props);
/* Skip non-GPU nodes */
- if (gpu_id != 0) {
+ if (props.KFDGpuID) {
int fd = open_drm_render_device(props.DrmRenderMinor);
if (fd <= 0) {
ret = HSAKMT_STATUS_ERROR;
@@ -2254,7 +2253,7 @@ HSAKMT_STATUS fmm_init_process_apertures(unsigned int NumNodes)
gpu_mem[gpu_mem_count].EngineId.ui32.Stepping = props.EngineId.ui32.Stepping;
gpu_mem[gpu_mem_count].drm_render_fd = fd;
- gpu_mem[gpu_mem_count].gpu_id = gpu_id;
+ gpu_mem[gpu_mem_count].gpu_id = props.KFDGpuID;
gpu_mem[gpu_mem_count].local_mem_size = props.LocalMemSize;
gpu_mem[gpu_mem_count].device_id = props.DeviceId;
gpu_mem[gpu_mem_count].node_id = i;
diff --git a/src/libhsakmt.h b/src/libhsakmt.h
index e4246e0..822744b 100644
--- a/src/libhsakmt.h
+++ b/src/libhsakmt.h
@@ -173,7 +173,6 @@ HSAKMT_STATUS validate_nodeid_array(uint32_t **gpu_id_array,
uint32_t NumberOfNodes, uint32_t *NodeArray);
HSAKMT_STATUS topology_sysfs_get_node_props(uint32_t node_id, HsaNodeProperties *props,
- uint32_t *gpu_id,
bool *p2p_links, uint32_t *num_p2pLinks);
HSAKMT_STATUS topology_sysfs_get_system_props(HsaSystemProperties *props);
void topology_setup_is_dgpu_param(HsaNodeProperties *props);
diff --git a/src/topology.c b/src/topology.c
index 81ff62f..99a6a03 100644
--- a/src/topology.c
+++ b/src/topology.c
@@ -56,7 +56,6 @@
#define KFD_SYSFS_PATH_NODES "/sys/devices/virtual/kfd/kfd/topology/nodes"
typedef struct {
- uint32_t gpu_id;
HsaNodeProperties node;
HsaMemoryProperties *mem; /* node->NumBanks elements */
HsaCacheProperties *cache;
@@ -1037,7 +1036,6 @@ static int topology_get_marketing_name(int minor, uint16_t *marketing_name)
HSAKMT_STATUS topology_sysfs_get_node_props(uint32_t node_id,
HsaNodeProperties *props,
- uint32_t *gpu_id,
bool *p2p_links,
uint32_t *num_p2pLinks)
{
@@ -1056,13 +1054,14 @@ HSAKMT_STATUS topology_sysfs_get_node_props(uint32_t node_id,
HSAKMT_STATUS ret = HSAKMT_STATUS_SUCCESS;
assert(props);
- assert(gpu_id);
ret = topology_sysfs_map_node_id(node_id, &sys_node_id);
if (ret != HSAKMT_STATUS_SUCCESS)
return ret;
/* Retrieve the GPU ID */
- ret = topology_sysfs_get_gpu_id(sys_node_id, gpu_id);
+ ret = topology_sysfs_get_gpu_id(sys_node_id, &props->KFDGpuID);
+ if (ret != HSAKMT_STATUS_SUCCESS)
+ return ret;
read_buf = malloc(PAGE_SIZE);
if (!read_buf)
@@ -1723,7 +1722,7 @@ static int32_t gpu_get_direct_link_cpu(uint32_t gpu_node, node_props_t *node_pro
HsaIoLinkProperties *props = node_props[gpu_node].link;
uint32_t i;
- if (!node_props[gpu_node].gpu_id || !props ||
+ if (!node_props[gpu_node].node.KFDGpuID || !props ||
node_props[gpu_node].node.NumIOLinks == 0)
return -1;
@@ -1776,7 +1775,7 @@ static HSAKMT_STATUS get_indirect_iolink_info(uint32_t node1, uint32_t node2,
return HSAKMT_STATUS_INVALID_PARAMETER;
/* CPU->CPU is not an indirect link */
- if (!node_props[node1].gpu_id && !node_props[node2].gpu_id)
+ if (!node_props[node1].node.KFDGpuID && !node_props[node2].node.KFDGpuID)
return HSAKMT_STATUS_INVALID_NODE_UNIT;
if (node_props[node1].node.HiveID &&
@@ -1784,16 +1783,16 @@ static HSAKMT_STATUS get_indirect_iolink_info(uint32_t node1, uint32_t node2,
node_props[node1].node.HiveID == node_props[node2].node.HiveID)
return HSAKMT_STATUS_INVALID_PARAMETER;
- if (node_props[node1].gpu_id)
+ if (node_props[node1].node.KFDGpuID)
dir_cpu1 = gpu_get_direct_link_cpu(node1, node_props);
- if (node_props[node2].gpu_id)
+ if (node_props[node2].node.KFDGpuID)
dir_cpu2 = gpu_get_direct_link_cpu(node2, node_props);
if (dir_cpu1 < 0 && dir_cpu2 < 0)
return HSAKMT_STATUS_ERROR;
/* if the node2(dst) is GPU , it need to be large bar for host access*/
- if (node_props[node2].gpu_id) {
+ if (node_props[node2].node.KFDGpuID) {
for (i = 0; i < node_props[node2].node.NumMemoryBanks; ++i)
if (node_props[node2].mem[i].HeapType ==
HSA_HEAPTYPE_FRAME_BUFFER_PUBLIC)
@@ -1922,7 +1921,6 @@ retry:
for (i = 0; i < sys_props.NumNodes; i++) {
ret = topology_sysfs_get_node_props(i,
&temp_props[i].node,
- &temp_props[i].gpu_id,
&p2p_links, &num_p2pLinks);
if (ret != HSAKMT_STATUS_SUCCESS) {
free_properties(temp_props, i);
@@ -1963,7 +1961,7 @@ retry:
goto err;
}
}
- } else if (!temp_props[i].gpu_id) { /* a CPU node */
+ } else if (!temp_props[i].node.KFDGpuID) { /* a CPU node */
ret = topology_get_cpu_cache_props(
i, cpuinfo, &temp_props[i]);
if (ret != HSAKMT_STATUS_SUCCESS) {
@@ -2104,7 +2102,7 @@ HSAKMT_STATUS validate_nodeid(uint32_t nodeid, uint32_t *gpu_id)
if (!g_props || !g_system || g_system->NumNodes <= nodeid)
return HSAKMT_STATUS_INVALID_NODE_UNIT;
if (gpu_id)
- *gpu_id = g_props[nodeid].gpu_id;
+ *gpu_id = g_props[nodeid].node.KFDGpuID;
return HSAKMT_STATUS_SUCCESS;
}
@@ -2114,7 +2112,7 @@ HSAKMT_STATUS gpuid_to_nodeid(uint32_t gpu_id, uint32_t *node_id)
uint64_t node_idx;
for (node_idx = 0; node_idx < g_system->NumNodes; node_idx++) {
- if (g_props[node_idx].gpu_id == gpu_id) {
+ if (g_props[node_idx].node.KFDGpuID == gpu_id) {
*node_id = node_idx;
return HSAKMT_STATUS_SUCCESS;
}
@@ -2383,7 +2381,7 @@ uint16_t get_device_id_by_gpu_id(HSAuint32 gpu_id)
return 0;
for (i = 0; i < g_system->NumNodes; i++) {
- if (g_props[i].gpu_id == gpu_id)
+ if (g_props[i].node.KFDGpuID == gpu_id)
return g_props[i].node.DeviceId;
}
--
2.35.1
More information about the amd-gfx
mailing list