[PATCH 10/11] libhsakmt: add open SMI event handle

Philip Yang Philip.Yang at amd.com
Tue Jun 28 14:50:19 UTC 2022


System Management Interface event is read from anonymous file handle,
this helper wrap the ioctl interface to get anonymous file handle for
GPU nodeid.

Define SMI event IDs, event triggers, copy the same value from
kfd_ioctl.h to avoid translation.

Change-Id: I5c8ba5301473bb3b80bb4e2aa33a9f675bedb001
Signed-off-by: Philip Yang <Philip.Yang at amd.com>
---
 include/hsakmt.h      | 16 ++++++++++++++
 include/hsakmttypes.h | 49 +++++++++++++++++++++++++++++++++++++++++++
 src/events.c          | 27 ++++++++++++++++++++++++
 src/libhsakmt.ver     |  1 +
 4 files changed, 93 insertions(+)

diff --git a/include/hsakmt.h b/include/hsakmt.h
index abc617f..ca586ba 100644
--- a/include/hsakmt.h
+++ b/include/hsakmt.h
@@ -877,6 +877,22 @@ hsaKmtGetXNACKMode(
     HSAint32 * enable  // OUT: returns XNACK value.
 );
 
+/**
+   Open anonymous file handle to enable events and read SMI events.
+
+   To enable events, write 64bit events mask to fd, event enums as bit index.
+   for example, event mask (HSA_SMI_EVENT_MASK_FROM_INDEX(HSA_SMI_EVENT_INDEX_MAX) - 1) to enable all events
+
+   Read event from fd is not blocking, use poll with timeout value to check if event is available.
+   Event is dropped if kernel event fifo is full.
+*/
+HSAKMT_STATUS
+HSAKMTAPI
+hsaKmtOpenSMI(
+    HSAuint32 NodeId,   // IN: GPU node_id to receive the SMI event from
+    int *fd             // OUT: anonymous file handle
+);
+
 #ifdef __cplusplus
 }   //extern "C"
 #endif
diff --git a/include/hsakmttypes.h b/include/hsakmttypes.h
index ab2591b..690e001 100644
--- a/include/hsakmttypes.h
+++ b/include/hsakmttypes.h
@@ -1354,6 +1354,55 @@ typedef struct _HSA_SVM_ATTRIBUTE {
 	HSAuint32 value; // attribute value
 } HSA_SVM_ATTRIBUTE;
 
+typedef enum _HSA_SMI_EVENT {
+	HSA_SMI_EVENT_NONE = 0, /* not used */
+	HSA_SMI_EVENT_VMFAULT = 1, /* event start counting at 1 */
+	HSA_SMI_EVENT_THERMAL_THROTTLE = 2,
+	HSA_SMI_EVENT_GPU_PRE_RESET = 3,
+	HSA_SMI_EVENT_GPU_POST_RESET = 4,
+	HSA_SMI_EVENT_MIGRATE_START = 5,
+	HSA_SMI_EVENT_MIGRATE_END = 6,
+	HSA_SMI_EVENT_PAGE_FAULT_START = 7,
+	HSA_SMI_EVENT_PAGE_FAULT_END = 8,
+	HSA_SMI_EVENT_QUEUE_EVICTION = 9,
+	HSA_SMI_EVENT_QUEUE_RESTORE = 10,
+	HSA_SMI_EVENT_UNMAP_FROM_GPU = 11,
+	HSA_SMI_EVENT_INDEX_MAX = 12,
+
+	/*
+	 * max event number, as a flag bit to get events from all processes,
+	 * this requires super user permission, otherwise will not be able to
+	 * receive event from any process. Without this flag to receive events
+	 * from same process.
+	 */
+	HSA_SMI_EVENT_ALL_PROCESS = 64
+} HSA_EVENT_TYPE;
+
+typedef enum _HSA_MIGRATE_TRIGGERS {
+	HSA_MIGRATE_TRIGGER_PREFETCH,
+	HSA_MIGRATE_TRIGGER_PAGEFAULT_GPU,
+	HSA_MIGRATE_TRIGGER_PAGEFAULT_CPU,
+	HSA_MIGRATE_TRIGGER_TTM_EVICTION
+} HSA_MIGRATE_TRIGGERS;
+
+typedef enum _HSA_QUEUE_EVICTION_TRIGGERS {
+	HSA_QUEUE_EVICTION_TRIGGER_SVM,
+	HSA_QUEUE_EVICTION_TRIGGER_USERPTR,
+	HSA_QUEUE_EVICTION_TRIGGER_TTM,
+	HSA_QUEUE_EVICTION_TRIGGER_SUSPEND,
+	HSA_QUEUE_EVICTION_CRIU_CHECKPOINT,
+	HSA_QUEUE_EVICTION_CRIU_RESTORE
+} HSA_QUEUE_EVICTION_TRIGGERS;
+
+typedef enum _HSA_SVM_UNMAP_TRIGGERS {
+	HSA_SVM_UNMAP_TRIGGER_MMU_NOTIFY,
+	HSA_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE,
+	HSA_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU
+} HSA_SVM_UNMAP_TRIGGERS;
+
+#define HSA_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1))
+#define HSA_SMI_EVENT_MSG_SIZE	96
+
 #pragma pack(pop, hsakmttypes_h)
 
 
diff --git a/src/events.c b/src/events.c
index d4c751c..06d3959 100644
--- a/src/events.c
+++ b/src/events.c
@@ -339,3 +339,30 @@ out:
 
 	return result;
 }
+
+HSAKMT_STATUS HSAKMTAPI hsaKmtOpenSMI(HSAuint32 NodeId, int *fd)
+{
+	struct kfd_ioctl_smi_events_args args;
+	HSAKMT_STATUS result;
+	uint32_t gpuid;
+
+	CHECK_KFD_OPEN();
+
+	pr_debug("[%s] node %d\n", __func__, NodeId);
+
+	result = validate_nodeid(NodeId, &gpuid);
+	if (result != HSAKMT_STATUS_SUCCESS) {
+		pr_err("[%s] invalid node ID: %d\n", __func__, NodeId);
+		return result;
+	}
+
+	args.gpuid = gpuid;
+	result = kmtIoctl(kfd_fd, AMDKFD_IOC_SMI_EVENTS, &args);
+	if (result) {
+		pr_debug("open SMI event fd failed %s\n", strerror(errno));
+		return HSAKMT_STATUS_ERROR;
+	}
+
+	*fd = args.anon_fd;
+	return HSAKMT_STATUS_SUCCESS;
+}
diff --git a/src/libhsakmt.ver b/src/libhsakmt.ver
index 50c309d..46370c6 100644
--- a/src/libhsakmt.ver
+++ b/src/libhsakmt.ver
@@ -69,6 +69,7 @@ hsaKmtSVMSetAttr;
 hsaKmtSVMGetAttr;
 hsaKmtSetXNACKMode;
 hsaKmtGetXNACKMode;
+hsaKmtOpenSMI;
 
 local: *;
 };
-- 
2.35.1



More information about the amd-gfx mailing list