[RFC PATCH 02/13] drm/admgpu: add helper functions to track status for ras manager

Jiang Liu gerry at linux.alibaba.com
Wed Jan 8 13:59:54 UTC 2025


Add helper functions to track status for ras manager and ip blocks.

Signed-off-by: Jiang Liu <gerry at linux.alibaba.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 38 +++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 38 +++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 10 +++++++
 3 files changed, 86 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index e4b13e729770..32941f29507c 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -367,12 +367,29 @@ bool amdgpu_device_ip_is_valid(struct amdgpu_device *adev,
 
 #define AMDGPU_MAX_IP_NUM 16
 
+enum amdgpu_marker {
+	// shared by ip blocks and ras blocks
+	AMDGPU_MARKER_IRQ0		= 0,
+	AMDGPU_MARKER_IRQ1		= 1,
+	AMDGPU_MARKER_IRQ2		= 2,
+	AMDGPU_MARKER_IRQ3		= 3,
+	AMDGPU_MARKER_IRQ4		= 4,
+	AMDGPU_MARKER_IRQ5		= 5,
+	AMDGPU_MARKER_IRQ6		= 6,
+	AMDGPU_MARKER_IRQ7		= 7,
+	AMDGPU_MARKER_IRQ_MAX		= 47,
+	AMDGPU_MARKER_DEBUGFS		= 63,
+};
+
+#define AMDGPU_MARKER_INDEX_IRQ(idx)		(AMDGPU_MARKER_INDEX_IRQ0 + (idx))
+
 struct amdgpu_ip_block_status {
 	bool valid;
 	bool sw;
 	bool hw;
 	bool late_initialized;
 	bool hang;
+	uint64_t markers;
 };
 
 struct amdgpu_ip_block_version {
@@ -400,6 +417,27 @@ amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
 int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
 			       const struct amdgpu_ip_block_version *ip_block_version);
 
+static inline void amdgpu_ip_block_set_marker(struct amdgpu_ip_block *ip_block,
+					      enum amdgpu_marker marker)
+{
+	WARN_ON(ip_block->status.markers & (0x1ull << marker));
+	ip_block->status.markers |= 0x1ull << (int)marker;
+}
+
+static inline bool amdgpu_ip_block_test_and_clear_marker(struct amdgpu_ip_block *ip_block,
+							 enum amdgpu_marker marker)
+{
+	bool set = false;
+	uint64_t value = 0x1ull << (int)marker;
+
+	if ((ip_block->status.markers & value) != 0 ) {
+		ip_block->status.markers &= ~value;
+		set = true;
+	}
+
+	return set;
+}
+
 /*
  * BIOS.
  */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 71e8eafbbfbc..6d52e22691f7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -5007,3 +5007,41 @@ bool amdgpu_ras_is_rma(struct amdgpu_device *adev)
 
 	return con->is_rma;
 }
+
+bool amdgpu_ras_test_marker(struct amdgpu_device *adev,
+			    struct ras_common_if *head, int marker)
+{
+	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
+
+	if (obj && obj->markers & (0x1ull << marker)) {
+		return true;
+	}
+
+	return false;
+}
+
+void amdgpu_ras_set_marker(struct amdgpu_device *adev,
+			   struct ras_common_if *head, int marker)
+{
+	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
+
+	WARN_ON(obj->markers & (0x1ull << marker));
+	if (obj) {
+		obj->markers |= 0x1ull << marker;
+	}
+}
+
+bool amdgpu_ras_test_and_clear_marker(struct amdgpu_device *adev,
+				      struct ras_common_if *head, int marker)
+{
+	bool set = false;
+	uint64_t value = 0x1ull << marker;
+	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
+
+	if (obj && (obj->markers & value) != 0 ) {
+		obj->markers &= ~value;
+		set = true;
+	}
+
+	return set;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index b13debcf48ee..bc7377eaf819 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -632,6 +632,8 @@ struct ras_manager {
 	struct ras_common_if head;
 	/* reference count */
 	int use;
+	/* Flags for status tracking */
+	uint64_t markers;
 	/* ras block link */
 	struct list_head node;
 	/* the device */
@@ -975,4 +977,12 @@ void amdgpu_ras_event_log_print(struct amdgpu_device *adev, u64 event_id,
 				const char *fmt, ...);
 
 bool amdgpu_ras_is_rma(struct amdgpu_device *adev);
+
+bool amdgpu_ras_test_marker(struct amdgpu_device *adev,
+			    struct ras_common_if *head, int marker);
+void amdgpu_ras_set_marker(struct amdgpu_device *adev,
+			   struct ras_common_if *head, int marker);
+bool amdgpu_ras_test_and_clear_marker(struct amdgpu_device *adev,
+				      struct ras_common_if *head,
+				      int marker);
 #endif
-- 
2.43.5



More information about the amd-gfx mailing list