[RFC v2 01/15] drm/amdgpu: add helper functions to track status for ras manager

Jiang Liu gerry at linux.alibaba.com
Mon Jan 13 01:42:06 UTC 2025


Add helper functions to track status for ras manager and ip blocks.

Signed-off-by: Jiang Liu <gerry at linux.alibaba.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu.h     | 38 +++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 37 ++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 10 +++++++
 3 files changed, 85 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 5e55a44f9eef..f0f773659faf 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -377,12 +377,28 @@ int amdgpu_ip_block_resume(struct amdgpu_ip_block *ip_block);
 
 #define AMDGPU_MAX_IP_NUM 16
 
+enum amdgpu_marker {
+	// Markers for IRQs, used for both ip blocks and ras blocks.
+	AMDGPU_MARKER_IRQ0 = 32,
+	AMDGPU_MARKER_IRQ1,
+	AMDGPU_MARKER_IRQ2,
+	AMDGPU_MARKER_IRQ3,
+	AMDGPU_MARKER_IRQ4,
+	AMDGPU_MARKER_IRQ5,
+	AMDGPU_MARKER_IRQ6,
+	AMDGPU_MARKER_IRQ7,
+	AMDGPU_MARKER_IRQ_MAX = 63,
+};
+
+#define AMDGPU_MARKER_IRQ(idx)		(AMDGPU_MARKER_IRQ0 + (idx))
+
 struct amdgpu_ip_block_status {
 	bool valid;
 	bool sw;
 	bool hw;
 	bool late_initialized;
 	bool hang;
+	uint64_t markers;
 };
 
 struct amdgpu_ip_block_version {
@@ -410,6 +426,28 @@ amdgpu_device_ip_get_ip_block(struct amdgpu_device *adev,
 int amdgpu_device_ip_block_add(struct amdgpu_device *adev,
 			       const struct amdgpu_ip_block_version *ip_block_version);
 
+static inline void amdgpu_ip_block_set_marker(struct amdgpu_ip_block *ip_block,
+					      enum amdgpu_marker marker)
+{
+	WARN_ON(marker > 63);
+	WARN_ON(ip_block->status.markers & (0x1ull << marker));
+	ip_block->status.markers |= 0x1ull << (int)marker;
+}
+
+static inline bool amdgpu_ip_block_test_and_clear_marker(struct amdgpu_ip_block *ip_block,
+							 enum amdgpu_marker marker)
+{
+	bool set = false;
+	uint64_t value = 0x1ull << (int)marker;
+
+	if ((ip_block->status.markers & value) != 0) {
+		ip_block->status.markers &= ~value;
+		set = true;
+	}
+
+	return set;
+}
+
 /*
  * BIOS.
  */
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index f0924aa3f4e4..5e19d820ab34 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -5207,3 +5207,40 @@ bool amdgpu_ras_is_rma(struct amdgpu_device *adev)
 
 	return con->is_rma;
 }
+
+bool amdgpu_ras_test_marker(struct amdgpu_device *adev,
+			    struct ras_common_if *head, int marker)
+{
+	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
+
+	if (obj && obj->markers & (0x1ull << marker))
+		return true;
+
+	return false;
+}
+
+void amdgpu_ras_set_marker(struct amdgpu_device *adev,
+			   struct ras_common_if *head, int marker)
+{
+	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
+
+	WARN_ON(marker > 63);
+	WARN_ON(obj->markers & (0x1ull << marker));
+	if (obj)
+		obj->markers |= 0x1ull << marker;
+}
+
+bool amdgpu_ras_test_and_clear_marker(struct amdgpu_device *adev,
+				      struct ras_common_if *head, int marker)
+{
+	bool set = false;
+	uint64_t value = 0x1ull << marker;
+	struct ras_manager *obj = amdgpu_ras_find_obj(adev, head);
+
+	if (obj && (obj->markers & value) != 0) {
+		obj->markers &= ~value;
+		set = true;
+	}
+
+	return set;
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 82db986c36a0..35881087b17b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -634,6 +634,8 @@ struct ras_manager {
 	struct ras_common_if head;
 	/* reference count */
 	int use;
+	/* Flags for status tracking */
+	uint64_t markers;
 	/* ras block link */
 	struct list_head node;
 	/* the device */
@@ -977,4 +979,12 @@ void amdgpu_ras_event_log_print(struct amdgpu_device *adev, u64 event_id,
 				const char *fmt, ...);
 
 bool amdgpu_ras_is_rma(struct amdgpu_device *adev);
+
+bool amdgpu_ras_test_marker(struct amdgpu_device *adev,
+			    struct ras_common_if *head, int marker);
+void amdgpu_ras_set_marker(struct amdgpu_device *adev,
+			   struct ras_common_if *head, int marker);
+bool amdgpu_ras_test_and_clear_marker(struct amdgpu_device *adev,
+				      struct ras_common_if *head,
+				      int marker);
 #endif
-- 
2.43.5



More information about the amd-gfx mailing list