[PATCH 17/26] drm/amdgpu: allow ras interrupt callback to return error data

Alex Deucher alexdeucher at gmail.com
Wed Jul 31 17:58:09 UTC 2019


From: Tao Zhou <tao.zhou1 at amd.com>

add error data as parameter for ras interrupt cb and process it

Signed-off-by: Tao Zhou <tao.zhou1 at amd.com>
Reviewed-by: Dennis Li <dennis.li at amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c |  6 ++--
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 37 +++++++++++++------------
 2 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 1914f37bee59..0eeb85d8399d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1003,7 +1003,7 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
 	struct ras_ih_data *data = &obj->ih_data;
 	struct amdgpu_iv_entry entry;
 	int ret;
-	struct ras_err_data err_data = {0, 0};
+	struct ras_err_data err_data = {0, 0, 0, NULL};
 
 	while (data->rptr != data->wptr) {
 		rmb();
@@ -1018,14 +1018,14 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
 		 * from the callback to udpate the error type/count, etc
 		 */
 		if (data->cb) {
-			ret = data->cb(obj->adev, &entry);
+			ret = data->cb(obj->adev, &err_data, &entry);
 			/* ue will trigger an interrupt, and in that case
 			 * we need do a reset to recovery the whole system.
 			 * But leave IP do that recovery, here we just dispatch
 			 * the error.
 			 */
 			if (ret == AMDGPU_RAS_UE) {
-				obj->err_data.ue_count++;
+				obj->err_data.ue_count += err_data.ue_count;
 			}
 			/* Might need get ce count by register, but not all IP
 			 * saves ce count, some IP just use one bit or two bits
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 0920db7aff34..2c86a5135ec9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -76,9 +76,6 @@ struct ras_common_if {
 	char name[32];
 };
 
-typedef int (*ras_ih_cb)(struct amdgpu_device *adev,
-		struct amdgpu_iv_entry *entry);
-
 struct amdgpu_ras {
 	/* ras infrastructure */
 	/* for ras itself. */
@@ -108,21 +105,6 @@ struct amdgpu_ras {
 	uint32_t flags;
 };
 
-struct ras_ih_data {
-	/* interrupt bottom half */
-	struct work_struct ih_work;
-	int inuse;
-	/* IP callback */
-	ras_ih_cb cb;
-	/* full of entries */
-	unsigned char *ring;
-	unsigned int ring_size;
-	unsigned int element_size;
-	unsigned int aligned_element_size;
-	unsigned int rptr;
-	unsigned int wptr;
-};
-
 struct ras_fs_data {
 	char sysfs_name[32];
 	char debugfs_name[32];
@@ -149,6 +131,25 @@ struct ras_err_handler_data {
 	int last_reserved;
 };
 
+typedef int (*ras_ih_cb)(struct amdgpu_device *adev,
+		struct ras_err_data *err_data,
+		struct amdgpu_iv_entry *entry);
+
+struct ras_ih_data {
+	/* interrupt bottom half */
+	struct work_struct ih_work;
+	int inuse;
+	/* IP callback */
+	ras_ih_cb cb;
+	/* full of entries */
+	unsigned char *ring;
+	unsigned int ring_size;
+	unsigned int element_size;
+	unsigned int aligned_element_size;
+	unsigned int rptr;
+	unsigned int wptr;
+};
+
 struct ras_manager {
 	struct ras_common_if head;
 	/* reference count */
-- 
2.20.1



More information about the amd-gfx mailing list