[PATCH 17/26] drm/amdgpu: allow ras interrupt callback to return error data
Alex Deucher
alexdeucher at gmail.com
Wed Jul 31 17:58:09 UTC 2019
From: Tao Zhou <tao.zhou1 at amd.com>
add error data as parameter for ras interrupt cb and process it
Signed-off-by: Tao Zhou <tao.zhou1 at amd.com>
Reviewed-by: Dennis Li <dennis.li at amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 6 ++--
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 37 +++++++++++++------------
2 files changed, 22 insertions(+), 21 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 1914f37bee59..0eeb85d8399d 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1003,7 +1003,7 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
struct ras_ih_data *data = &obj->ih_data;
struct amdgpu_iv_entry entry;
int ret;
- struct ras_err_data err_data = {0, 0};
+ struct ras_err_data err_data = {0, 0, 0, NULL};
while (data->rptr != data->wptr) {
rmb();
@@ -1018,14 +1018,14 @@ static void amdgpu_ras_interrupt_handler(struct ras_manager *obj)
* from the callback to udpate the error type/count, etc
*/
if (data->cb) {
- ret = data->cb(obj->adev, &entry);
+ ret = data->cb(obj->adev, &err_data, &entry);
/* ue will trigger an interrupt, and in that case
* we need do a reset to recovery the whole system.
* But leave IP do that recovery, here we just dispatch
* the error.
*/
if (ret == AMDGPU_RAS_UE) {
- obj->err_data.ue_count++;
+ obj->err_data.ue_count += err_data.ue_count;
}
/* Might need get ce count by register, but not all IP
* saves ce count, some IP just use one bit or two bits
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index 0920db7aff34..2c86a5135ec9 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -76,9 +76,6 @@ struct ras_common_if {
char name[32];
};
-typedef int (*ras_ih_cb)(struct amdgpu_device *adev,
- struct amdgpu_iv_entry *entry);
-
struct amdgpu_ras {
/* ras infrastructure */
/* for ras itself. */
@@ -108,21 +105,6 @@ struct amdgpu_ras {
uint32_t flags;
};
-struct ras_ih_data {
- /* interrupt bottom half */
- struct work_struct ih_work;
- int inuse;
- /* IP callback */
- ras_ih_cb cb;
- /* full of entries */
- unsigned char *ring;
- unsigned int ring_size;
- unsigned int element_size;
- unsigned int aligned_element_size;
- unsigned int rptr;
- unsigned int wptr;
-};
-
struct ras_fs_data {
char sysfs_name[32];
char debugfs_name[32];
@@ -149,6 +131,25 @@ struct ras_err_handler_data {
int last_reserved;
};
+typedef int (*ras_ih_cb)(struct amdgpu_device *adev,
+ struct ras_err_data *err_data,
+ struct amdgpu_iv_entry *entry);
+
+struct ras_ih_data {
+ /* interrupt bottom half */
+ struct work_struct ih_work;
+ int inuse;
+ /* IP callback */
+ ras_ih_cb cb;
+ /* full of entries */
+ unsigned char *ring;
+ unsigned int ring_size;
+ unsigned int element_size;
+ unsigned int aligned_element_size;
+ unsigned int rptr;
+ unsigned int wptr;
+};
+
struct ras_manager {
struct ras_common_if head;
/* reference count */
--
2.20.1
More information about the amd-gfx
mailing list