[PATCH 1/2] drm/amdgpu: add reset sources in gpu reset context
Eric Huang
jinhuieric.huang at amd.com
Mon Jun 3 18:12:57 UTC 2024
reset source or reset cause is very useful info
for reset context, it will be used by events API.
Suggested-by: Lijo Lazar <Lijo.Lazar at amd.com>
Signed-off-by: Eric Huang <jinhuieric.huang at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c | 34 +++++++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h | 13 +++++++++
2 files changed, 47 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
index bfdde772b7ee..f07f0fb9f827 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.c
@@ -160,3 +160,37 @@ void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain)
atomic_set(&reset_domain->in_gpu_reset, 0);
up_write(&reset_domain->sem);
}
+
+void amdgpu_reset_get_desc(struct amdgpu_reset_context *rst_ctxt, char *buf,
+ size_t len)
+{
+ struct amdgpu_ring *ring;
+
+ if (!buf || !len)
+ return;
+
+ switch (rst_ctxt->src) {
+ case AMDGPU_RESET_SRC_JOB:
+ if (rst_ctxt->job) {
+ ring = amdgpu_job_ring(rst_ctxt->job);
+ snprintf(buf, len, "job hang on ring:%s", ring->name);
+ } else {
+ strscpy(buf, "job hang", len);
+ }
+ break;
+ case AMDGPU_RESET_SRC_RAS:
+ strscpy(buf, "RAS error", len);
+ break;
+ case AMDGPU_RESET_SRC_MES:
+ strscpy(buf, "MES hang", len);
+ break;
+ case AMDGPU_RESET_SRC_HWS:
+ strscpy(buf, "HWS hang", len);
+ break;
+ case AMDGPU_RESET_SRC_USER:
+ strscpy(buf, "user trigger", len);
+ break;
+ default:
+ strscpy(buf, "unknown", len);
+ }
+}
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
index 5a9cc043b858..9de8e4157a4f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_reset.h
@@ -36,6 +36,15 @@ enum AMDGPU_RESET_FLAGS {
AMDGPU_HOST_FLR = 3,
};
+enum AMDGPU_RESET_SRCS {
+ AMDGPU_RESET_SRC_UNKNOWN,
+ AMDGPU_RESET_SRC_JOB,
+ AMDGPU_RESET_SRC_RAS,
+ AMDGPU_RESET_SRC_MES,
+ AMDGPU_RESET_SRC_HWS,
+ AMDGPU_RESET_SRC_USER,
+};
+
struct amdgpu_reset_context {
enum amd_reset_method method;
struct amdgpu_device *reset_req_dev;
@@ -43,6 +52,7 @@ struct amdgpu_reset_context {
struct amdgpu_hive_info *hive;
struct list_head *reset_device_list;
unsigned long flags;
+ enum AMDGPU_RESET_SRCS src;
};
struct amdgpu_reset_handler {
@@ -130,6 +140,9 @@ void amdgpu_device_lock_reset_domain(struct amdgpu_reset_domain *reset_domain);
void amdgpu_device_unlock_reset_domain(struct amdgpu_reset_domain *reset_domain);
+void amdgpu_reset_get_desc(struct amdgpu_reset_context *rst_ctxt, char *buf,
+ size_t len);
+
#define for_each_handler(i, handler, reset_ctl) \
for (i = 0; (i < AMDGPU_RESET_MAX_HANDLERS) && \
(handler = (*reset_ctl->reset_handlers)[i]); \
--
2.34.1
More information about the amd-gfx
mailing list