[PATCH] drm/amdgpu: Report generic instead of unknown boot time errors

Zhang, Hawking Hawking.Zhang at amd.com
Wed Feb 26 06:33:23 UTC 2025


[AMD Official Use Only - AMD Internal Distribution Only]

Reviewed-by: Hawking Zhang <Hawking.Zhang at amd.com>

Regards,
Hawking
-----Original Message-----
From: Liu, Xiang(Dean) <Xiang.Liu at amd.com>
Sent: Wednesday, February 26, 2025 14:30
To: amd-gfx at lists.freedesktop.org
Cc: Zhang, Hawking <Hawking.Zhang at amd.com>; Subedi, Rohit <Rohit.Subedi at amd.com>; Liu, Xiang(Dean) <Xiang.Liu at amd.com>
Subject: [PATCH] drm/amdgpu: Report generic instead of unknown boot time errors

Change the DMESG reporting of unknown errors to "Boot Controller Generic Error" to align with the RAS SPEC and provide more clarity to customers.

Signed-off-by: Xiang Liu <xiang.liu at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 4 ++--  drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 493dd004d6fa..285e3aa2bb2f 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -5138,9 +5138,9 @@ static void amdgpu_ras_boot_time_error_reporting(struct amdgpu_device *adev,
                         "socket: %d, aid: %d, fw_status: 0x%x, data abort exception\n",
                         socket_id, aid_id, fw_status);

-       if (AMDGPU_RAS_GPU_ERR_UNKNOWN(boot_error))
+       if (AMDGPU_RAS_GPU_ERR_GENERIC(boot_error))
                dev_info(adev->dev,
-                        "socket: %d, aid: %d, fw_status: 0x%x, unknown boot time errors\n",
+                        "socket: %d, aid: %d, fw_status: 0x%x, Boot Controller Generic
+Error\n",
                         socket_id, aid_id, fw_status);
 }

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
index cc4586581dba..764e9fa0a914 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.h
@@ -47,7 +47,7 @@ struct amdgpu_iv_entry;
 #define AMDGPU_RAS_GPU_ERR_AID_ID(x)                   AMDGPU_GET_REG_FIELD(x, 12, 11)
 #define AMDGPU_RAS_GPU_ERR_HBM_ID(x)                   AMDGPU_GET_REG_FIELD(x, 14, 13)
 #define AMDGPU_RAS_GPU_ERR_DATA_ABORT(x)               AMDGPU_GET_REG_FIELD(x, 29, 29)
-#define AMDGPU_RAS_GPU_ERR_UNKNOWN(x)                  AMDGPU_GET_REG_FIELD(x, 30, 30)
+#define AMDGPU_RAS_GPU_ERR_GENERIC(x)                  AMDGPU_GET_REG_FIELD(x, 30, 30)

 #define AMDGPU_RAS_BOOT_STATUS_POLLING_LIMIT   100
 #define AMDGPU_RAS_BOOT_STEADY_STATUS          0xBA
--
2.34.1



More information about the amd-gfx mailing list