[PATCH] drm/amdgpu: Decode deferred error type in aca bank parser
Xiang Liu
xiang.liu at amd.com
Wed Feb 26 07:49:13 UTC 2025
In the case of poison inband log, the error type need to be specified
by checking the deferred or poison bit of status register.
v2: check both deferred and poison bit
Signed-off-by: Xiang Liu <xiang.liu at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h | 6 ++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 4 ++--
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 5 +++--
drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c | 4 ++--
drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c | 4 ++--
drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 4 ++--
drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c | 4 ++--
7 files changed, 19 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
index b84a3489b116..6f62e5d80ed6 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.h
@@ -76,6 +76,12 @@ struct ras_query_context;
#define mmSMNAID_XCD1_MCA_SMU 0x38430400 /* SMN AID XCD1 */
#define mmSMNXCD_XCD0_MCA_SMU 0x40430400 /* SMN XCD XCD0 */
+#define ACA_BANK_ERR_CE_DE_DECODE(bank) \
+ ((ACA_REG__STATUS__POISON((bank)->regs[ACA_REG_IDX_STATUS]) || \
+ ACA_REG__STATUS__DEFERRED((bank)->regs[ACA_REG_IDX_STATUS])) ? \
+ ACA_ERROR_TYPE_DEFERRED : \
+ ACA_ERROR_TYPE_CE)
+
enum aca_reg_idx {
ACA_REG_IDX_CTL = 0,
ACA_REG_IDX_STATUS = 1,
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index 49da137d42c9..c313c2cf6969 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -1169,8 +1169,8 @@ static int xgmi_v6_4_0_aca_bank_parser(struct aca_handle *handle, struct aca_ban
break;
case ACA_SMU_TYPE_CE:
count = ext_error_code == 6 ? count : 0ULL;
- bank->aca_err_type = ACA_ERROR_TYPE_CE;
- ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE, count);
+ bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank);
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type, count);
break;
default:
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 50eb856249d5..87add6274b98 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -883,9 +883,10 @@ static int gfx_v9_4_3_aca_bank_parser(struct aca_handle *handle,
ACA_ERROR_TYPE_UE, 1ULL);
break;
case ACA_SMU_TYPE_CE:
- bank->aca_err_type = ACA_ERROR_TYPE_CE;
+ bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank);
ret = aca_error_cache_log_bank_error(handle, &info,
- ACA_ERROR_TYPE_CE, ACA_REG__MISC0__ERRCNT(misc0));
+ bank->aca_err_type,
+ ACA_REG__MISC0__ERRCNT(misc0));
break;
default:
return -EINVAL;
diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
index b8f06e9c9e62..1fcab0ef21c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v4_0_3.c
@@ -1332,8 +1332,8 @@ static int jpeg_v4_0_3_aca_bank_parser(struct aca_handle *handle, struct aca_ban
1ULL);
break;
case ACA_SMU_TYPE_CE:
- bank->aca_err_type = ACA_ERROR_TYPE_CE;
- ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE,
+ bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank);
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
ACA_REG__MISC0__ERRCNT(misc0));
break;
default:
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
index 58d22f0d5a68..a54e7b929295 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_8.c
@@ -751,8 +751,8 @@ static int mmhub_v1_8_aca_bank_parser(struct aca_handle *handle, struct aca_bank
1ULL);
break;
case ACA_SMU_TYPE_CE:
- bank->aca_err_type = ACA_ERROR_TYPE_CE;
- ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE,
+ bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank);
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
ACA_REG__MISC0__ERRCNT(misc0));
break;
default:
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index ba43c8f46f45..5636f473c3d0 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -2529,8 +2529,8 @@ static int sdma_v4_4_2_aca_bank_parser(struct aca_handle *handle, struct aca_ban
1ULL);
break;
case ACA_SMU_TYPE_CE:
- bank->aca_err_type = ACA_ERROR_TYPE_CE;
- ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE,
+ bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank);
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
ACA_REG__MISC0__ERRCNT(misc0));
break;
default:
diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
index 2681c40704a1..20f6bd9255db 100644
--- a/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/vcn_v4_0_3.c
@@ -1963,8 +1963,8 @@ static int vcn_v4_0_3_aca_bank_parser(struct aca_handle *handle, struct aca_bank
1ULL);
break;
case ACA_SMU_TYPE_CE:
- bank->aca_err_type = ACA_ERROR_TYPE_CE;
- ret = aca_error_cache_log_bank_error(handle, &info, ACA_ERROR_TYPE_CE,
+ bank->aca_err_type = ACA_BANK_ERR_CE_DE_DECODE(bank);
+ ret = aca_error_cache_log_bank_error(handle, &info, bank->aca_err_type,
ACA_REG__MISC0__ERRCNT(misc0));
break;
default:
--
2.34.1
More information about the amd-gfx
mailing list