[PATCH 2/2] drm/amdgpu: only harvest gcea/mmea error status in aldebaran
Hawking Zhang
Hawking.Zhang at amd.com
Fri Apr 16 09:44:24 UTC 2021
In aldebaran, driver only needs to harvest SDP
RdRspStatus, WrRspStatus and first parity error
on RdRsp data. Check error type before harvest
error information.
Signed-off-by: Hawking Zhang <Hawking.Zhang at amd.com>
---
drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c | 21 ++++++++++++---------
drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c | 11 +++++++----
2 files changed, 19 insertions(+), 13 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
index 9ca76a3ac38c..91427543aabe 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_2.c
@@ -808,7 +808,7 @@ static struct gfx_v9_4_2_utc_block gfx_v9_4_2_utc_blocks[] = {
REG_SET_FIELD(0, ATC_L2_CACHE_4K_DSM_CNTL, WRITE_COUNTERS, 1) },
};
-static const struct soc15_reg_entry gfx_v9_4_2_rdrsp_status_regs =
+static const struct soc15_reg_entry gfx_v9_4_2_ea_err_status_regs =
{ SOC15_REG_ENTRY(GC, 0, regGCEA_ERR_STATUS), 0, 1, 16 };
static int gfx_v9_4_2_get_reg_error_count(struct amdgpu_device *adev,
@@ -1040,11 +1040,11 @@ static void gfx_v9_4_2_reset_ea_err_status(struct amdgpu_device *adev)
uint32_t i, j;
mutex_lock(&adev->grbm_idx_mutex);
- for (i = 0; i < gfx_v9_4_2_rdrsp_status_regs.se_num; i++) {
- for (j = 0; j < gfx_v9_4_2_rdrsp_status_regs.instance;
+ for (i = 0; i < gfx_v9_4_2_ea_err_status_regs.se_num; i++) {
+ for (j = 0; j < gfx_v9_4_2_ea_err_status_regs.instance;
j++) {
gfx_v9_4_2_select_se_sh(adev, i, 0, j);
- WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_rdrsp_status_regs), 0x10);
+ WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_regs), 0x10);
}
}
gfx_v9_4_2_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
@@ -1089,17 +1089,20 @@ static void gfx_v9_4_2_query_ea_err_status(struct amdgpu_device *adev)
mutex_lock(&adev->grbm_idx_mutex);
- for (i = 0; i < gfx_v9_4_2_rdrsp_status_regs.se_num; i++) {
- for (j = 0; j < gfx_v9_4_2_rdrsp_status_regs.instance;
+ for (i = 0; i < gfx_v9_4_2_ea_err_status_regs.se_num; i++) {
+ for (j = 0; j < gfx_v9_4_2_ea_err_status_regs.instance;
j++) {
gfx_v9_4_2_select_se_sh(adev, i, 0, j);
reg_value = RREG32(SOC15_REG_ENTRY_OFFSET(
- gfx_v9_4_2_rdrsp_status_regs));
- if (reg_value)
+ gfx_v9_4_2_ea_err_status_regs));
+ if (REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_WRRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, GCEA_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) {
dev_warn(adev->dev, "GCEA err detected at instance: %d, status: 0x%x!\n",
j, reg_value);
+ }
/* clear after read */
- WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_rdrsp_status_regs), 0x10);
+ WREG32(SOC15_REG_ENTRY_OFFSET(gfx_v9_4_2_ea_err_status_regs), 0x10);
}
}
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
index d0f41346ea0c..cc69c434d0de 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_7.c
@@ -1286,7 +1286,7 @@ static void mmhub_v1_7_reset_ras_error_count(struct amdgpu_device *adev)
}
}
-static const struct soc15_reg_entry mmhub_v1_7_err_status_regs[] = {
+static const struct soc15_reg_entry mmhub_v1_7_ea_err_status_regs[] = {
{ SOC15_REG_ENTRY(MMHUB, 0, regMMEA0_ERR_STATUS), 0, 0, 0 },
{ SOC15_REG_ENTRY(MMHUB, 0, regMMEA1_ERR_STATUS), 0, 0, 0 },
{ SOC15_REG_ENTRY(MMHUB, 0, regMMEA2_ERR_STATUS), 0, 0, 0 },
@@ -1303,12 +1303,15 @@ static void mmhub_v1_7_query_ras_error_status(struct amdgpu_device *adev)
if (!amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB))
return;
- for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_err_status_regs); i++) {
+ for (i = 0; i < ARRAY_SIZE(mmhub_v1_7_ea_err_status_regs); i++) {
reg_value =
- RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_7_err_status_regs[i]));
- if (reg_value)
+ RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_7_ea_err_status_regs[i]));
+ if (REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_WRRSP_STATUS) ||
+ REG_GET_FIELD(reg_value, MMEA0_ERR_STATUS, SDP_RDRSP_DATAPARITY_ERROR)) {
dev_warn(adev->dev, "MMHUB EA err detected at instance: %d, status: 0x%x!\n",
i, reg_value);
+ }
}
}
--
2.17.1
More information about the amd-gfx
mailing list