[PATCH 03/10] drm/amdgpu: add RAS status reset for gfx_v9_4_3

Fri May 12 21:43:44 UTC 2023

From: Tao Zhou <tao.zhou1 at amd.com>

Reset GFX RAS status registers.

v2: fix typo in title.
    remove xcp operation.
v3: change instance from 0 to xcc_id for register access.

Signed-off-by: Tao Zhou <tao.zhou1 at amd.com>
Reviewed-by: Hawking Zhang <Hawking.Zhang at amd.com>
Signed-off-by: Alex Deucher <alexander.deucher at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c | 41 +++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index f178e3f565e9..e6069d081f71 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -3050,11 +3050,52 @@ static void gfx_v9_4_3_inst_query_ras_err_status(struct amdgpu_device *adev,
 	gfx_v9_4_3_inst_query_utc_err_status(adev, xcc_id);
 }
 
+static void gfx_v9_4_3_inst_reset_utc_err_status(struct amdgpu_device *adev,
+					int xcc_id)
+{
+	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regUTCL2_MEM_ECC_STATUS, 0x3);
+	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_MEM_ECC_STATUS, 0x3);
+	WREG32_SOC15(GC, GET_INST(GC, xcc_id), regVML2_WALKER_MEM_ECC_STATUS, 0x3);
+}
+
+static void gfx_v9_4_3_inst_reset_ea_err_status(struct amdgpu_device *adev,
+					int xcc_id)
+{
+	uint32_t i, j;
+	uint32_t value;
+
+	mutex_lock(&adev->grbm_idx_mutex);
+	for (i = 0; i < gfx_v9_4_3_ea_err_status_regs.se_num; i++) {
+		for (j = 0; j < gfx_v9_4_3_ea_err_status_regs.instance; j++) {
+			gfx_v9_4_3_xcc_select_se_sh(adev, i, 0, j, xcc_id);
+			value = RREG32_SOC15(GC, GET_INST(GC, xcc_id), regGCEA_ERR_STATUS);
+			value = REG_SET_FIELD(value, GCEA_ERR_STATUS,
+						CLEAR_ERROR_STATUS, 0x1);
+			WREG32_SOC15(GC, GET_INST(GC, xcc_id), regGCEA_ERR_STATUS, value);
+		}
+	}
+	gfx_v9_4_3_xcc_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff,
+			xcc_id);
+	mutex_unlock(&adev->grbm_idx_mutex);
+}
+
+static void gfx_v9_4_3_inst_reset_ras_err_status(struct amdgpu_device *adev,
+					void *ras_error_status, int xcc_id)
+{
+	gfx_v9_4_3_inst_reset_utc_err_status(adev, xcc_id);
+	gfx_v9_4_3_inst_reset_ea_err_status(adev, xcc_id);
+}
+
 static void gfx_v9_4_3_query_ras_error_status(struct amdgpu_device *adev)
 {
 	amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_query_ras_err_status);
 }
 
+static void gfx_v9_4_3_reset_ras_error_status(struct amdgpu_device *adev)
+{
+	amdgpu_gfx_ras_error_func(adev, NULL, gfx_v9_4_3_inst_reset_ras_err_status);
+}
+
 static const struct amd_ip_funcs gfx_v9_4_3_ip_funcs = {
 	.name = "gfx_v9_4_3",
 	.early_init = gfx_v9_4_3_early_init,
-- 
2.40.1