[PATCH 2/4] drm/amdgpu: add reset_ras_error_count function for MMHUB
Hawking Zhang
Hawking.Zhang at amd.com
Mon Mar 2 10:33:37 UTC 2020
MMHUB ras error counters are dirty ones after cold reboot
Read operation is needed to reset them to 0
Signed-off-by: Hawking Zhang <Hawking.Zhang at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h | 1 +
drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c | 3 +++
drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c | 12 ++++++++++++
drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c | 12 ++++++++++++
4 files changed, 28 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
index 1cd78940cf82..e89fb35fec71 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mmhub.h
@@ -26,6 +26,7 @@ struct amdgpu_mmhub_funcs {
int (*ras_late_init)(struct amdgpu_device *adev);
void (*query_ras_error_count)(struct amdgpu_device *adev,
void *ras_error_status);
+ void (*reset_ras_error_count)(struct amdgpu_device *adev);
};
struct amdgpu_mmhub {
diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
index 90216abf14a4..d15a433af699 100644
--- a/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v9_0.c
@@ -912,6 +912,9 @@ static int gmc_v9_0_late_init(void *handle)
}
}
+ if (adev->mmhub.funcs && adev->mmhub.funcs->reset_ras_error_count)
+ adev->mmhub.funcs->reset_ras_error_count(adev);
+
r = amdgpu_gmc_ras_late_init(adev);
if (r)
return r;
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
index 49a3a56ec017..396c2a624de0 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v1_0.c
@@ -747,7 +747,19 @@ static void mmhub_v1_0_query_ras_error_count(struct amdgpu_device *adev,
err_data->ue_count += ded_count;
}
+static void mmhub_v1_0_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ uint32_t i;
+
+ /* read back edc counter registers to reset the counters to 0 */
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) {
+ for (i = 0; i < ARRAY_SIZE(mmhub_v1_0_edc_cnt_regs); i++)
+ RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v1_0_edc_cnt_regs[i]));
+ }
+}
+
const struct amdgpu_mmhub_funcs mmhub_v1_0_funcs = {
.ras_late_init = amdgpu_mmhub_ras_late_init,
.query_ras_error_count = mmhub_v1_0_query_ras_error_count,
+ .reset_ras_error_count = mmhub_v1_0_reset_ras_error_count,
};
diff --git a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
index a5281df8d84f..0d413fabd015 100644
--- a/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
+++ b/drivers/gpu/drm/amd/amdgpu/mmhub_v9_4.c
@@ -1596,7 +1596,19 @@ static void mmhub_v9_4_query_ras_error_count(struct amdgpu_device *adev,
err_data->ue_count += ded_count;
}
+static void mmhub_v9_4_reset_ras_error_count(struct amdgpu_device *adev)
+{
+ uint32_t i;
+
+ /* read back edc counter registers to reset the counters to 0 */
+ if (amdgpu_ras_is_supported(adev, AMDGPU_RAS_BLOCK__MMHUB)) {
+ for (i = 0; i < ARRAY_SIZE(mmhub_v9_4_edc_cnt_regs); i++)
+ RREG32(SOC15_REG_ENTRY_OFFSET(mmhub_v9_4_edc_cnt_regs[i]));
+ }
+}
+
const struct amdgpu_mmhub_funcs mmhub_v9_4_funcs = {
.ras_late_init = amdgpu_mmhub_ras_late_init,
.query_ras_error_count = mmhub_v9_4_query_ras_error_count,
+ .reset_ras_error_count = mmhub_v9_4_reset_ras_error_count,
};
--
2.17.1
More information about the amd-gfx
mailing list