[PATCH v2 09/12] drm/amdgpu: add sdma v4.4.2 ACA support
Yang Wang
kevinyang.wang at amd.com
Thu Jan 4 11:48:55 UTC 2024
v1:
add sdma v4.4.2 ACA driver support
v2:
use macro to define smn address value.
Signed-off-by: Yang Wang <kevinyang.wang at amd.com>
---
drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 82 ++++++++++++++++++++++++
1 file changed, 82 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
index 2d688dca26be..8e2366c6717d 100644
--- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
+++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
@@ -45,6 +45,8 @@
MODULE_FIRMWARE("amdgpu/sdma_4_4_2.bin");
+#define mmSMNAID_AID0_MCA_SMU 0x03b30400
+
#define WREG32_SDMA(instance, offset, value) \
WREG32(sdma_v4_4_2_get_reg_offset(adev, (instance), (offset)), value)
#define RREG32_SDMA(instance, offset) \
@@ -2204,9 +2206,89 @@ static const struct amdgpu_ras_block_hw_ops sdma_v4_4_2_ras_hw_ops = {
.reset_ras_error_count = sdma_v4_4_2_reset_ras_error_count,
};
+static int sdma_v4_4_2_aca_bank_generate_report(struct aca_handle *handle,
+ struct aca_bank *bank, enum aca_error_type type,
+ struct aca_bank_report *report, void *data)
+{
+ u64 status, misc0;
+ int ret;
+
+ status = bank->regs[ACA_REG_IDX_STATUS];
+ if ((type == ACA_ERROR_TYPE_UE &&
+ ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_FAULT) ||
+ (type == ACA_ERROR_TYPE_CE &&
+ ACA_REG__STATUS__ERRORCODEEXT(status) == ACA_EXTERROR_CODE_CE)) {
+
+ ret = aca_bank_info_decode(bank, &report->info);
+ if (ret)
+ return ret;
+
+ misc0 = bank->regs[ACA_REG_IDX_MISC0];
+ report->count = ACA_REG__MISC0__ERRCNT(misc0);
+ report->type = type;
+ }
+
+ return 0;
+}
+
+/* CODE_SDMA0 - CODE_SDMA4, reference to smu driver if header file */
+static int sdma_v4_4_2_err_codes[] = { 33, 34, 35, 36 };
+
+static bool sdma_v4_4_2_aca_bank_is_valid(struct aca_handle *handle, struct aca_bank *bank,
+ enum aca_error_type type, void *data)
+{
+ u32 instlo;
+
+ instlo = ACA_REG__IPID__INSTANCEIDLO(bank->regs[ACA_REG_IDX_IPID]);
+ instlo &= GENMASK(31, 1);
+
+ if (instlo != mmSMNAID_AID0_MCA_SMU)
+ return false;
+
+ if (aca_bank_check_error_codes(handle->adev, bank,
+ sdma_v4_4_2_err_codes,
+ ARRAY_SIZE(sdma_v4_4_2_err_codes)))
+ return false;
+
+ return true;
+}
+
+static const struct aca_bank_ops sdma_v4_4_2_aca_bank_ops = {
+ .aca_bank_generate_report = sdma_v4_4_2_aca_bank_generate_report,
+ .aca_bank_is_valid = sdma_v4_4_2_aca_bank_is_valid,
+};
+
+static const struct aca_info sdma_v4_4_2_aca_info = {
+ .hwip = ACA_HWIP_TYPE_SMU,
+ .mask = ACA_ERROR_UE_MASK,
+ .bank_ops = &sdma_v4_4_2_aca_bank_ops,
+};
+
+static int sdma_v4_4_2_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)
+{
+ int r;
+
+ r = amdgpu_ras_block_late_init(adev, ras_block);
+ if (r)
+ return r;
+
+ r = amdgpu_ras_bind_aca(adev, AMDGPU_RAS_BLOCK__SDMA,
+ &sdma_v4_4_2_aca_info, NULL);
+ if (r)
+ goto late_fini;
+
+ return 0;
+
+late_fini:
+ amdgpu_ras_block_late_fini(adev, ras_block);
+
+ return r;
+}
+
static struct amdgpu_sdma_ras sdma_v4_4_2_ras = {
.ras_block = {
.hw_ops = &sdma_v4_4_2_ras_hw_ops,
+ .ras_late_init = sdma_v4_4_2_ras_late_init,
},
};
--
2.34.1
More information about the amd-gfx
mailing list