[PATCH] drm/amdgpu: Add debug mask to disable CE logs
Xiang Liu
xiang.liu at amd.com
Fri Jun 6 04:10:41 UTC 2025
Add debug mask to disable kernel logs of RAS correctable errors,
including both ACA and CE error counter kernel messages.
Signed-off-by: Xiang Liu <xiang.liu at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c | 5 +++++
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c | 6 ++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 3 +++
4 files changed, 15 insertions(+)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
index 9f7284ab5ab6..d0982dbce31a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@@ -1309,6 +1309,7 @@ struct amdgpu_device {
bool debug_exp_resets;
bool debug_disable_gpu_ring_reset;
bool debug_vm_userptr;
+ bool debug_disable_ce_logs;
/* Protection for the following isolation structure */
struct mutex enforce_isolation_mutex;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
index 3835f2592914..cbc40cad581b 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_aca.c
@@ -115,6 +115,11 @@ static void aca_smu_bank_dump(struct amdgpu_device *adev, int idx, int total, st
u64 event_id = qctx ? qctx->evid.event_id : RAS_EVENT_INVALID_ID;
int i;
+ if (adev->debug_disable_ce_logs &&
+ bank->smu_err_type == ACA_SMU_TYPE_CE &&
+ !ACA_BANK_ERR_IS_DEFFERED(bank))
+ return;
+
RAS_EVENT_LOG(adev, event_id, HW_ERR "Accelerator Check Architecture events logged\n");
/* plus 1 for output format, e.g: ACA[08/08]: xxxx */
for (i = 0; i < ARRAY_SIZE(aca_regs); i++)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
index 7249f815feaa..fbd0e3a030de 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@@ -144,6 +144,7 @@ enum AMDGPU_DEBUG_MASK {
AMDGPU_DEBUG_DISABLE_GPU_RING_RESET = BIT(6),
AMDGPU_DEBUG_SMU_POOL = BIT(7),
AMDGPU_DEBUG_VM_USERPTR = BIT(8),
+ AMDGPU_DEBUG_DISABLE_RAS_CE_LOG = BIT(9)
};
unsigned int amdgpu_vram_limit = UINT_MAX;
@@ -2302,6 +2303,11 @@ static void amdgpu_init_debug_options(struct amdgpu_device *adev)
pr_info("debug: VM mode debug for userptr is enabled\n");
adev->debug_vm_userptr = true;
}
+
+ if (amdgpu_debug_mask & AMDGPU_DEBUG_DISABLE_RAS_CE_LOG) {
+ pr_info("debug: disable kernel logs of correctalbe errors\n");
+ adev->debug_disable_ce_logs = true;
+ }
}
static unsigned long amdgpu_fix_asic_type(struct pci_dev *pdev, unsigned long flags)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
index 6565dc7ff9cd..505850f415ef 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
@@ -1107,6 +1107,9 @@ static void amdgpu_ras_error_print_error_data(struct amdgpu_device *adev,
err_info->de_count, blk_name);
}
} else {
+ if (adev->debug_disable_ce_logs)
+ return;
+
for_each_ras_error(err_node, err_data) {
err_info = &err_node->err_info;
mcm_info = &err_info->mcm_info;
--
2.34.1
More information about the amd-gfx
mailing list