[PATCH 1/5] drm/amdgpu: Add log info for umc_v12_0 and smu_v13_0_6
Zhang, Hawking
Hawking.Zhang at amd.com
Wed Jan 17 11:11:40 UTC 2024
[AMD Official Use Only - General]
+ dev_info(adev->dev,
+ "MCA_UMC_STATUS(0x%llx): Val:%llu, Poison:%llu, Deferred:%llu, PCC:%llu, UC:%llu, TCC:%llu\n",
+ mc_umc_status,
Please also print out socket id for UMC MCA status.
+ dev_info(smu->adev->dev, "MSG %s(%d) query %s MCA count result:%u\n",
+ (msg == SMU_MSG_QueryValidMcaCeCount) ?
+ "SMU_MSG_QueryValidMcaCeCount" : "SMU_MSG_QueryValidMcaCount",
+ msg,
+ (msg == SMU_MSG_QueryValidMcaCeCount) ? "CE" : "UE",
+ *count);
+
This seems redundant or was added for debugging purpose. We can drop this print since there is log to cover failures.
Regards,
Hawking
-----Original Message-----
From: Chai, Thomas <YiPeng.Chai at amd.com>
Sent: Tuesday, January 16, 2024 16:21
To: amd-gfx at lists.freedesktop.org
Cc: Chai, Thomas <YiPeng.Chai at amd.com>; Zhang, Hawking <Hawking.Zhang at amd.com>; Zhou1, Tao <Tao.Zhou1 at amd.com>; Li, Candice <Candice.Li at amd.com>; Wang, Yang(Kevin) <KevinYang.Wang at amd.com>; Yang, Stanley <Stanley.Yang at amd.com>; Chai, Thomas <YiPeng.Chai at amd.com>
Subject: [PATCH 1/5] drm/amdgpu: Add log info for umc_v12_0 and smu_v13_0_6
Add log info for umc_v12_0 and smu_v13_0_6.
Signed-off-by: YiPeng Chai <YiPeng.Chai at amd.com>
---
drivers/gpu/drm/amd/amdgpu/umc_v12_0.c | 11 +++++++++++
drivers/gpu/drm/amd/amdkfd/kfd_events.c | 6 +++++-
.../gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 13 +++++++++++++
3 files changed, 29 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
index 6423dca5b777..fa2168f1d3bf 100644
--- a/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/umc_v12_0.c
@@ -91,6 +91,17 @@ static void umc_v12_0_reset_error_count(struct amdgpu_device *adev)
bool umc_v12_0_is_deferred_error(struct amdgpu_device *adev, uint64_t mc_umc_status) {
+ dev_info(adev->dev,
+ "MCA_UMC_STATUS(0x%llx): Val:%llu, Poison:%llu, Deferred:%llu, PCC:%llu, UC:%llu, TCC:%llu\n",
+ mc_umc_status,
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val),
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Poison),
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred),
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, PCC),
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, UC),
+ REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, TCC)
+ );
+
return (amdgpu_ras_is_poison_mode_supported(adev) &&
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Val) == 1) &&
(REG_GET_FIELD(mc_umc_status, MCA_UMC_UMC0_MCUMC_STATUST0, Deferred) == 1)); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
index 11923964ce9a..51bb98db5d7a 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c
@@ -1297,8 +1297,10 @@ void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid)
uint32_t id = KFD_FIRST_NONSIGNAL_EVENT_ID;
int user_gpu_id;
- if (!p)
+ if (!p) {
+ dev_warn(dev->adev->dev, "Not find process with pasid:%d\n", pasid);
return; /* Presumably process exited. */
+ }
user_gpu_id = kfd_process_get_user_gpu_id(p, dev->id);
if (unlikely(user_gpu_id == -EINVAL)) { @@ -1334,6 +1336,8 @@ void kfd_signal_poison_consumed_event(struct kfd_node *dev, u32 pasid)
}
}
+ dev_warn(dev->adev->dev, "Send SIGBUS to process %s(pasid:%d)\n",
+ p->lead_thread->comm, pasid);
rcu_read_unlock();
/* user application will handle SIGBUS signal */ diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 952a983da49a..cee8ee5afcb6 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -2406,10 +2406,23 @@ static int smu_v13_0_6_get_valid_mca_count(struct smu_context *smu, enum amdgpu_
ret = smu_cmn_send_smc_msg(smu, msg, count);
if (ret) {
+ dev_err(smu->adev->dev, "%s(%d) failed to query %s MCA count, ret:%d\n",
+ (msg == SMU_MSG_QueryValidMcaCeCount) ?
+ "SMU_MSG_QueryValidMcaCeCount" : "SMU_MSG_QueryValidMcaCount",
+ msg,
+ (msg == SMU_MSG_QueryValidMcaCeCount) ? "CE" : "UE",
+ ret);
*count = 0;
return ret;
}
+ dev_info(smu->adev->dev, "MSG %s(%d) query %s MCA count result:%u\n",
+ (msg == SMU_MSG_QueryValidMcaCeCount) ?
+ "SMU_MSG_QueryValidMcaCeCount" : "SMU_MSG_QueryValidMcaCount",
+ msg,
+ (msg == SMU_MSG_QueryValidMcaCeCount) ? "CE" : "UE",
+ *count);
+
return 0;
}
--
2.34.1
More information about the amd-gfx
mailing list