[PATCH 2/3] drm/amdgpu: Add psp v13 function to query boot status

Hawking Zhang Hawking.Zhang at amd.com
Wed Nov 1 11:22:24 UTC 2023


Add psp v13 function to query boot status.

v2: limit the use case to dGPU only (Lijo)

Signed-off-by: Hawking Zhang <Hawking.Zhang at amd.com>
Reviewed-by: Tao Zhou <tao.zhou1 at amd.com>
Reviewed-by: Yang Wang <kevinyang.wang at amd.com>
Reviewed-by: Le Ma <le.ma at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c | 15 +++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h |  3 +
 drivers/gpu/drm/amd/amdgpu/psp_v13_0.c  | 78 +++++++++++++++++++++++++
 3 files changed, 96 insertions(+)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
index e6dc3cfbac0e..66d9c189af29 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@@ -2120,6 +2120,21 @@ int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev)
 	return ret;
 }
 
+int amdgpu_psp_query_boot_status(struct amdgpu_device *adev)
+{
+	struct psp_context *psp = &adev->psp;
+	int ret;
+
+	if (amdgpu_sriov_vf(adev) || (adev->flags & AMD_IS_APU))
+		return 0;
+
+	if (psp->funcs &&
+	    psp->funcs->query_boot_status)
+		ret = psp->funcs->query_boot_status(psp);
+
+	return ret;
+}
+
 static int psp_hw_start(struct psp_context *psp)
 {
 	struct amdgpu_device *adev = psp->adev;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
index 7111dd32e66f..5d36ad3f48c7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@@ -134,6 +134,7 @@ struct psp_funcs {
 	int (*update_spirom)(struct psp_context *psp, uint64_t fw_pri_mc_addr);
 	int (*vbflash_stat)(struct psp_context *psp);
 	int (*fatal_error_recovery_quirk)(struct psp_context *psp);
+	int (*query_boot_status)(struct psp_context *psp);
 };
 
 struct ta_funcs {
@@ -537,4 +538,6 @@ int is_psp_fw_valid(struct psp_bin_desc bin);
 
 int amdgpu_psp_wait_for_bootloader(struct amdgpu_device *adev);
 
+int amdgpu_psp_query_boot_status(struct amdgpu_device *adev);
+
 #endif
diff --git a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
index 4142e2fcd866..3cf4684d0d3f 100644
--- a/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/psp_v13_0.c
@@ -759,6 +759,83 @@ static int psp_v13_0_fatal_error_recovery_quirk(struct psp_context *psp)
 	return 0;
 }
 
+
+static void psp_v13_0_boot_error_reporting(struct amdgpu_device *adev,
+					   uint32_t inst,
+					   uint32_t boot_error)
+{
+	uint32_t socket_id;
+	uint32_t aid_id;
+	uint32_t hbm_id;
+	uint32_t reg_data;
+
+	socket_id = REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, SOCKET_ID);
+	aid_id = REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, AID_ID);
+	hbm_id = REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, HBM_ID);
+
+	reg_data = RREG32_SOC15(MP0, inst, regMP0_SMN_C2PMSG_109);
+	dev_info(adev->dev, "socket: %d, aid: %d, firmware boot failed, fw status is 0x%x\n",
+		 socket_id, aid_id, reg_data);
+
+	if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_MEM_TRAINING))
+		dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, memory training failed\n",
+			 socket_id, aid_id, hbm_id);
+
+	if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_FW_LOAD))
+		dev_info(adev->dev, "socket: %d, aid: %d, firmware load failed at boot time\n",
+			 socket_id, aid_id);
+
+	if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_WAFL_LINK_TRAINING))
+		dev_info(adev->dev, "socket: %d, aid: %d, wafl link training failed\n",
+			 socket_id, aid_id);
+
+	if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_XGMI_LINK_TRAINING))
+		dev_info(adev->dev, "socket: %d, aid: %d, xgmi link training failed\n",
+			 socket_id, aid_id);
+
+	if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_USR_CP_LINK_TRAINING))
+		dev_info(adev->dev, "socket: %d, aid: %d, usr cp link training failed\n",
+			 socket_id, aid_id);
+
+	if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_USR_DP_LINK_TRAINING))
+		dev_info(adev->dev, "socket: %d, aid: %d, usr dp link training failed\n",
+			 socket_id, aid_id);
+
+	if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_HBM_MEM_TEST))
+		dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, hbm memory test failed\n",
+			 socket_id, aid_id, hbm_id);
+
+	if (REG_GET_FIELD(boot_error, MP0_SMN_C2PMSG_126, GPU_ERR_HBM_BIST_TEST))
+		dev_info(adev->dev, "socket: %d, aid: %d, hbm: %d, hbm bist test failed\n",
+			 socket_id, aid_id, hbm_id);
+}
+
+static int psp_v13_0_query_boot_status(struct psp_context *psp)
+{
+	struct amdgpu_device *adev = psp->adev;
+	int inst_mask = adev->aid_mask;
+	uint32_t reg_data;
+	uint32_t i;
+	int ret = 0;
+
+	if (amdgpu_ip_version(adev, MP0_HWIP, 0) != IP_VERSION(13, 0, 6))
+		return 0;
+
+	if (RREG32_SOC15(MP0, 0, regMP0_SMN_C2PMSG_59) < 0x00a10007)
+		return 0;
+
+	for_each_inst(i, inst_mask) {
+		reg_data = RREG32_SOC15(MP0, i, regMP0_SMN_C2PMSG_126);
+		if (!REG_GET_FIELD(reg_data, MP0_SMN_C2PMSG_126, BOOT_STATUS)) {
+			psp_v13_0_boot_error_reporting(adev, i, reg_data);
+			ret = -EINVAL;
+			break;
+		}
+	}
+
+	return ret;
+}
+
 static const struct psp_funcs psp_v13_0_funcs = {
 	.init_microcode = psp_v13_0_init_microcode,
 	.wait_for_bootloader = psp_v13_0_wait_for_bootloader_steady_state,
@@ -781,6 +858,7 @@ static const struct psp_funcs psp_v13_0_funcs = {
 	.update_spirom = psp_v13_0_update_spirom,
 	.vbflash_stat = psp_v13_0_vbflash_status,
 	.fatal_error_recovery_quirk = psp_v13_0_fatal_error_recovery_quirk,
+	.query_boot_status = psp_v13_0_query_boot_status,
 };
 
 void psp_v13_0_set_psp_funcs(struct psp_context *psp)
-- 
2.17.1



More information about the amd-gfx mailing list