[PATCH v2 2/2] drm/amd/pm: Get xgmi link status for XGMI_v_6_4_0
Asad Kamal
asad.kamal at amd.com
Wed Nov 13 08:24:33 UTC 2024
Get XGMI_v_6_4_0 link status and populate it to metrics v1_7 for
SMU_v_13_0_6
v2: Get link status register value for each soc from separate
function (Lijo)
Signed-off-by: Asad Kamal <asad.kamal at amd.com>
Reviewed-by: Lijo Lazar <lijo.lazar at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c | 41 +++++++++++++++++++
drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h | 2 +
.../drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c | 4 +-
3 files changed, 46 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
index b47422b0b5b1..74b4349e345a 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@@ -40,6 +40,11 @@
#define smnPCS_GOPX1_PCS_ERROR_STATUS 0x12200210
#define smnPCS_GOPX1_PCS_ERROR_NONCORRECTABLE_MASK 0x12200218
+#define XGMI_STATE_DISABLE 0xD1
+#define XGMI_STATE_LS0 0x81
+#define XGMI_LINK_ACTIVE 1
+#define XGMI_LINK_INACTIVE 0
+
static DEFINE_MUTEX(xgmi_mutex);
#define AMDGPU_MAX_XGMI_DEVICE_PER_HIVE 4
@@ -289,6 +294,42 @@ static const struct amdgpu_pcs_ras_field xgmi3x16_pcs_ras_fields[] = {
SOC15_REG_FIELD(PCS_XGMI3X16_PCS_ERROR_STATUS, RxCMDPktErr)},
};
+static u32 xgmi_v6_4_get_link_status(struct amdgpu_device *adev, int global_link_num)
+{
+ const u32 smnpcs_xgmi3x16_pcs_state_hist1 = 0x11a00070;
+ const int xgmi_inst = 2;
+ u32 link_inst;
+ u64 addr;
+
+ link_inst = global_link_num % xgmi_inst;
+
+ addr = (smnpcs_xgmi3x16_pcs_state_hist1 | (link_inst << 20)) +
+ adev->asic_funcs->encode_ext_smn_addressing(global_link_num / xgmi_inst);
+
+ return RREG32_PCIE_EXT(addr);
+}
+
+int amdgpu_get_xgmi_link_status(struct amdgpu_device *adev, int global_link_num)
+{
+ u32 xgmi_state_reg_val;
+
+ switch (amdgpu_ip_version(adev, XGMI_HWIP, 0)) {
+ case IP_VERSION(6, 4, 0):
+ xgmi_state_reg_val = xgmi_v6_4_get_link_status(adev, global_link_num);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ if ((xgmi_state_reg_val & 0xFF) == XGMI_STATE_DISABLE)
+ return -ENOLINK;
+
+ if ((xgmi_state_reg_val & 0xFF) == XGMI_STATE_LS0)
+ return XGMI_LINK_ACTIVE;
+
+ return XGMI_LINK_INACTIVE;
+}
+
/**
* DOC: AMDGPU XGMI Support
*
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
index 8cc7ab38db7c..d1282b4c6348 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@@ -84,5 +84,7 @@ int amdgpu_xgmi_reset_on_init(struct amdgpu_device *adev);
int amdgpu_xgmi_request_nps_change(struct amdgpu_device *adev,
struct amdgpu_hive_info *hive,
int req_nps_mode);
+int amdgpu_get_xgmi_link_status(struct amdgpu_device *adev,
+ int global_link_num);
#endif
diff --git a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
index 11ecaa62f419..ab3c93ddce46 100644
--- a/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
+++ b/drivers/gpu/drm/amd/pm/swsmu/smu13/smu_v13_0_6_ppt.c
@@ -96,7 +96,6 @@ MODULE_FIRMWARE("amdgpu/smu_13_0_14.bin");
#define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK 0xE0
#define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT 0x5
#define LINK_SPEED_MAX 4
-
#define SMU_13_0_6_DSCLK_THRESHOLD 140
#define MCA_BANK_IPID(_ip, _hwid, _type) \
@@ -2448,6 +2447,9 @@ static ssize_t smu_v13_0_6_get_gpu_metrics(struct smu_context *smu, void **table
SMUQ10_ROUND(GET_METRIC_FIELD(XgmiReadDataSizeAcc, flag)[i]);
gpu_metrics->xgmi_write_data_acc[i] =
SMUQ10_ROUND(GET_METRIC_FIELD(XgmiWriteDataSizeAcc, flag)[i]);
+ ret = amdgpu_get_xgmi_link_status(adev, i);
+ if (ret >= 0)
+ gpu_metrics->xgmi_link_status[i] = ret;
}
gpu_metrics->num_partition = adev->xcp_mgr->num_xcps;
--
2.46.0
More information about the amd-gfx
mailing list