[PATCH] drm/amdgpu: disable c-states on xgmi perfmons

Zeng, Oak Oak.Zeng at amd.com
Thu Oct 17 23:57:48 UTC 2019


Is it the design that we have to disable cstate before r/w df, 

or this is only a workaround? - in this case we need to work with df to figure out the root cause of the hang.

Regards,
Oak

-----Original Message-----
From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of Kim, Jonathan
Sent: Wednesday, October 16, 2019 8:50 PM
To: amd-gfx at lists.freedesktop.org
Cc: Kuehling, Felix <Felix.Kuehling at amd.com>; Quan, Evan <Evan.Quan at amd.com>
Subject: RE: [PATCH] drm/amdgpu: disable c-states on xgmi perfmons

+ Felix

-----Original Message-----
From: Kim, Jonathan <Jonathan.Kim at amd.com> 
Sent: Wednesday, October 16, 2019 8:49 PM
To: amd-gfx at lists.freedesktop.org
Cc: Felix.Keuhling at amd.com; Quan, Evan <Evan.Quan at amd.com>; Kim, Jonathan <Jonathan.Kim at amd.com>; Kim, Jonathan <Jonathan.Kim at amd.com>
Subject: [PATCH] drm/amdgpu: disable c-states on xgmi perfmons

read or writes to df registers when gpu df is in c-states will result in hang.  df c-states should be disabled prior to read or writes then re-enabled after read or writes.

Change-Id: I6d5a83e4fe13e29c73dfb03a94fe7c611e867fec
Signed-off-by: Jonathan Kim <Jonathan.Kim at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/df_v3_6.c | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
index 16fbd2bc8ad1..9a58416662e0 100644
--- a/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
+++ b/drivers/gpu/drm/amd/amdgpu/df_v3_6.c
@@ -102,6 +102,9 @@ static uint64_t df_v3_6_get_fica(struct amdgpu_device *adev,
 	address = adev->nbio.funcs->get_pcie_index_offset(adev);
 	data = adev->nbio.funcs->get_pcie_data_offset(adev);
 
+	if (smu_set_df_cstate(&adev->smu, 0))
+		return 0xFFFFFFFFFFFFFFFF;
+
 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 	WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3);
 	WREG32(data, ficaa_val);
@@ -114,6 +117,8 @@ static uint64_t df_v3_6_get_fica(struct amdgpu_device *adev,
 
 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
 
+	smu_set_df_cstate(&adev->smu, 1);
+
 	return (((ficadh_val & 0xFFFFFFFFFFFFFFFF) << 32) | ficadl_val);  }
 
@@ -125,6 +130,9 @@ static void df_v3_6_set_fica(struct amdgpu_device *adev, uint32_t ficaa_val,
 	address = adev->nbio.funcs->get_pcie_index_offset(adev);
 	data = adev->nbio.funcs->get_pcie_data_offset(adev);
 
+	if (smu_set_df_cstate(&adev->smu, 0))
+		return;
+
 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 	WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessAddress3);
 	WREG32(data, ficaa_val);
@@ -134,8 +142,9 @@ static void df_v3_6_set_fica(struct amdgpu_device *adev, uint32_t ficaa_val,
 
 	WREG32(address, smnDF_PIE_AON_FabricIndirectConfigAccessDataHi3);
 	WREG32(data, ficadh_val);
-
 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+
+	smu_set_df_cstate(&adev->smu, 1);
 }
 
 /*
@@ -153,12 +162,17 @@ static void df_v3_6_perfmon_rreg(struct amdgpu_device *adev,
 	address = adev->nbio.funcs->get_pcie_index_offset(adev);
 	data = adev->nbio.funcs->get_pcie_data_offset(adev);
 
+	if (smu_set_df_cstate(&adev->smu, 0))
+		return;
+
 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 	WREG32(address, lo_addr);
 	*lo_val = RREG32(data);
 	WREG32(address, hi_addr);
 	*hi_val = RREG32(data);
 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+
+	smu_set_df_cstate(&adev->smu, 1);
 }
 
 /*
@@ -175,12 +189,17 @@ static void df_v3_6_perfmon_wreg(struct amdgpu_device *adev, uint32_t lo_addr,
 	address = adev->nbio.funcs->get_pcie_index_offset(adev);
 	data = adev->nbio.funcs->get_pcie_data_offset(adev);
 
+	if (smu_set_df_cstate(&adev->smu, 0))
+		return;
+
 	spin_lock_irqsave(&adev->pcie_idx_lock, flags);
 	WREG32(address, lo_addr);
 	WREG32(data, lo_val);
 	WREG32(address, hi_addr);
 	WREG32(data, hi_val);
 	spin_unlock_irqrestore(&adev->pcie_idx_lock, flags);
+
+	smu_set_df_cstate(&adev->smu, 1);
 }
 
 /* get the number of df counters available */
--
2.17.1

_______________________________________________
amd-gfx mailing list
amd-gfx at lists.freedesktop.org
https://lists.freedesktop.org/mailman/listinfo/amd-gfx


More information about the amd-gfx mailing list