[PATCH] drm/amdkfd: Workaround fix the multi-VF doorbell corruption issue

Samuel Zhang guoqing.zhang at amd.com
Fri Oct 18 07:54:50 UTC 2024


In MI300 series, doorbell will get corrupted in mutil-VF scenario. This
is a HW bug, see DEGGIGX90-5071 and SWDEV-480706 for details.

The fix is set CP_HQD_PQ_DOORBELL_CONTROL.DOORBELL_MODE to 1 in multi-VF
mode.

Signed-off-by: Samuel Zhang <guoqing.zhang at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c         | 2 +-
 drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
index 016290f00592..7dd24b16d9bc 100644
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_4_3.c
@@ -1834,7 +1834,7 @@ static int gfx_v9_4_3_xcc_mqd_init(struct amdgpu_ring *ring, int xcc_id)
 				    DOORBELL_SOURCE, 0);
 		tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
 				    DOORBELL_HIT, 0);
-		if (amdgpu_sriov_vf(adev))
+		if (amdgpu_virt_get_sriov_vf_mode(adev) == SRIOV_VF_MODE_MULTI_VF)
 			tmp = REG_SET_FIELD(tmp, CP_HQD_PQ_DOORBELL_CONTROL,
 					    DOORBELL_MODE, 1);
 	} else {
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
index 84e8ea3a8a0c..bc2dcae9e823 100644
--- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
+++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_v9.c
@@ -549,7 +549,7 @@ static void init_mqd_hiq_v9_4_3(struct mqd_manager *mm, void **mqd,
 		m->cp_hqd_pq_control |= CP_HQD_PQ_CONTROL__NO_UPDATE_RPTR_MASK |
 					1 << CP_HQD_PQ_CONTROL__PRIV_STATE__SHIFT |
 					1 << CP_HQD_PQ_CONTROL__KMD_QUEUE__SHIFT;
-		if (amdgpu_sriov_vf(mm->dev->adev))
+		if (amdgpu_virt_get_sriov_vf_mode(mm->dev->adev) == SRIOV_VF_MODE_MULTI_VF)
 			m->cp_hqd_pq_doorbell_control |= 1 <<
 				CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT;
 		m->cp_mqd_stride_size = kfd_hiq_mqd_stride(mm->dev);
@@ -703,6 +703,10 @@ static void init_mqd_v9_4_3(struct mqd_manager *mm, void **mqd,
 			m->pm4_target_xcc_in_xcp = q->pm4_target_xcc;
 		}
 
+		if (amdgpu_virt_get_sriov_vf_mode(mm->dev->adev) == SRIOV_VF_MODE_MULTI_VF)
+			m->cp_hqd_pq_doorbell_control |= 1 <<
+				CP_HQD_PQ_DOORBELL_CONTROL__DOORBELL_MODE__SHIFT;
+
 		if (xcc == 0) {
 			/* Set the MQD pointer and gart address to XCC0 MQD */
 			*mqd = m;
-- 
2.25.1



More information about the amd-gfx mailing list