[PATCH 3/3] drm/amdgpu/mes11: make fence waits synchronous

Horace Chen horace.chen at amd.com
Wed Apr 17 11:30:25 UTC 2024


The MES firmware expects synchronous operation with the
driver.  For this to work asynchronously, each caller
would need to provide its own fence location and sequence
number.

For now, add a mutex lock to serialize the MES submission.
For SR-IOV long-wait case, break the long-wait to separated
part to prevent this wait from impacting reset sequence.

Signed-off-by: Horace Chen <horace.chen at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c |  3 +++
 drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h |  1 +
 drivers/gpu/drm/amd/amdgpu/mes_v11_0.c  | 18 ++++++++++++++----
 3 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 78e4f88f5134..8896be95b2c8 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -137,6 +137,7 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
 	spin_lock_init(&adev->mes.queue_id_lock);
 	spin_lock_init(&adev->mes.ring_lock);
 	mutex_init(&adev->mes.mutex_hidden);
+	mutex_init(&adev->mes.submission_lock);
 
 	adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK;
 	adev->mes.vmid_mask_mmhub = 0xffffff00;
@@ -221,6 +222,7 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
 	idr_destroy(&adev->mes.queue_id_idr);
 	ida_destroy(&adev->mes.doorbell_ida);
 	mutex_destroy(&adev->mes.mutex_hidden);
+	mutex_destroy(&adev->mes.submission_lock);
 	return r;
 }
 
@@ -240,6 +242,7 @@ void amdgpu_mes_fini(struct amdgpu_device *adev)
 	idr_destroy(&adev->mes.queue_id_idr);
 	ida_destroy(&adev->mes.doorbell_ida);
 	mutex_destroy(&adev->mes.mutex_hidden);
+	mutex_destroy(&adev->mes.submission_lock);
 }
 
 static void amdgpu_mes_queue_free_mqd(struct amdgpu_mes_queue *q)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
index 6b3e1844eac5..90af935cc889 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
@@ -85,6 +85,7 @@ struct amdgpu_mes {
 
 	struct amdgpu_ring              ring;
 	spinlock_t                      ring_lock;
+	struct mutex                    submission_lock;
 
 	const struct firmware           *fw[AMDGPU_MAX_MES_PIPES];
 
diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
index e40d00afd4f5..0a609a5b8835 100644
--- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
@@ -162,6 +162,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
 	struct amdgpu_ring *ring = &mes->ring;
 	unsigned long flags;
 	signed long timeout = adev->usec_timeout;
+	signed long retry_count = 1;
 	const char *op_str, *misc_op_str;
 
 	if (x_pkt->header.opcode >= MES_SCH_API_MAX)
@@ -169,15 +170,19 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
 
 	if (amdgpu_emu_mode) {
 		timeout *= 100;
-	} else if (amdgpu_sriov_vf(adev)) {
+	}
+
+	if (amdgpu_sriov_vf(adev) && timeout > 0) {
 		/* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */
-		timeout = 15 * 600 * 1000;
+		retry_count = (15 * 600 * 1000) / timeout;
 	}
 	BUG_ON(size % 4 != 0);
 
+	mutex_lock(&mes->submission_lock);
 	spin_lock_irqsave(&mes->ring_lock, flags);
 	if (amdgpu_ring_alloc(ring, ndw)) {
 		spin_unlock_irqrestore(&mes->ring_lock, flags);
+		mutex_unlock(&mes->submission_lock);
 		return -ENOMEM;
 	}
 
@@ -199,8 +204,13 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
 	else
 		dev_dbg(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode);
 
-	r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
-		      timeout);
+	do {
+		r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
+				timeout);
+		retry_count--;
+	} while (retry_count > 0 && !amdgpu_in_reset(adev));
+
+	mutex_unlock(&mes->submission_lock);
 	if (r < 1) {
 
 		if (misc_op_str)
-- 
2.34.1



More information about the amd-gfx mailing list