[PATCH 3/3] drm/amdgpu/mes11: make fence waits synchronous
Christian König
christian.koenig at amd.com
Wed Apr 17 12:49:27 UTC 2024
Am 17.04.24 um 13:30 schrieb Horace Chen:
> The MES firmware expects synchronous operation with the
> driver. For this to work asynchronously, each caller
> would need to provide its own fence location and sequence
> number.
Well that's certainly not correct. The seqno takes care that we can wait
async for the submission to complete.
So clear NAK for that patch here.
Regards,
Christian.
>
> For now, add a mutex lock to serialize the MES submission.
> For SR-IOV long-wait case, break the long-wait to separated
> part to prevent this wait from impacting reset sequence.
>
> Signed-off-by: Horace Chen <horace.chen at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 3 +++
> drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h | 1 +
> drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 18 ++++++++++++++----
> 3 files changed, 18 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> index 78e4f88f5134..8896be95b2c8 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> @@ -137,6 +137,7 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
> spin_lock_init(&adev->mes.queue_id_lock);
> spin_lock_init(&adev->mes.ring_lock);
> mutex_init(&adev->mes.mutex_hidden);
> + mutex_init(&adev->mes.submission_lock);
>
> adev->mes.total_max_queue = AMDGPU_FENCE_MES_QUEUE_ID_MASK;
> adev->mes.vmid_mask_mmhub = 0xffffff00;
> @@ -221,6 +222,7 @@ int amdgpu_mes_init(struct amdgpu_device *adev)
> idr_destroy(&adev->mes.queue_id_idr);
> ida_destroy(&adev->mes.doorbell_ida);
> mutex_destroy(&adev->mes.mutex_hidden);
> + mutex_destroy(&adev->mes.submission_lock);
> return r;
> }
>
> @@ -240,6 +242,7 @@ void amdgpu_mes_fini(struct amdgpu_device *adev)
> idr_destroy(&adev->mes.queue_id_idr);
> ida_destroy(&adev->mes.doorbell_ida);
> mutex_destroy(&adev->mes.mutex_hidden);
> + mutex_destroy(&adev->mes.submission_lock);
> }
>
> static void amdgpu_mes_queue_free_mqd(struct amdgpu_mes_queue *q)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> index 6b3e1844eac5..90af935cc889 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.h
> @@ -85,6 +85,7 @@ struct amdgpu_mes {
>
> struct amdgpu_ring ring;
> spinlock_t ring_lock;
> + struct mutex submission_lock;
>
> const struct firmware *fw[AMDGPU_MAX_MES_PIPES];
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> index e40d00afd4f5..0a609a5b8835 100644
> --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c
> @@ -162,6 +162,7 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
> struct amdgpu_ring *ring = &mes->ring;
> unsigned long flags;
> signed long timeout = adev->usec_timeout;
> + signed long retry_count = 1;
> const char *op_str, *misc_op_str;
>
> if (x_pkt->header.opcode >= MES_SCH_API_MAX)
> @@ -169,15 +170,19 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
>
> if (amdgpu_emu_mode) {
> timeout *= 100;
> - } else if (amdgpu_sriov_vf(adev)) {
> + }
> +
> + if (amdgpu_sriov_vf(adev) && timeout > 0) {
> /* Worst case in sriov where all other 15 VF timeout, each VF needs about 600ms */
> - timeout = 15 * 600 * 1000;
> + retry_count = (15 * 600 * 1000) / timeout;
> }
> BUG_ON(size % 4 != 0);
>
> + mutex_lock(&mes->submission_lock);
> spin_lock_irqsave(&mes->ring_lock, flags);
> if (amdgpu_ring_alloc(ring, ndw)) {
> spin_unlock_irqrestore(&mes->ring_lock, flags);
> + mutex_unlock(&mes->submission_lock);
> return -ENOMEM;
> }
>
> @@ -199,8 +204,13 @@ static int mes_v11_0_submit_pkt_and_poll_completion(struct amdgpu_mes *mes,
> else
> dev_dbg(adev->dev, "MES msg=%d was emitted\n", x_pkt->header.opcode);
>
> - r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
> - timeout);
> + do {
> + r = amdgpu_fence_wait_polling(ring, ring->fence_drv.sync_seq,
> + timeout);
> + retry_count--;
> + } while (retry_count > 0 && !amdgpu_in_reset(adev));
> +
> + mutex_unlock(&mes->submission_lock);
> if (r < 1) {
>
> if (misc_op_str)
More information about the amd-gfx
mailing list