[PATCH v2 08/10] drm/amdgpu: fix locking scope when flushing tlb
Christian König
christian.koenig at amd.com
Wed May 29 06:49:41 UTC 2024
Am 28.05.24 um 19:23 schrieb Yunxiang Li:
> Which method is used to flush tlb does not depend on whether a reset is
> in progress or not. We should skip flush altogether if the GPU will get
> reset. So put both path under reset_domain read lock.
>
> Signed-off-by: Yunxiang Li <Yunxiang.Li at amd.com>
Reviewed-by: Christian König <christian.koenig at amd.com>
Maybe add CC: stable?
Regards,
Christian.
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 66 +++++++++++++------------
> 1 file changed, 34 insertions(+), 32 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> index 603c0738fd03..4edd10b10a92 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
> @@ -684,12 +684,17 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
> struct amdgpu_ring *ring = &adev->gfx.kiq[inst].ring;
> struct amdgpu_kiq *kiq = &adev->gfx.kiq[inst];
> unsigned int ndw;
> - signed long r;
> + int r;
> uint32_t seq;
>
> - if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready ||
> - !down_read_trylock(&adev->reset_domain->sem)) {
> + /*
> + * A GPU reset should flush all TLBs anyway, so no need to do
> + * this while one is ongoing.
> + */
> + if (!down_read_trylock(&adev->reset_domain->sem))
> + return 0;
>
> + if (!adev->gmc.flush_pasid_uses_kiq || !ring->sched.ready) {
> if (adev->gmc.flush_tlb_needs_extra_type_2)
> adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
> 2, all_hub,
> @@ -703,43 +708,40 @@ int amdgpu_gmc_flush_gpu_tlb_pasid(struct amdgpu_device *adev, uint16_t pasid,
> adev->gmc.gmc_funcs->flush_gpu_tlb_pasid(adev, pasid,
> flush_type, all_hub,
> inst);
> - return 0;
> - }
> + r = 0;
> + } else {
> + /* 2 dwords flush + 8 dwords fence */
> + ndw = kiq->pmf->invalidate_tlbs_size + 8;
>
> - /* 2 dwords flush + 8 dwords fence */
> - ndw = kiq->pmf->invalidate_tlbs_size + 8;
> + if (adev->gmc.flush_tlb_needs_extra_type_2)
> + ndw += kiq->pmf->invalidate_tlbs_size;
>
> - if (adev->gmc.flush_tlb_needs_extra_type_2)
> - ndw += kiq->pmf->invalidate_tlbs_size;
> + if (adev->gmc.flush_tlb_needs_extra_type_0)
> + ndw += kiq->pmf->invalidate_tlbs_size;
>
> - if (adev->gmc.flush_tlb_needs_extra_type_0)
> - ndw += kiq->pmf->invalidate_tlbs_size;
> + spin_lock(&adev->gfx.kiq[inst].ring_lock);
> + amdgpu_ring_alloc(ring, ndw);
> + if (adev->gmc.flush_tlb_needs_extra_type_2)
> + kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 2, all_hub);
>
> - spin_lock(&adev->gfx.kiq[inst].ring_lock);
> - amdgpu_ring_alloc(ring, ndw);
> - if (adev->gmc.flush_tlb_needs_extra_type_2)
> - kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 2, all_hub);
> + if (flush_type == 2 && adev->gmc.flush_tlb_needs_extra_type_0)
> + kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 0, all_hub);
>
> - if (flush_type == 2 && adev->gmc.flush_tlb_needs_extra_type_0)
> - kiq->pmf->kiq_invalidate_tlbs(ring, pasid, 0, all_hub);
> + kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub);
> + r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
> + if (r) {
> + amdgpu_ring_undo(ring);
> + spin_unlock(&adev->gfx.kiq[inst].ring_lock);
> + goto error_unlock_reset;
> + }
>
> - kiq->pmf->kiq_invalidate_tlbs(ring, pasid, flush_type, all_hub);
> - r = amdgpu_fence_emit_polling(ring, &seq, MAX_KIQ_REG_WAIT);
> - if (r) {
> - amdgpu_ring_undo(ring);
> + amdgpu_ring_commit(ring);
> spin_unlock(&adev->gfx.kiq[inst].ring_lock);
> - goto error_unlock_reset;
> - }
> -
> - amdgpu_ring_commit(ring);
> - spin_unlock(&adev->gfx.kiq[inst].ring_lock);
> - r = amdgpu_fence_wait_polling(ring, seq, usec_timeout);
> - if (r < 1) {
> - dev_err(adev->dev, "wait for kiq fence error: %ld.\n", r);
> - r = -ETIME;
> - goto error_unlock_reset;
> + if (amdgpu_fence_wait_polling(ring, seq, usec_timeout) < 1) {
> + dev_err(adev->dev, "timeout waiting for kiq fence\n");
> + r = -ETIME;
> + }
> }
> - r = 0;
>
> error_unlock_reset:
> up_read(&adev->reset_domain->sem);
More information about the amd-gfx
mailing list