[RFC] drm/amdgpu: flush gpu tlb using sdma ring

Mon Jun 12 06:52:44 UTC 2023

[Public]

> -----Original Message-----
> From: amd-gfx <amd-gfx-bounces at lists.freedesktop.org> On Behalf Of
> Shashank Sharma
> Sent: Friday, June 9, 2023 6:03 PM
> To: amd-gfx at lists.freedesktop.org
> Cc: Kuehling, Felix <Felix.Kuehling at amd.com>; Somalapuram, Amaranath
> <Amaranath.Somalapuram at amd.com>; Koenig, Christian
> <Christian.Koenig at amd.com>; Sharma, Shashank
> <Shashank.Sharma at amd.com>
> Subject: [RFC] drm/amdgpu: flush gpu tlb using sdma ring
>
> This RFC patch moves the code to flush TLB using SDMA ring to a different
> place like a separate function. The idea is that KFD/KGD code should be able
> to flush GPU TLB using SDMA if they want to.
>
> Cc: Christian Koenig <christian.koenig at amd.com>
> Signed-off-by: Shashank Sharma <shashank.sharma at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 41
> ++++++++++++++++++++++++
> drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h |  1 +
>  drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c   | 36 +++++----------------
>  3 files changed, 50 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
> index 231ca06bc9c7..20a5dad32bfc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
> @@ -30,6 +30,47 @@
>  /* SDMA CSA reside in the 3rd page of CSA */  #define
> AMDGPU_CSA_SDMA_OFFSET (4096 * 2)
>
> +/**
> + * amdgpu_sdma_flush_tlb - flush gpu TLB using SDMA ring
> + *
> + * @adev: amdgpu device pointer
> + *
> + * return: returns dma fence which must be put by caller
> + *
> + * The SDMA on Navi has a bug which can theoretically result in memory
> + * corruption if an invalidation happens at the same time as an VA
> + * translation. Avoid this by doing the invalidation from the SDMA
> + * itself.
> + */
> +struct dma_fence *amdgpu_sdma_flush_tlb(struct amdgpu_device *adev) {
> +     int r;
> +     struct dma_fence *fence;
> +     struct amdgpu_job *job;
> +     struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> +
> +     mutex_lock(&adev->mman.gtt_window_lock);
> +     r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.entity,

I guess this patch needs to be reworked on top of recent entity rename to high_pr code.

Regards,
Guchun

> +                                  AMDGPU_FENCE_OWNER_UNDEFINED,
> +                                  16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
> +                                  &job);
> +     if (r)
> +             goto error_alloc;
> +
> +     job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
> +     job->vm_needs_flush = true;
> +     job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
> +     amdgpu_ring_pad_ib(ring, &job->ibs[0]);
> +     fence = amdgpu_job_submit(job);
> +
> +     mutex_unlock(&adev->mman.gtt_window_lock);
> +     return fence;
> +
> +error_alloc:
> +     mutex_unlock(&adev->mman.gtt_window_lock);
> +     return NULL;
> +}
> +
>  /*
>   * GPU SDMA IP block helpers function.
>   */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> index fc8528812598..c895948f6e82 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> @@ -130,5 +130,6 @@ void amdgpu_sdma_destroy_inst_ctx(struct
> amdgpu_device *adev,
>          bool duplicate);
>  void amdgpu_sdma_unset_buffer_funcs_helper(struct amdgpu_device
> *adev);  int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev);
> +struct dma_fence *amdgpu_sdma_flush_tlb(struct amdgpu_device *adev);
>
>  #endif
> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> index ab2556ca984e..0bfaee00a838 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
> @@ -52,6 +52,7 @@
>  #include "athub_v2_1.h"
>
>  #include "amdgpu_reset.h"
> +#include "amdgpu_sdma.h"
>
>  #if 0
>  static const struct soc15_reg_golden golden_settings_navi10_hdp[] = @@ -
> 333,9 +334,6 @@ static void gmc_v10_0_flush_gpu_tlb(struct
> amdgpu_device *adev, uint32_t vmid,  {
>       struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
>       struct dma_fence *fence;
> -     struct amdgpu_job *job;
> -
> -     int r;
>
>       /* flush hdp cache */
>       adev->hdp.funcs->flush_hdp(adev, NULL); @@ -378,34 +376,16 @@
> static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t
> vmid,
>               return;
>       }
>
> -     /* The SDMA on Navi has a bug which can theoretically result in
> memory
> -      * corruption if an invalidation happens at the same time as an VA
> -      * translation. Avoid this by doing the invalidation from the SDMA
> -      * itself.
> -      */
> -     r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.entity,
> -                                  AMDGPU_FENCE_OWNER_UNDEFINED,
> -                                  16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
> -                                  &job);
> -     if (r)
> -             goto error_alloc;
> -
> -     job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
> -     job->vm_needs_flush = true;
> -     job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
> -     amdgpu_ring_pad_ib(ring, &job->ibs[0]);
> -     fence = amdgpu_job_submit(job);
> -
>       mutex_unlock(&adev->mman.gtt_window_lock);
>
> -     dma_fence_wait(fence, false);
> -     dma_fence_put(fence);
> -
> -     return;
> +     fence = amdgpu_sdma_flush_tlb(adev);
> +     if (fence) {
> +             dma_fence_wait(fence, false);
> +             dma_fence_put(fence);
> +             return;
> +     }
>
> -error_alloc:
> -     mutex_unlock(&adev->mman.gtt_window_lock);
> -     DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
> +     DRM_ERROR("Error flushing GPU TLB using the SDMA !\n");
>  }
>
>  /**
> --
> 2.40.1