[PATCH v2] drm/amdgpu: flush GPU TLB using SDMA ring

Shashank Sharma shashank.sharma at amd.com
Mon Jun 12 12:24:40 UTC 2023


Hey Alex,

On 09/06/2023 19:08, Alex Deucher wrote:
> On Fri, Jun 9, 2023 at 11:05 AM Shashank Sharma <shashank.sharma at amd.com> wrote:
>> This patch moves the code to flush GPU TLB using SDMA ring and
>> splits it into two parts:
>> - a general purpose function to flush GPU TLB using any valid GPU ring.
>> - a wrapper which consumes this function using SDMA ring.
>>
>> The idea is that KFD/KGD code should be able to flush GPU TLB
>> using SDMA ring if they want to.
>>
>> v2: Addressed review comments on RFC
>>      - Make the TLB flush function generic, and add a SDMA wrapper
>>        to it (Christian).
>>
>> Cc: Christian Koenig <christian.koenig at amd.com>
>> Cc: Philip Yang <philip.yang at amd.com>
>> Signed-off-by: Shashank Sharma <shashank.sharma at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 22 +++++++++++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h |  1 +
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c  | 36 ++++++++++++++++++++++++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h  |  1 +
>>   drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c   | 34 +++++-----------------
>>   5 files changed, 67 insertions(+), 27 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
>> index 231ca06bc9c7..05ffeb704dc4 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
>> @@ -30,6 +30,28 @@
>>   /* SDMA CSA reside in the 3rd page of CSA */
>>   #define AMDGPU_CSA_SDMA_OFFSET (4096 * 2)
>>
>> +/**
>> + * amdgpu_sdma_flush_tlb - flush gpu TLB using SDMA ring
>> + *
>> + * @adev: amdgpu device pointer
>> + *
>> + * return: returns 0 on success.
>> + */
>> +int amdgpu_sdma_flush_gpu_tlb(struct amdgpu_device *adev)
>> +{
>> +       struct dma_fence *fence;
>> +       struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
> I think it would be better to put this function in amdgpu_ttm.c or
> some amdgpu_vm.c.  It doesn't really have anything to do with SDMA per
> se.  buffer_funcs_ring could point to any engine that could handle
> buffer ops.  It just happens to be SDMA on most chips.

Makes sense, I will probably remove the SDMA layer itself and just keep 
it a generic function.

- Shashank

>> +
>> +       fence = amdgpu_ttm_flush_tlb(ring);
>> +       if (fence) {
>> +               dma_fence_wait(fence, false);
>> +               dma_fence_put(fence);
>> +               return 0;
>> +       }
>> +
>> +       return -1;
> Please use an appropriate error code here rather than -1.

Noted,

- Shashank

> Alex
>
>> +}
>> +
>>   /*
>>    * GPU SDMA IP block helpers function.
>>    */
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
>> index fc8528812598..739077862a7d 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
>> @@ -130,5 +130,6 @@ void amdgpu_sdma_destroy_inst_ctx(struct amdgpu_device *adev,
>>           bool duplicate);
>>   void amdgpu_sdma_unset_buffer_funcs_helper(struct amdgpu_device *adev);
>>   int amdgpu_sdma_ras_sw_init(struct amdgpu_device *adev);
>> +int amdgpu_sdma_flush_gpu_tlb(struct amdgpu_device *adev);
>>
>>   #endif
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> index c5ef7f7bdc15..1248d1dd5abc 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
>> @@ -81,6 +81,42 @@ static int amdgpu_ttm_init_on_chip(struct amdgpu_device *adev,
>>                                    false, size_in_page);
>>   }
>>
>> +/**
>> + * amdgpu_ttm_flush_tlb - flush gpu TLB using a GPU ring
>> + *
>> + * @ring: ring to submit the flushing job to
>> + *
>> + * return: returns dma fence which must be put by caller
>> + */
>> +struct dma_fence *amdgpu_ttm_flush_tlb(struct amdgpu_ring *ring)
>> +{
>> +       struct amdgpu_job *job;
>> +       struct dma_fence *fence;
>> +       struct amdgpu_device *adev = ring->adev;
>> +       int r;
>> +
>> +       mutex_lock(&adev->mman.gtt_window_lock);
>> +       r = amdgpu_job_alloc_with_ib(adev, &adev->mman.entity,
>> +                                    AMDGPU_FENCE_OWNER_UNDEFINED,
>> +                                    16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
>> +                                    &job);
>> +       if (r)
>> +               goto error_alloc;
>> +
>> +       job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
>> +       job->vm_needs_flush = true;
>> +       job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
>> +       amdgpu_ring_pad_ib(ring, &job->ibs[0]);
>> +       fence = amdgpu_job_submit(job);
>> +
>> +       mutex_unlock(&adev->mman.gtt_window_lock);
>> +       return fence;
>> +
>> +error_alloc:
>> +       mutex_unlock(&adev->mman.gtt_window_lock);
>> +       return NULL;
>> +}
>> +
>>   /**
>>    * amdgpu_evict_flags - Compute placement flags
>>    *
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> index e2cd5894afc9..86ba70d2eb53 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
>> @@ -200,5 +200,6 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
>>   int amdgpu_ttm_evict_resources(struct amdgpu_device *adev, int mem_type);
>>
>>   void amdgpu_ttm_debugfs_init(struct amdgpu_device *adev);
>> +struct dma_fence *amdgpu_ttm_flush_tlb(struct amdgpu_ring *ring);
>>
>>   #endif
>> diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> index ab2556ca984e..9892b155d1ef 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v10_0.c
>> @@ -52,6 +52,7 @@
>>   #include "athub_v2_1.h"
>>
>>   #include "amdgpu_reset.h"
>> +#include "amdgpu_sdma.h"
>>
>>   #if 0
>>   static const struct soc15_reg_golden golden_settings_navi10_hdp[] =
>> @@ -332,10 +333,6 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>>                                          uint32_t vmhub, uint32_t flush_type)
>>   {
>>          struct amdgpu_ring *ring = adev->mman.buffer_funcs_ring;
>> -       struct dma_fence *fence;
>> -       struct amdgpu_job *job;
>> -
>> -       int r;
>>
>>          /* flush hdp cache */
>>          adev->hdp.funcs->flush_hdp(adev, NULL);
>> @@ -378,34 +375,17 @@ static void gmc_v10_0_flush_gpu_tlb(struct amdgpu_device *adev, uint32_t vmid,
>>                  return;
>>          }
>>
>> -       /* The SDMA on Navi has a bug which can theoretically result in memory
>> +       mutex_unlock(&adev->mman.gtt_window_lock);
>> +
>> +       /*
>> +        * The SDMA on Navi has a bug which can theoretically result in memory
>>           * corruption if an invalidation happens at the same time as an VA
>>           * translation. Avoid this by doing the invalidation from the SDMA
>>           * itself.
>>           */
>> -       r = amdgpu_job_alloc_with_ib(ring->adev, &adev->mman.entity,
>> -                                    AMDGPU_FENCE_OWNER_UNDEFINED,
>> -                                    16 * 4, AMDGPU_IB_POOL_IMMEDIATE,
>> -                                    &job);
>> -       if (r)
>> -               goto error_alloc;
>>
>> -       job->vm_pd_addr = amdgpu_gmc_pd_addr(adev->gart.bo);
>> -       job->vm_needs_flush = true;
>> -       job->ibs->ptr[job->ibs->length_dw++] = ring->funcs->nop;
>> -       amdgpu_ring_pad_ib(ring, &job->ibs[0]);
>> -       fence = amdgpu_job_submit(job);
>> -
>> -       mutex_unlock(&adev->mman.gtt_window_lock);
>> -
>> -       dma_fence_wait(fence, false);
>> -       dma_fence_put(fence);
>> -
>> -       return;
>> -
>> -error_alloc:
>> -       mutex_unlock(&adev->mman.gtt_window_lock);
>> -       DRM_ERROR("Error flushing GPU TLB using the SDMA (%d)!\n", r);
>> +       if (amdgpu_sdma_flush_gpu_tlb(adev))
>> +               DRM_ERROR("Error flushing GPU TLB using the SDMA !\n");
>>   }
>>
>>   /**
>> --
>> 2.40.1
>>


More information about the amd-gfx mailing list