[PATCH 1/3] drm/amdgpu: re-work VM syncing

Wed Aug 28 22:29:42 UTC 2024

On 2024-08-22 03:28, Friedrich Vock wrote:
> On 21.08.24 22:46, Felix Kuehling wrote:
>>
>> On 2024-08-21 08:03, Christian König wrote:
>>> Rework how VM operations synchronize to submissions. Provide an
>>> amdgpu_sync container to the backends instead of an reservation
>>> object and fill in the amdgpu_sync object in the higher layers
>>> of the code.
>>>
>>> No intended functional change, just prepares for upcomming changes.
>>>
>>> Signed-off-by: Christian König <christian.koenig at amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c      | 84 
>>> +++++++++++++--------
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h      | 11 +--
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c  |  7 +-
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c   |  2 +-
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c | 16 +---
>>
>> There are two calls to amdgpu_vm_update_range in amdkfd/kfd_svm.c 
>> that would need to be updated as well.
>
> I don't think any change should be needed there? Both calls pass NULL 
> for the resv.

Right, sorry, the change to the function signature looked bigger than it 
was due to formatting changes. The patch is

Acked-by: Felix Kuehling <felix.kuehling at amd.com>

> All this patch changes is that we're now passing NULL for the 
> amdgpu_sync - but the behavior with a NULL amdgpu_sync with this patch 
> is the same as with a NULL dma_resv without this patch, so nothing 
> needs to change.
>
> Regards,
> Friedrich
>
>>
>> Regards,
>>    Felix
>>
>>
>>>   5 files changed, 65 insertions(+), 55 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> index bcb729094521..ba99d428610a 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>> @@ -838,7 +838,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device 
>>> *adev,
>>>       params.vm = vm;
>>>       params.immediate = immediate;
>>> -    r = vm->update_funcs->prepare(&params, NULL, 
>>> AMDGPU_SYNC_EXPLICIT);
>>> +    r = vm->update_funcs->prepare(&params, NULL);
>>>       if (r)
>>>           goto error;
>>> @@ -933,7 +933,7 @@ amdgpu_vm_tlb_flush(struct 
>>> amdgpu_vm_update_params *params,
>>>    * @unlocked: unlocked invalidation during MM callback
>>>    * @flush_tlb: trigger tlb invalidation after update completed
>>>    * @allow_override: change MTYPE for local NUMA nodes
>>> - * @resv: fences we need to sync to
>>> + * @sync: fences we need to sync to
>>>    * @start: start of mapped range
>>>    * @last: last mapped entry
>>>    * @flags: flags for the entries
>>> @@ -949,16 +949,16 @@ amdgpu_vm_tlb_flush(struct 
>>> amdgpu_vm_update_params *params,
>>>    * 0 for success, negative erro code for failure.
>>>    */
>>>   int amdgpu_vm_update_range(struct amdgpu_device *adev, struct 
>>> amdgpu_vm *vm,
>>> -               bool immediate, bool unlocked, bool flush_tlb, bool 
>>> allow_override,
>>> -               struct dma_resv *resv, uint64_t start, uint64_t last,
>>> -               uint64_t flags, uint64_t offset, uint64_t vram_base,
>>> +               bool immediate, bool unlocked, bool flush_tlb,
>>> +               bool allow_override, struct amdgpu_sync *sync,
>>> +               uint64_t start, uint64_t last, uint64_t flags,
>>> +               uint64_t offset, uint64_t vram_base,
>>>                  struct ttm_resource *res, dma_addr_t *pages_addr,
>>>                  struct dma_fence **fence)
>>>   {
>>>       struct amdgpu_vm_tlb_seq_struct *tlb_cb;
>>>       struct amdgpu_vm_update_params params;
>>>       struct amdgpu_res_cursor cursor;
>>> -    enum amdgpu_sync_mode sync_mode;
>>>       int r, idx;
>>>       if (!drm_dev_enter(adev_to_drm(adev), &idx))
>>> @@ -991,14 +991,6 @@ int amdgpu_vm_update_range(struct amdgpu_device 
>>> *adev, struct amdgpu_vm *vm,
>>>       params.allow_override = allow_override;
>>>       INIT_LIST_HEAD(&params.tlb_flush_waitlist);
>>> -    /* Implicitly sync to command submissions in the same VM before
>>> -     * unmapping. Sync to moving fences before mapping.
>>> -     */
>>> -    if (!(flags & AMDGPU_PTE_VALID))
>>> -        sync_mode = AMDGPU_SYNC_EQ_OWNER;
>>> -    else
>>> -        sync_mode = AMDGPU_SYNC_EXPLICIT;
>>> -
>>>       amdgpu_vm_eviction_lock(vm);
>>>       if (vm->evicting) {
>>>           r = -EBUSY;
>>> @@ -1013,7 +1005,7 @@ int amdgpu_vm_update_range(struct 
>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>           dma_fence_put(tmp);
>>>       }
>>> -    r = vm->update_funcs->prepare(&params, resv, sync_mode);
>>> +    r = vm->update_funcs->prepare(&params, sync);
>>>       if (r)
>>>           goto error_free;
>>> @@ -1155,23 +1147,30 @@ int amdgpu_vm_bo_update(struct amdgpu_device 
>>> *adev, struct amdgpu_bo_va *bo_va,
>>>       struct amdgpu_bo *bo = bo_va->base.bo;
>>>       struct amdgpu_vm *vm = bo_va->base.vm;
>>>       struct amdgpu_bo_va_mapping *mapping;
>>> +    struct dma_fence **last_update;
>>>       dma_addr_t *pages_addr = NULL;
>>>       struct ttm_resource *mem;
>>> -    struct dma_fence **last_update;
>>> +    struct amdgpu_sync sync;
>>>       bool flush_tlb = clear;
>>> -    bool uncached;
>>> -    struct dma_resv *resv;
>>>       uint64_t vram_base;
>>>       uint64_t flags;
>>> +    bool uncached;
>>>       int r;
>>> +    amdgpu_sync_create(&sync);
>>>       if (clear || !bo) {
>>>           mem = NULL;
>>> -        resv = vm->root.bo->tbo.base.resv;
>>> +
>>> +        /* Implicitly sync to command submissions in the same VM 
>>> before
>>> +         * unmapping.
>>> +         */
>>> +        r = amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.base.resv,
>>> +                     AMDGPU_SYNC_EQ_OWNER, vm);
>>> +        if (r)
>>> +            goto error_free;
>>>       } else {
>>>           struct drm_gem_object *obj = &bo->tbo.base;
>>> -        resv = bo->tbo.base.resv;
>>>           if (obj->import_attach && bo_va->is_xgmi) {
>>>               struct dma_buf *dma_buf = obj->import_attach->dmabuf;
>>>               struct drm_gem_object *gobj = dma_buf->priv;
>>> @@ -1185,6 +1184,12 @@ int amdgpu_vm_bo_update(struct amdgpu_device 
>>> *adev, struct amdgpu_bo_va *bo_va,
>>>           if (mem && (mem->mem_type == TTM_PL_TT ||
>>>                   mem->mem_type == AMDGPU_PL_PREEMPT))
>>>               pages_addr = bo->tbo.ttm->dma_address;
>>> +
>>> +        /* Implicitly sync to moving fences before mapping anything */
>>> +        r = amdgpu_sync_resv(adev, &sync, bo->tbo.base.resv,
>>> +                     AMDGPU_SYNC_EXPLICIT, vm);
>>> +        if (r)
>>> +            goto error_free;
>>>       }
>>>       if (bo) {
>>> @@ -1234,12 +1239,12 @@ int amdgpu_vm_bo_update(struct amdgpu_device 
>>> *adev, struct amdgpu_bo_va *bo_va,
>>>           trace_amdgpu_vm_bo_update(mapping);
>>>           r = amdgpu_vm_update_range(adev, vm, false, false, flush_tlb,
>>> -                       !uncached, resv, mapping->start, mapping->last,
>>> -                       update_flags, mapping->offset,
>>> -                       vram_base, mem, pages_addr,
>>> -                       last_update);
>>> +                       !uncached, &sync, mapping->start,
>>> +                       mapping->last, update_flags,
>>> +                       mapping->offset, vram_base, mem,
>>> +                       pages_addr, last_update);
>>>           if (r)
>>> -            return r;
>>> +            goto error_free;
>>>       }
>>>       /* If the BO is not in its preferred location add it back to
>>> @@ -1267,7 +1272,9 @@ int amdgpu_vm_bo_update(struct amdgpu_device 
>>> *adev, struct amdgpu_bo_va *bo_va,
>>>               trace_amdgpu_vm_bo_mapping(mapping);
>>>       }
>>> -    return 0;
>>> +error_free:
>>> +    amdgpu_sync_free(&sync);
>>> +    return r;
>>>   }
>>>   /**
>>> @@ -1414,25 +1421,34 @@ int amdgpu_vm_clear_freed(struct 
>>> amdgpu_device *adev,
>>>                 struct amdgpu_vm *vm,
>>>                 struct dma_fence **fence)
>>>   {
>>> -    struct dma_resv *resv = vm->root.bo->tbo.base.resv;
>>>       struct amdgpu_bo_va_mapping *mapping;
>>> -    uint64_t init_pte_value = 0;
>>>       struct dma_fence *f = NULL;
>>> +    struct amdgpu_sync sync;
>>>       int r;
>>> +
>>> +    /*
>>> +     * Implicitly sync to command submissions in the same VM before
>>> +     * unmapping.
>>> +     */
>>> +    amdgpu_sync_create(&sync);
>>> +    r = amdgpu_sync_resv(adev, &sync, vm->root.bo->tbo.base.resv,
>>> +                 AMDGPU_SYNC_EQ_OWNER, vm);
>>> +    if (r)
>>> +        goto error_free;
>>> +
>>>       while (!list_empty(&vm->freed)) {
>>>           mapping = list_first_entry(&vm->freed,
>>>               struct amdgpu_bo_va_mapping, list);
>>>           list_del(&mapping->list);
>>>           r = amdgpu_vm_update_range(adev, vm, false, false, true, 
>>> false,
>>> -                       resv, mapping->start, mapping->last,
>>> -                       init_pte_value, 0, 0, NULL, NULL,
>>> -                       &f);
>>> +                       &sync, mapping->start, mapping->last,
>>> +                       0, 0, 0, NULL, NULL, &f);
>>>           amdgpu_vm_free_mapping(adev, vm, mapping, f);
>>>           if (r) {
>>>               dma_fence_put(f);
>>> -            return r;
>>> +            goto error_free;
>>>           }
>>>       }
>>> @@ -1443,7 +1459,9 @@ int amdgpu_vm_clear_freed(struct amdgpu_device 
>>> *adev,
>>>           dma_fence_put(f);
>>>       }
>>> -    return 0;
>>> +error_free:
>>> +    amdgpu_sync_free(&sync);
>>> +    return r;
>>>   }
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> index 046949c4b695..1a759012ce93 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>> @@ -304,8 +304,8 @@ struct amdgpu_vm_update_params {
>>>   struct amdgpu_vm_update_funcs {
>>>       int (*map_table)(struct amdgpu_bo_vm *bo);
>>> -    int (*prepare)(struct amdgpu_vm_update_params *p, struct 
>>> dma_resv *resv,
>>> -               enum amdgpu_sync_mode sync_mode);
>>> +    int (*prepare)(struct amdgpu_vm_update_params *p,
>>> +               struct amdgpu_sync *sync);
>>>       int (*update)(struct amdgpu_vm_update_params *p,
>>>                 struct amdgpu_bo_vm *bo, uint64_t pe, uint64_t addr,
>>>                 unsigned count, uint32_t incr, uint64_t flags);
>>> @@ -505,9 +505,10 @@ int amdgpu_vm_flush_compute_tlb(struct 
>>> amdgpu_device *adev,
>>>   void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
>>>                   struct amdgpu_vm *vm, struct amdgpu_bo *bo);
>>>   int amdgpu_vm_update_range(struct amdgpu_device *adev, struct 
>>> amdgpu_vm *vm,
>>> -               bool immediate, bool unlocked, bool flush_tlb, bool 
>>> allow_override,
>>> -               struct dma_resv *resv, uint64_t start, uint64_t last,
>>> -               uint64_t flags, uint64_t offset, uint64_t vram_base,
>>> +               bool immediate, bool unlocked, bool flush_tlb,
>>> +               bool allow_override, struct amdgpu_sync *sync,
>>> +               uint64_t start, uint64_t last, uint64_t flags,
>>> +               uint64_t offset, uint64_t vram_base,
>>>                  struct ttm_resource *res, dma_addr_t *pages_addr,
>>>                  struct dma_fence **fence);
>>>   int amdgpu_vm_bo_update(struct amdgpu_device *adev,
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
>>> index 3895bd7d176a..9ff59a4e6f15 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_cpu.c
>>> @@ -46,13 +46,12 @@ static int amdgpu_vm_cpu_map_table(struct 
>>> amdgpu_bo_vm *table)
>>>    * Negativ errno, 0 for success.
>>>    */
>>>   static int amdgpu_vm_cpu_prepare(struct amdgpu_vm_update_params *p,
>>> -                 struct dma_resv *resv,
>>> -                 enum amdgpu_sync_mode sync_mode)
>>> +                 struct amdgpu_sync *sync)
>>>   {
>>> -    if (!resv)
>>> +    if (!sync)
>>>           return 0;
>>> -    return amdgpu_bo_sync_wait_resv(p->adev, resv, sync_mode, 
>>> p->vm, true);
>>> +    return amdgpu_sync_wait(sync, true);
>>>   }
>>>   /**
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
>>> index e39d6e7643bf..a076f43097e4 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
>>> @@ -403,7 +403,7 @@ int amdgpu_vm_pt_clear(struct amdgpu_device 
>>> *adev, struct amdgpu_vm *vm,
>>>       params.vm = vm;
>>>       params.immediate = immediate;
>>> -    r = vm->update_funcs->prepare(&params, NULL, 
>>> AMDGPU_SYNC_EXPLICIT);
>>> +    r = vm->update_funcs->prepare(&params, NULL);
>>>       if (r)
>>>           goto exit;
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>>> index 9b748d7058b5..4772fba33285 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_sdma.c
>>> @@ -77,32 +77,24 @@ static int amdgpu_vm_sdma_alloc_job(struct 
>>> amdgpu_vm_update_params *p,
>>>    * amdgpu_vm_sdma_prepare - prepare SDMA command submission
>>>    *
>>>    * @p: see amdgpu_vm_update_params definition
>>> - * @resv: reservation object with embedded fence
>>> - * @sync_mode: synchronization mode
>>> + * @sync: amdgpu_sync object with fences to wait for
>>>    *
>>>    * Returns:
>>>    * Negativ errno, 0 for success.
>>>    */
>>>   static int amdgpu_vm_sdma_prepare(struct amdgpu_vm_update_params *p,
>>> -                  struct dma_resv *resv,
>>> -                  enum amdgpu_sync_mode sync_mode)
>>> +                  struct amdgpu_sync *sync)
>>>   {
>>> -    struct amdgpu_sync sync;
>>>       int r;
>>>       r = amdgpu_vm_sdma_alloc_job(p, 0);
>>>       if (r)
>>>           return r;
>>> -    if (!resv)
>>> +    if (!sync)
>>>           return 0;
>>> -    amdgpu_sync_create(&sync);
>>> -    r = amdgpu_sync_resv(p->adev, &sync, resv, sync_mode, p->vm);
>>> -    if (!r)
>>> -        r = amdgpu_sync_push_to_job(&sync, p->job);
>>> -    amdgpu_sync_free(&sync);
>>> -
>>> +    r = amdgpu_sync_push_to_job(sync, p->job);
>>>       if (r) {
>>>           p->num_dw_left = 0;
>>>           amdgpu_job_free(p->job);