[PATCH v3 1/2] drm/amdgpu: Acquire ttm locks for dmaunmap

Thu Oct 26 06:06:49 UTC 2023

Am 25.10.23 um 20:45 schrieb Felix Kuehling:
> On 2023-10-25 02:12, Christian König wrote:
>> Am 24.10.23 um 21:20 schrieb David Francis:
>>> dmaunmap can call ttm_bo_validate, which expects the
>>> ttm dma_resv to be held.
>>
>> Well first of all the dma_resv object isn't related to TTM.
>>
>>>
>>> Acquire the locks in amdgpu_amdkfd_gpuvm_dmaunmap_mem.
>>>
>>> Because the dmaunmap step can now fail, two new numbers
>>> need to be tracked. n_dmaunmap_success tracks the number
>>> of devices that have completed dmaunmap. If a device fails
>>> to dmaunmap due to a signal interrupt, n_dmaunmap_bos tracks
>>> the number of bos on that device that were successfully
>>> dmaunmapped.
>>>
>>> Track those values in struct kgd_mem.
>>>
>>> This failure can also cause the sync_memory step of the ioctl
>>> to be repeated; it is idempotent, so this should not cause any issues.
>>>
>>> Signed-off-by: David Francis <David.Francis at amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h    |  6 ++++-
>>>   .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c  | 23 
>>> +++++++++++++++----
>>>   drivers/gpu/drm/amd/amdkfd/kfd_chardev.c      | 19 ++++++++++-----
>>>   3 files changed, 37 insertions(+), 11 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>> index 3ad8dc523b42..c60564ec4312 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
>>> @@ -86,6 +86,10 @@ struct kgd_mem {
>>>         bool aql_queue;
>>>       bool is_imported;
>>> +
>>> +    /* Used to track successful dmaunmap across retries in unmap 
>>> ioctl */
>>> +    uint32_t n_dmaunmap_success;
>>> +    uint32_t n_dmaunmap_bos;
>>>   };
>>>     /* KFD Memory Eviction */
>>> @@ -302,7 +306,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(struct 
>>> amdgpu_device *adev,
>>>                         struct kgd_mem *mem, void *drm_priv);
>>>   int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
>>>           struct amdgpu_device *adev, struct kgd_mem *mem, void 
>>> *drm_priv);
>>> -void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void 
>>> *drm_priv);
>>> +int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void 
>>> *drm_priv);
>>>   int amdgpu_amdkfd_gpuvm_sync_memory(
>>>           struct amdgpu_device *adev, struct kgd_mem *mem, bool intr);
>>>   int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_mem *mem,
>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c 
>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> index 54f31a420229..c431132d7cc1 100644
>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
>>> @@ -2102,21 +2102,36 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
>>>       return ret;
>>>   }
>>>   -void amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void 
>>> *drm_priv)
>>> +int amdgpu_amdkfd_gpuvm_dmaunmap_mem(struct kgd_mem *mem, void 
>>> *drm_priv)
>>>   {
>>>       struct kfd_mem_attachment *entry;
>>>       struct amdgpu_vm *vm;
>>> +    int ret;
>>> +    int i = 0;
>>>         vm = drm_priv_to_vm(drm_priv);
>>>         mutex_lock(&mem->lock);
>>>         list_for_each_entry(entry, &mem->attachments, list) {
>>> -        if (entry->bo_va->base.vm == vm)
>>> -            kfd_mem_dmaunmap_attachment(mem, entry);
>>> -    }
>>> +        if (i >= mem->n_dmaunmap_bos) {
>>> +            ret = amdgpu_bo_reserve(entry->bo_va->base.bo, false);
>>> +            if (ret) {
>>> +                mem->n_dmaunmap_bos = i;
>>> +                goto out;
>>> +            }
>>> +
>>> +            if (entry->bo_va->base.vm == vm)
>>> +                kfd_mem_dmaunmap_attachment(mem, entry);
>>>   + amdgpu_bo_unreserve(entry->bo_va->base.bo);
>>> +        }
>>> +        i++;
>>> +    }
>>
>> WOW, big NAK to that!
>>
>> You are essentially re-inventing the locking multiple BOs at the same 
>> time, please don't do that. Instead use dma_exec or ttm_exec_buf for 
>> that.
>
> I don't think that's the intention here. We're not locking multiple 
> BOs at the same time. The purpose of all this counting is to safely 
> handle ioctl restart for signal handling without dmaunmapping the same 
> thing twice.

Well, it's not mandatory to lock multiple BOs at the same time but it 
solves the problem that this shouldn't be interrupted.

>
> That said, I'm not a fan of this counting approach either and 
> suggested a simpler way of doing this by adding a flag in the 
> kfd_mem_attachment structure.

When we look strictly at the rules for IOCTLs and restarting them then 
that counting approach is not really allowed either.

Kernel operations should either completely fail, fully complete or 
explicitly return how much they completed (e.g. how many bytes 
transferred for example). That we only partially complete and track that 
state inside the kernel is usually a no-go.

That said is it possible that userspace tries to do this operation 
twice? If yes what happens?

For this local problem the alternative to using drm_exec or ttm_exec_* 
would be to use non-interruptible waits#.

Regards,
Christian.

>
> Regards,
>   Felix
>
>
>>
>> This also avoids all the fail handling.
>>
>> Regards,
>> Christian.
>>
>>> +    mem->n_dmaunmap_bos = 0;
>>> +out:
>>>       mutex_unlock(&mem->lock);
>>> +    return ret;
>>>   }
>>>     int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
>>> diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c 
>>> b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> index 06988cf1db51..66dee67ad859 100644
>>> --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c
>>> @@ -1366,7 +1366,7 @@ static int 
>>> kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
>>>   {
>>>       struct kfd_ioctl_unmap_memory_from_gpu_args *args = data;
>>>       struct kfd_process_device *pdd, *peer_pdd;
>>> -    void *mem;
>>> +    struct kgd_mem *mem;
>>>       long err = 0;
>>>       uint32_t *devices_arr = NULL, i;
>>>       bool flush_tlb;
>>> @@ -1400,7 +1400,7 @@ static int 
>>> kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
>>>           goto bind_process_to_device_failed;
>>>       }
>>>   -    mem = kfd_process_device_translate_handle(pdd,
>>> +    mem = (struct kgd_mem *)kfd_process_device_translate_handle(pdd,
>>>                           GET_IDR_HANDLE(args->handle));
>>>       if (!mem) {
>>>           err = -ENOMEM;
>>> @@ -1414,7 +1414,7 @@ static int 
>>> kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
>>>               goto get_mem_obj_from_handle_failed;
>>>           }
>>>           err = amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
>>> -            peer_pdd->dev->adev, (struct kgd_mem *)mem, 
>>> peer_pdd->drm_priv);
>>> +            peer_pdd->dev->adev, mem, peer_pdd->drm_priv);
>>>           if (err) {
>>>               pr_err("Failed to unmap from gpu %d/%d\n",
>>>                      i, args->n_devices);
>>> @@ -1426,7 +1426,7 @@ static int 
>>> kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
>>>       flush_tlb = kfd_flush_tlb_after_unmap(pdd->dev->kfd);
>>>       if (flush_tlb) {
>>>           err = amdgpu_amdkfd_gpuvm_sync_memory(pdd->dev->adev,
>>> -                (struct kgd_mem *) mem, true);
>>> +                mem, true);
>>>           if (err) {
>>>               pr_debug("Sync memory failed, wait interrupted by user 
>>> signal\n");
>>>               goto sync_memory_failed;
>>> @@ -1434,7 +1434,7 @@ static int 
>>> kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
>>>       }
>>>         /* Flush TLBs after waiting for the page table updates to 
>>> complete */
>>> -    for (i = 0; i < args->n_devices; i++) {
>>> +    for (i = mem->n_dmaunmap_success; i < args->n_devices; i++) {
>>>           peer_pdd = kfd_process_device_data_by_id(p, devices_arr[i]);
>>>           if (WARN_ON_ONCE(!peer_pdd))
>>>               continue;
>>> @@ -1442,8 +1442,14 @@ static int 
>>> kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
>>>               kfd_flush_tlb(peer_pdd, TLB_FLUSH_HEAVYWEIGHT);
>>>             /* Remove dma mapping after tlb flush to avoid 
>>> IO_PAGE_FAULT */
>>> -        amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, peer_pdd->drm_priv);
>>> +        err = amdgpu_amdkfd_gpuvm_dmaunmap_mem(mem, 
>>> peer_pdd->drm_priv);
>>> +        if (err) {
>>> +            pr_debug("DMA unmapping failed, acquire interrupted by 
>>> user signal\n");
>>> +            goto dmaunmap_failed;
>>> +        }
>>> +        mem->n_dmaunmap_success = i+1;
>>>       }
>>> +    mem->n_dmaunmap_success = 0;
>>>         mutex_unlock(&p->mutex);
>>>   @@ -1455,6 +1461,7 @@ static int 
>>> kfd_ioctl_unmap_memory_from_gpu(struct file *filep,
>>>   get_mem_obj_from_handle_failed:
>>>   unmap_memory_from_gpu_failed:
>>>   sync_memory_failed:
>>> +dmaunmap_failed:
>>>       mutex_unlock(&p->mutex);
>>>   copy_from_user_failed:
>>>       kfree(devices_arr);
>>