[PATCH 1/1] drm/amdgpu: add helper function for vm pasid

Das, Nirmoy nirmoy.das at amd.com
Mon Jun 21 11:27:47 UTC 2021


On 6/21/2021 1:18 PM, Christian König wrote:
> Am 21.06.21 um 13:10 schrieb Das, Nirmoy:
>>
>> On 6/21/2021 12:59 PM, Christian König wrote:
>>> Am 21.06.21 um 12:56 schrieb Das, Nirmoy:
>>>>
>>>> On 6/21/2021 12:26 PM, Christian König wrote:
>>>>> Well you completely break the handling in amdgpu_vm_handle_fault() 
>>>>> with this.
>>>>
>>>>
>>>> I see one issue with:  if (!vm || (vm && vm->root.bo != root)) . I 
>>>> will split it and resend.
>>>>
>>>> Am I missing something else ?
>>>
>>> The problem is you drop and re-take the lock now at the wrong time.
>>
>>
>> I see the problem.
>>
>>>
>>> I don't think what you try to do here is possible at all.
>>
>>
>> Does it makes sense to resend without amdgpu_vm_handle_fault() changes ?
>
> Some other changes don't make sense to me as well.
>
> For example:
>
> pasid = amdgpu_pasid_alloc(16);
>
> Why do you want to allocate a hard coded pasid number here?


This is from  original amdgpu_driver_open_kms(). We are allocating a 
pasid number and passing that to

amdgpu_vm_init(). I wanted to move that to vmcode with this patch.


Regards,

Nirmoy


>
> Christian.
>
>
>>
>>>
>>> Christian.
>>>
>>>>
>>>>
>>>> Regards,
>>>>
>>>> Nirmoy
>>>>
>>>>
>>>>
>>>>>
>>>>> Christian.
>>>>>
>>>>> Am 21.06.21 um 11:47 schrieb Das, Nirmoy:
>>>>>> ping.
>>>>>>
>>>>>> On 6/17/2021 3:03 PM, Nirmoy Das wrote:
>>>>>>> Cleanup code related to vm pasid by adding helper function.
>>>>>>> This reduces lots code duplication.
>>>>>>>
>>>>>>> Signed-off-by: Nirmoy Das <nirmoy.das at amd.com>
>>>>>>> ---
>>>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c |  17 +--
>>>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c  | 176 
>>>>>>> ++++++++++++------------
>>>>>>>   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |   2 +-
>>>>>>>   3 files changed, 96 insertions(+), 99 deletions(-)
>>>>>>>
>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c 
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>>>>>> index cbb932f97355..27851fb0e25b 100644
>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>>>>>> @@ -1149,7 +1149,7 @@ int amdgpu_driver_open_kms(struct 
>>>>>>> drm_device *dev, struct drm_file *file_priv)
>>>>>>>   {
>>>>>>>       struct amdgpu_device *adev = drm_to_adev(dev);
>>>>>>>       struct amdgpu_fpriv *fpriv;
>>>>>>> -    int r, pasid;
>>>>>>> +    int r;
>>>>>>>         /* Ensure IB tests are run on ring */
>>>>>>> flush_delayed_work(&adev->delayed_init_work);
>>>>>>> @@ -1172,15 +1172,9 @@ int amdgpu_driver_open_kms(struct 
>>>>>>> drm_device *dev, struct drm_file *file_priv)
>>>>>>>           goto out_suspend;
>>>>>>>       }
>>>>>>>   -    pasid = amdgpu_pasid_alloc(16);
>>>>>>> -    if (pasid < 0) {
>>>>>>> -        dev_warn(adev->dev, "No more PASIDs available!");
>>>>>>> -        pasid = 0;
>>>>>>> -    }
>>>>>>> -
>>>>>>> -    r = amdgpu_vm_init(adev, &fpriv->vm, pasid);
>>>>>>> +    r = amdgpu_vm_init(adev, &fpriv->vm);
>>>>>>>       if (r)
>>>>>>> -        goto error_pasid;
>>>>>>> +        goto free_fpriv;
>>>>>>>         fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL);
>>>>>>>       if (!fpriv->prt_va) {
>>>>>>> @@ -1208,10 +1202,7 @@ int amdgpu_driver_open_kms(struct 
>>>>>>> drm_device *dev, struct drm_file *file_priv)
>>>>>>>   error_vm:
>>>>>>>       amdgpu_vm_fini(adev, &fpriv->vm);
>>>>>>>   -error_pasid:
>>>>>>> -    if (pasid)
>>>>>>> -        amdgpu_pasid_free(pasid);
>>>>>>> -
>>>>>>> +free_fpriv:
>>>>>>>       kfree(fpriv);
>>>>>>>     out_suspend:
>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c 
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>>> index 63975bda8e76..562c2c48a3a3 100644
>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>>> @@ -87,6 +87,69 @@ struct amdgpu_prt_cb {
>>>>>>>       struct dma_fence_cb cb;
>>>>>>>   };
>>>>>>>   +static int amdgpu_vm_pasid_alloc(struct amdgpu_device *adev,
>>>>>>> +                 struct amdgpu_vm *vm, unsigned int pasid)
>>>>>>> +{
>>>>>>> +    unsigned long flags;
>>>>>>> +    int r;
>>>>>>> +
>>>>>>> +    if (!pasid)
>>>>>>> +        return 0;
>>>>>>> +
>>>>>>> + spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>>>> +    r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid 
>>>>>>> + 1,
>>>>>>> +              GFP_ATOMIC);
>>>>>>> + spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>>>> +    if (r < 0)
>>>>>>> +        return r;
>>>>>>> +
>>>>>>> +    vm->pasid = pasid;
>>>>>>> +    return 0;
>>>>>>> +}
>>>>>>> +static void amdgpu_vm_pasid_remove_id(struct amdgpu_device *adev,
>>>>>>> +                      unsigned int pasid)
>>>>>>> +{
>>>>>>> +    unsigned long flags;
>>>>>>> +
>>>>>>> +    if (!pasid)
>>>>>>> +        return;
>>>>>>> +
>>>>>>> + spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>>>> +    idr_remove(&adev->vm_manager.pasid_idr, pasid);
>>>>>>> + spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>>>> +
>>>>>>> +}
>>>>>>> +
>>>>>>> +static void amdgpu_vm_pasid_remove(struct amdgpu_device *adev,
>>>>>>> +                   struct amdgpu_vm *vm)
>>>>>>> +{
>>>>>>> +    amdgpu_vm_pasid_remove_id(adev, vm->pasid);
>>>>>>> +    vm->pasid = 0;
>>>>>>> +}
>>>>>>> +
>>>>>>> +static void amdgpu_vm_pasid_free(struct amdgpu_device *adev,
>>>>>>> +                 struct amdgpu_vm *vm)
>>>>>>> +{
>>>>>>> +    if (!vm->pasid)
>>>>>>> +        return;
>>>>>>> +
>>>>>>> +    amdgpu_pasid_free(vm->pasid);
>>>>>>> +    amdgpu_vm_pasid_remove(adev, vm);
>>>>>>> +}
>>>>>>> +
>>>>>>> +static struct amdgpu_vm *amdgpu_vm_pasid_find(struct 
>>>>>>> amdgpu_device *adev,
>>>>>>> +                          unsigned int pasid)
>>>>>>> +{
>>>>>>> +    struct amdgpu_vm *vm;
>>>>>>> +    unsigned long flags;
>>>>>>> +
>>>>>>> + spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>>>> +    vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>>>>>>> + spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>>>> +
>>>>>>> +    return vm;
>>>>>>> +}
>>>>>>> +
>>>>>>>   /*
>>>>>>>    * vm eviction_lock can be taken in MMU notifiers. Make sure 
>>>>>>> no reclaim-FS
>>>>>>>    * happens while holding this lock anywhere to prevent 
>>>>>>> deadlocks when
>>>>>>> @@ -2859,17 +2922,17 @@ long amdgpu_vm_wait_idle(struct 
>>>>>>> amdgpu_vm *vm, long timeout)
>>>>>>>    *
>>>>>>>    * @adev: amdgpu_device pointer
>>>>>>>    * @vm: requested vm
>>>>>>> - * @pasid: Process address space identifier
>>>>>>>    *
>>>>>>>    * Init @vm fields.
>>>>>>>    *
>>>>>>>    * Returns:
>>>>>>>    * 0 for success, error for failure.
>>>>>>>    */
>>>>>>> -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm 
>>>>>>> *vm, u32 pasid)
>>>>>>> +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm 
>>>>>>> *vm)
>>>>>>>   {
>>>>>>>       struct amdgpu_bo *root_bo;
>>>>>>>       struct amdgpu_bo_vm *root;
>>>>>>> +    unsigned int pasid;
>>>>>>>       int r, i;
>>>>>>>         vm->va = RB_ROOT_CACHED;
>>>>>>> @@ -2940,19 +3003,15 @@ int amdgpu_vm_init(struct amdgpu_device 
>>>>>>> *adev, struct amdgpu_vm *vm, u32 pasid)
>>>>>>>         amdgpu_bo_unreserve(vm->root.bo);
>>>>>>>   -    if (pasid) {
>>>>>>> -        unsigned long flags;
>>>>>>> -
>>>>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>>>> -        r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, 
>>>>>>> pasid + 1,
>>>>>>> -                  GFP_ATOMIC);
>>>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>>>> -        if (r < 0)
>>>>>>> -            goto error_free_root;
>>>>>>> -
>>>>>>> -        vm->pasid = pasid;
>>>>>>> +    pasid = amdgpu_pasid_alloc(16);
>>>>>>> +    if (pasid < 0) {
>>>>>>> +        dev_warn(adev->dev, "No more PASIDs available!");
>>>>>>> +        pasid = 0;
>>>>>>>       }
>>>>>>>   +    if (amdgpu_vm_pasid_alloc(adev, vm, pasid))
>>>>>>> +        goto error_free_pasid;
>>>>>>> +
>>>>>>>       INIT_KFIFO(vm->faults);
>>>>>>>         return 0;
>>>>>>> @@ -2960,6 +3019,10 @@ int amdgpu_vm_init(struct amdgpu_device 
>>>>>>> *adev, struct amdgpu_vm *vm, u32 pasid)
>>>>>>>   error_unreserve:
>>>>>>>       amdgpu_bo_unreserve(vm->root.bo);
>>>>>>>   +error_free_pasid:
>>>>>>> +    if (pasid)
>>>>>>> +        amdgpu_pasid_free(pasid);
>>>>>>> +
>>>>>>>   error_free_root:
>>>>>>>       amdgpu_bo_unref(&root->shadow);
>>>>>>>       amdgpu_bo_unref(&root_bo);
>>>>>>> @@ -3039,18 +3102,11 @@ int amdgpu_vm_make_compute(struct 
>>>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>>>>       if (r)
>>>>>>>           goto unreserve_bo;
>>>>>>>   -    if (pasid) {
>>>>>>> -        unsigned long flags;
>>>>>>> -
>>>>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>>>> -        r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, 
>>>>>>> pasid + 1,
>>>>>>> -                  GFP_ATOMIC);
>>>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>>>> +    r = amdgpu_vm_pasid_alloc(adev, vm, pasid);
>>>>>>> +    if (r ==  -ENOSPC)
>>>>>>> +        goto unreserve_bo;
>>>>>>>   -        if (r == -ENOSPC)
>>>>>>> -            goto unreserve_bo;
>>>>>>> -        r = 0;
>>>>>>> -    }
>>>>>>> +    r = 0;
>>>>>>>         /* Check if PD needs to be reinitialized and do it before
>>>>>>>        * changing any other state, in case it fails.
>>>>>>> @@ -3088,19 +3144,7 @@ int amdgpu_vm_make_compute(struct 
>>>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>>>>       vm->last_update = NULL;
>>>>>>>       vm->is_compute_context = true;
>>>>>>>   -    if (vm->pasid) {
>>>>>>> -        unsigned long flags;
>>>>>>> -
>>>>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>>>> - idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
>>>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>>>> -
>>>>>>> -        /* Free the original amdgpu allocated pasid
>>>>>>> -         * Will be replaced with kfd allocated pasid
>>>>>>> -         */
>>>>>>> -        amdgpu_pasid_free(vm->pasid);
>>>>>>> -        vm->pasid = 0;
>>>>>>> -    }
>>>>>>> +    amdgpu_vm_pasid_free(adev, vm);
>>>>>>>         /* Free the shadow bo for compute VM */
>>>>>>> amdgpu_bo_unref(&to_amdgpu_bo_vm(vm->root.bo)->shadow);
>>>>>>> @@ -3111,13 +3155,8 @@ int amdgpu_vm_make_compute(struct 
>>>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>>>>       goto unreserve_bo;
>>>>>>>     free_idr:
>>>>>>> -    if (pasid) {
>>>>>>> -        unsigned long flags;
>>>>>>> +    amdgpu_vm_pasid_remove_id(adev, pasid);
>>>>>>>   - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>>>> - idr_remove(&adev->vm_manager.pasid_idr, pasid);
>>>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>>>> -    }
>>>>>>>   unreserve_bo:
>>>>>>>       amdgpu_bo_unreserve(vm->root.bo);
>>>>>>>       return r;
>>>>>>> @@ -3133,14 +3172,7 @@ int amdgpu_vm_make_compute(struct 
>>>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>>>>    */
>>>>>>>   void amdgpu_vm_release_compute(struct amdgpu_device *adev, 
>>>>>>> struct amdgpu_vm *vm)
>>>>>>>   {
>>>>>>> -    if (vm->pasid) {
>>>>>>> -        unsigned long flags;
>>>>>>> -
>>>>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>>>> - idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
>>>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>>>> -    }
>>>>>>> -    vm->pasid = 0;
>>>>>>> +    amdgpu_vm_pasid_remove(adev, vm);
>>>>>>>       vm->is_compute_context = false;
>>>>>>>   }
>>>>>>>   @@ -3164,15 +3196,7 @@ void amdgpu_vm_fini(struct 
>>>>>>> amdgpu_device *adev, struct amdgpu_vm *vm)
>>>>>>>         root = amdgpu_bo_ref(vm->root.bo);
>>>>>>>       amdgpu_bo_reserve(root, true);
>>>>>>> -    if (vm->pasid) {
>>>>>>> -        unsigned long flags;
>>>>>>> -
>>>>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>>>> - idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
>>>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>>>> -        vm->pasid = 0;
>>>>>>> -    }
>>>>>>> -
>>>>>>> +    amdgpu_vm_pasid_remove(adev, vm);
>>>>>>>       dma_fence_wait(vm->last_unlocked, false);
>>>>>>>       dma_fence_put(vm->last_unlocked);
>>>>>>>   @@ -3334,16 +3358,10 @@ int amdgpu_vm_ioctl(struct drm_device 
>>>>>>> *dev, void *data, struct drm_file *filp)
>>>>>>>   void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32 
>>>>>>> pasid,
>>>>>>>                struct amdgpu_task_info *task_info)
>>>>>>>   {
>>>>>>> -    struct amdgpu_vm *vm;
>>>>>>> -    unsigned long flags;
>>>>>>> +    struct amdgpu_vm *vm = amdgpu_vm_pasid_find(adev, pasid);
>>>>>>>   - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>>>> -
>>>>>>> -    vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>>>>>>>       if (vm)
>>>>>>>           *task_info = vm->task_info;
>>>>>>> -
>>>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>>>>   }
>>>>>>>     /**
>>>>>>> @@ -3380,24 +3398,16 @@ bool amdgpu_vm_handle_fault(struct 
>>>>>>> amdgpu_device *adev, u32 pasid,
>>>>>>>   {
>>>>>>>       bool is_compute_context = false;
>>>>>>>       struct amdgpu_bo *root;
>>>>>>> -    unsigned long irqflags;
>>>>>>>       uint64_t value, flags;
>>>>>>>       struct amdgpu_vm *vm;
>>>>>>>       int r;
>>>>>>>   - spin_lock_irqsave(&adev->vm_manager.pasid_lock, irqflags);
>>>>>>> -    vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>>>>>>> -    if (vm) {
>>>>>>> -        root = amdgpu_bo_ref(vm->root.bo);
>>>>>>> -        is_compute_context = vm->is_compute_context;
>>>>>>> -    } else {
>>>>>>> -        root = NULL;
>>>>>>> -    }
>>>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, irqflags);
>>>>>>> -
>>>>>>> -    if (!root)
>>>>>>> +    vm = amdgpu_vm_pasid_find(adev, pasid);
>>>>>>> +    if (!vm)
>>>>>>>           return false;
>>>>>>>   +    root = amdgpu_bo_ref(vm->root.bo);
>>>>>>> +        is_compute_context = vm->is_compute_context;
>>>>>>>       addr /= AMDGPU_GPU_PAGE_SIZE;
>>>>>>>         if (is_compute_context &&
>>>>>>> @@ -3411,12 +3421,8 @@ bool amdgpu_vm_handle_fault(struct 
>>>>>>> amdgpu_device *adev, u32 pasid,
>>>>>>>           goto error_unref;
>>>>>>>         /* Double check that the VM still exists */
>>>>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, irqflags);
>>>>>>> -    vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>>>>>>> -    if (vm && vm->root.bo != root)
>>>>>>> -        vm = NULL;
>>>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, irqflags);
>>>>>>> -    if (!vm)
>>>>>>> +    vm = amdgpu_vm_pasid_find(adev, pasid);
>>>>>>> +    if (!vm || (vm && vm->root.bo != root))
>>>>>>>           goto error_unlock;
>>>>>>>         flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
>>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h 
>>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>>> index ddb85a85cbba..ee994e21dffa 100644
>>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>>> @@ -376,7 +376,7 @@ void amdgpu_vm_manager_init(struct 
>>>>>>> amdgpu_device *adev);
>>>>>>>   void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
>>>>>>>     long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout);
>>>>>>> -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm 
>>>>>>> *vm, u32 pasid);
>>>>>>> +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm 
>>>>>>> *vm);
>>>>>>>   int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct 
>>>>>>> amdgpu_vm *vm, u32 pasid);
>>>>>>>   void amdgpu_vm_release_compute(struct amdgpu_device *adev, 
>>>>>>> struct amdgpu_vm *vm);
>>>>>>>   void amdgpu_vm_fini(struct amdgpu_device *adev, struct 
>>>>>>> amdgpu_vm *vm);
>>>>>
>>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx at lists.freedesktop.org
>> https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&data=04%7C01%7Cnirmoy.das%40amd.com%7Cf5d31eacbd3b473370eb08d934a65305%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637598711246978184%7CUnknown%7CTWFpbGZsb3d8eyJWIjoiMC4wLjAwMDAiLCJQIjoiV2luMzIiLCJBTiI6Ik1haWwiLCJXVCI6Mn0%3D%7C1000&sdata=CyiiHS9%2BuIXasVBsdWRUUXGmp%2BxDufoW26sxeZrikP4%3D&reserved=0 
>>
>


More information about the amd-gfx mailing list