[PATCH 1/1] drm/amdgpu: add helper function for vm pasid
Christian König
ckoenig.leichtzumerken at gmail.com
Mon Jun 21 11:18:38 UTC 2021
Am 21.06.21 um 13:10 schrieb Das, Nirmoy:
>
> On 6/21/2021 12:59 PM, Christian König wrote:
>> Am 21.06.21 um 12:56 schrieb Das, Nirmoy:
>>>
>>> On 6/21/2021 12:26 PM, Christian König wrote:
>>>> Well you completely break the handling in amdgpu_vm_handle_fault()
>>>> with this.
>>>
>>>
>>> I see one issue with: if (!vm || (vm && vm->root.bo != root)) . I
>>> will split it and resend.
>>>
>>> Am I missing something else ?
>>
>> The problem is you drop and re-take the lock now at the wrong time.
>
>
> I see the problem.
>
>>
>> I don't think what you try to do here is possible at all.
>
>
> Does it makes sense to resend without amdgpu_vm_handle_fault() changes ?
Some other changes don't make sense to me as well.
For example:
pasid = amdgpu_pasid_alloc(16);
Why do you want to allocate a hard coded pasid number here?
Christian.
>
>>
>> Christian.
>>
>>>
>>>
>>> Regards,
>>>
>>> Nirmoy
>>>
>>>
>>>
>>>>
>>>> Christian.
>>>>
>>>> Am 21.06.21 um 11:47 schrieb Das, Nirmoy:
>>>>> ping.
>>>>>
>>>>> On 6/17/2021 3:03 PM, Nirmoy Das wrote:
>>>>>> Cleanup code related to vm pasid by adding helper function.
>>>>>> This reduces lots code duplication.
>>>>>>
>>>>>> Signed-off-by: Nirmoy Das <nirmoy.das at amd.com>
>>>>>> ---
>>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 17 +--
>>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 176
>>>>>> ++++++++++++------------
>>>>>> drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 2 +-
>>>>>> 3 files changed, 96 insertions(+), 99 deletions(-)
>>>>>>
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>>>>> index cbb932f97355..27851fb0e25b 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
>>>>>> @@ -1149,7 +1149,7 @@ int amdgpu_driver_open_kms(struct
>>>>>> drm_device *dev, struct drm_file *file_priv)
>>>>>> {
>>>>>> struct amdgpu_device *adev = drm_to_adev(dev);
>>>>>> struct amdgpu_fpriv *fpriv;
>>>>>> - int r, pasid;
>>>>>> + int r;
>>>>>> /* Ensure IB tests are run on ring */
>>>>>> flush_delayed_work(&adev->delayed_init_work);
>>>>>> @@ -1172,15 +1172,9 @@ int amdgpu_driver_open_kms(struct
>>>>>> drm_device *dev, struct drm_file *file_priv)
>>>>>> goto out_suspend;
>>>>>> }
>>>>>> - pasid = amdgpu_pasid_alloc(16);
>>>>>> - if (pasid < 0) {
>>>>>> - dev_warn(adev->dev, "No more PASIDs available!");
>>>>>> - pasid = 0;
>>>>>> - }
>>>>>> -
>>>>>> - r = amdgpu_vm_init(adev, &fpriv->vm, pasid);
>>>>>> + r = amdgpu_vm_init(adev, &fpriv->vm);
>>>>>> if (r)
>>>>>> - goto error_pasid;
>>>>>> + goto free_fpriv;
>>>>>> fpriv->prt_va = amdgpu_vm_bo_add(adev, &fpriv->vm, NULL);
>>>>>> if (!fpriv->prt_va) {
>>>>>> @@ -1208,10 +1202,7 @@ int amdgpu_driver_open_kms(struct
>>>>>> drm_device *dev, struct drm_file *file_priv)
>>>>>> error_vm:
>>>>>> amdgpu_vm_fini(adev, &fpriv->vm);
>>>>>> -error_pasid:
>>>>>> - if (pasid)
>>>>>> - amdgpu_pasid_free(pasid);
>>>>>> -
>>>>>> +free_fpriv:
>>>>>> kfree(fpriv);
>>>>>> out_suspend:
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> index 63975bda8e76..562c2c48a3a3 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
>>>>>> @@ -87,6 +87,69 @@ struct amdgpu_prt_cb {
>>>>>> struct dma_fence_cb cb;
>>>>>> };
>>>>>> +static int amdgpu_vm_pasid_alloc(struct amdgpu_device *adev,
>>>>>> + struct amdgpu_vm *vm, unsigned int pasid)
>>>>>> +{
>>>>>> + unsigned long flags;
>>>>>> + int r;
>>>>>> +
>>>>>> + if (!pasid)
>>>>>> + return 0;
>>>>>> +
>>>>>> + spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>>> + r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid, pasid
>>>>>> + 1,
>>>>>> + GFP_ATOMIC);
>>>>>> + spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>>> + if (r < 0)
>>>>>> + return r;
>>>>>> +
>>>>>> + vm->pasid = pasid;
>>>>>> + return 0;
>>>>>> +}
>>>>>> +static void amdgpu_vm_pasid_remove_id(struct amdgpu_device *adev,
>>>>>> + unsigned int pasid)
>>>>>> +{
>>>>>> + unsigned long flags;
>>>>>> +
>>>>>> + if (!pasid)
>>>>>> + return;
>>>>>> +
>>>>>> + spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>>> + idr_remove(&adev->vm_manager.pasid_idr, pasid);
>>>>>> + spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>>> +
>>>>>> +}
>>>>>> +
>>>>>> +static void amdgpu_vm_pasid_remove(struct amdgpu_device *adev,
>>>>>> + struct amdgpu_vm *vm)
>>>>>> +{
>>>>>> + amdgpu_vm_pasid_remove_id(adev, vm->pasid);
>>>>>> + vm->pasid = 0;
>>>>>> +}
>>>>>> +
>>>>>> +static void amdgpu_vm_pasid_free(struct amdgpu_device *adev,
>>>>>> + struct amdgpu_vm *vm)
>>>>>> +{
>>>>>> + if (!vm->pasid)
>>>>>> + return;
>>>>>> +
>>>>>> + amdgpu_pasid_free(vm->pasid);
>>>>>> + amdgpu_vm_pasid_remove(adev, vm);
>>>>>> +}
>>>>>> +
>>>>>> +static struct amdgpu_vm *amdgpu_vm_pasid_find(struct
>>>>>> amdgpu_device *adev,
>>>>>> + unsigned int pasid)
>>>>>> +{
>>>>>> + struct amdgpu_vm *vm;
>>>>>> + unsigned long flags;
>>>>>> +
>>>>>> + spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>>> + vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>>>>>> + spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>>> +
>>>>>> + return vm;
>>>>>> +}
>>>>>> +
>>>>>> /*
>>>>>> * vm eviction_lock can be taken in MMU notifiers. Make sure no
>>>>>> reclaim-FS
>>>>>> * happens while holding this lock anywhere to prevent
>>>>>> deadlocks when
>>>>>> @@ -2859,17 +2922,17 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm
>>>>>> *vm, long timeout)
>>>>>> *
>>>>>> * @adev: amdgpu_device pointer
>>>>>> * @vm: requested vm
>>>>>> - * @pasid: Process address space identifier
>>>>>> *
>>>>>> * Init @vm fields.
>>>>>> *
>>>>>> * Returns:
>>>>>> * 0 for success, error for failure.
>>>>>> */
>>>>>> -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm
>>>>>> *vm, u32 pasid)
>>>>>> +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm
>>>>>> *vm)
>>>>>> {
>>>>>> struct amdgpu_bo *root_bo;
>>>>>> struct amdgpu_bo_vm *root;
>>>>>> + unsigned int pasid;
>>>>>> int r, i;
>>>>>> vm->va = RB_ROOT_CACHED;
>>>>>> @@ -2940,19 +3003,15 @@ int amdgpu_vm_init(struct amdgpu_device
>>>>>> *adev, struct amdgpu_vm *vm, u32 pasid)
>>>>>> amdgpu_bo_unreserve(vm->root.bo);
>>>>>> - if (pasid) {
>>>>>> - unsigned long flags;
>>>>>> -
>>>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>>> - r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid,
>>>>>> pasid + 1,
>>>>>> - GFP_ATOMIC);
>>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>>> - if (r < 0)
>>>>>> - goto error_free_root;
>>>>>> -
>>>>>> - vm->pasid = pasid;
>>>>>> + pasid = amdgpu_pasid_alloc(16);
>>>>>> + if (pasid < 0) {
>>>>>> + dev_warn(adev->dev, "No more PASIDs available!");
>>>>>> + pasid = 0;
>>>>>> }
>>>>>> + if (amdgpu_vm_pasid_alloc(adev, vm, pasid))
>>>>>> + goto error_free_pasid;
>>>>>> +
>>>>>> INIT_KFIFO(vm->faults);
>>>>>> return 0;
>>>>>> @@ -2960,6 +3019,10 @@ int amdgpu_vm_init(struct amdgpu_device
>>>>>> *adev, struct amdgpu_vm *vm, u32 pasid)
>>>>>> error_unreserve:
>>>>>> amdgpu_bo_unreserve(vm->root.bo);
>>>>>> +error_free_pasid:
>>>>>> + if (pasid)
>>>>>> + amdgpu_pasid_free(pasid);
>>>>>> +
>>>>>> error_free_root:
>>>>>> amdgpu_bo_unref(&root->shadow);
>>>>>> amdgpu_bo_unref(&root_bo);
>>>>>> @@ -3039,18 +3102,11 @@ int amdgpu_vm_make_compute(struct
>>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>>> if (r)
>>>>>> goto unreserve_bo;
>>>>>> - if (pasid) {
>>>>>> - unsigned long flags;
>>>>>> -
>>>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>>> - r = idr_alloc(&adev->vm_manager.pasid_idr, vm, pasid,
>>>>>> pasid + 1,
>>>>>> - GFP_ATOMIC);
>>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>>> + r = amdgpu_vm_pasid_alloc(adev, vm, pasid);
>>>>>> + if (r == -ENOSPC)
>>>>>> + goto unreserve_bo;
>>>>>> - if (r == -ENOSPC)
>>>>>> - goto unreserve_bo;
>>>>>> - r = 0;
>>>>>> - }
>>>>>> + r = 0;
>>>>>> /* Check if PD needs to be reinitialized and do it before
>>>>>> * changing any other state, in case it fails.
>>>>>> @@ -3088,19 +3144,7 @@ int amdgpu_vm_make_compute(struct
>>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>>> vm->last_update = NULL;
>>>>>> vm->is_compute_context = true;
>>>>>> - if (vm->pasid) {
>>>>>> - unsigned long flags;
>>>>>> -
>>>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>>> - idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
>>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>>> -
>>>>>> - /* Free the original amdgpu allocated pasid
>>>>>> - * Will be replaced with kfd allocated pasid
>>>>>> - */
>>>>>> - amdgpu_pasid_free(vm->pasid);
>>>>>> - vm->pasid = 0;
>>>>>> - }
>>>>>> + amdgpu_vm_pasid_free(adev, vm);
>>>>>> /* Free the shadow bo for compute VM */
>>>>>> amdgpu_bo_unref(&to_amdgpu_bo_vm(vm->root.bo)->shadow);
>>>>>> @@ -3111,13 +3155,8 @@ int amdgpu_vm_make_compute(struct
>>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>>> goto unreserve_bo;
>>>>>> free_idr:
>>>>>> - if (pasid) {
>>>>>> - unsigned long flags;
>>>>>> + amdgpu_vm_pasid_remove_id(adev, pasid);
>>>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>>> - idr_remove(&adev->vm_manager.pasid_idr, pasid);
>>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>>> - }
>>>>>> unreserve_bo:
>>>>>> amdgpu_bo_unreserve(vm->root.bo);
>>>>>> return r;
>>>>>> @@ -3133,14 +3172,7 @@ int amdgpu_vm_make_compute(struct
>>>>>> amdgpu_device *adev, struct amdgpu_vm *vm,
>>>>>> */
>>>>>> void amdgpu_vm_release_compute(struct amdgpu_device *adev,
>>>>>> struct amdgpu_vm *vm)
>>>>>> {
>>>>>> - if (vm->pasid) {
>>>>>> - unsigned long flags;
>>>>>> -
>>>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>>> - idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
>>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>>> - }
>>>>>> - vm->pasid = 0;
>>>>>> + amdgpu_vm_pasid_remove(adev, vm);
>>>>>> vm->is_compute_context = false;
>>>>>> }
>>>>>> @@ -3164,15 +3196,7 @@ void amdgpu_vm_fini(struct amdgpu_device
>>>>>> *adev, struct amdgpu_vm *vm)
>>>>>> root = amdgpu_bo_ref(vm->root.bo);
>>>>>> amdgpu_bo_reserve(root, true);
>>>>>> - if (vm->pasid) {
>>>>>> - unsigned long flags;
>>>>>> -
>>>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>>> - idr_remove(&adev->vm_manager.pasid_idr, vm->pasid);
>>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>>> - vm->pasid = 0;
>>>>>> - }
>>>>>> -
>>>>>> + amdgpu_vm_pasid_remove(adev, vm);
>>>>>> dma_fence_wait(vm->last_unlocked, false);
>>>>>> dma_fence_put(vm->last_unlocked);
>>>>>> @@ -3334,16 +3358,10 @@ int amdgpu_vm_ioctl(struct drm_device
>>>>>> *dev, void *data, struct drm_file *filp)
>>>>>> void amdgpu_vm_get_task_info(struct amdgpu_device *adev, u32
>>>>>> pasid,
>>>>>> struct amdgpu_task_info *task_info)
>>>>>> {
>>>>>> - struct amdgpu_vm *vm;
>>>>>> - unsigned long flags;
>>>>>> + struct amdgpu_vm *vm = amdgpu_vm_pasid_find(adev, pasid);
>>>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, flags);
>>>>>> -
>>>>>> - vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>>>>>> if (vm)
>>>>>> *task_info = vm->task_info;
>>>>>> -
>>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
>>>>>> }
>>>>>> /**
>>>>>> @@ -3380,24 +3398,16 @@ bool amdgpu_vm_handle_fault(struct
>>>>>> amdgpu_device *adev, u32 pasid,
>>>>>> {
>>>>>> bool is_compute_context = false;
>>>>>> struct amdgpu_bo *root;
>>>>>> - unsigned long irqflags;
>>>>>> uint64_t value, flags;
>>>>>> struct amdgpu_vm *vm;
>>>>>> int r;
>>>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, irqflags);
>>>>>> - vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>>>>>> - if (vm) {
>>>>>> - root = amdgpu_bo_ref(vm->root.bo);
>>>>>> - is_compute_context = vm->is_compute_context;
>>>>>> - } else {
>>>>>> - root = NULL;
>>>>>> - }
>>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, irqflags);
>>>>>> -
>>>>>> - if (!root)
>>>>>> + vm = amdgpu_vm_pasid_find(adev, pasid);
>>>>>> + if (!vm)
>>>>>> return false;
>>>>>> + root = amdgpu_bo_ref(vm->root.bo);
>>>>>> + is_compute_context = vm->is_compute_context;
>>>>>> addr /= AMDGPU_GPU_PAGE_SIZE;
>>>>>> if (is_compute_context &&
>>>>>> @@ -3411,12 +3421,8 @@ bool amdgpu_vm_handle_fault(struct
>>>>>> amdgpu_device *adev, u32 pasid,
>>>>>> goto error_unref;
>>>>>> /* Double check that the VM still exists */
>>>>>> - spin_lock_irqsave(&adev->vm_manager.pasid_lock, irqflags);
>>>>>> - vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
>>>>>> - if (vm && vm->root.bo != root)
>>>>>> - vm = NULL;
>>>>>> - spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, irqflags);
>>>>>> - if (!vm)
>>>>>> + vm = amdgpu_vm_pasid_find(adev, pasid);
>>>>>> + if (!vm || (vm && vm->root.bo != root))
>>>>>> goto error_unlock;
>>>>>> flags = AMDGPU_PTE_VALID | AMDGPU_PTE_SNOOPED |
>>>>>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>> b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>> index ddb85a85cbba..ee994e21dffa 100644
>>>>>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
>>>>>> @@ -376,7 +376,7 @@ void amdgpu_vm_manager_init(struct
>>>>>> amdgpu_device *adev);
>>>>>> void amdgpu_vm_manager_fini(struct amdgpu_device *adev);
>>>>>> long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout);
>>>>>> -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm
>>>>>> *vm, u32 pasid);
>>>>>> +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm
>>>>>> *vm);
>>>>>> int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct
>>>>>> amdgpu_vm *vm, u32 pasid);
>>>>>> void amdgpu_vm_release_compute(struct amdgpu_device *adev,
>>>>>> struct amdgpu_vm *vm);
>>>>>> void amdgpu_vm_fini(struct amdgpu_device *adev, struct
>>>>>> amdgpu_vm *vm);
>>>>
>>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
More information about the amd-gfx
mailing list