[PATCH 4/4] drm/amdgpu: rework reserved VMID handling

Thu Dec 8 17:36:49 UTC 2022

Series is:
Reviewed-by: Alex Deucher <alexander.deucher at amd.com>

On Mon, Dec 5, 2022 at 5:43 AM Christian König
<ckoenig.leichtzumerken at gmail.com> wrote:
>
> Instead of reserving a VMID for a single process allow that many
> processes use the reserved ID. This allows for proper isolation
> between the processes.
>
> Signed-off-by: Christian König <christian.koenig at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c | 45 ++++++++++++-------------
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h |  3 +-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h  |  6 +---
>  3 files changed, 24 insertions(+), 30 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> index 7e5aad241295..6481b43ffe25 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
> @@ -279,12 +279,13 @@ static int amdgpu_vmid_grab_reserved(struct amdgpu_vm *vm,
>  {
>         struct amdgpu_device *adev = ring->adev;
>         unsigned vmhub = ring->funcs->vmhub;
> +       struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
>         uint64_t fence_context = adev->fence_context + ring->idx;
>         bool needs_flush = vm->use_cpu_for_update;
>         uint64_t updates = amdgpu_vm_tlb_seq(vm);
>         int r;
>
> -       *id = vm->reserved_vmid[vmhub];
> +       *id = id_mgr->reserved;
>         if ((*id)->owner != vm->immediate.fence_context ||
>             !amdgpu_vmid_compatible(*id, job) ||
>             (*id)->flushed_updates < updates ||
> @@ -464,31 +465,27 @@ int amdgpu_vmid_alloc_reserved(struct amdgpu_device *adev,
>                                struct amdgpu_vm *vm,
>                                unsigned vmhub)
>  {
> -       struct amdgpu_vmid_mgr *id_mgr;
> -       struct amdgpu_vmid *idle;
> -       int r = 0;
> +       struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
>
> -       id_mgr = &adev->vm_manager.id_mgr[vmhub];
>         mutex_lock(&id_mgr->lock);
>         if (vm->reserved_vmid[vmhub])
>                 goto unlock;
> -       if (atomic_inc_return(&id_mgr->reserved_vmid_num) >
> -           AMDGPU_VM_MAX_RESERVED_VMID) {
> -               DRM_ERROR("Over limitation of reserved vmid\n");
> -               atomic_dec(&id_mgr->reserved_vmid_num);
> -               r = -EINVAL;
> -               goto unlock;
> +
> +       ++id_mgr->reserved_use_count;
> +       if (!id_mgr->reserved) {
> +               struct amdgpu_vmid *id;
> +
> +               id = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid,
> +                                     list);
> +               /* Remove from normal round robin handling */
> +               list_del_init(&id->list);
> +               id_mgr->reserved = id;
>         }
> -       /* Select the first entry VMID */
> -       idle = list_first_entry(&id_mgr->ids_lru, struct amdgpu_vmid, list);
> -       list_del_init(&idle->list);
> -       vm->reserved_vmid[vmhub] = idle;
> -       mutex_unlock(&id_mgr->lock);
> +       vm->reserved_vmid[vmhub] = true;
>
> -       return 0;
>  unlock:
>         mutex_unlock(&id_mgr->lock);
> -       return r;
> +       return 0;
>  }
>
>  void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
> @@ -498,12 +495,12 @@ void amdgpu_vmid_free_reserved(struct amdgpu_device *adev,
>         struct amdgpu_vmid_mgr *id_mgr = &adev->vm_manager.id_mgr[vmhub];
>
>         mutex_lock(&id_mgr->lock);
> -       if (vm->reserved_vmid[vmhub]) {
> -               list_add(&vm->reserved_vmid[vmhub]->list,
> -                       &id_mgr->ids_lru);
> -               vm->reserved_vmid[vmhub] = NULL;
> -               atomic_dec(&id_mgr->reserved_vmid_num);
> +       if (vm->reserved_vmid[vmhub] &&
> +           !--id_mgr->reserved_use_count) {
> +               /* give the reserved ID back to normal round robin */
> +               list_add(&id_mgr->reserved->list, &id_mgr->ids_lru);
>         }
> +       vm->reserved_vmid[vmhub] = false;
>         mutex_unlock(&id_mgr->lock);
>  }
>
> @@ -570,7 +567,7 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
>
>                 mutex_init(&id_mgr->lock);
>                 INIT_LIST_HEAD(&id_mgr->ids_lru);
> -               atomic_set(&id_mgr->reserved_vmid_num, 0);
> +               id_mgr->reserved_use_count = 0;
>
>                 /* manage only VMIDs not used by KFD */
>                 id_mgr->num_ids = adev->vm_manager.first_kfd_vmid;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
> index 06c8a0034fa5..c573a8ffff47 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.h
> @@ -67,7 +67,8 @@ struct amdgpu_vmid_mgr {
>         unsigned                num_ids;
>         struct list_head        ids_lru;
>         struct amdgpu_vmid      ids[AMDGPU_NUM_VMID];
> -       atomic_t                reserved_vmid_num;
> +       struct amdgpu_vmid      *reserved;
> +       unsigned int            reserved_use_count;
>  };
>
>  int amdgpu_pasid_alloc(unsigned int bits);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> index 6546e786bf00..094bb4807303 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> @@ -119,9 +119,6 @@ struct amdgpu_bo_vm;
>  /* Reserve 2MB at top/bottom of address space for kernel use */
>  #define AMDGPU_VA_RESERVED_SIZE                        (2ULL << 20)
>
> -/* max vmids dedicated for process */
> -#define AMDGPU_VM_MAX_RESERVED_VMID    1
> -
>  /* See vm_update_mode */
>  #define AMDGPU_VM_USE_CPU_FOR_GFX (1 << 0)
>  #define AMDGPU_VM_USE_CPU_FOR_COMPUTE (1 << 1)
> @@ -298,8 +295,7 @@ struct amdgpu_vm {
>         struct dma_fence        *last_unlocked;
>
>         unsigned int            pasid;
> -       /* dedicated to vm */
> -       struct amdgpu_vmid      *reserved_vmid[AMDGPU_MAX_VMHUBS];
> +       bool                    reserved_vmid[AMDGPU_MAX_VMHUBS];
>
>         /* Flag to indicate if VM tables are updated by CPU or GPU (SDMA) */
>         bool                                    use_cpu_for_update;
> --
> 2.34.1
>