[PATCH v5] drm/xe/dgfx: Release mmap mappings on rpm suspend

Fri Dec 22 18:18:43 UTC 2023

On Fri, 2023-12-22 at 23:20 +0530, Badal Nilawar wrote:
> Release all mmap mappings for all vram objects which are associated
> with userfault such that, while pcie function in D3hot, any access
> to memory mappings will raise a userfault.
> 
> Upon userfault, in order to access memory mappings, if graphics
> function is in D3 then runtime resume of dgpu will be triggered to
> transition to D0.
> 
> v2:
>   - Avoid iomem check before bo migration check as bo can migrate
>     to system memory (Matthew Auld)
> v3:
>   - Delete bo userfault link during bo destroy
>   - Upon bo move (vram-smem), do bo userfault link deletion in
>     xe_bo_move_notify instead of xe_bo_move (Thomas Hellström)
>   - Grab lock in rpm hook while deleting bo userfault link (Matthew
> Auld)
> v4:
>   - Add kernel doc and wrap vram_userfault related
>     stuff in the structure (Matthew Auld)
>   - Get rpm wakeref before taking dma reserve lock (Matthew Auld)
>   - In suspend path apply lock for entire list op
>     including list iteration (Matthew Auld)
> v5:
>   - Use mutex lock instead of spin lock
> 
> Cc: Rodrigo Vivi <rodrigo.vivi at intel.com>
> Cc: Matthew Auld <matthew.auld at intel.com>
> Cc: Anshuman Gupta <anshuman.gupta at intel.com>
> Signed-off-by: Badal Nilawar <badal.nilawar at intel.com>

Acked-by: Thomas Hellström <thomas.hellstrom at linux.intel.com> #For the
xe_bo_move_notify() changes.

> ---
>  drivers/gpu/drm/xe/xe_bo.c           | 57
> ++++++++++++++++++++++++++--
>  drivers/gpu/drm/xe/xe_bo.h           |  2 +
>  drivers/gpu/drm/xe/xe_bo_types.h     | 10 +++++
>  drivers/gpu/drm/xe/xe_device_types.h | 12 ++++++
>  drivers/gpu/drm/xe/xe_pci.c          |  2 +
>  drivers/gpu/drm/xe/xe_pm.c           | 17 +++++++++
>  drivers/gpu/drm/xe/xe_pm.h           |  1 +
>  7 files changed, 97 insertions(+), 4 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index 8e4a3b1f6b93..4fd3a229755c 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -586,6 +586,8 @@ static int xe_bo_move_notify(struct xe_bo *bo,
>  {
>         struct ttm_buffer_object *ttm_bo = &bo->ttm;
>         struct xe_device *xe = ttm_to_xe_device(ttm_bo->bdev);
> +       struct ttm_resource *old_mem = ttm_bo->resource;
> +       u32 old_mem_type = old_mem ? old_mem->mem_type :
> XE_PL_SYSTEM;
>         int ret;
>  
>         /*
> @@ -605,6 +607,18 @@ static int xe_bo_move_notify(struct xe_bo *bo,
>         if (ttm_bo->base.dma_buf && !ttm_bo->base.import_attach)
>                 dma_buf_move_notify(ttm_bo->base.dma_buf);
>  
> +       /*
> +        * TTM has already nuked the mmap for us (see
> ttm_bo_unmap_virtual),
> +        * so if we moved from VRAM make sure to unlink this from the
> userfault
> +        * tracking.
> +        */
> +       if (mem_type_is_vram(old_mem_type)) {
> +               mutex_lock(&xe->mem_access.vram_userfault.lock);
> +               if (!list_empty(&bo->vram_userfault.link))
> +                       list_del_init(&bo->vram_userfault.link);
> +               mutex_unlock(&xe->mem_access.vram_userfault.lock);
> +       }
> +
>         return 0;
>  }
>  
> @@ -1063,6 +1077,11 @@ static void xe_ttm_bo_destroy(struct
> ttm_buffer_object *ttm_bo)
>         if (bo->vm && xe_bo_is_user(bo))
>                 xe_vm_put(bo->vm);
>  
> +       mutex_lock(&xe->mem_access.vram_userfault.lock);
> +       if (!list_empty(&bo->vram_userfault.link))
> +               list_del(&bo->vram_userfault.link);
> +       mutex_unlock(&xe->mem_access.vram_userfault.lock);
> +
>         kfree(bo);
>  }
>  
> @@ -1110,16 +1129,20 @@ static vm_fault_t xe_gem_fault(struct
> vm_fault *vmf)
>  {
>         struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
>         struct drm_device *ddev = tbo->base.dev;
> +       struct xe_device *xe = to_xe_device(ddev);
> +       struct xe_bo *bo = ttm_to_xe_bo(tbo);
> +       bool needs_rpm = bo->flags & XE_BO_CREATE_VRAM_MASK;
>         vm_fault_t ret;
>         int idx, r = 0;
>  
> +       if (needs_rpm)
> +               xe_device_mem_access_get(xe);
> +
>         ret = ttm_bo_vm_reserve(tbo, vmf);
>         if (ret)
> -               return ret;
> +               goto out;
>  
>         if (drm_dev_enter(ddev, &idx)) {
> -               struct xe_bo *bo = ttm_to_xe_bo(tbo);
> -
>                 trace_xe_bo_cpu_fault(bo);
>  
>                 if (should_migrate_to_system(bo)) {
> @@ -1137,10 +1160,25 @@ static vm_fault_t xe_gem_fault(struct
> vm_fault *vmf)
>         } else {
>                 ret = ttm_bo_vm_dummy_page(vmf, vmf->vma-
> >vm_page_prot);
>         }
> +
>         if (ret == VM_FAULT_RETRY && !(vmf->flags &
> FAULT_FLAG_RETRY_NOWAIT))
> -               return ret;
> +               goto out;
> +       /*
> +        * ttm_bo_vm_reserve() already has dma_resv_lock.
> +        */
> +       if (ret == VM_FAULT_NOPAGE && list_empty(&bo-
> >vram_userfault.link)) {
> +               if (tbo->resource->bus.is_iomem) {
> +                       mutex_lock(&xe-
> >mem_access.vram_userfault.lock);
> +                       list_add(&bo->vram_userfault.link, &xe-
> >mem_access.vram_userfault.list);
> +                       mutex_unlock(&xe-
> >mem_access.vram_userfault.lock);
> +               }
> +       }
>  
>         dma_resv_unlock(tbo->base.resv);
> +out:
> +       if (needs_rpm)
> +               xe_device_mem_access_put(xe);
> +
>         return ret;
>  }
>  
> @@ -1254,6 +1292,7 @@ struct xe_bo *___xe_bo_create_locked(struct
> xe_device *xe, struct xe_bo *bo,
>  #ifdef CONFIG_PROC_FS
>         INIT_LIST_HEAD(&bo->client_link);
>  #endif
> +       INIT_LIST_HEAD(&bo->vram_userfault.link);
>  
>         drm_gem_private_object_init(&xe->drm, &bo->ttm.base, size);
>  
> @@ -2264,6 +2303,16 @@ int xe_bo_dumb_create(struct drm_file
> *file_priv,
>         return err;
>  }
>  
> +void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo)
> +{
> +       struct ttm_buffer_object *tbo = &bo->ttm;
> +       struct ttm_device *bdev = tbo->bdev;
> +
> +       drm_vma_node_unmap(&tbo->base.vma_node, bdev->dev_mapping);
> +
> +       list_del(&bo->vram_userfault.link);
> +}
> +
>  #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
>  #include "tests/xe_bo.c"
>  #endif
> diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
> index 97b32528c600..350cc73cadf8 100644
> --- a/drivers/gpu/drm/xe/xe_bo.h
> +++ b/drivers/gpu/drm/xe/xe_bo.h
> @@ -249,6 +249,8 @@ int xe_gem_create_ioctl(struct drm_device *dev,
> void *data,
>                         struct drm_file *file);
>  int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
>                              struct drm_file *file);
> +void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo);
> +
>  int xe_bo_dumb_create(struct drm_file *file_priv,
>                       struct drm_device *dev,
>                       struct drm_mode_create_dumb *args);
> diff --git a/drivers/gpu/drm/xe/xe_bo_types.h
> b/drivers/gpu/drm/xe/xe_bo_types.h
> index 64c2249a4e40..b4e44fab9cd2 100644
> --- a/drivers/gpu/drm/xe/xe_bo_types.h
> +++ b/drivers/gpu/drm/xe/xe_bo_types.h
> @@ -88,6 +88,16 @@ struct xe_bo {
>          * objects.
>          */
>         u16 cpu_caching;
> +
> +       /**
> +        * @vram_userfault: Encapsulate vram_userfault related stuff
> +        */
> +       struct {
> +               /** @link: Link into @mem_access.vram_userfault.list
> */
> +               struct list_head link;
> +               /** @count: Whether bo is
> @mem_access.vram_userfault.list */
> +               unsigned int count;
> +       } vram_userfault;
>  };
>  
>  #define intel_bo_to_drm_bo(bo) (&(bo)->ttm.base)
> diff --git a/drivers/gpu/drm/xe/xe_device_types.h
> b/drivers/gpu/drm/xe/xe_device_types.h
> index 71f23ac365e6..e27c2211728f 100644
> --- a/drivers/gpu/drm/xe/xe_device_types.h
> +++ b/drivers/gpu/drm/xe/xe_device_types.h
> @@ -385,6 +385,18 @@ struct xe_device {
>         struct {
>                 /** @ref: ref count of memory accesses */
>                 atomic_t ref;
> +
> +               /** @vram_userfault: Encapsulate vram_userfault
> related stuff */
> +               struct {
> +                       /** @lock: Protects access to
> @vram_usefault.list */
> +                       struct mutex lock;
> +
> +                       /**
> +                        *  @list: Keep list of userfaulted vram bo,
> which require to release their
> +                        *  mmap mappings at runtime suspend path.
> +                        */
> +                       struct list_head list;
> +               } vram_userfault;
>         } mem_access;
>  
>         /**
> diff --git a/drivers/gpu/drm/xe/xe_pci.c
> b/drivers/gpu/drm/xe/xe_pci.c
> index 1f997353a78f..4de79a7c5dc2 100644
> --- a/drivers/gpu/drm/xe/xe_pci.c
> +++ b/drivers/gpu/drm/xe/xe_pci.c
> @@ -775,6 +775,8 @@ static int xe_pci_probe(struct pci_dev *pdev,
> const struct pci_device_id *ent)
>                 str_yes_no(xe_device_has_sriov(xe)),
>                 xe_sriov_mode_to_string(xe_device_sriov_mode(xe)));
>  
> +       xe_pm_init_early(xe);
> +
>         err = xe_device_probe(xe);
>         if (err)
>                 return err;
> diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
> index b429c2876a76..8166b45f83d8 100644
> --- a/drivers/gpu/drm/xe/xe_pm.c
> +++ b/drivers/gpu/drm/xe/xe_pm.c
> @@ -163,6 +163,12 @@ static void xe_pm_runtime_init(struct xe_device
> *xe)
>         pm_runtime_put(dev);
>  }
>  
> +void xe_pm_init_early(struct xe_device *xe)
> +{
> +       INIT_LIST_HEAD(&xe->mem_access.vram_userfault.list);
> +       drmm_mutex_init(&xe->drm, &xe-
> >mem_access.vram_userfault.lock);
> +}
> +
>  void xe_pm_init(struct xe_device *xe)
>  {
>         struct pci_dev *pdev = to_pci_dev(xe->drm.dev);
> @@ -214,6 +220,7 @@ struct task_struct
> *xe_pm_read_callback_task(struct xe_device *xe)
>  
>  int xe_pm_runtime_suspend(struct xe_device *xe)
>  {
> +       struct xe_bo *bo, *on;
>         struct xe_gt *gt;
>         u8 id;
>         int err = 0;
> @@ -247,6 +254,16 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
>          */
>         lock_map_acquire(&xe_device_mem_access_lockdep_map);
>  
> +       /*
> +        * Applying lock for entire list op as xe_ttm_bo_destroy and
> xe_bo_move_notify
> +        * also checks and delets bo entry from user fault list.
> +        */
> +       mutex_lock(&xe->mem_access.vram_userfault.lock);
> +       list_for_each_entry_safe(bo, on,
> +                                &xe->mem_access.vram_userfault.list,
> vram_userfault.link)
> +               xe_bo_runtime_pm_release_mmap_offset(bo);
> +       mutex_unlock(&xe->mem_access.vram_userfault.lock);
> +
>         if (xe->d3cold.allowed) {
>                 err = xe_bo_evict_all(xe);
>                 if (err)
> diff --git a/drivers/gpu/drm/xe/xe_pm.h b/drivers/gpu/drm/xe/xe_pm.h
> index 6b9031f7af24..64a97c6726a7 100644
> --- a/drivers/gpu/drm/xe/xe_pm.h
> +++ b/drivers/gpu/drm/xe/xe_pm.h
> @@ -20,6 +20,7 @@ struct xe_device;
>  int xe_pm_suspend(struct xe_device *xe);
>  int xe_pm_resume(struct xe_device *xe);
>  
> +void xe_pm_init_early(struct xe_device *xe);
>  void xe_pm_init(struct xe_device *xe);
>  void xe_pm_runtime_fini(struct xe_device *xe);
>  int xe_pm_runtime_suspend(struct xe_device *xe);