[Intel-gfx] [PATCH] drm/i915/dgfx: Grab wakeref at i915_ttm_unmap_virtual

Gupta, Anshuman anshuman.gupta at intel.com
Wed Sep 28 13:32:48 UTC 2022



> -----Original Message-----
> From: Auld, Matthew <matthew.auld at intel.com>
> Sent: Tuesday, September 27, 2022 6:15 PM
> To: Gupta, Anshuman <anshuman.gupta at intel.com>; intel-
> gfx at lists.freedesktop.org
> Cc: joonas.lahtinen at linux.intel.com; tvrtko.ursulin at linux.intel.com
> Subject: Re: [PATCH] drm/i915/dgfx: Grab wakeref at i915_ttm_unmap_virtual
> 
> On 27/09/2022 13:30, Gupta, Anshuman wrote:
> >
> >
> >> -----Original Message-----
> >> From: Auld, Matthew <matthew.auld at intel.com>
> >> Sent: Monday, September 26, 2022 9:52 PM
> >> To: Gupta, Anshuman <anshuman.gupta at intel.com>; intel-
> >> gfx at lists.freedesktop.org
> >> Cc: joonas.lahtinen at linux.intel.com; tvrtko.ursulin at linux.intel.com
> >> Subject: Re: [PATCH] drm/i915/dgfx: Grab wakeref at
> >> i915_ttm_unmap_virtual
> >>
> >> On 23/09/2022 15:31, Anshuman Gupta wrote:
> >>> We had already grabbed the rpm wakeref at obj destruction path, but
> >>> it also required to grab the wakeref when object moves.
> >>> When i915_gem_object_release_mmap_offset() gets called by
> >>> i915_ttm_move_notify(), it will release the mmap offset without
> >>> grabbing the wakeref. We want to avoid that therefore, grab the
> >>> wakreref at i915_ttm_unmap_virtual() accordingly.
> >>>
> >>> While doing that also changed the lmem_userfault_lock from mutex to
> >>> spinlock, as spinlock widely used for list.
> >>>
> >>> Also changed if (obj->userfault_count) to
> >>> GEM_BUG_ON(!obj->userfault_count).
> >>>
> >>> Fixes: ad74457a6b5a ("drm/i915/dgfx: Release mmap on rpm suspend")
> >>> Suggested-by: Matthew Auld <matthew.auld at intel.com>
> >>> Signed-off-by: Anshuman Gupta <anshuman.gupta at intel.com>
> >>> ---
> >>>    drivers/gpu/drm/i915/gem/i915_gem_mman.c | 19 +++++-------
> >>>    drivers/gpu/drm/i915/gem/i915_gem_ttm.c  | 39 ++++++++++++++++------
> --
> >>>    drivers/gpu/drm/i915/gt/intel_gt.c       | 11 ++++++-
> >>>    drivers/gpu/drm/i915/gt/intel_gt_types.h |  2 +-
> >>>    4 files changed, 45 insertions(+), 26 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> >>> b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> >>> index 73d9eda1d6b7..9da561c19a47 100644
> >>> --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> >>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c
> >>> @@ -557,11 +557,13 @@ void
> >>> i915_gem_object_runtime_pm_release_mmap_offset(struct
> >>> drm_i915_gem_object *
> >>>
> >>>    	drm_vma_node_unmap(&bo->base.vma_node, bdev->dev_mapping);
> >>>
> >>> -	if (obj->userfault_count) {
> >>> -		/* rpm wakeref provide exclusive access */
> >>> -		list_del(&obj->userfault_link);
> >>> -		obj->userfault_count = 0;
> >>> -	}
> >>> +	/*
> >>> +	 * We have exclusive access here via runtime suspend. All other callers
> >>> +	 * must first grab the rpm wakeref.
> >>> +	 */
> >>> +	GEM_BUG_ON(!obj->userfault_count);
> >>> +	list_del(&obj->userfault_link);
> >>> +	obj->userfault_count = 0;
> >>>    }
> >>>
> >>>    void i915_gem_object_release_mmap_offset(struct
> >>> drm_i915_gem_object
> >>> *obj) @@ -587,13 +589,6 @@ void
> >> i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj)
> >>>    		spin_lock(&obj->mmo.lock);
> >>>    	}
> >>>    	spin_unlock(&obj->mmo.lock);
> >>> -
> >>> -	if (obj->userfault_count) {
> >>> -		mutex_lock(&to_gt(to_i915(obj->base.dev))-
> >>> lmem_userfault_lock);
> >>> -		list_del(&obj->userfault_link);
> >>> -		mutex_unlock(&to_gt(to_i915(obj->base.dev))-
> >>> lmem_userfault_lock);
> >>> -		obj->userfault_count = 0;
> >>> -	}
> >>>    }
> >>>
> >>>    static struct i915_mmap_offset *
> >>> diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> >>> b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> >>> index e3fc38dd5db0..0630eeca7316 100644
> >>> --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> >>> +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c
> >>> @@ -509,18 +509,9 @@ static int i915_ttm_shrink(struct
> >> drm_i915_gem_object *obj, unsigned int flags)
> >>>    static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo)
> >>>    {
> >>>    	struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo);
> >>> -	intel_wakeref_t wakeref = 0;
> >>>
> >>>    	if (bo->resource && likely(obj)) {
> >>> -		/* ttm_bo_release() already has dma_resv_lock */
> >>> -		if (i915_ttm_cpu_maps_iomem(bo->resource))
> >>> -			wakeref = intel_runtime_pm_get(&to_i915(obj-
> >>> base.dev)->runtime_pm);
> >>> -
> >>>    		__i915_gem_object_pages_fini(obj);
> >>> -
> >>> -		if (wakeref)
> >>> -			intel_runtime_pm_put(&to_i915(obj->base.dev)-
> >>> runtime_pm, wakeref);
> >>> -
> >>>    		i915_ttm_free_cached_io_rsgt(obj);
> >>>    	}
> >>>    }
> >>> @@ -1052,12 +1043,15 @@ static vm_fault_t vm_fault_ttm(struct
> >>> vm_fault
> >> *vmf)
> >>>    	if (ret == VM_FAULT_RETRY && !(vmf->flags &
> >> FAULT_FLAG_RETRY_NOWAIT))
> >>>    		goto out_rpm;
> >>>
> >>> -	/* ttm_bo_vm_reserve() already has dma_resv_lock */
> >>> +	/*
> >>> +	 * ttm_bo_vm_reserve() already has dma_resv_lock.
> >>> +	 * userfault_count is protected by dma_resv lock and rpm wakeref.
> >>> +	 */
> >>>    	if (ret == VM_FAULT_NOPAGE && wakeref && !obj->userfault_count) {
> >>>    		obj->userfault_count = 1;
> >>> -		mutex_lock(&to_gt(to_i915(obj->base.dev))-
> >>> lmem_userfault_lock);
> >>> +		spin_lock(to_gt(to_i915(obj->base.dev))-
> >>> lmem_userfault_lock);
> >>>    		list_add(&obj->userfault_link, &to_gt(to_i915(obj->base.dev))-
> >>> lmem_userfault_list);
> >>> -		mutex_unlock(&to_gt(to_i915(obj->base.dev))-
> >>> lmem_userfault_lock);
> >>> +		spin_unlock(to_gt(to_i915(obj->base.dev))-
> >>> lmem_userfault_lock);
> >>>    	}
> >>>
> >>>    	if (wakeref & CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)
> >>> @@ -1123,7 +1117,28 @@ static u64 i915_ttm_mmap_offset(struct
> >>> drm_i915_gem_object *obj)
> >>>
> >>>    static void i915_ttm_unmap_virtual(struct drm_i915_gem_object *obj)
> >>>    {
> >>> +	struct ttm_buffer_object *bo = i915_gem_to_ttm(obj);
> >>> +	intel_wakeref_t wakeref = 0;
> >>> +
> >>> +	assert_object_held_shared(obj);
> >>> +
> >>> +	if (i915_ttm_cpu_maps_iomem(bo->resource)) {
> >>> +		wakeref =
> >>> +intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm);
> >>> +
> >>> +		/* userfault_count is protected by obj lock and rpm wakeref. */
> >>> +		if (obj->userfault_count) {
> >>> +			spin_lock(to_gt(to_i915(obj->base.dev))-
> >>> lmem_userfault_lock);
> >>> +			list_del(&obj->userfault_link);
> >>> +			spin_unlock(to_gt(to_i915(obj->base.dev))-
> >>> lmem_userfault_lock);
> >>> +			obj->userfault_count = 0;
> >>> +		}
> >>> +
> >>> +	}
> >>> +
> >>>    	ttm_bo_unmap_virtual(i915_gem_to_ttm(obj));
> >>> +
> >>> +	if (wakeref)
> >>> +		intel_runtime_pm_put(&to_i915(obj->base.dev)->runtime_pm,
> >> wakeref);
> >>>    }
> >>>
> >>>    static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops
> >>> = { diff --git a/drivers/gpu/drm/i915/gt/intel_gt.c
> >>> b/drivers/gpu/drm/i915/gt/intel_gt.c
> >>> index b367cfff48d5..1e63432d97bb 100644
> >>> --- a/drivers/gpu/drm/i915/gt/intel_gt.c
> >>> +++ b/drivers/gpu/drm/i915/gt/intel_gt.c
> >>> @@ -41,7 +41,7 @@ void intel_gt_common_init_early(struct intel_gt *gt)
> >>>    	spin_lock_init(gt->irq_lock);
> >>>
> >>>    	INIT_LIST_HEAD(&gt->lmem_userfault_list);
> >>> -	mutex_init(&gt->lmem_userfault_lock);
> >>> +	spin_lock_init(gt->lmem_userfault_lock);
> >>>    	INIT_LIST_HEAD(&gt->closed_vma);
> >>>    	spin_lock_init(&gt->closed_lock);
> >>>
> >>> @@ -71,6 +71,10 @@ int intel_root_gt_init_early(struct
> >>> drm_i915_private
> >> *i915)
> >>>    	if (!gt->irq_lock)
> >>>    		return -ENOMEM;
> >>>
> >>> +	gt->lmem_userfault_lock = drmm_kzalloc(&i915->drm, sizeof(*gt-
> >>> lmem_userfault_lock), GFP_KERNEL);
> >>> +	if (!gt->lmem_userfault_lock)
> >>> +		return -ENOMEM;
> >>> +
> >>>    	intel_gt_common_init_early(gt);
> >>>
> >>>    	return 0;
> >>> @@ -813,6 +817,11 @@ static int intel_gt_tile_setup(struct intel_gt
> >>> *gt,
> >> phys_addr_t phys_addr)
> >>>    		gt->uncore = uncore;
> >>>    		gt->irq_lock = irq_lock;
> >>>
> >>> +		gt->lmem_userfault_lock = drmm_kzalloc(&gt->i915->drm,
> >>> +						       sizeof(*gt-
> >>> lmem_userfault_lock), GFP_KERNEL);
> >>> +		if (!gt->lmem_userfault_lock)
> >>> +			return -ENOMEM;
> >>> +
> >>>    		intel_gt_common_init_early(gt);
> >>>    	}
> >>>
> >>> diff --git a/drivers/gpu/drm/i915/gt/intel_gt_types.h
> >>> b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> >>> index 30003d68fd51..925775310b1e 100644
> >>> --- a/drivers/gpu/drm/i915/gt/intel_gt_types.h
> >>> +++ b/drivers/gpu/drm/i915/gt/intel_gt_types.h
> >>> @@ -153,7 +153,7 @@ struct intel_gt {
> >>>    	 *  but instead has exclusive access by virtue of all other
> >>> accesses
> >> requiring
> >>>    	 *  holding the runtime pm wakeref.
> >>>    	 */
> >>> -	struct mutex lmem_userfault_lock;
> >>> +	spinlock_t *lmem_userfault_lock;
Hi Jani & Tvrtko,
Could you please provide your opinion whether to keep these lmem_userfault_lock, lmem_userfault_list
fields in drm_i915_private or intel_gt, there are required to handle runtime PM with lmem access.
What could the best place to keep these fields ?
Thanks,
Anshuman Gupta.
> >>>    	struct list_head lmem_userfault_list;
> >>
> >> It looks like there were some comments off list about this. It
> >> doesn't look like runtime pm is really per gt, so maybe just stick
> >> all this in i915? Or was there some other reason for putting this in gt?
> > Thanks for review,
> > Yes runtime pm is not per GT , i had kept inside gt to align with GTT
> > mmap releasing implementation, As there it was encapsulated inside git->ggtt.
> > Also userfault_wakeref is also encapsulated with in GT.
> > Shall I move all userfault_wakeref, lmem_userfault_lock and
> lmem_userfault_list to i915 ?
> 
> I think so. Or if this actually desired then we need to poke at the object lmem-
> >gt in places like i915_ttm_unmap_virtual(), instead of looking at the root gt.
> 
> >
> > Thanks ,
> > Anshuman Gupta.
> >
> >
> >
> >
> >>
> >>>
> >>>    	struct list_head closed_vma;


More information about the Intel-gfx mailing list