[Intel-xe] [PATCH v2] drm/xe/dgfx: Release mmap mappings on rpm suspend

Thu Dec 14 14:16:52 UTC 2023


On 14-12-2023 03:17, Rodrigo Vivi wrote:
> On Tue, Nov 28, 2023 at 09:11:18PM +0530, Badal Nilawar wrote:
>> Release all mmap mappings for all vram objects which are associated
> 
> I wonder if it would be possible to extend that to system mappings as well?
This approach will be only useful for vram as vram will not be 
accessible when card in d3.
> 
>> with userfault such that, while pcie function in D3hot, any access
>> to memory mappings will raise a userfault.
>>
>> Upon userfault, in order to access memory mappings, if graphics
>> function is in D3 then runtime resume of dgpu will be triggered to
>> transition to D0.
>>
>> v2:
>>    - Add lock dep assertion before updating vram_userfault_count (Rodrigo)
>>    - Avoid iomem check before bo migration check as bo can migrate
>>      to system memory (Matthew Auld)
>>
>> Cc: Matthew Auld <matthew.auld at intel.com>
>> Cc: Anshuman Gupta <anshuman.gupta at intel.com>
>> Signed-off-by: Badal Nilawar <badal.nilawar at intel.com>
>> ---
>>   drivers/gpu/drm/xe/xe_bo.c           | 53 +++++++++++++++++++++++++++-
>>   drivers/gpu/drm/xe/xe_bo.h           |  2 ++
>>   drivers/gpu/drm/xe/xe_bo_types.h     |  6 ++++
>>   drivers/gpu/drm/xe/xe_device_types.h | 20 +++++++++++
>>   drivers/gpu/drm/xe/xe_pm.c           |  7 ++++
>>   5 files changed, 87 insertions(+), 1 deletion(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
>> index c5aa01c0861c..03e699e3383f 100644
>> --- a/drivers/gpu/drm/xe/xe_bo.c
>> +++ b/drivers/gpu/drm/xe/xe_bo.c
>> @@ -784,6 +784,18 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
>>   		dma_fence_put(fence);
>>   	}
>>   
>> +	/*
>> +	 * TTM has already nuked the mmap for us (see ttm_bo_unmap_virtual),
>> +	 * so if we moved from VRAM make sure to unlink this from the userfault
>> +	 * tracking.
>> +	 */
>> +	if (mem_type_is_vram(old_mem_type)) {
>> +		spin_lock(&xe->mem_access.vram_userfault_lock);
>> +		if (!list_empty(&bo->vram_userfault_link))
>> +			list_del_init(&bo->vram_userfault_link);
>> +		spin_unlock(&xe->mem_access.vram_userfault_lock);
>> +	}
>> +
>>   	xe_device_mem_access_put(xe);
>>   
>>   out:
>> @@ -1112,6 +1124,8 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
>>   {
>>   	struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
>>   	struct drm_device *ddev = tbo->base.dev;
>> +	struct xe_bo *bo = NULL;
>> +	struct xe_device *xe = to_xe_device(ddev);
>>   	vm_fault_t ret;
>>   	int idx, r = 0;
>>   
>> @@ -1120,7 +1134,7 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
>>   		return ret;
>>   
>>   	if (drm_dev_enter(ddev, &idx)) {
>> -		struct xe_bo *bo = ttm_to_xe_bo(tbo);
>> +		bo = ttm_to_xe_bo(tbo);
>>   
>>   		trace_xe_bo_cpu_fault(bo);
>>   
>> @@ -1139,10 +1153,31 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
>>   	} else {
>>   		ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
>>   	}
>> +
>>   	if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
>>   		return ret;
>> +	/*
>> +	 * ttm_bo_vm_reserve() already has dma_resv_lock.
>> +	 * vram_userfault_count is protected by dma_resv lock and rpm wakeref.
>> +	 */
>> +	if (ret == VM_FAULT_NOPAGE && bo && !bo->vram_userfault_count) {
>> +		if (tbo->resource->bus.is_iomem) {
>> +			dma_resv_assert_held(tbo->base.resv);
>> +
>> +			xe_device_mem_access_get(xe);
> 
> should/could we get the rpm ref directly here?
Ok
> 
> Also, I wonder if we should anyway get that at the beginning of page fault,
> regardless of the mentioned conditions above?!
Adding rpm ref at begining will end up waking device for gem fault other 
than iomem.
> 
>> +
>> +			bo->vram_userfault_count = 1;
>> +
>> +			spin_lock(&xe->mem_access.vram_userfault_lock);
>> +			list_add(&bo->vram_userfault_link, &xe->mem_access.vram_userfault_list);
>> +			spin_unlock(&xe->mem_access.vram_userfault_lock);
>> +
>> +			xe_device_mem_access_put(xe);
> 
> okay, so in the next second we have another page fault if this is continuous access?!
> shouldn't we block that for a longer period if this ever happened?
For display enabled platform this scenario will not hit as rpm will 
happen only when display goes off but for headless case it will keep 
hitting userfault. That's why in other patch series we proposed to take 
rpm ref in mmap and release on vm close.
> 
>> +		}
>> +	}
>>   
>>   	dma_resv_unlock(tbo->base.resv);
>> +
>>   	return ret;
>>   }
>>   
>> @@ -2168,6 +2203,22 @@ int xe_bo_dumb_create(struct drm_file *file_priv,
>>   	return err;
>>   }
>>   
>> +void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo)
>> +{
>> +	struct ttm_buffer_object *tbo = &bo->ttm;
>> +	struct ttm_device *bdev = tbo->bdev;
>> +
>> +	drm_vma_node_unmap(&tbo->base.vma_node, bdev->dev_mapping);
>> +
>> +	/*
>> +	 * We have exclusive access here via runtime suspend. All other callers
>> +	 * must first grab the rpm wakeref.
>> +	 */
>> +	XE_WARN_ON(!bo->vram_userfault_count);
>> +	list_del(&bo->vram_userfault_link);
> 
> to be really honest I got confused with the list here.
> Why do we need the list if we are adding all buffers on suspend and
> removing on page fault?
> 
> I was expecting some list of buffers that are mmaped but not exported
> and once we have some buffer like that we ummap that and then uppon
> page fault we wake up the device, but we would only remove from the list
> when buffer is gone... So I'm not understanding the list usage here.
Actually list is to store mmaped references at xe_gem_fault, while 
suspending we are scanning the list and doing unmap.

Regards,
Badal
> 
>> +	bo->vram_userfault_count = 0;
>> +}
>> +
>>   #if IS_ENABLED(CONFIG_DRM_XE_KUNIT_TEST)
>>   #include "tests/xe_bo.c"
>>   #endif
>> diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
>> index 9f373f6d25f2..c9f52e3f6b97 100644
>> --- a/drivers/gpu/drm/xe/xe_bo.h
>> +++ b/drivers/gpu/drm/xe/xe_bo.h
>> @@ -229,6 +229,8 @@ int xe_gem_create_ioctl(struct drm_device *dev, void *data,
>>   			struct drm_file *file);
>>   int xe_gem_mmap_offset_ioctl(struct drm_device *dev, void *data,
>>   			     struct drm_file *file);
>> +void xe_bo_runtime_pm_release_mmap_offset(struct xe_bo *bo);
>> +
>>   int xe_bo_dumb_create(struct drm_file *file_priv,
>>   		      struct drm_device *dev,
>>   		      struct drm_mode_create_dumb *args);
>> diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
>> index 4bff60996168..032773bb097d 100644
>> --- a/drivers/gpu/drm/xe/xe_bo_types.h
>> +++ b/drivers/gpu/drm/xe/xe_bo_types.h
>> @@ -79,6 +79,12 @@ struct xe_bo {
>>   	struct llist_node freed;
>>   	/** @created: Whether the bo has passed initial creation */
>>   	bool created;
>> +	/**
>> +	 * Whether the object is currently in fake offset mmap backed by vram.
>> +	 */
>> +	unsigned int vram_userfault_count;
>> +	struct list_head vram_userfault_link;
>> +
>>   };
>>   
>>   #define intel_bo_to_drm_bo(bo) (&(bo)->ttm.base)
>> diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
>> index 2712905c7a91..046085af4c56 100644
>> --- a/drivers/gpu/drm/xe/xe_device_types.h
>> +++ b/drivers/gpu/drm/xe/xe_device_types.h
>> @@ -359,6 +359,26 @@ struct xe_device {
>>   	struct {
>>   		/** @ref: ref count of memory accesses */
>>   		atomic_t ref;
>> +		/*
>> +		 *  Protects access to vram usefault list.
>> +		 *  It is required, if we are outside of the runtime suspend path,
>> +		 *  access to @vram_userfault_list requires always first grabbing the
>> +		 *  runtime pm, to ensure we can't race against runtime suspend.
>> +		 *  Once we have that we also need to grab @vram_userfault_lock,
>> +		 *  at which point we have exclusive access.
>> +		 *  The runtime suspend path is special since it doesn't really hold any locks,
>> +		 *  but instead has exclusive access by virtue of all other accesses requiring
>> +		 *  holding the runtime pm wakeref.
>> +		 */
>> +		spinlock_t vram_userfault_lock;
>> +
>> +		/*
>> +		 *  Keep list of userfaulted gem obj, which require to release their
>> +		 *  mmap mappings at runtime suspend path.
>> +		 */
>> +		struct list_head vram_userfault_list;
>> +
>> +		bool vram_userfault_ongoing;
>>   	} mem_access;
>>   
>>   	/**
>> diff --git a/drivers/gpu/drm/xe/xe_pm.c b/drivers/gpu/drm/xe/xe_pm.c
>> index e31a91cf311c..e2bda8bc75bf 100644
>> --- a/drivers/gpu/drm/xe/xe_pm.c
>> +++ b/drivers/gpu/drm/xe/xe_pm.c
>> @@ -181,6 +181,8 @@ void xe_pm_init(struct xe_device *xe)
>>   	}
>>   
>>   	xe_pm_runtime_init(xe);
>> +	INIT_LIST_HEAD(&xe->mem_access.vram_userfault_list);
>> +	spin_lock_init(&xe->mem_access.vram_userfault_lock);
>>   }
>>   
>>   void xe_pm_runtime_fini(struct xe_device *xe)
>> @@ -214,6 +216,7 @@ struct task_struct *xe_pm_read_callback_task(struct xe_device *xe)
>>   
>>   int xe_pm_runtime_suspend(struct xe_device *xe)
>>   {
>> +	struct xe_bo *bo, *on;
>>   	struct xe_gt *gt;
>>   	u8 id;
>>   	int err = 0;
>> @@ -247,6 +250,10 @@ int xe_pm_runtime_suspend(struct xe_device *xe)
>>   	 */
>>   	lock_map_acquire(&xe_device_mem_access_lockdep_map);
>>   
>> +	list_for_each_entry_safe(bo, on,
>> +				 &xe->mem_access.vram_userfault_list, vram_userfault_link)
>> +		xe_bo_runtime_pm_release_mmap_offset(bo);
>> +
>>   	if (xe->d3cold.allowed) {
>>   		err = xe_bo_evict_all(xe);
>>   		if (err)
>> -- 
>> 2.25.1
>>