[Intel-xe] [PATCH 2/4] drm/xe/vm: Implement userptr page pinning

Tue Aug 22 08:23:21 UTC 2023

On 8/20/23 06:06, Matthew Brost wrote:
> On Fri, Aug 18, 2023 at 05:08:43PM +0200, Thomas Hellström wrote:
>> Implement pinning of userptrs between VM_BIND and VM_UNBIND, which will
>> facilitate avoiding long hangs on non-preemptible workloads. But don't
>> hook it up to userspace just yet.
>>
>> Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>
>> ---
>>   drivers/gpu/drm/xe/xe_vm.c       | 76 ++++++++++++++++++++++----------
>>   drivers/gpu/drm/xe/xe_vm.h       |  9 ++++
>>   drivers/gpu/drm/xe/xe_vm_types.h | 12 +++++
>>   3 files changed, 74 insertions(+), 23 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
>> index 8bf7f62e6548..ecbcad696b60 100644
>> --- a/drivers/gpu/drm/xe/xe_vm.c
>> +++ b/drivers/gpu/drm/xe/xe_vm.c
>> @@ -74,10 +74,6 @@ int xe_vma_userptr_pin_pages(struct xe_vma *vma)
>>   	if (notifier_seq == vma->userptr.notifier_seq)
>>   		return 0;
>>   
>> -	pages = kvmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL);
>> -	if (!pages)
>> -		return -ENOMEM;
>> -
>>   	if (vma->userptr.sg) {
>>   		dma_unmap_sgtable(xe->drm.dev,
>>   				  vma->userptr.sg,
>> @@ -87,6 +83,17 @@ int xe_vma_userptr_pin_pages(struct xe_vma *vma)
>>   		vma->userptr.sg = NULL;
>>   	}
>>   
>> +	if (vma->userptr.pinned_pages) {
>> +		unpin_user_pages_dirty_lock(vma->userptr.pinned_pages,
>> +					    vma->userptr.num_pinned,
>> +					    !read_only);
>> +		pages = vma->userptr.pinned_pages;
> This implies that we can repin already pinned pages, I don't think that
> should be possible, right? We shouldn't call this function twice nor
> should the retry loop be trigger - both of these cases require a
> invalidation to occur which shouldn't be possible if the pages are
> pinned. So we likely should have warning if vma->userptr.pinned_pages is
> set, right?

Good catch. Currently since we still allow the userptr sequence number 
to be bumped, the next exec will release the old pages and pin new pages 
(which may be the same), but the GPU might still be accessing the old 
pages. Need to make sure this doesn't happen.

/Thomas

>> +	} else {
>> +		pages = kvmalloc_array(num_pages, sizeof(*pages), GFP_KERNEL);
>> +		if (!pages)
>> +			return -ENOMEM;
>> +	}
>> +
>>   	pinned = ret = 0;
>>   	if (in_kthread) {
>>   		if (!mmget_not_zero(vma->userptr.notifier.mm)) {
>> @@ -97,11 +104,18 @@ int xe_vma_userptr_pin_pages(struct xe_vma *vma)
>>   	}
>>   
>>   	while (pinned < num_pages) {
>> -		ret = get_user_pages_fast(xe_vma_userptr(vma) +
>> -					  pinned * PAGE_SIZE,
>> -					  num_pages - pinned,
>> -					  read_only ? 0 : FOLL_WRITE,
>> -					  &pages[pinned]);
>> +		if (xe_vma_is_pinned(vma))
>> +			ret = pin_user_pages_fast(xe_vma_userptr(vma) +
>> +						  pinned * PAGE_SIZE,
>> +						  num_pages - pinned,
>> +						  read_only ? 0 : FOLL_WRITE,
>> +						  &pages[pinned]);
>> +		else
>> +			ret = get_user_pages_fast(xe_vma_userptr(vma) +
>> +						  pinned * PAGE_SIZE,
>> +						  num_pages - pinned,
>> +						  read_only ? 0 : FOLL_WRITE,
>> +						  &pages[pinned]);
>>   		if (ret < 0) {
>>   			if (in_kthread)
>>   				ret = 0;
>> @@ -137,19 +151,24 @@ int xe_vma_userptr_pin_pages(struct xe_vma *vma)
>>   	if (ret)
>>   		goto out_free_sg;
>>   
>> -	for (i = 0; i < pinned; ++i) {
>> -		if (!read_only) {
>> -			lock_page(pages[i]);
>> -			set_page_dirty(pages[i]);
>> -			unlock_page(pages[i]);
>> +	if (!xe_vma_is_pinned(vma)) {
>> +		for (i = 0; i < pinned; ++i) {
>> +			if (!read_only) {
>> +				lock_page(pages[i]);
>> +				set_page_dirty(pages[i]);
>> +				unlock_page(pages[i]);
>> +			}
>> +
>> +			mark_page_accessed(pages[i]);
>>   		}
>>   
>> -		mark_page_accessed(pages[i]);
>> +		release_pages(pages, pinned);
>> +		kvfree(pages);
>> +	} else {
>> +		vma->userptr.pinned_pages = pages;
>> +		vma->userptr.num_pinned = pinned;
>>   	}
>>   
>> -	release_pages(pages, pinned);
>> -	kvfree(pages);
>> -
>>   	vma->userptr.notifier_seq = notifier_seq;
>>   	if (xe_vma_userptr_check_repin(vma) == -EAGAIN)
>>   		goto retry;
>> @@ -160,9 +179,14 @@ int xe_vma_userptr_pin_pages(struct xe_vma *vma)
>>   	sg_free_table(vma->userptr.sg);
>>   	vma->userptr.sg = NULL;
>>   out_release_pages:
>> -	release_pages(pages, pinned);
>> +	if (!xe_vma_is_pinned(vma))
>> +		release_pages(pages, pinned);
>> +	else
>> +		unpin_user_pages(pages, pinned);
>> +	vma->userptr.num_pinned = 0;
>>   mm_closed:
>>   	kvfree(pages);
>> +	vma->userptr.pinned_pages = NULL;
>>   	return ret;
>>   }
>>   
>> @@ -721,7 +745,7 @@ static bool vma_userptr_invalidate(struct mmu_interval_notifier *mni,
>>   	mmu_interval_set_seq(mni, cur_seq);
>>   
>>   	/* No need to stop gpu access if the userptr is not yet bound. */
>> -	if (!vma->userptr.initial_bind) {
>> +	if (xe_vma_is_pinned(vma) || !vma->userptr.initial_bind) {
>>   		up_write(&vm->userptr.notifier_lock);
>>   		return true;
>>   	}
>> @@ -976,10 +1000,16 @@ static void xe_vma_destroy_late(struct xe_vma *vma)
>>   			vma->userptr.sg = NULL;
>>   		}
>>   
>> +		if (vma->userptr.pinned_pages) {
>> +			unpin_user_pages_dirty_lock(vma->userptr.pinned_pages,
>> +						    vma->userptr.num_pinned,
>> +						    !read_only);
>> +			kvfree(vma->userptr.pinned_pages);
>> +		}
>> +
>>   		/*
>> -		 * Since userptr pages are not pinned, we can't remove
>> -		 * the notifer until we're sure the GPU is not accessing
>> -		 * them anymore
>> +		 * We can't remove the notifer until we're sure the GPU is
>> +		 * not accessing the pages anymore
>>   		 */
>>   		mmu_interval_notifier_remove(&vma->userptr.notifier);
>>   		xe_vm_put(vm);
>> diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
>> index 6de6e3edb24a..913544d7d995 100644
>> --- a/drivers/gpu/drm/xe/xe_vm.h
>> +++ b/drivers/gpu/drm/xe/xe_vm.h
>> @@ -139,6 +139,15 @@ static inline bool xe_vma_is_userptr(struct xe_vma *vma)
>>   	return xe_vma_has_no_bo(vma) && !xe_vma_is_null(vma);
>>   }
>>   
>> +/**
>> + * xe_vma_is_pinned() - User has requested the backing store of this vma
>> + * to be pinned.
>> + */
>> +static inline bool xe_vma_is_pinned(struct xe_vma *vma)
>> +{
>> +	return xe_vma_is_userptr(vma) && (vma->gpuva.flags & XE_VMA_PINNED);
> Nit, I'd name this xe_vma_is_userptr_pinned based checking both userptr
> and pinned. Or just check XE_VMA_PINNED here with the current name.
>
> Matt
>
>> +}
>> +
>>   #define xe_vm_assert_held(vm) dma_resv_assert_held(&(vm)->resv)
>>   
>>   u64 xe_vm_pdp4_descriptor(struct xe_vm *vm, struct xe_tile *tile);
>> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
>> index 3681a5ff588b..9b90e649cd69 100644
>> --- a/drivers/gpu/drm/xe/xe_vm_types.h
>> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
>> @@ -33,6 +33,8 @@ struct xe_vm;
>>   #define XE_VMA_PTE_4K		(DRM_GPUVA_USERBITS << 5)
>>   #define XE_VMA_PTE_2M		(DRM_GPUVA_USERBITS << 6)
>>   #define XE_VMA_PTE_1G		(DRM_GPUVA_USERBITS << 7)
>> +/* User requested backing store to be pinned */
>> +#define XE_VMA_PINNED           (DRM_GPUVA_USERBITS << 8)
>>   
>>   /** struct xe_userptr - User pointer */
>>   struct xe_userptr {
>> @@ -54,6 +56,16 @@ struct xe_userptr {
>>   	 * read: vm->userptr.notifier_lock in write mode or vm->resv held.
>>   	 */
>>   	bool initial_bind;
>> +	/**
>> +	 * @pinned_pages: List of pinned pages if xe_vma_pinned(),
>> +	 * NULL otherwise. protected by the vm lock.
>> +	 */
>> +	struct page **pinned_pages;
>> +	/**
>> +	 * @num_pinned: Number of pointers to pinned pages in @pinned_pages.
>> +	 * protected by the vm lock.
>> +	 */
>> +	unsigned long num_pinned;
>>   #if IS_ENABLED(CONFIG_DRM_XE_USERPTR_INVAL_INJECT)
>>   	u32 divisor;
>>   #endif
>> -- 
>> 2.41.0
>>