[PATCH v2 13/32] drm/xe/svm: Incase of atomic access ensure get_pages happens from vram

Ghimiray, Himal Prasad himal.prasad.ghimiray at intel.com
Mon Apr 21 04:58:25 UTC 2025



On 17-04-2025 09:49, Matthew Brost wrote:
> On Mon, Apr 07, 2025 at 03:47:00PM +0530, Himal Prasad Ghimiray wrote:
>> Ranges can be invalidated in between vram allocation and get_pages,
>> ensure the dma mapping is happening from vram only incase of atomic
>> access. Retry 3 times before calling out fault in case of concurrent
>> cpu/gpu access.
>>
> 
> Again I pulled this patch into a series which will minimally enable
> atomics per UMD request. See the version of the patch [1] I landed on -
> that is basically my review feedback. I took ownership but left SoB by
> you as it based on this patch. We will need another reviewer though as
> we are both contributors but feel to comment there.

Thanks for update. Will use the version you modified for the prefetch 
series too. It looks good to me.
  >
> Matt
> 
> [1] https://patchwork.freedesktop.org/patch/649010/?series=147846&rev=2
> 
>> Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
>> ---
>>   drivers/gpu/drm/xe/xe_svm.c | 43 ++++++++++++++++++++++++-------------
>>   1 file changed, 28 insertions(+), 15 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
>> index f4ae3feaf9d3..7ec7ecd7eb1f 100644
>> --- a/drivers/gpu/drm/xe/xe_svm.c
>> +++ b/drivers/gpu/drm/xe/xe_svm.c
>> @@ -778,11 +778,13 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
>>   			IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR),
>>   		.check_pages_threshold = IS_DGFX(vm->xe) &&
>>   			IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? SZ_64K : 0,
>> +		.vram_only = 0,
>>   	};
>>   	struct xe_svm_range *range;
>>   	struct drm_exec exec;
>>   	struct dma_fence *fence;
>>   	struct xe_tile *tile = gt_to_tile(gt);
>> +	int retry_count = 3;
>>   	ktime_t end = 0;
>>   	int err;
>>   
>> @@ -792,6 +794,7 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
>>   	xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT, 1);
>>   
>>   retry:
>> +	retry_count--;
>>   	/* Always process UNMAPs first so view SVM ranges is current */
>>   	err = xe_svm_garbage_collector(vm);
>>   	if (err)
>> @@ -807,30 +810,40 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
>>   
>>   	range_debug(range, "PAGE FAULT");
>>   
>> -	/* XXX: Add migration policy, for now migrate range once */
>> -	if (!range->skip_migrate &&
>> -	    xe_svm_range_needs_migrate_to_vram(range, vma, IS_DGFX(vm->xe))) {
>> -		range->skip_migrate = true;
>> -
>> +	if (xe_svm_range_needs_migrate_to_vram(range, vma, IS_DGFX(vm->xe))) {
>>   		err = xe_svm_alloc_vram(vm, tile, range, &ctx);
>>   		if (err) {
>> -			drm_dbg(&vm->xe->drm,
>> -				"VRAM allocation failed, falling back to "
>> -				"retrying fault, asid=%u, errno=%pe\n",
>> -				vm->usm.asid, ERR_PTR(err));
>> -			goto retry;
>> +			if (retry_count) {
>> +				drm_dbg(&vm->xe->drm,
>> +					"VRAM allocation failed, falling back to retrying fault, asid=%u, errno=%pe\n",
>> +					vm->usm.asid, ERR_PTR(err));
>> +				goto retry;
>> +			} else {
>> +				drm_err(&vm->xe->drm,
>> +					"VRAM allocation failed, retry count exceeded, asid=%u, errno=%pe\n",
>> +					vm->usm.asid, ERR_PTR(err));
>> +				return err;
>> +			}
>>   		}
>> +
>>   	}
>>   
>> +	if (atomic)
>> +		ctx.vram_only = 1;
>> +
>>   	range_debug(range, "GET PAGES");
>>   	err = xe_svm_range_get_pages(vm, range, &ctx);
>>   	/* Corner where CPU mappings have changed */
>>   	if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) {
>> -		drm_dbg(&vm->xe->drm,
>> -			"Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n",
>> -			vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
>> -		range_debug(range, "PAGE FAULT - RETRY PAGES");
>> -		goto retry;
>> +		if (retry_count) {
>> +			drm_dbg(&vm->xe->drm, "Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n",
>> +				vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
>> +			range_debug(range, "PAGE FAULT - RETRY PAGES");
>> +			goto retry;
>> +		} else {
>> +			drm_err(&vm->xe->drm, "Get pages failed,, retry count exceeded, asid=%u,, errno=%pe\n",
>> +				vm->usm.asid, ERR_PTR(err));
>> +		}
>>   	}
>>   	if (err) {
>>   		range_debug(range, "PAGE FAULT - FAIL PAGE COLLECT");
>> -- 
>> 2.34.1
>>



More information about the Intel-xe mailing list