[PATCH v2 13/32] drm/xe/svm: Incase of atomic access ensure get_pages happens from vram
Matthew Brost
matthew.brost at intel.com
Thu Apr 17 04:19:17 UTC 2025
On Mon, Apr 07, 2025 at 03:47:00PM +0530, Himal Prasad Ghimiray wrote:
> Ranges can be invalidated in between vram allocation and get_pages,
> ensure the dma mapping is happening from vram only incase of atomic
> access. Retry 3 times before calling out fault in case of concurrent
> cpu/gpu access.
>
Again I pulled this patch into a series which will minimally enable
atomics per UMD request. See the version of the patch [1] I landed on -
that is basically my review feedback. I took ownership but left SoB by
you as it based on this patch. We will need another reviewer though as
we are both contributors but feel to comment there.
Matt
[1] https://patchwork.freedesktop.org/patch/649010/?series=147846&rev=2
> Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
> ---
> drivers/gpu/drm/xe/xe_svm.c | 43 ++++++++++++++++++++++++-------------
> 1 file changed, 28 insertions(+), 15 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
> index f4ae3feaf9d3..7ec7ecd7eb1f 100644
> --- a/drivers/gpu/drm/xe/xe_svm.c
> +++ b/drivers/gpu/drm/xe/xe_svm.c
> @@ -778,11 +778,13 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
> IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR),
> .check_pages_threshold = IS_DGFX(vm->xe) &&
> IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? SZ_64K : 0,
> + .vram_only = 0,
> };
> struct xe_svm_range *range;
> struct drm_exec exec;
> struct dma_fence *fence;
> struct xe_tile *tile = gt_to_tile(gt);
> + int retry_count = 3;
> ktime_t end = 0;
> int err;
>
> @@ -792,6 +794,7 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
> xe_gt_stats_incr(gt, XE_GT_STATS_ID_SVM_PAGEFAULT_COUNT, 1);
>
> retry:
> + retry_count--;
> /* Always process UNMAPs first so view SVM ranges is current */
> err = xe_svm_garbage_collector(vm);
> if (err)
> @@ -807,30 +810,40 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
>
> range_debug(range, "PAGE FAULT");
>
> - /* XXX: Add migration policy, for now migrate range once */
> - if (!range->skip_migrate &&
> - xe_svm_range_needs_migrate_to_vram(range, vma, IS_DGFX(vm->xe))) {
> - range->skip_migrate = true;
> -
> + if (xe_svm_range_needs_migrate_to_vram(range, vma, IS_DGFX(vm->xe))) {
> err = xe_svm_alloc_vram(vm, tile, range, &ctx);
> if (err) {
> - drm_dbg(&vm->xe->drm,
> - "VRAM allocation failed, falling back to "
> - "retrying fault, asid=%u, errno=%pe\n",
> - vm->usm.asid, ERR_PTR(err));
> - goto retry;
> + if (retry_count) {
> + drm_dbg(&vm->xe->drm,
> + "VRAM allocation failed, falling back to retrying fault, asid=%u, errno=%pe\n",
> + vm->usm.asid, ERR_PTR(err));
> + goto retry;
> + } else {
> + drm_err(&vm->xe->drm,
> + "VRAM allocation failed, retry count exceeded, asid=%u, errno=%pe\n",
> + vm->usm.asid, ERR_PTR(err));
> + return err;
> + }
> }
> +
> }
>
> + if (atomic)
> + ctx.vram_only = 1;
> +
> range_debug(range, "GET PAGES");
> err = xe_svm_range_get_pages(vm, range, &ctx);
> /* Corner where CPU mappings have changed */
> if (err == -EOPNOTSUPP || err == -EFAULT || err == -EPERM) {
> - drm_dbg(&vm->xe->drm,
> - "Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n",
> - vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
> - range_debug(range, "PAGE FAULT - RETRY PAGES");
> - goto retry;
> + if (retry_count) {
> + drm_dbg(&vm->xe->drm, "Get pages failed, falling back to retrying, asid=%u, gpusvm=%p, errno=%pe\n",
> + vm->usm.asid, &vm->svm.gpusvm, ERR_PTR(err));
> + range_debug(range, "PAGE FAULT - RETRY PAGES");
> + goto retry;
> + } else {
> + drm_err(&vm->xe->drm, "Get pages failed,, retry count exceeded, asid=%u,, errno=%pe\n",
> + vm->usm.asid, ERR_PTR(err));
> + }
> }
> if (err) {
> range_debug(range, "PAGE FAULT - FAIL PAGE COLLECT");
> --
> 2.34.1
>
More information about the Intel-xe
mailing list