[PATCH] [CI] - MADVISE
Himal Prasad Ghimiray
himal.prasad.ghimiray at intel.com
Fri Jun 13 13:17:14 UTC 2025
- DO NOT REVIEW
---
drivers/gpu/drm/drm_gpusvm.c | 122 ++----
drivers/gpu/drm/drm_gpuvm.c | 93 ++++-
drivers/gpu/drm/nouveau/nouveau_uvmm.c | 1 +
drivers/gpu/drm/xe/Makefile | 1 +
drivers/gpu/drm/xe/xe_bo.c | 21 +-
drivers/gpu/drm/xe/xe_bo_types.h | 8 +
drivers/gpu/drm/xe/xe_device.c | 3 +
drivers/gpu/drm/xe/xe_gt_pagefault.c | 2 +-
drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c | 24 --
drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h | 3 -
drivers/gpu/drm/xe/xe_pt.c | 25 +-
drivers/gpu/drm/xe/xe_svm.c | 153 +++++---
drivers/gpu/drm/xe/xe_svm.h | 22 ++
drivers/gpu/drm/xe/xe_tile.h | 18 +
drivers/gpu/drm/xe/xe_vm.c | 398 +++++++++++++++++---
drivers/gpu/drm/xe/xe_vm.h | 9 +-
drivers/gpu/drm/xe/xe_vm_madvise.c | 389 +++++++++++++++++++
drivers/gpu/drm/xe/xe_vm_madvise.h | 15 +
drivers/gpu/drm/xe/xe_vm_types.h | 38 +-
include/drm/drm_gpusvm.h | 70 ++++
include/drm/drm_gpuvm.h | 25 +-
include/uapi/drm/xe_drm.h | 253 +++++++++++++
22 files changed, 1443 insertions(+), 250 deletions(-)
create mode 100644 drivers/gpu/drm/xe/xe_vm_madvise.c
create mode 100644 drivers/gpu/drm/xe/xe_vm_madvise.h
diff --git a/drivers/gpu/drm/drm_gpusvm.c b/drivers/gpu/drm/drm_gpusvm.c
index 7ff81aa0a1ca..993485386239 100644
--- a/drivers/gpu/drm/drm_gpusvm.c
+++ b/drivers/gpu/drm/drm_gpusvm.c
@@ -380,107 +380,50 @@ static void drm_gpusvm_zdd_put(struct drm_gpusvm_zdd *zdd)
}
/**
- * drm_gpusvm_range_find() - Find GPU SVM range from GPU SVM notifier
- * @notifier: Pointer to the GPU SVM notifier structure.
- * @start: Start address of the range
- * @end: End address of the range
+ * drm_gpusvm_notifier_find() - Find GPU SVM notifier from GPU SVM
+ * @gpusvm: Pointer to the GPU SVM structure.
+ * @start: Start address of the notifier
+ * @end: End address of the notifier
*
- * Return: A pointer to the drm_gpusvm_range if found or NULL
+ * Return: A pointer to the drm_gpusvm_notifier if found or NULL
*/
-struct drm_gpusvm_range *
-drm_gpusvm_range_find(struct drm_gpusvm_notifier *notifier, unsigned long start,
- unsigned long end)
+struct drm_gpusvm_notifier *
+drm_gpusvm_notifier_find(struct drm_gpusvm *gpusvm, unsigned long start,
+ unsigned long end)
{
struct interval_tree_node *itree;
- itree = interval_tree_iter_first(¬ifier->root, start, end - 1);
+ itree = interval_tree_iter_first(&gpusvm->root, start, end - 1);
if (itree)
- return container_of(itree, struct drm_gpusvm_range, itree);
+ return container_of(itree, struct drm_gpusvm_notifier, itree);
else
return NULL;
}
-EXPORT_SYMBOL_GPL(drm_gpusvm_range_find);
+EXPORT_SYMBOL_GPL(drm_gpusvm_notifier_find);
/**
- * drm_gpusvm_for_each_range_safe() - Safely iterate over GPU SVM ranges in a notifier
- * @range__: Iterator variable for the ranges
- * @next__: Iterator variable for the ranges temporay storage
- * @notifier__: Pointer to the GPU SVM notifier
- * @start__: Start address of the range
- * @end__: End address of the range
- *
- * This macro is used to iterate over GPU SVM ranges in a notifier while
- * removing ranges from it.
- */
-#define drm_gpusvm_for_each_range_safe(range__, next__, notifier__, start__, end__) \
- for ((range__) = drm_gpusvm_range_find((notifier__), (start__), (end__)), \
- (next__) = __drm_gpusvm_range_next(range__); \
- (range__) && (drm_gpusvm_range_start(range__) < (end__)); \
- (range__) = (next__), (next__) = __drm_gpusvm_range_next(range__))
-
-/**
- * __drm_gpusvm_notifier_next() - get the next drm_gpusvm_notifier in the list
- * @notifier: a pointer to the current drm_gpusvm_notifier
+ * drm_gpusvm_range_find() - Find GPU SVM range from GPU SVM notifier
+ * @notifier: Pointer to the GPU SVM notifier structure.
+ * @start: Start address of the range
+ * @end: End address of the range
*
- * Return: A pointer to the next drm_gpusvm_notifier if available, or NULL if
- * the current notifier is the last one or if the input notifier is
- * NULL.
+ * Return: A pointer to the drm_gpusvm_range if found or NULL
*/
-static struct drm_gpusvm_notifier *
-__drm_gpusvm_notifier_next(struct drm_gpusvm_notifier *notifier)
-{
- if (notifier && !list_is_last(¬ifier->entry,
- ¬ifier->gpusvm->notifier_list))
- return list_next_entry(notifier, entry);
-
- return NULL;
-}
-
-static struct drm_gpusvm_notifier *
-notifier_iter_first(struct rb_root_cached *root, unsigned long start,
- unsigned long last)
+struct drm_gpusvm_range *
+drm_gpusvm_range_find(struct drm_gpusvm_notifier *notifier, unsigned long start,
+ unsigned long end)
{
struct interval_tree_node *itree;
- itree = interval_tree_iter_first(root, start, last);
+ itree = interval_tree_iter_first(¬ifier->root, start, end - 1);
if (itree)
- return container_of(itree, struct drm_gpusvm_notifier, itree);
+ return container_of(itree, struct drm_gpusvm_range, itree);
else
return NULL;
}
-
-/**
- * drm_gpusvm_for_each_notifier() - Iterate over GPU SVM notifiers in a gpusvm
- * @notifier__: Iterator variable for the notifiers
- * @notifier__: Pointer to the GPU SVM notifier
- * @start__: Start address of the notifier
- * @end__: End address of the notifier
- *
- * This macro is used to iterate over GPU SVM notifiers in a gpusvm.
- */
-#define drm_gpusvm_for_each_notifier(notifier__, gpusvm__, start__, end__) \
- for ((notifier__) = notifier_iter_first(&(gpusvm__)->root, (start__), (end__) - 1); \
- (notifier__) && (drm_gpusvm_notifier_start(notifier__) < (end__)); \
- (notifier__) = __drm_gpusvm_notifier_next(notifier__))
-
-/**
- * drm_gpusvm_for_each_notifier_safe() - Safely iterate over GPU SVM notifiers in a gpusvm
- * @notifier__: Iterator variable for the notifiers
- * @next__: Iterator variable for the notifiers temporay storage
- * @notifier__: Pointer to the GPU SVM notifier
- * @start__: Start address of the notifier
- * @end__: End address of the notifier
- *
- * This macro is used to iterate over GPU SVM notifiers in a gpusvm while
- * removing notifiers from it.
- */
-#define drm_gpusvm_for_each_notifier_safe(notifier__, next__, gpusvm__, start__, end__) \
- for ((notifier__) = notifier_iter_first(&(gpusvm__)->root, (start__), (end__) - 1), \
- (next__) = __drm_gpusvm_notifier_next(notifier__); \
- (notifier__) && (drm_gpusvm_notifier_start(notifier__) < (end__)); \
- (notifier__) = (next__), (next__) = __drm_gpusvm_notifier_next(notifier__))
+EXPORT_SYMBOL_GPL(drm_gpusvm_range_find);
/**
* drm_gpusvm_notifier_invalidate() - Invalidate a GPU SVM notifier.
@@ -581,22 +524,6 @@ int drm_gpusvm_init(struct drm_gpusvm *gpusvm,
}
EXPORT_SYMBOL_GPL(drm_gpusvm_init);
-/**
- * drm_gpusvm_notifier_find() - Find GPU SVM notifier
- * @gpusvm: Pointer to the GPU SVM structure
- * @fault_addr: Fault address
- *
- * This function finds the GPU SVM notifier associated with the fault address.
- *
- * Return: Pointer to the GPU SVM notifier on success, NULL otherwise.
- */
-static struct drm_gpusvm_notifier *
-drm_gpusvm_notifier_find(struct drm_gpusvm *gpusvm,
- unsigned long fault_addr)
-{
- return notifier_iter_first(&gpusvm->root, fault_addr, fault_addr + 1);
-}
-
/**
* to_drm_gpusvm_notifier() - retrieve the container struct for a given rbtree node
* @node: a pointer to the rbtree node embedded within a drm_gpusvm_notifier struct
@@ -1052,7 +979,7 @@ drm_gpusvm_range_find_or_insert(struct drm_gpusvm *gpusvm,
if (!mmget_not_zero(mm))
return ERR_PTR(-EFAULT);
- notifier = drm_gpusvm_notifier_find(gpusvm, fault_addr);
+ notifier = drm_gpusvm_notifier_find(gpusvm, fault_addr, fault_addr + 1);
if (!notifier) {
notifier = drm_gpusvm_notifier_alloc(gpusvm, fault_addr);
if (IS_ERR(notifier)) {
@@ -1216,7 +1143,8 @@ void drm_gpusvm_range_remove(struct drm_gpusvm *gpusvm,
drm_gpusvm_driver_lock_held(gpusvm);
notifier = drm_gpusvm_notifier_find(gpusvm,
- drm_gpusvm_range_start(range));
+ drm_gpusvm_range_start(range),
+ drm_gpusvm_range_start(range) + 1);
if (WARN_ON_ONCE(!notifier))
return;
diff --git a/drivers/gpu/drm/drm_gpuvm.c b/drivers/gpu/drm/drm_gpuvm.c
index f9eb56f24bef..030f7570b8e2 100644
--- a/drivers/gpu/drm/drm_gpuvm.c
+++ b/drivers/gpu/drm/drm_gpuvm.c
@@ -2102,10 +2102,13 @@ static int
__drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm,
const struct drm_gpuvm_ops *ops, void *priv,
u64 req_addr, u64 req_range,
+ enum drm_gpuvm_sm_map_ops_flags flags,
struct drm_gem_object *req_obj, u64 req_offset)
{
struct drm_gpuva *va, *next;
u64 req_end = req_addr + req_range;
+ bool is_madvise_ops = (flags == DRM_GPUVM_SKIP_GEM_OBJ_VA_SPLIT_MADVISE);
+ bool needs_map = !is_madvise_ops;
int ret;
if (unlikely(!drm_gpuvm_range_valid(gpuvm, req_addr, req_range)))
@@ -2118,26 +2121,35 @@ __drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm,
u64 range = va->va.range;
u64 end = addr + range;
bool merge = !!va->gem.obj;
+ bool skip_madvise_ops = is_madvise_ops && merge;
+ needs_map = !is_madvise_ops;
if (addr == req_addr) {
merge &= obj == req_obj &&
offset == req_offset;
if (end == req_end) {
- ret = op_unmap_cb(ops, priv, va, merge);
- if (ret)
- return ret;
+ if (!is_madvise_ops) {
+ ret = op_unmap_cb(ops, priv, va, merge);
+ if (ret)
+ return ret;
+ }
break;
}
if (end < req_end) {
- ret = op_unmap_cb(ops, priv, va, merge);
- if (ret)
- return ret;
+ if (!is_madvise_ops) {
+ ret = op_unmap_cb(ops, priv, va, merge);
+ if (ret)
+ return ret;
+ }
continue;
}
if (end > req_end) {
+ if (skip_madvise_ops)
+ break;
+
struct drm_gpuva_op_map n = {
.va.addr = req_end,
.va.range = range - req_range,
@@ -2152,6 +2164,9 @@ __drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm,
ret = op_remap_cb(ops, priv, NULL, &n, &u);
if (ret)
return ret;
+
+ if (is_madvise_ops)
+ needs_map = true;
break;
}
} else if (addr < req_addr) {
@@ -2169,20 +2184,42 @@ __drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm,
u.keep = merge;
if (end == req_end) {
+ if (skip_madvise_ops)
+ break;
+
ret = op_remap_cb(ops, priv, &p, NULL, &u);
if (ret)
return ret;
+
+ if (is_madvise_ops)
+ needs_map = true;
+
break;
}
if (end < req_end) {
+ if (skip_madvise_ops)
+ continue;
+
ret = op_remap_cb(ops, priv, &p, NULL, &u);
if (ret)
return ret;
+
+ if (is_madvise_ops) {
+ ret = op_map_cb(ops, priv, req_addr,
+ min(end - req_addr, req_end - end),
+ NULL, req_offset);
+ if (ret)
+ return ret;
+ }
+
continue;
}
if (end > req_end) {
+ if (skip_madvise_ops)
+ break;
+
struct drm_gpuva_op_map n = {
.va.addr = req_end,
.va.range = end - req_end,
@@ -2194,6 +2231,9 @@ __drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm,
ret = op_remap_cb(ops, priv, &p, &n, &u);
if (ret)
return ret;
+
+ if (is_madvise_ops)
+ needs_map = true;
break;
}
} else if (addr > req_addr) {
@@ -2202,20 +2242,29 @@ __drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm,
(addr - req_addr);
if (end == req_end) {
- ret = op_unmap_cb(ops, priv, va, merge);
- if (ret)
- return ret;
+ if (!is_madvise_ops) {
+ ret = op_unmap_cb(ops, priv, va, merge);
+ if (ret)
+ return ret;
+ }
+
break;
}
if (end < req_end) {
- ret = op_unmap_cb(ops, priv, va, merge);
- if (ret)
- return ret;
+ if (!is_madvise_ops) {
+ ret = op_unmap_cb(ops, priv, va, merge);
+ if (ret)
+ return ret;
+ }
+
continue;
}
if (end > req_end) {
+ if (skip_madvise_ops)
+ break;
+
struct drm_gpuva_op_map n = {
.va.addr = req_end,
.va.range = end - req_end,
@@ -2230,14 +2279,16 @@ __drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm,
ret = op_remap_cb(ops, priv, NULL, &n, &u);
if (ret)
return ret;
+
+ if (is_madvise_ops)
+ return op_map_cb(ops, priv, addr,
+ (req_end - addr), NULL, req_offset);
break;
}
}
}
-
- return op_map_cb(ops, priv,
- req_addr, req_range,
- req_obj, req_offset);
+ return needs_map ? op_map_cb(ops, priv, req_addr,
+ req_range, req_obj, req_offset) : 0;
}
static int
@@ -2336,15 +2387,15 @@ drm_gpuvm_sm_map(struct drm_gpuvm *gpuvm, void *priv,
struct drm_gem_object *req_obj, u64 req_offset)
{
const struct drm_gpuvm_ops *ops = gpuvm->ops;
+ enum drm_gpuvm_sm_map_ops_flags flags = DRM_GPUVM_SM_MAP_NOT_MADVISE;
if (unlikely(!(ops && ops->sm_step_map &&
ops->sm_step_remap &&
ops->sm_step_unmap)))
return -EINVAL;
- return __drm_gpuvm_sm_map(gpuvm, ops, priv,
- req_addr, req_range,
- req_obj, req_offset);
+ return __drm_gpuvm_sm_map(gpuvm, ops, priv, req_addr, req_range,
+ flags, req_obj, req_offset);
}
EXPORT_SYMBOL_GPL(drm_gpuvm_sm_map);
@@ -2486,6 +2537,7 @@ static const struct drm_gpuvm_ops gpuvm_list_ops = {
* @gpuvm: the &drm_gpuvm representing the GPU VA space
* @req_addr: the start address of the new mapping
* @req_range: the range of the new mapping
+ * @drm_gpuvm_sm_map_ops_flag: ops flag determining madvise or not
* @req_obj: the &drm_gem_object to map
* @req_offset: the offset within the &drm_gem_object
*
@@ -2516,6 +2568,7 @@ static const struct drm_gpuvm_ops gpuvm_list_ops = {
struct drm_gpuva_ops *
drm_gpuvm_sm_map_ops_create(struct drm_gpuvm *gpuvm,
u64 req_addr, u64 req_range,
+ enum drm_gpuvm_sm_map_ops_flags flags,
struct drm_gem_object *req_obj, u64 req_offset)
{
struct drm_gpuva_ops *ops;
@@ -2535,7 +2588,7 @@ drm_gpuvm_sm_map_ops_create(struct drm_gpuvm *gpuvm,
args.ops = ops;
ret = __drm_gpuvm_sm_map(gpuvm, &gpuvm_list_ops, &args,
- req_addr, req_range,
+ req_addr, req_range, flags,
req_obj, req_offset);
if (ret)
goto err_free_ops;
diff --git a/drivers/gpu/drm/nouveau/nouveau_uvmm.c b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
index 48f105239f42..26e13fcdbdb8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_uvmm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_uvmm.c
@@ -1303,6 +1303,7 @@ nouveau_uvmm_bind_job_submit(struct nouveau_job *job,
op->ops = drm_gpuvm_sm_map_ops_create(&uvmm->base,
op->va.addr,
op->va.range,
+ DRM_GPUVM_SM_MAP_NOT_MADVISE,
op->gem.obj,
op->gem.offset);
if (IS_ERR(op->ops)) {
diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
index f5f5775acdc0..d375b549c30f 100644
--- a/drivers/gpu/drm/xe/Makefile
+++ b/drivers/gpu/drm/xe/Makefile
@@ -117,6 +117,7 @@ xe-y += xe_bb.o \
xe_uc.o \
xe_uc_fw.o \
xe_vm.o \
+ xe_vm_madvise.o \
xe_vram.o \
xe_vram_freq.o \
xe_vsec.o \
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 4e39188a021a..82b33dfc5515 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -1667,6 +1667,12 @@ static void xe_gem_object_close(struct drm_gem_object *obj,
}
}
+static bool should_migrate_to_smem(struct xe_bo *bo)
+{
+ return bo->attr.atomic_access == DRM_XE_VMA_ATOMIC_GLOBAL ||
+ bo->attr.atomic_access == DRM_XE_VMA_ATOMIC_CPU;
+}
+
static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
{
struct ttm_buffer_object *tbo = vmf->vma->vm_private_data;
@@ -1675,7 +1681,7 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
struct xe_bo *bo = ttm_to_xe_bo(tbo);
bool needs_rpm = bo->flags & XE_BO_FLAG_VRAM_MASK;
vm_fault_t ret;
- int idx;
+ int idx, r = 0;
if (needs_rpm)
xe_pm_runtime_get(xe);
@@ -1687,8 +1693,17 @@ static vm_fault_t xe_gem_fault(struct vm_fault *vmf)
if (drm_dev_enter(ddev, &idx)) {
trace_xe_bo_cpu_fault(bo);
- ret = ttm_bo_vm_fault_reserved(vmf, vmf->vma->vm_page_prot,
- TTM_BO_VM_NUM_PREFAULT);
+ if (should_migrate_to_smem(bo)) {
+ r = xe_bo_migrate(bo, XE_PL_TT);
+ if (r == -EBUSY || r == -ERESTARTSYS || r == -EINTR)
+ ret = VM_FAULT_NOPAGE;
+ else if (r)
+ ret = VM_FAULT_SIGBUS;
+ }
+ if (!ret)
+ ret = ttm_bo_vm_fault_reserved(vmf,
+ vmf->vma->vm_page_prot,
+ TTM_BO_VM_NUM_PREFAULT);
drm_dev_exit(idx);
} else {
ret = ttm_bo_vm_dummy_page(vmf, vmf->vma->vm_page_prot);
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index eb5e83c5f233..2a5273f745f7 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -62,6 +62,14 @@ struct xe_bo {
*/
struct list_head client_link;
#endif
+ /** @attr: User controlled attributes for bo */
+ struct {
+ /**
+ * @atomic_access: type of atomic access bo needs
+ * protected by bo dma-resv lock
+ */
+ u32 atomic_access;
+ } attr;
/**
* @pxp_key_instance: PXP key instance this BO was created against. A
* 0 in this variable indicates that the BO does not use PXP encryption.
diff --git a/drivers/gpu/drm/xe/xe_device.c b/drivers/gpu/drm/xe/xe_device.c
index 7d9a31868ea9..1f9969acbcd9 100644
--- a/drivers/gpu/drm/xe/xe_device.c
+++ b/drivers/gpu/drm/xe/xe_device.c
@@ -61,6 +61,7 @@
#include "xe_ttm_stolen_mgr.h"
#include "xe_ttm_sys_mgr.h"
#include "xe_vm.h"
+#include "xe_vm_madvise.h"
#include "xe_vram.h"
#include "xe_vsec.h"
#include "xe_wait_user_fence.h"
@@ -197,6 +198,8 @@ static const struct drm_ioctl_desc xe_ioctls[] = {
DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl,
DRM_RENDER_ALLOW),
DRM_IOCTL_DEF_DRV(XE_OBSERVATION, xe_observation_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_MADVISE, xe_vm_madvise_ioctl, DRM_RENDER_ALLOW),
+ DRM_IOCTL_DEF_DRV(XE_VM_QUERY_VMAS_ATTRS, xe_vm_query_vmas_attrs_ioctl, DRM_RENDER_ALLOW),
};
static long xe_drm_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
diff --git a/drivers/gpu/drm/xe/xe_gt_pagefault.c b/drivers/gpu/drm/xe/xe_gt_pagefault.c
index e2d975b2fddb..1335c5fc0e10 100644
--- a/drivers/gpu/drm/xe/xe_gt_pagefault.c
+++ b/drivers/gpu/drm/xe/xe_gt_pagefault.c
@@ -89,7 +89,7 @@ static int xe_pf_begin(struct drm_exec *exec, struct xe_vma *vma,
if (err)
return err;
- if (atomic && IS_DGFX(vm->xe)) {
+ if (xe_vma_need_vram_for_atomic(vm->xe, vma, atomic)) {
if (xe_vma_is_userptr(vma)) {
err = -EACCES;
return err;
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
index e1362e608146..6088df8e159c 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.c
@@ -448,30 +448,6 @@ void xe_gt_tlb_invalidation_vm(struct xe_gt *gt, struct xe_vm *vm)
xe_gt_tlb_invalidation_fence_wait(&fence);
}
-/**
- * xe_gt_tlb_invalidation_vma - Issue a TLB invalidation on this GT for a VMA
- * @gt: GT structure
- * @fence: invalidation fence which will be signal on TLB invalidation
- * completion, can be NULL
- * @vma: VMA to invalidate
- *
- * Issue a range based TLB invalidation if supported, if not fallback to a full
- * TLB invalidation. Completion of TLB is asynchronous and caller can use
- * the invalidation fence to wait for completion.
- *
- * Return: Negative error code on error, 0 on success
- */
-int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
- struct xe_gt_tlb_invalidation_fence *fence,
- struct xe_vma *vma)
-{
- xe_gt_assert(gt, vma);
-
- return xe_gt_tlb_invalidation_range(gt, fence, xe_vma_start(vma),
- xe_vma_end(vma),
- xe_vma_vm(vma)->usm.asid);
-}
-
/**
* xe_guc_tlb_invalidation_done_handler - TLB invalidation done handler
* @guc: guc
diff --git a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
index abe9b03d543e..31072dbcad8e 100644
--- a/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
+++ b/drivers/gpu/drm/xe/xe_gt_tlb_invalidation.h
@@ -19,9 +19,6 @@ int xe_gt_tlb_invalidation_init_early(struct xe_gt *gt);
void xe_gt_tlb_invalidation_reset(struct xe_gt *gt);
int xe_gt_tlb_invalidation_ggtt(struct xe_gt *gt);
-int xe_gt_tlb_invalidation_vma(struct xe_gt *gt,
- struct xe_gt_tlb_invalidation_fence *fence,
- struct xe_vma *vma);
void xe_gt_tlb_invalidation_vm(struct xe_gt *gt, struct xe_vm *vm);
int xe_gt_tlb_invalidation_range(struct xe_gt *gt,
struct xe_gt_tlb_invalidation_fence *fence,
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index f39d5cc9f411..9dd286853654 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -518,7 +518,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
{
struct xe_pt_stage_bind_walk *xe_walk =
container_of(walk, typeof(*xe_walk), base);
- u16 pat_index = xe_walk->vma->pat_index;
+ u16 pat_index = xe_walk->vma->attr.pat_index;
struct xe_pt *xe_parent = container_of(parent, typeof(*xe_parent), base);
struct xe_vm *vm = xe_walk->vm;
struct xe_pt *xe_child;
@@ -645,13 +645,18 @@ static bool xe_atomic_for_vram(struct xe_vm *vm)
return true;
}
-static bool xe_atomic_for_system(struct xe_vm *vm, struct xe_bo *bo)
+static bool xe_atomic_for_system(struct xe_vm *vm,
+ struct xe_vma *vma)
{
struct xe_device *xe = vm->xe;
+ struct xe_bo *bo = xe_vma_bo(vma);
if (!xe->info.has_device_atomics_on_smem)
return false;
+ if (vma->attr.atomic_access == DRM_XE_VMA_ATOMIC_DEVICE)
+ return true;
+
/*
* If a SMEM+LMEM allocation is backed by SMEM, a device
* atomics will cause a gpu page fault and which then
@@ -745,7 +750,7 @@ xe_pt_stage_bind(struct xe_tile *tile, struct xe_vma *vma,
if (vma->gpuva.flags & XE_VMA_ATOMIC_PTE_BIT) {
xe_walk.default_vram_pte = xe_atomic_for_vram(vm) ? XE_USM_PPGTT_PTE_AE : 0;
- xe_walk.default_system_pte = xe_atomic_for_system(vm, bo) ?
+ xe_walk.default_system_pte = xe_atomic_for_system(vm, vma) ?
XE_USM_PPGTT_PTE_AE : 0;
}
@@ -950,7 +955,19 @@ bool xe_pt_zap_ptes_range(struct xe_tile *tile, struct xe_vm *vm,
struct xe_pt *pt = vm->pt_root[tile->id];
u8 pt_mask = (range->tile_present & ~range->tile_invalidated);
- xe_svm_assert_in_notifier(vm);
+ /*
+ * Locking rules:
+ *
+ * - notifier_lock (write): full protection against page table changes
+ * and MMU notifier invalidations.
+ *
+ * - notifier_lock (read) + vm_lock (write): combined protection against
+ * invalidations and concurrent page table modifications. (e.g., madvise)
+ *
+ */
+ lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 0) ||
+ (lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) &&
+ lockdep_is_held_type(&vm->lock, 0)));
if (!(pt_mask & BIT(tile->id)))
return false;
diff --git a/drivers/gpu/drm/xe/xe_svm.c b/drivers/gpu/drm/xe/xe_svm.c
index 13abc6049041..8cbf6a6f1cf4 100644
--- a/drivers/gpu/drm/xe/xe_svm.c
+++ b/drivers/gpu/drm/xe/xe_svm.c
@@ -169,14 +169,9 @@ static void xe_svm_invalidate(struct drm_gpusvm *gpusvm,
{
struct xe_vm *vm = gpusvm_to_vm(gpusvm);
struct xe_device *xe = vm->xe;
- struct xe_tile *tile;
struct drm_gpusvm_range *r, *first;
- struct xe_gt_tlb_invalidation_fence
- fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
u64 adj_start = mmu_range->start, adj_end = mmu_range->end;
u8 tile_mask = 0;
- u8 id;
- u32 fence_id = 0;
long err;
xe_svm_assert_in_notifier(vm);
@@ -222,42 +217,8 @@ static void xe_svm_invalidate(struct drm_gpusvm *gpusvm,
xe_device_wmb(xe);
- for_each_tile(tile, xe, id) {
- if (tile_mask & BIT(id)) {
- int err;
-
- xe_gt_tlb_invalidation_fence_init(tile->primary_gt,
- &fence[fence_id], true);
-
- err = xe_gt_tlb_invalidation_range(tile->primary_gt,
- &fence[fence_id],
- adj_start,
- adj_end,
- vm->usm.asid);
- if (WARN_ON_ONCE(err < 0))
- goto wait;
- ++fence_id;
-
- if (!tile->media_gt)
- continue;
-
- xe_gt_tlb_invalidation_fence_init(tile->media_gt,
- &fence[fence_id], true);
-
- err = xe_gt_tlb_invalidation_range(tile->media_gt,
- &fence[fence_id],
- adj_start,
- adj_end,
- vm->usm.asid);
- if (WARN_ON_ONCE(err < 0))
- goto wait;
- ++fence_id;
- }
- }
-
-wait:
- for (id = 0; id < fence_id; ++id)
- xe_gt_tlb_invalidation_fence_wait(&fence[id]);
+ err = xe_vm_range_tilemask_tlb_invalidation(vm, adj_start, adj_end, tile_mask);
+ WARN_ON_ONCE(err);
range_notifier_event_end:
r = first;
@@ -831,6 +792,37 @@ bool xe_svm_range_needs_migrate_to_vram(struct xe_svm_range *range, struct xe_vm
return true;
}
+/**
+ * xe_vma_resolve_pagemap - Resolve the appropriate DRM pagemap for a VMA
+ * @vma: Pointer to the xe_vma structure containing memory attributes
+ * @tile: Pointer to the xe_tile structure used as fallback for VRAM mapping
+ *
+ * This function determines the correct DRM pagemap to use for a given VMA.
+ * It first checks if a valid devmem_fd is provided in the VMA's preferred
+ * location. If the devmem_fd is negative, it returns NULL, indicating no
+ * pagemap is available and smem to be used as preferred location.
+ * If the devmem_fd is equal to the default faulting
+ * GT identifier, it returns the VRAM pagemap associated with the tile.
+ *
+ * Future support for multi-device configurations may use drm_pagemap_from_fd()
+ * to resolve pagemaps from arbitrary file descriptors.
+ *
+ * Return: A pointer to the resolved drm_pagemap, or NULL if none is applicable.
+ */
+struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile)
+{
+ s32 fd = (s32)vma->attr.preferred_loc.devmem_fd;
+
+ if (fd == DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM)
+ return NULL;
+
+ if (fd == DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE)
+ return IS_DGFX(tile_to_xe(tile)) ? &tile->mem.vram.dpagemap : NULL;
+
+ /* TODO: Support multi-device with drm_pagemap_from_fd(fd) */
+ return NULL;
+}
+
/**
* xe_svm_handle_pagefault() - SVM handle page fault
* @vm: The VM.
@@ -854,7 +846,7 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR),
.check_pages_threshold = IS_DGFX(vm->xe) &&
IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ? SZ_64K : 0,
- .devmem_only = atomic && IS_DGFX(vm->xe) &&
+ .devmem_only = xe_vma_need_vram_for_atomic(vm->xe, vma, atomic) &&
IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR),
.timeslice_ms = atomic && IS_DGFX(vm->xe) &&
IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR) ?
@@ -862,6 +854,7 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
};
struct xe_svm_range *range;
struct dma_fence *fence;
+ struct drm_pagemap *dpagemap;
struct xe_tile *tile = gt_to_tile(gt);
int migrate_try_count = ctx.devmem_only ? 3 : 1;
ktime_t end = 0;
@@ -891,8 +884,13 @@ int xe_svm_handle_pagefault(struct xe_vm *vm, struct xe_vma *vma,
range_debug(range, "PAGE FAULT");
+ dpagemap = xe_vma_resolve_pagemap(vma, tile);
if (--migrate_try_count >= 0 &&
- xe_svm_range_needs_migrate_to_vram(range, vma, IS_DGFX(vm->xe))) {
+ xe_svm_range_needs_migrate_to_vram(range, vma, !!dpagemap || ctx.devmem_only)) {
+ /* TODO : For multi-device dpagemap will be used to find the remote tile
+ * and remote device. Will need to modify xe_svm_alloc_vram to use dpagemap
+ * for future multi-device support.
+ */
err = xe_svm_alloc_vram(vm, tile, range, &ctx);
ctx.timeslice_ms <<= 1; /* Double timeslice if we have to retry */
if (err) {
@@ -974,6 +972,33 @@ bool xe_svm_has_mapping(struct xe_vm *vm, u64 start, u64 end)
return drm_gpusvm_has_mapping(&vm->svm.gpusvm, start, end);
}
+/**
+ * xe_svm_unmap_address_range - UNMAP SVM mappings and ranges
+ * @start: start addr
+ * @end: end addr
+ *
+ * This function UNMAPS svm ranges if start or end address are inside them.
+ */
+void xe_svm_unmap_address_range(struct xe_vm *vm, u64 start, u64 end)
+{
+ struct drm_gpusvm_notifier *notifier, *next;
+
+ lockdep_assert_held_write(&vm->lock);
+
+ drm_gpusvm_for_each_notifier_safe(notifier, next, &vm->svm.gpusvm, start, end) {
+ struct drm_gpusvm_range *range, *__next;
+
+ drm_gpusvm_for_each_range_safe(range, __next, notifier, start, end) {
+ if (start > drm_gpusvm_range_start(range) ||
+ end < drm_gpusvm_range_end(range)) {
+ if (IS_DGFX(vm->xe) && xe_svm_range_in_vram(to_xe_range(range)))
+ drm_gpusvm_range_evict(&vm->svm.gpusvm, range);
+ __xe_svm_garbage_collector(vm, to_xe_range(range));
+ }
+ }
+ }
+}
+
/**
* xe_svm_bo_evict() - SVM evict BO to system memory
* @bo: BO to evict
@@ -1038,6 +1063,48 @@ int xe_svm_range_get_pages(struct xe_vm *vm, struct xe_svm_range *range,
return err;
}
+/**
+ * xe_svm_ranges_zap_ptes_in_range - clear ptes of svm ranges in input range
+ * @vm: Pointer to the xe_vm structure
+ * @start: Start of the input range
+ * @end: End of the input range
+ *
+ * This function removes the page table entries (PTEs) associated
+ * with the svm ranges within the given input start and end
+ *
+ * Return: tile_mask for which gt's need to be tlb invalidated.
+ */
+u8 xe_svm_ranges_zap_ptes_in_range(struct xe_vm *vm, u64 start, u64 end)
+{
+ struct drm_gpusvm_notifier *notifier;
+ struct xe_svm_range *range;
+ u64 adj_start, adj_end;
+ struct xe_tile *tile;
+ u8 tile_mask = 0;
+ u8 id;
+
+ lockdep_assert(lockdep_is_held_type(&vm->svm.gpusvm.notifier_lock, 1) &&
+ lockdep_is_held_type(&vm->lock, 0));
+
+ drm_gpusvm_for_each_notifier(notifier, &vm->svm.gpusvm, start, end) {
+ struct drm_gpusvm_range *r = NULL;
+
+ adj_start = max(start, notifier->itree.start);
+ adj_end = min(end, notifier->itree.last + 1);
+ drm_gpusvm_for_each_range(r, notifier, adj_start, adj_end) {
+ range = to_xe_range(r);
+ for_each_tile(tile, vm->xe, id) {
+ if (xe_pt_zap_ptes_range(tile, vm, range)) {
+ tile_mask |= BIT(id);
+ range->tile_invalidated |= BIT(id);
+ }
+ }
+ }
+ }
+
+ return tile_mask;
+}
+
#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR)
static struct drm_pagemap_device_addr
diff --git a/drivers/gpu/drm/xe/xe_svm.h b/drivers/gpu/drm/xe/xe_svm.h
index 19ce4f2754a7..1658e28f3773 100644
--- a/drivers/gpu/drm/xe/xe_svm.h
+++ b/drivers/gpu/drm/xe/xe_svm.h
@@ -91,6 +91,12 @@ bool xe_svm_range_validate(struct xe_vm *vm,
u64 xe_svm_find_vma_start(struct xe_vm *vm, u64 addr, u64 end, struct xe_vma *vma);
+u8 xe_svm_ranges_zap_ptes_in_range(struct xe_vm *vm, u64 start, u64 end);
+
+void xe_svm_unmap_address_range(struct xe_vm *vm, u64 start, u64 end);
+
+struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile);
+
/**
* xe_svm_range_has_dma_mapping() - SVM range has DMA mapping
* @range: SVM range
@@ -305,6 +311,22 @@ u64 xe_svm_find_vma_start(struct xe_vm *vm, u64 addr, u64 end, struct xe_vma *vm
return ULONG_MAX;
}
+static inline
+u8 xe_svm_ranges_zap_ptes_in_range(struct xe_vm *vm, u64 start, u64 end)
+{
+ return 0;
+}
+
+static inline
+void xe_svm_unmap_address_range(struct xe_vm *vm, u64 start, u64 end)
+{
+}
+
+static inline
+struct drm_pagemap *xe_vma_resolve_pagemap(struct xe_vma *vma, struct xe_tile *tile)
+{
+ return NULL;
+}
#define xe_svm_assert_in_notifier(...) do {} while (0)
#define xe_svm_range_has_dma_mapping(...) false
diff --git a/drivers/gpu/drm/xe/xe_tile.h b/drivers/gpu/drm/xe/xe_tile.h
index eb939316d55b..142ed5cf654b 100644
--- a/drivers/gpu/drm/xe/xe_tile.h
+++ b/drivers/gpu/drm/xe/xe_tile.h
@@ -10,6 +10,24 @@
struct xe_tile;
+#if IS_ENABLED(CONFIG_DRM_XE_DEVMEM_MIRROR)
+/**
+ * xe_tile_from_dpagemap - Find xe_tile from drm_pagemap
+ * @dpagemap: pointer to struct drm_pagemap
+ *
+ * Return: Pointer to xe_tile
+ */
+static inline struct xe_tile *xe_tile_from_dpagemap(struct drm_pagemap *dpagemap)
+{
+ return container_of(dpagemap, struct xe_tile, mem.vram.dpagemap);
+}
+
+#else
+static inline struct xe_tile *xe_tile_from_dpagemap(struct drm_pagemap *dpagemap)
+{
+ return NULL;
+}
+#endif
int xe_tile_init_early(struct xe_tile *tile, struct xe_device *xe, u8 id);
int xe_tile_init_noalloc(struct xe_tile *tile);
int xe_tile_init(struct xe_tile *tile);
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index d18807b92b18..56d6c286e3d3 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -38,6 +38,7 @@
#include "xe_res_cursor.h"
#include "xe_svm.h"
#include "xe_sync.h"
+#include "xe_tile.h"
#include "xe_trace_bo.h"
#include "xe_wa.h"
#include "xe_hmm.h"
@@ -1168,7 +1169,8 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
struct xe_bo *bo,
u64 bo_offset_or_userptr,
u64 start, u64 end,
- u16 pat_index, unsigned int flags)
+ struct xe_vma_mem_attr *attr,
+ unsigned int flags)
{
struct xe_vma *vma;
struct xe_tile *tile;
@@ -1223,7 +1225,8 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
if (vm->xe->info.has_atomic_enable_pte_bit)
vma->gpuva.flags |= XE_VMA_ATOMIC_PTE_BIT;
- vma->pat_index = pat_index;
+ vma->attr = *attr;
+ vma->skip_invalidation = 0;
if (bo) {
struct drm_gpuvm_bo *vm_bo;
@@ -2169,6 +2172,95 @@ int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
return err;
}
+static void xe_vm_query_vmas(struct xe_vm *vm, u32 *num_vmas, u64 start, u64 end)
+{
+ struct drm_gpuva *gpuva;
+
+ lockdep_assert_held(&vm->lock);
+ drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end)
+ (*num_vmas)++;
+}
+
+static int get_mem_attrs(struct xe_vm *vm, u32 *num_vmas, u64 start,
+ u64 end, struct drm_xe_vma_mem_attr *attrs)
+{
+ struct drm_gpuva *gpuva;
+ int i = 0;
+
+ lockdep_assert_held(&vm->lock);
+
+ drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) {
+ struct xe_vma *vma = gpuva_to_vma(gpuva);
+
+ if (i == *num_vmas)
+ return -EINVAL;
+
+ attrs[i].start = xe_vma_start(vma);
+ attrs[i].end = xe_vma_end(vma);
+ attrs[i].atomic.val = vma->attr.atomic_access;
+ attrs[i].pat_index.val = vma->attr.pat_index;
+ attrs[i].preferred_mem_loc.devmem_fd = vma->attr.preferred_loc.devmem_fd;
+ attrs[i].preferred_mem_loc.migration_policy = vma->attr.preferred_loc.migration_policy;
+
+ i++;
+ }
+
+ if (i < (*num_vmas - 1))
+ *num_vmas = i;
+ return 0;
+}
+
+int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct xe_file *xef = to_xe_file(file);
+ struct drm_xe_vma_mem_attr *mem_attrs;
+ struct drm_xe_vm_query_vmas_attr *args = data;
+ u64 __user *attrs_user = u64_to_user_ptr(args->vector_of_vma_mem_attr);
+ struct xe_vm *vm;
+ int err = 0;
+
+ if (XE_IOCTL_DBG(xe,
+ (args->num_vmas == 0 && attrs_user) ||
+ (args->num_vmas > 0 && !attrs_user)))
+ return -EINVAL;
+
+ vm = xe_vm_lookup(xef, args->vm_id);
+ if (XE_IOCTL_DBG(xe, !vm))
+ return -EINVAL;
+
+ down_read(&vm->lock);
+
+ attrs_user = u64_to_user_ptr(args->vector_of_vma_mem_attr);
+
+ if (args->num_vmas == 0 && !attrs_user) {
+ xe_vm_query_vmas(vm, &args->num_vmas, args->start, args->start + args->range);
+ goto unlock_vm;
+ }
+
+ mem_attrs = kvmalloc_array(args->num_vmas, sizeof(struct drm_xe_vma_mem_attr),
+ GFP_KERNEL | __GFP_ACCOUNT |
+ __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
+ if (!mem_attrs) {
+ err = args->num_vmas > 1 ? -ENOBUFS : -ENOMEM;
+ goto unlock_vm;
+ }
+
+ err = get_mem_attrs(vm, &args->num_vmas, args->start,
+ args->start + args->range, mem_attrs);
+ if (err)
+ goto free_mem_attrs;
+
+ err = __copy_to_user(attrs_user, mem_attrs,
+ sizeof(struct drm_xe_vma_mem_attr) * args->num_vmas);
+
+free_mem_attrs:
+ kvfree(mem_attrs);
+unlock_vm:
+ up_read(&vm->lock);
+ return err;
+}
+
static bool vma_matches(struct xe_vma *vma, u64 page_addr)
{
if (page_addr > xe_vma_end(vma) - 1 ||
@@ -2318,6 +2410,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops,
case DRM_XE_VM_BIND_OP_MAP:
case DRM_XE_VM_BIND_OP_MAP_USERPTR:
ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, addr, range,
+ DRM_GPUVM_SM_MAP_NOT_MADVISE,
obj, bo_offset_or_userptr);
break;
case DRM_XE_VM_BIND_OP_UNMAP:
@@ -2443,7 +2536,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_vma_ops *vops,
ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO);
static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
- u16 pat_index, unsigned int flags)
+ struct xe_vma_mem_attr *attr, unsigned int flags)
{
struct xe_bo *bo = op->gem.obj ? gem_to_xe_bo(op->gem.obj) : NULL;
struct drm_exec exec;
@@ -2472,7 +2565,7 @@ static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
}
vma = xe_vma_create(vm, bo, op->gem.offset,
op->va.addr, op->va.addr +
- op->va.range - 1, pat_index, flags);
+ op->va.range - 1, attr, flags);
if (IS_ERR(vma))
goto err_unlock;
@@ -2615,6 +2708,15 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
switch (op->base.op) {
case DRM_GPUVA_OP_MAP:
{
+ struct xe_vma_mem_attr default_attr = {
+ .preferred_loc = {
+ .devmem_fd = DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE,
+ .migration_policy = DRM_XE_MIGRATE_ALL_PAGES,
+ },
+ .atomic_access = DRM_XE_VMA_ATOMIC_UNDEFINED,
+ .pat_index = op->map.pat_index,
+ };
+
flags |= op->map.read_only ?
VMA_CREATE_FLAG_READ_ONLY : 0;
flags |= op->map.is_null ?
@@ -2624,7 +2726,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
flags |= op->map.is_cpu_addr_mirror ?
VMA_CREATE_FLAG_IS_SYSTEM_ALLOCATOR : 0;
- vma = new_vma(vm, &op->base.map, op->map.pat_index,
+ vma = new_vma(vm, &op->base.map, &default_attr,
flags);
if (IS_ERR(vma))
return PTR_ERR(vma);
@@ -2652,8 +2754,12 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
end = op->base.remap.next->va.addr;
if (xe_vma_is_cpu_addr_mirror(old) &&
- xe_svm_has_mapping(vm, start, end))
- return -EBUSY;
+ xe_svm_has_mapping(vm, start, end)) {
+ if (vops->flags & XE_VMA_OPS_FLAG_MADVISE)
+ xe_svm_unmap_address_range(vm, start, end);
+ else
+ return -EBUSY;
+ }
op->remap.start = xe_vma_start(old);
op->remap.range = xe_vma_size(old);
@@ -2672,7 +2778,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
if (op->base.remap.prev) {
vma = new_vma(vm, op->base.remap.prev,
- old->pat_index, flags);
+ &old->attr, flags);
if (IS_ERR(vma))
return PTR_ERR(vma);
@@ -2702,7 +2808,7 @@ static int vm_bind_ioctl_ops_parse(struct xe_vm *vm, struct drm_gpuva_ops *ops,
if (op->base.remap.next) {
vma = new_vma(vm, op->base.remap.next,
- old->pat_index, flags);
+ &old->attr, flags);
if (IS_ERR(vma))
return PTR_ERR(vma);
@@ -2892,15 +2998,24 @@ static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
int err = 0;
struct xe_svm_range *svm_range;
+ struct drm_pagemap *dpagemap;
struct drm_gpusvm_ctx ctx = {};
- struct xe_tile *tile;
+ struct xe_tile *tile = NULL;
unsigned long i;
u32 region;
if (!xe_vma_is_cpu_addr_mirror(vma))
return 0;
- region = op->prefetch_range.region;
+ if (op->prefetch_range.region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC) {
+ dpagemap = xe_vma_resolve_pagemap(vma, xe_device_get_root_tile(vm->xe));
+ if (dpagemap)
+ tile = xe_tile_from_dpagemap(dpagemap);
+ } else {
+ region = op->prefetch_range.region;
+ if (region)
+ tile = &vm->xe->tiles[region_to_mem_type[region] - XE_PL_VRAM0];
+ }
ctx.read_only = xe_vma_read_only(vma);
ctx.devmem_possible = devmem_possible;
@@ -2908,11 +3023,10 @@ static int prefetch_ranges(struct xe_vm *vm, struct xe_vma_op *op)
/* TODO: Threading the migration */
xa_for_each(&op->prefetch_range.range, i, svm_range) {
- if (!region)
+ if (!tile)
xe_svm_range_migrate_to_smem(vm, svm_range);
- if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, region)) {
- tile = &vm->xe->tiles[region_to_mem_type[region] - XE_PL_VRAM0];
+ if (xe_svm_range_needs_migrate_to_vram(svm_range, vma, !!tile)) {
err = xe_svm_alloc_vram(vm, tile, svm_range, &ctx);
if (err) {
drm_dbg(&vm->xe->drm, "VRAM allocation failed, retry from userspace, asid=%u, gpusvm=%p, errno=%pe\n",
@@ -2980,7 +3094,8 @@ static int op_lock_and_prep(struct drm_exec *exec, struct xe_vm *vm,
else
region = op->prefetch.region;
- xe_assert(vm->xe, region <= ARRAY_SIZE(region_to_mem_type));
+ xe_assert(vm->xe, region == DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC ||
+ region <= ARRAY_SIZE(region_to_mem_type));
err = vma_lock_and_validate(exec,
gpuva_to_vma(op->base.prefetch.va),
@@ -3398,8 +3513,8 @@ static int vm_bind_ioctl_check_args(struct xe_device *xe, struct xe_vm *vm,
op == DRM_XE_VM_BIND_OP_PREFETCH) ||
XE_IOCTL_DBG(xe, prefetch_region &&
op != DRM_XE_VM_BIND_OP_PREFETCH) ||
- XE_IOCTL_DBG(xe, !(BIT(prefetch_region) &
- xe->info.mem_region_mask)) ||
+ XE_IOCTL_DBG(xe, (prefetch_region != DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC &&
+ !(BIT(prefetch_region) & xe->info.mem_region_mask))) ||
XE_IOCTL_DBG(xe, obj &&
op == DRM_XE_VM_BIND_OP_UNMAP)) {
err = -EINVAL;
@@ -3842,6 +3957,68 @@ void xe_vm_unlock(struct xe_vm *vm)
dma_resv_unlock(xe_vm_resv(vm));
}
+/**
+ * xe_vm_range_tilemask_tlb_invalidation - Issue a TLB invalidation on this tilemask for an
+ * address range
+ * @vm: The VM
+ * @start: start address
+ * @end: end address
+ * @tile_mask: mask for which gt's issue tlb invalidation
+ *
+ * Issue a range based TLB invalidation for gt's in tilemask
+ *
+ * Returns 0 for success, negative error code otherwise.
+ */
+int xe_vm_range_tilemask_tlb_invalidation(struct xe_vm *vm, u64 start,
+ u64 end, u8 tile_mask)
+{
+ struct xe_gt_tlb_invalidation_fence fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
+ struct xe_tile *tile;
+ u32 fence_id = 0;
+ u8 id;
+ int err;
+
+ if (!tile_mask)
+ return 0;
+
+ for_each_tile(tile, vm->xe, id) {
+ if (tile_mask & BIT(id)) {
+ xe_gt_tlb_invalidation_fence_init(tile->primary_gt,
+ &fence[fence_id], true);
+
+ err = xe_gt_tlb_invalidation_range(tile->primary_gt,
+ &fence[fence_id],
+ start,
+ end,
+ vm->usm.asid);
+ if (err)
+ goto wait;
+ ++fence_id;
+
+ if (!tile->media_gt)
+ continue;
+
+ xe_gt_tlb_invalidation_fence_init(tile->media_gt,
+ &fence[fence_id], true);
+
+ err = xe_gt_tlb_invalidation_range(tile->media_gt,
+ &fence[fence_id],
+ start,
+ end,
+ vm->usm.asid);
+ if (err)
+ goto wait;
+ ++fence_id;
+ }
+ }
+
+wait:
+ for (id = 0; id < fence_id; ++id)
+ xe_gt_tlb_invalidation_fence_wait(&fence[id]);
+
+ return err;
+}
+
/**
* xe_vm_invalidate_vma - invalidate GPU mappings for VMA without a lock
* @vma: VMA to invalidate
@@ -3857,11 +4034,9 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
struct xe_device *xe = xe_vma_vm(vma)->xe;
struct xe_vm *vm = xe_vma_vm(vma);
struct xe_tile *tile;
- struct xe_gt_tlb_invalidation_fence
- fence[XE_MAX_TILES_PER_DEVICE * XE_MAX_GT_PER_TILE];
- u8 id;
- u32 fence_id = 0;
+ u8 tile_mask = 0;
int ret = 0;
+ u8 id;
xe_assert(xe, !xe_vma_is_null(vma));
xe_assert(xe, !xe_vma_is_cpu_addr_mirror(vma));
@@ -3892,37 +4067,14 @@ int xe_vm_invalidate_vma(struct xe_vma *vma)
}
}
- for_each_tile(tile, xe, id) {
- if (xe_pt_zap_ptes(tile, vma)) {
- xe_device_wmb(xe);
- xe_gt_tlb_invalidation_fence_init(tile->primary_gt,
- &fence[fence_id],
- true);
-
- ret = xe_gt_tlb_invalidation_vma(tile->primary_gt,
- &fence[fence_id], vma);
- if (ret)
- goto wait;
- ++fence_id;
-
- if (!tile->media_gt)
- continue;
-
- xe_gt_tlb_invalidation_fence_init(tile->media_gt,
- &fence[fence_id],
- true);
+ for_each_tile(tile, xe, id)
+ if (xe_pt_zap_ptes(tile, vma))
+ tile_mask |= BIT(id);
- ret = xe_gt_tlb_invalidation_vma(tile->media_gt,
- &fence[fence_id], vma);
- if (ret)
- goto wait;
- ++fence_id;
- }
- }
+ xe_device_wmb(xe);
-wait:
- for (id = 0; id < fence_id; ++id)
- xe_gt_tlb_invalidation_fence_wait(&fence[id]);
+ ret = xe_vm_range_tilemask_tlb_invalidation(xe_vma_vm(vma), xe_vma_start(vma),
+ xe_vma_end(vma), tile_mask);
/* WRITE_ONCE pair with READ_ONCE in xe_gt_pagefault.c */
WRITE_ONCE(vma->tile_invalidated, vma->tile_mask);
@@ -4124,3 +4276,149 @@ void xe_vm_snapshot_free(struct xe_vm_snapshot *snap)
}
kvfree(snap);
}
+
+/**
+ * xe_vma_need_vram_for_atomic - Check if VMA needs VRAM migration for atomic operations
+ * @xe: Pointer to the XE device structure
+ * @vma: Pointer to the virtual memory area (VMA) structure
+ * @is_atomic: In pagefault path and atomic operation
+ *
+ * This function determines whether the given VMA needs to be migrated to
+ * VRAM in order to do atomic GPU operation.
+ *
+ * Return: true if migration to VRAM is required, false otherwise.
+ */
+bool xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool is_atomic)
+{
+ u32 atomic_access = xe_vma_bo(vma) ? xe_vma_bo(vma)->attr.atomic_access :
+ vma->attr.atomic_access;
+
+ if (!IS_DGFX(xe))
+ return false;
+
+ /* Note: The checks implemented here are platform-specific. For instance,
+ * on a device supporting CXL atomics, these would ideally work universally
+ * without additional handling.
+ */
+ switch (atomic_access) {
+ case DRM_XE_VMA_ATOMIC_DEVICE:
+ return !xe->info.has_device_atomics_on_smem;
+
+ case DRM_XE_VMA_ATOMIC_CPU:
+ case DRM_XE_VMA_ATOMIC_UNDEFINED:
+ return is_atomic;
+
+ case DRM_XE_VMA_ATOMIC_GLOBAL:
+ return true;
+
+ default:
+ return is_atomic;
+ }
+}
+
+/**
+ * xe_vm_alloc_madvise_vma - Allocate VMA's with madvise ops
+ * @vm: Pointer to the xe_vm structure
+ * @start: Starting input address
+ * @range: Size of the input range
+ *
+ * This function splits existing vma to create new vma for user provided input range
+ *
+ * Return: 0 if success
+ */
+int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t start, uint64_t range)
+{
+ struct xe_vma_ops vops;
+ struct drm_gpuva_ops *ops = NULL;
+ struct drm_gpuva_op *__op;
+ bool is_cpu_addr_mirror = false;
+ bool remap_op = false;
+ struct xe_vma_mem_attr tmp_attr;
+ int err;
+
+ vm_dbg(&vm->xe->drm, "MADVISE IN: addr=0x%016llx, size=0x%016llx", start, range);
+
+ lockdep_assert_held_write(&vm->lock);
+
+ vm_dbg(&vm->xe->drm, "MADVISE_OPS_CREATE: addr=0x%016llx, size=0x%016llx", start, range);
+ ops = drm_gpuvm_sm_map_ops_create(&vm->gpuvm, start, range,
+ DRM_GPUVM_SKIP_GEM_OBJ_VA_SPLIT_MADVISE,
+ NULL, start);
+ if (IS_ERR(ops))
+ return PTR_ERR(ops);
+
+ if (list_empty(&ops->list)) {
+ err = 0;
+ goto free_ops;
+ }
+
+ drm_gpuva_for_each_op(__op, ops) {
+ struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
+
+ if (__op->op == DRM_GPUVA_OP_REMAP) {
+ xe_assert(vm->xe, !remap_op);
+ remap_op = true;
+
+ if (xe_vma_is_cpu_addr_mirror(gpuva_to_vma(op->base.remap.unmap->va)))
+ is_cpu_addr_mirror = true;
+ else
+ is_cpu_addr_mirror = false;
+ }
+
+ if (__op->op == DRM_GPUVA_OP_MAP) {
+ xe_assert(vm->xe, remap_op);
+ remap_op = false;
+
+ /* In case of madvise ops DRM_GPUVA_OP_MAP is always after
+ * DRM_GPUVA_OP_REMAP, so ensure we assign op->map.is_cpu_addr_mirror true
+ * if REMAP is for xe_vma_is_cpu_addr_mirror vma
+ */
+ op->map.is_cpu_addr_mirror = is_cpu_addr_mirror;
+ }
+
+ print_op(vm->xe, __op);
+ }
+
+ xe_vma_ops_init(&vops, vm, NULL, NULL, 0);
+ vops.flags |= XE_VMA_OPS_FLAG_MADVISE;
+ err = vm_bind_ioctl_ops_parse(vm, ops, &vops);
+ if (err)
+ goto unwind_ops;
+
+ xe_vm_lock(vm, false);
+
+ drm_gpuva_for_each_op(__op, ops) {
+ struct xe_vma_op *op = gpuva_op_to_vma_op(__op);
+ struct xe_vma *vma;
+
+ if (__op->op == DRM_GPUVA_OP_UNMAP) {
+ /* There should be no unmap */
+ XE_WARN_ON("UNEXPECTED UNMAP");
+ xe_vma_destroy(gpuva_to_vma(op->base.unmap.va), NULL);
+ } else if (__op->op == DRM_GPUVA_OP_REMAP) {
+ vma = gpuva_to_vma(op->base.remap.unmap->va);
+ /* Store attributes for REMAP UNMAPPED VMA, so they can be assigned
+ * to newly MAP created vma.
+ */
+ tmp_attr = vma->attr;
+ xe_vma_destroy(gpuva_to_vma(op->base.remap.unmap->va), NULL);
+ } else if (__op->op == DRM_GPUVA_OP_MAP) {
+ vma = op->map.vma;
+ /* In case of madvise call, MAP will always be follwed by REMAP.
+ * Therefore temp_attr will always have sane values, making it safe to
+ * copy them to new vma.
+ */
+ vma->attr = tmp_attr;
+ }
+ }
+
+ xe_vm_unlock(vm);
+ drm_gpuva_ops_free(&vm->gpuvm, ops);
+ return 0;
+
+unwind_ops:
+ vm_bind_ioctl_ops_unwind(vm, &ops, 1);
+free_ops:
+ drm_gpuva_ops_free(&vm->gpuvm, ops);
+ return err;
+}
diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
index 88d6c0ef89c7..e4a601a3583f 100644
--- a/drivers/gpu/drm/xe/xe_vm.h
+++ b/drivers/gpu/drm/xe/xe_vm.h
@@ -171,6 +171,10 @@ static inline bool xe_vma_is_userptr(struct xe_vma *vma)
struct xe_vma *xe_vm_find_vma_by_addr(struct xe_vm *vm, u64 page_addr);
+bool xe_vma_need_vram_for_atomic(struct xe_device *xe, struct xe_vma *vma, bool is_atomic);
+
+int xe_vm_alloc_madvise_vma(struct xe_vm *vm, uint64_t addr, uint64_t size);
+
/**
* to_userptr_vma() - Return a pointer to an embedding userptr vma
* @vma: Pointer to the embedded struct xe_vma
@@ -191,7 +195,7 @@ int xe_vm_destroy_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
int xe_vm_bind_ioctl(struct drm_device *dev, void *data,
struct drm_file *file);
-
+int xe_vm_query_vmas_attrs_ioctl(struct drm_device *dev, void *data, struct drm_file *file);
void xe_vm_close_and_put(struct xe_vm *vm);
static inline bool xe_vm_in_fault_mode(struct xe_vm *vm)
@@ -228,6 +232,9 @@ struct dma_fence *xe_vm_range_rebind(struct xe_vm *vm,
struct dma_fence *xe_vm_range_unbind(struct xe_vm *vm,
struct xe_svm_range *range);
+int xe_vm_range_tilemask_tlb_invalidation(struct xe_vm *vm, u64 start,
+ u64 end, u8 tile_mask);
+
int xe_vm_invalidate_vma(struct xe_vma *vma);
int xe_vm_validate_protected(struct xe_vm *vm);
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.c b/drivers/gpu/drm/xe/xe_vm_madvise.c
new file mode 100644
index 000000000000..06e40ab0970e
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.c
@@ -0,0 +1,389 @@
+// SPDX-License-Identifier: MIT
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#include "xe_vm_madvise.h"
+
+#include <linux/nospec.h>
+#include <drm/ttm/ttm_tt.h>
+#include <drm/xe_drm.h>
+
+#include "xe_bo.h"
+#include "xe_gt_tlb_invalidation.h"
+#include "xe_pat.h"
+#include "xe_pt.h"
+#include "xe_svm.h"
+
+struct xe_vmas_in_madvise_range {
+ u64 addr;
+ u64 range;
+ struct xe_vma **vmas;
+ int num_vmas;
+ bool has_svm_vmas;
+ bool has_bo_vmas;
+ bool has_userptr_vmas;
+};
+
+static int get_vmas(struct xe_vm *vm, struct xe_vmas_in_madvise_range *madvise_range)
+{
+ u64 addr = madvise_range->addr;
+ u64 range = madvise_range->range;
+
+ struct xe_vma **__vmas;
+ struct drm_gpuva *gpuva;
+ int max_vmas = 8;
+
+ lockdep_assert_held(&vm->lock);
+
+ madvise_range->num_vmas = 0;
+ madvise_range->vmas = kmalloc_array(max_vmas, sizeof(*madvise_range->vmas), GFP_KERNEL);
+ if (!madvise_range->vmas)
+ return -ENOMEM;
+
+ vm_dbg(&vm->xe->drm, "VMA's in range: start=0x%016llx, end=0x%016llx", addr, addr + range);
+
+ drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, addr, addr + range) {
+ struct xe_vma *vma = gpuva_to_vma(gpuva);
+
+ if (xe_vma_bo(vma))
+ madvise_range->has_bo_vmas = true;
+ else if (xe_vma_is_cpu_addr_mirror(vma))
+ madvise_range->has_svm_vmas = true;
+ else if (xe_vma_is_userptr(vma))
+ madvise_range->has_userptr_vmas = true;
+ else
+ XE_WARN_ON("UNEXPECTED VMA");
+
+ if (madvise_range->num_vmas == max_vmas) {
+ max_vmas <<= 1;
+ __vmas = krealloc(madvise_range->vmas,
+ max_vmas * sizeof(*madvise_range->vmas),
+ GFP_KERNEL);
+ if (!__vmas) {
+ kfree(madvise_range->vmas);
+ return -ENOMEM;
+ }
+ madvise_range->vmas = __vmas;
+ }
+
+ madvise_range->vmas[madvise_range->num_vmas] = vma;
+ (madvise_range->num_vmas)++;
+ }
+
+ if (!madvise_range->num_vmas)
+ kfree(madvise_range->vmas);
+
+ vm_dbg(&vm->xe->drm, "madvise_range-num_vmas = %d\n", madvise_range->num_vmas);
+
+ return 0;
+}
+
+static void madvise_preferred_mem_loc(struct xe_device *xe, struct xe_vm *vm,
+ struct xe_vma **vmas, int num_vmas,
+ struct drm_xe_madvise *op)
+{
+ int i;
+
+ xe_assert(vm->xe, op->type == DRM_XE_VMA_ATTR_PREFERRED_LOC);
+
+ for (i = 0; i < num_vmas; i++) {
+ if (vmas[i]->attr.preferred_loc.devmem_fd == op->preferred_mem_loc.devmem_fd &&
+ vmas[i]->attr.preferred_loc.migration_policy ==
+ op->preferred_mem_loc.migration_policy) {
+ vmas[i]->skip_invalidation = 1;
+ } else {
+ vmas[i]->attr.preferred_loc.devmem_fd = op->preferred_mem_loc.devmem_fd;
+ /* Till multi-device support is not added migration_policy
+ * is of no use and can be ignored.
+ */
+ vmas[i]->attr.preferred_loc.migration_policy =
+ op->preferred_mem_loc.migration_policy;
+ }
+ }
+}
+
+static void madvise_atomic(struct xe_device *xe, struct xe_vm *vm,
+ struct xe_vma **vmas, int num_vmas,
+ struct drm_xe_madvise *op)
+{
+ struct xe_bo *bo;
+ int i;
+
+ xe_assert(vm->xe, op->type == DRM_XE_VMA_ATTR_ATOMIC);
+ xe_assert(vm->xe, op->atomic.val <= DRM_XE_VMA_ATOMIC_CPU);
+
+ for (i = 0; i < num_vmas; i++) {
+ if (vmas[i]->attr.atomic_access == op->atomic.val)
+ vmas[i]->skip_invalidation = 1;
+ else
+ vmas[i]->attr.atomic_access = op->atomic.val;
+
+ bo = xe_vma_bo(vmas[i]);
+ if (!bo)
+ continue;
+
+ xe_bo_assert_held(bo);
+ bo->attr.atomic_access = op->atomic.val;
+
+ /* Invalidate cpu page table, so bo can migrate to smem in next access */
+ if (xe_bo_is_vram(bo) &&
+ (bo->attr.atomic_access == DRM_XE_VMA_ATOMIC_CPU ||
+ bo->attr.atomic_access == DRM_XE_VMA_ATOMIC_GLOBAL))
+ ttm_bo_unmap_virtual(&bo->ttm);
+ }
+}
+
+static void madvise_pat_index(struct xe_device *xe, struct xe_vm *vm,
+ struct xe_vma **vmas, int num_vmas,
+ struct drm_xe_madvise *op)
+{
+ int i;
+
+ xe_assert(vm->xe, op->type == DRM_XE_VMA_ATTR_PAT);
+
+ for (i = 0; i < num_vmas; i++) {
+ if (vmas[i]->attr.pat_index == op->pat_index.val)
+ vmas[i]->skip_invalidation = 1;
+ else
+ vmas[i]->attr.pat_index = op->pat_index.val;
+ }
+}
+
+typedef void (*madvise_func)(struct xe_device *xe, struct xe_vm *vm,
+ struct xe_vma **vmas, int num_vmas,
+ struct drm_xe_madvise *op);
+
+static const madvise_func madvise_funcs[] = {
+ [DRM_XE_VMA_ATTR_PREFERRED_LOC] = madvise_preferred_mem_loc,
+ [DRM_XE_VMA_ATTR_ATOMIC] = madvise_atomic,
+ [DRM_XE_VMA_ATTR_PAT] = madvise_pat_index,
+};
+
+static void xe_zap_ptes_in_madvise_range(struct xe_vm *vm, u64 start, u64 end, u8 *tile_mask)
+{
+ struct drm_gpuva *gpuva;
+ struct xe_tile *tile;
+ u8 id;
+
+ lockdep_assert_held_write(&vm->lock);
+
+ if (dma_resv_wait_timeout(xe_vm_resv(vm), DMA_RESV_USAGE_BOOKKEEP,
+ false, MAX_SCHEDULE_TIMEOUT) <= 0)
+ XE_WARN_ON(1);
+
+ drm_gpuvm_for_each_va_range(gpuva, &vm->gpuvm, start, end) {
+ struct xe_vma *vma = gpuva_to_vma(gpuva);
+
+ if (vma->skip_invalidation)
+ continue;
+
+ if (xe_vma_is_cpu_addr_mirror(vma)) {
+ *tile_mask |= xe_svm_ranges_zap_ptes_in_range(vm,
+ xe_vma_start(vma),
+ xe_vma_end(vma));
+ } else {
+ if (xe_vma_is_userptr(vma))
+ WARN_ON_ONCE(!dma_resv_test_signaled(xe_vm_resv(xe_vma_vm(vma)),
+ DMA_RESV_USAGE_BOOKKEEP));
+ for_each_tile(tile, vm->xe, id) {
+ if (xe_pt_zap_ptes(tile, vma)) {
+ *tile_mask |= BIT(id);
+ vma->tile_invalidated |= BIT(id);
+ }
+ }
+ }
+ }
+}
+
+static int xe_vm_invalidate_madvise_range(struct xe_vm *vm, u64 start, u64 end)
+{
+ u8 tile_mask = 0;
+
+ xe_zap_ptes_in_madvise_range(vm, start, end, &tile_mask);
+ if (!tile_mask)
+ return 0;
+
+ xe_device_wmb(vm->xe);
+
+ return xe_vm_range_tilemask_tlb_invalidation(vm, start, end, tile_mask);
+}
+
+static int drm_xe_madvise_args_are_sane(struct xe_device *xe, const struct drm_xe_madvise *args)
+{
+ if (XE_IOCTL_DBG(xe, !args))
+ return -EINVAL;
+
+ if (XE_IOCTL_DBG(xe, !IS_ALIGNED(args->start, SZ_4K)))
+ return -EINVAL;
+
+ if (XE_IOCTL_DBG(xe, !IS_ALIGNED(args->range, SZ_4K)))
+ return -EINVAL;
+
+ if (XE_IOCTL_DBG(xe, args->range < SZ_4K))
+ return -EINVAL;
+
+ switch (args->type) {
+ case DRM_XE_VMA_ATTR_ATOMIC:
+ if (XE_IOCTL_DBG(xe, args->atomic.val > DRM_XE_VMA_ATOMIC_CPU))
+ return -EINVAL;
+ break;
+ case DRM_XE_VMA_ATTR_PAT:
+ u16 coh_mode = xe_pat_index_get_coh_mode(xe, args->pat_index.val);
+
+ if (XE_IOCTL_DBG(xe, !coh_mode))
+ return -EINVAL;
+
+ if (XE_WARN_ON(coh_mode > XE_COH_AT_LEAST_1WAY))
+ return -EINVAL;
+ break;
+ case DRM_XE_VMA_ATTR_PREFERRED_LOC:
+ s32 fd = (s32)args->preferred_mem_loc.devmem_fd;
+
+ if (XE_IOCTL_DBG(xe, fd < DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM))
+ return -EINVAL;
+
+ if (XE_IOCTL_DBG(xe, args->preferred_mem_loc.migration_policy >
+ DRM_XE_MIGRATE_ONLY_SYSTEM_PAGES))
+ return -EINVAL;
+ break;
+ default:
+ if (XE_IOCTL_DBG(xe, 1))
+ return -EINVAL;
+ }
+
+ if (XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int check_bo_args_are_sane(struct xe_vm *vm, struct xe_vma **vmas,
+ int num_vmas, u32 atomic_val)
+{
+ struct xe_device *xe = vm->xe;
+ struct xe_bo *bo;
+ int i;
+
+ for (i = 0; i < num_vmas; i++) {
+ bo = xe_vma_bo(vmas[i]);
+ if (!bo)
+ continue;
+
+ if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_VMA_ATOMIC_CPU &&
+ !(bo->flags & XE_BO_FLAG_SYSTEM)))
+ return -EINVAL;
+
+ /* NOTE: The following atomic checks are platform-specific. For example,
+ * if a device supports CXL atomics, these may not be necessary or
+ * may behave differently.
+ */
+ if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_VMA_ATOMIC_DEVICE &&
+ !(bo->flags & XE_BO_FLAG_VRAM0) &&
+ !(bo->flags & XE_BO_FLAG_VRAM1) &&
+ !(bo->flags & XE_BO_FLAG_SYSTEM &&
+ xe->info.has_device_atomics_on_smem)))
+ return -EINVAL;
+
+ if (XE_IOCTL_DBG(xe, atomic_val == DRM_XE_VMA_ATOMIC_GLOBAL &&
+ (!(bo->flags & XE_BO_FLAG_SYSTEM) ||
+ (!(bo->flags & XE_BO_FLAG_VRAM0) &&
+ !(bo->flags & XE_BO_FLAG_VRAM1)))))
+ return -EINVAL;
+ }
+ return 0;
+}
+/**
+ * xe_vm_madvise_ioctl - Handle MADVise ioctl for a VM
+ * @dev: DRM device pointer
+ * @data: Pointer to ioctl data (drm_xe_madvise*)
+ * @file: DRM file pointer
+ *
+ * Handles the MADVISE ioctl to provide memory advice for vma's within
+ * input range.
+ *
+ * Return: 0 on success or a negative error code on failure.
+ */
+int xe_vm_madvise_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
+{
+ struct xe_device *xe = to_xe_device(dev);
+ struct xe_file *xef = to_xe_file(file);
+ struct drm_xe_madvise *args = data;
+ struct xe_vm *vm;
+ struct xe_bo *bo;
+ struct drm_exec exec;
+ int err = 0;
+ int attr_type;
+
+ vm = xe_vm_lookup(xef, args->vm_id);
+ if (XE_IOCTL_DBG(xe, !vm))
+ return -EINVAL;
+
+ if (drm_xe_madvise_args_are_sane(vm->xe, args))
+ return -EINVAL;
+
+ down_write(&vm->lock);
+
+ if (XE_IOCTL_DBG(xe, xe_vm_is_closed_or_banned(vm))) {
+ err = -ENOENT;
+ goto unlock_vm;
+ }
+
+ xe_vm_alloc_madvise_vma(vm, args->start, args->range);
+
+ struct xe_vmas_in_madvise_range madvise_range = {.addr = args->start,
+ .range = args->range, };
+ err = get_vmas(vm, &madvise_range);
+ if (err || !madvise_range.num_vmas)
+ goto unlock_vm;
+
+ if (madvise_range.has_bo_vmas) {
+ if (args->type == DRM_XE_VMA_ATTR_ATOMIC) {
+ err = check_bo_args_are_sane(vm, madvise_range.vmas,
+ madvise_range.num_vmas,
+ args->atomic.val);
+ if (err)
+ goto unlock_vm;
+ }
+
+ drm_exec_init(&exec, DRM_EXEC_IGNORE_DUPLICATES, 0);
+ drm_exec_until_all_locked(&exec) {
+ for (int i = 0; i < madvise_range.num_vmas; i++) {
+ bo = xe_vma_bo(madvise_range.vmas[i]);
+ if (!bo)
+ continue;
+ err = drm_exec_lock_obj(&exec, &bo->ttm.base);
+ drm_exec_retry_on_contention(&exec);
+ if (err)
+ goto err_fini;
+ }
+ }
+ }
+
+ if (madvise_range.has_userptr_vmas)
+ down_read(&vm->userptr.notifier_lock);
+
+ if (madvise_range.has_svm_vmas)
+ xe_svm_notifier_lock(vm);
+
+ attr_type = array_index_nospec(args->type, ARRAY_SIZE(madvise_funcs));
+ madvise_funcs[attr_type](xe, vm, madvise_range.vmas, madvise_range.num_vmas, args);
+
+ kfree(madvise_range.vmas);
+ madvise_range.vmas = NULL;
+
+ err = xe_vm_invalidate_madvise_range(vm, args->start, args->start + args->range);
+
+ if (madvise_range.has_svm_vmas)
+ xe_svm_notifier_unlock(vm);
+
+ if (madvise_range.has_userptr_vmas)
+ up_read(&vm->userptr.notifier_lock);
+err_fini:
+ if (madvise_range.has_bo_vmas)
+ drm_exec_fini(&exec);
+unlock_vm:
+ up_write(&vm->lock);
+ xe_vm_put(vm);
+ return err;
+}
diff --git a/drivers/gpu/drm/xe/xe_vm_madvise.h b/drivers/gpu/drm/xe/xe_vm_madvise.h
new file mode 100644
index 000000000000..b0e1fc445f23
--- /dev/null
+++ b/drivers/gpu/drm/xe/xe_vm_madvise.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: MIT */
+/*
+ * Copyright © 2025 Intel Corporation
+ */
+
+#ifndef _XE_VM_MADVISE_H_
+#define _XE_VM_MADVISE_H_
+
+struct drm_device;
+struct drm_file;
+
+int xe_vm_madvise_ioctl(struct drm_device *dev, void *data,
+ struct drm_file *file);
+
+#endif
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index bed6088e1bb3..3b3019ecbfab 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -77,6 +77,32 @@ struct xe_userptr {
#endif
};
+/**
+ * struct xe_vma_mem_attr - memory attributes associated with vma
+ */
+struct xe_vma_mem_attr {
+ /** @preferred_loc: perferred memory_location*/
+ struct {
+ /** @preferred_loc.migration_policy: Policy for migration of pages */
+ u32 migration_policy;
+
+ /**
+ * @preferred_loc.devmem_fd: used for determining pagemap_fd requested by user
+ * DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM and DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE mean
+ * system memory or closest device memory respectively.
+ */
+ u32 devmem_fd;
+ } preferred_loc;
+
+ /** @atomic_access: The atomic access type for the vma */
+ u32 atomic_access;
+
+ /**
+ * @pat_index: The pat index to use when encoding the PTEs for this vma.
+ */
+ u16 pat_index;
+};
+
struct xe_vma {
/** @gpuva: Base GPUVA object */
struct drm_gpuva gpuva;
@@ -126,15 +152,22 @@ struct xe_vma {
u8 tile_staged;
/**
- * @pat_index: The pat index to use when encoding the PTEs for this vma.
+ * @skip_invalidation: Used in madvise to avoid invalidation
+ * if mem attributes doesn't change
*/
- u16 pat_index;
+ u32 skip_invalidation;
/**
* @ufence: The user fence that was provided with MAP.
* Needs to be signalled before UNMAP can be processed.
*/
struct xe_user_fence *ufence;
+
+ /**
+ * @attr: The attributes of vma which determines the migration policy
+ * and encoding of the PTEs for this vma.
+ */
+ struct xe_vma_mem_attr attr;
};
/**
@@ -462,6 +495,7 @@ struct xe_vma_ops {
struct xe_vm_pgtable_update_ops pt_update_ops[XE_MAX_TILES_PER_DEVICE];
/** @flag: signify the properties within xe_vma_ops*/
#define XE_VMA_OPS_FLAG_HAS_SVM_PREFETCH BIT(0)
+#define XE_VMA_OPS_FLAG_MADVISE BIT(1)
u32 flags;
#ifdef TEST_VM_OPS_ERROR
/** @inject_error: inject error to test error handling */
diff --git a/include/drm/drm_gpusvm.h b/include/drm/drm_gpusvm.h
index 6a5156476bf4..cdd89e5af4f8 100644
--- a/include/drm/drm_gpusvm.h
+++ b/include/drm/drm_gpusvm.h
@@ -373,6 +373,10 @@ const struct dev_pagemap_ops *drm_gpusvm_pagemap_ops_get(void);
bool drm_gpusvm_has_mapping(struct drm_gpusvm *gpusvm, unsigned long start,
unsigned long end);
+struct drm_gpusvm_notifier *
+drm_gpusvm_notifier_find(struct drm_gpusvm *gpusvm, unsigned long start,
+ unsigned long end);
+
struct drm_gpusvm_range *
drm_gpusvm_range_find(struct drm_gpusvm_notifier *notifier, unsigned long start,
unsigned long end);
@@ -530,4 +534,70 @@ __drm_gpusvm_range_next(struct drm_gpusvm_range *range)
(range__) && (drm_gpusvm_range_start(range__) < (end__)); \
(range__) = __drm_gpusvm_range_next(range__))
+/**
+ * drm_gpusvm_for_each_range_safe() - Safely iterate over GPU SVM ranges in a notifier
+ * @range__: Iterator variable for the ranges
+ * @next__: Iterator variable for the ranges temporay storage
+ * @notifier__: Pointer to the GPU SVM notifier
+ * @start__: Start address of the range
+ * @end__: End address of the range
+ *
+ * This macro is used to iterate over GPU SVM ranges in a notifier while
+ * removing ranges from it.
+ */
+#define drm_gpusvm_for_each_range_safe(range__, next__, notifier__, start__, end__) \
+ for ((range__) = drm_gpusvm_range_find((notifier__), (start__), (end__)), \
+ (next__) = __drm_gpusvm_range_next(range__); \
+ (range__) && (drm_gpusvm_range_start(range__) < (end__)); \
+ (range__) = (next__), (next__) = __drm_gpusvm_range_next(range__))
+
+/**
+ * __drm_gpusvm_notifier_next() - get the next drm_gpusvm_notifier in the list
+ * @notifier: a pointer to the current drm_gpusvm_notifier
+ *
+ * Return: A pointer to the next drm_gpusvm_notifier if available, or NULL if
+ * the current notifier is the last one or if the input notifier is
+ * NULL.
+ */
+static inline struct drm_gpusvm_notifier *
+__drm_gpusvm_notifier_next(struct drm_gpusvm_notifier *notifier)
+{
+ if (notifier && !list_is_last(¬ifier->entry,
+ ¬ifier->gpusvm->notifier_list))
+ return list_next_entry(notifier, entry);
+
+ return NULL;
+}
+
+/**
+ * drm_gpusvm_for_each_notifier() - Iterate over GPU SVM notifiers in a gpusvm
+ * @notifier__: Iterator variable for the notifiers
+ * @gpusvm__: Pointer to the GPU SVM notifier
+ * @start__: Start address of the notifier
+ * @end__: End address of the notifier
+ *
+ * This macro is used to iterate over GPU SVM notifiers in a gpusvm.
+ */
+#define drm_gpusvm_for_each_notifier(notifier__, gpusvm__, start__, end__) \
+ for ((notifier__) = drm_gpusvm_notifier_find((gpusvm__), (start__), (end__)); \
+ (notifier__) && (drm_gpusvm_notifier_start(notifier__) < (end__)); \
+ (notifier__) = __drm_gpusvm_notifier_next(notifier__))
+
+/**
+ * drm_gpusvm_for_each_notifier_safe() - Safely iterate over GPU SVM notifiers in a gpusvm
+ * @notifier__: Iterator variable for the notifiers
+ * @next__: Iterator variable for the notifiers temporay storage
+ * @gpusvm__: Pointer to the GPU SVM notifier
+ * @start__: Start address of the notifier
+ * @end__: End address of the notifier
+ *
+ * This macro is used to iterate over GPU SVM notifiers in a gpusvm while
+ * removing notifiers from it.
+ */
+#define drm_gpusvm_for_each_notifier_safe(notifier__, next__, gpusvm__, start__, end__) \
+ for ((notifier__) = drm_gpusvm_notifier_find((gpusvm__), (start__), (end__)), \
+ (next__) = __drm_gpusvm_notifier_next(notifier__); \
+ (notifier__) && (drm_gpusvm_notifier_start(notifier__) < (end__)); \
+ (notifier__) = (next__), (next__) = __drm_gpusvm_notifier_next(notifier__))
+
#endif /* __DRM_GPUSVM_H__ */
diff --git a/include/drm/drm_gpuvm.h b/include/drm/drm_gpuvm.h
index 2a9629377633..c589b886a4fd 100644
--- a/include/drm/drm_gpuvm.h
+++ b/include/drm/drm_gpuvm.h
@@ -211,6 +211,27 @@ enum drm_gpuvm_flags {
DRM_GPUVM_USERBITS = BIT(1),
};
+/**
+ * enum drm_gpuvm_sm_map_ops_flags - flags for drm_gpuvm split/merge ops
+ */
+enum drm_gpuvm_sm_map_ops_flags {
+ /**
+ * @DRM_GPUVM_SM_MAP_NOT_MADVISE: DEFAULT sm_map ops
+ */
+ DRM_GPUVM_SM_MAP_NOT_MADVISE = 0,
+
+ /**
+ * @DRM_GPUVM_SKIP_GEM_OBJ_VA_SPLIT_MADVISE: This flag is used by
+ * drm_gpuvm_sm_map_ops_create to iterate over GPUVMA's in the
+ * user-provided range and split the existing non-GEM object VMA if the
+ * start or end of the input range lies within it. The operations can
+ * create up to 2 REMAPS and 2 MAPs. Unlike drm_gpuvm_sm_map_ops_flags
+ * in default mode, the operation with this flag will never have UNMAPs and
+ * merges, and can be without any final operations.
+ */
+ DRM_GPUVM_SKIP_GEM_OBJ_VA_SPLIT_MADVISE = BIT(0),
+};
+
/**
* struct drm_gpuvm - DRM GPU VA Manager
*
@@ -1059,8 +1080,8 @@ struct drm_gpuva_ops {
#define drm_gpuva_next_op(op) list_next_entry(op, entry)
struct drm_gpuva_ops *
-drm_gpuvm_sm_map_ops_create(struct drm_gpuvm *gpuvm,
- u64 addr, u64 range,
+drm_gpuvm_sm_map_ops_create(struct drm_gpuvm *gpuvm, u64 addr, u64 range,
+ enum drm_gpuvm_sm_map_ops_flags flags,
struct drm_gem_object *obj, u64 offset);
struct drm_gpuva_ops *
drm_gpuvm_sm_unmap_ops_create(struct drm_gpuvm *gpuvm,
diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
index 6a702ba7817c..c0aa7c19870e 100644
--- a/include/uapi/drm/xe_drm.h
+++ b/include/uapi/drm/xe_drm.h
@@ -81,6 +81,8 @@ extern "C" {
* - &DRM_IOCTL_XE_EXEC
* - &DRM_IOCTL_XE_WAIT_USER_FENCE
* - &DRM_IOCTL_XE_OBSERVATION
+ * - &DRM_IOCTL_XE_MADVISE
+ * - &DRM_IOCTL_XE_VM_QUERY_VMAS_ATTRS
*/
/*
@@ -102,6 +104,8 @@ extern "C" {
#define DRM_XE_EXEC 0x09
#define DRM_XE_WAIT_USER_FENCE 0x0a
#define DRM_XE_OBSERVATION 0x0b
+#define DRM_XE_MADVISE 0x0c
+#define DRM_XE_VM_QUERY_VMAS_ATTRS 0x0d
/* Must be kept compact -- no holes */
@@ -117,6 +121,8 @@ extern "C" {
#define DRM_IOCTL_XE_EXEC DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec)
#define DRM_IOCTL_XE_WAIT_USER_FENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence)
#define DRM_IOCTL_XE_OBSERVATION DRM_IOW(DRM_COMMAND_BASE + DRM_XE_OBSERVATION, struct drm_xe_observation_param)
+#define DRM_IOCTL_XE_MADVISE DRM_IOW(DRM_COMMAND_BASE + DRM_XE_MADVISE, struct drm_xe_madvise)
+#define DRM_IOCTL_XE_VM_QUERY_VMAS_ATTRS DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_VM_QUERY_VMAS_ATTRS, struct drm_xe_vm_query_vmas_attr)
/**
* DOC: Xe IOCTL Extensions
@@ -1003,6 +1009,10 @@ struct drm_xe_vm_destroy {
* valid on VMs with DRM_XE_VM_CREATE_FLAG_FAULT_MODE set. The CPU address
* mirror flag are only valid for DRM_XE_VM_BIND_OP_MAP operations, the BO
* handle MBZ, and the BO offset MBZ.
+ *
+ * The @prefetch_mem_region_instance for %DRM_XE_VM_BIND_OP_PREFETCH can also be:
+ * - %DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC, which ensures prefetching occurs in
+ * the memory region advised by madvise.
*/
struct drm_xe_vm_bind_op {
/** @extensions: Pointer to the first extension struct, if any */
@@ -1108,6 +1118,7 @@ struct drm_xe_vm_bind_op {
/** @flags: Bind flags */
__u32 flags;
+#define DRM_XE_CONSULT_MEM_ADVISE_PREF_LOC -1
/**
* @prefetch_mem_region_instance: Memory region to prefetch VMA to.
* It is a region instance, not a mask.
@@ -1970,6 +1981,248 @@ struct drm_xe_query_eu_stall {
__u64 sampling_rates[];
};
+/**
+ * struct drm_xe_madvise - Input of &DRM_IOCTL_XE_MADVISE
+ *
+ * This structure is used to set memory attributes for a virtual address range
+ * in a VM. The type of attribute is specified by @type, and the corresponding
+ * union member is used to provide additional parameters for @type.
+ *
+ * Supported attribute types:
+ * - DRM_XE_VMA_ATTR_PREFERRED_LOC: Set preferred memory location.
+ * - DRM_XE_VMA_ATTR_ATOMIC: Set atomic access policy.
+ * - DRM_XE_VMA_ATTR_PAT: Set page attribute table index.
+ *
+ * Example:
+ *
+ * .. code-block:: C
+ *
+ * struct drm_xe_madvise madvise = {
+ * .vm_id = vm_id,
+ * .start = 0x100000,
+ * .range = 0x2000,
+ * .type = DRM_XE_VMA_ATTR_ATOMIC,
+ * .atomic_val = DRM_XE_VMA_ATOMIC_DEVICE,
+ * };
+ *
+ * ioctl(fd, DRM_IOCTL_XE_MADVISE, &madvise);
+ *
+ */
+struct drm_xe_madvise {
+ /** @extensions: Pointer to the first extension struct, if any */
+ __u64 extensions;
+
+ /** @start: start of the virtual address range */
+ __u64 start;
+
+ /** @size: size of the virtual address range */
+ __u64 range;
+
+ /** @vm_id: vm_id of the virtual range */
+ __u32 vm_id;
+
+#define DRM_XE_VMA_ATTR_PREFERRED_LOC 0
+#define DRM_XE_VMA_ATTR_ATOMIC 1
+#define DRM_XE_VMA_ATTR_PAT 2
+ /** @type: type of attribute */
+ __u32 type;
+
+ union {
+ /**
+ * @atomic: Atomic access policy
+ *
+ * Used when @type == DRM_XE_VMA_ATTR_ATOMIC.
+ *
+ * Supported values for @atomic.val:
+ * - DRM_XE_VMA_ATOMIC_UNDEFINED: Undefined or default behaviour
+ * Support both GPU and CPU atomic operations for system allocator
+ * Support GPU atomic operations for normal(bo) allocator
+ * - DRM_XE_VMA_ATOMIC_DEVICE: Support GPU atomic operations
+ * - DRM_XE_VMA_ATOMIC_GLOBAL: Support both GPU and CPU atomic operations
+ * - DRM_XE_VMA_ATOMIC_CPU: Support CPU atomic
+ */
+ struct {
+#define DRM_XE_VMA_ATOMIC_UNDEFINED 0
+#define DRM_XE_VMA_ATOMIC_DEVICE 1
+#define DRM_XE_VMA_ATOMIC_GLOBAL 2
+#define DRM_XE_VMA_ATOMIC_CPU 3
+ /** @atomic.val: value of atomic operation */
+ __u32 val;
+
+ /** @atomic.reserved: Reserved */
+ __u32 reserved;
+ } atomic;
+
+ /**
+ * @pat_index: Page attribute table index
+ *
+ * Used when @type == DRM_XE_VMA_ATTR_PAT.
+ */
+ struct {
+ /** @pat_index.val: PAT index value */
+ __u32 val;
+
+ /** @pat_index.reserved: Reserved */
+ __u32 reserved;
+ } pat_index;
+
+ /**
+ * @preferred_mem_loc: preferred memory location
+ *
+ * Used when @type == DRM_XE_VMA_ATTR_PREFERRED_LOC
+ *
+ * Supported values for @preferred_mem_loc.devmem_fd:
+ * - DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE: set vram of faulting tile as preferred loc
+ * - DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM: set smem as preferred loc
+ *
+ * Supported values for @preferred_mem_loc.migration_policy:
+ * - DRM_XE_MIGRATE_ALL_PAGES
+ * - DRM_XE_MIGRATE_ONLY_SYSTEM_PAGES
+ */
+ struct {
+#define DRM_XE_PREFERRED_LOC_DEFAULT_DEVICE 0
+#define DRM_XE_PREFERRED_LOC_DEFAULT_SYSTEM -1
+ /** @preferred_mem_loc.devmem_fd: fd for preferred loc */
+ __u32 devmem_fd;
+
+#define DRM_XE_MIGRATE_ALL_PAGES 0
+#define DRM_XE_MIGRATE_ONLY_SYSTEM_PAGES 1
+ /** @preferred_mem_loc.migration_policy: Page migration policy */
+ __u32 migration_policy;
+ } preferred_mem_loc;
+ };
+
+ /** @reserved: Reserved */
+ __u64 reserved[2];
+};
+
+/**
+ * struct drm_xe_vma_mem_attr - Output of &DRM_IOCTL_XE_VM_QUERY_VMAS_ATTRS
+ *
+ * This structure is provided by userspace and filled by KMD in response to the
+ * DRM_IOCTL_XE_VM_QUERY_VMAS_ATTRS ioctl. It describes memory attributes of
+ * a VMA within a specified address range in a VM.
+ *
+ * The structure includes information such as atomic access policy, purgeable
+ * state, page attribute table (PAT) index, and preferred memory location.
+ * Userspace allocates an array of these structures and passes a pointer to the
+ * ioctl to retrieve attributes for each VMA
+ *
+ * @extensions: Pointer to the first extension struct, if any
+ * @start: Start address of the virtual memory area
+ * @end: End address of the virtual memory area
+ *
+ * @atomic.val: Value of atomic operation policy
+ * @atomic.reserved: Reserved, must be zero
+ *
+ * @purge_state_val.val: Value for DRM_XE_VMA_ATTR_PURGEABLE_STATE
+ * @purge_state_val.reserved: Reserved, must be zero
+ *
+ * @pat_index.val: Page Attribute Table (PAT) index
+ * @pat_index.reserved: Reserved, must be zero
+ *
+ * @preferred_mem_loc.devmem_fd: File descriptor for preferred device memory
+ * @preferred_mem_loc.migration_policy: Migration policy for memory placement
+ *
+ */
+struct drm_xe_vma_mem_attr {
+ /** @extensions: Pointer to the first extension struct, if any */
+ __u64 extensions;
+
+ /** @start: start of the vma */
+ __u64 start;
+
+ /** @end: end of the vma */
+ __u64 end;
+
+ struct {
+ struct {
+ /** @atomic.val: atomic attribute for vma*/
+ __u32 val;
+
+ /** @atomic.reserved: Reserved */
+ __u32 reserved;
+ } atomic;
+
+ struct {
+ /** @pat_index.val: PAT index of vma */
+ __u32 val;
+
+ /** @pat_index.reserved: Reserved */
+ __u32 reserved;
+ } pat_index;
+
+ /** @preferred_mem_loc: preferred memory location */
+ struct {
+ /** @preferred_mem_loc.devmem_fd: fd for preferred loc */
+ __u32 devmem_fd;
+
+ /** @preferred_mem_loc.migration_policy: Page migration policy */
+ __u32 migration_policy;
+ } preferred_mem_loc;
+ };
+
+ /** @reserved: Reserved */
+ __u64 reserved[2];
+};
+
+/**
+ * struct drm_xe_vm_query_vmas_attr - Input of &DRM_IOCTL_XE_VM_QUERY_MEM_ATTRIBUTES
+ *
+ * This structure is used to query memory attributes of virtual memory areas
+ * (VMAs) within a specified address range in a VM. It provides detailed
+ * information about each VMA, including atomic access policy, purgeable state,
+ * page attribute table (PAT) index, and preferred memory location.
+ *
+ * Userspace first calls the ioctl with @num_vmas = 0 and
+ * @vector_of_vma_mem_attr = NULL to retrieve the number of VMAs in the range.
+ * Then, it allocates a buffer of that size and calls the ioctl again to fill
+ * the buffer with VMA attributes.
+ *
+ * Example:
+ *
+ * .. code-block:: C
+ *
+ * struct drm_xe_vma_mem_attr *attrs;
+ * __u32 num_vmas;
+ *
+ * // First call to get number of VMAs
+ * ioctl(fd, DRM_IOCTL_XE_VM_QUERY_VMAS_ATTRS, &query);
+ * num_vmas = query.num_vmas;
+ *
+ * // Allocate and query attributes
+ * attrs = malloc(num_vmas, sizeof(*attrs));
+ * query.vector_of_vma_mem_attr = (__u64)(uintptr_t)attrs;
+ * query.num_vmas = num_vmas;
+ * ioctl(fd, DRM_IOCTL_XE_VM_QUERY_VMAS_ATTRS, &query);
+ */
+struct drm_xe_vm_query_vmas_attr {
+ /** @extensions: Pointer to the first extension struct, if any */
+ __u64 extensions;
+
+ /** @vm_id: vm_id of the virtual range */
+ __u32 vm_id;
+
+ /** @num_vmas: number of vmas in range */
+ __u32 num_vmas;
+
+ /** @start: start of the virtual address range */
+ __u64 start;
+
+ /** @size: size of the virtual address range */
+ __u64 range;
+
+ /**
+ * @vector_of_ops: userptr to array of struct
+ * drm_xe_vma_mem_attr
+ */
+ __u64 vector_of_vma_mem_attr;
+
+ /** @reserved: Reserved */
+ __u64 reserved[2];
+
+};
+
#if defined(__cplusplus)
}
#endif
--
2.34.1
More information about the Intel-xe
mailing list