[PATCH v5 21/23] drm/xe/vm: Add a delayed worker to merge fragmented vmas
Himal Prasad Ghimiray
himal.prasad.ghimiray at intel.com
Tue Jul 22 13:35:24 UTC 2025
During initial mirror bind initialize and start the delayed work item
responsible for merging adjacent CPU address mirror VMAs with default
memory attributes. This function sets the merge_active flag and schedules
the work to run after a delay, allowing batching of VMA updates.
Suggested-by: Matthew Brost <matthew.brost at intel.com>
Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
---
drivers/gpu/drm/xe/xe_vm.c | 126 +++++++++++++++++++++++++++++++
drivers/gpu/drm/xe/xe_vm_types.h | 15 ++++
2 files changed, 141 insertions(+)
diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
index 003c8209f8bd..bee849167c0d 100644
--- a/drivers/gpu/drm/xe/xe_vm.c
+++ b/drivers/gpu/drm/xe/xe_vm.c
@@ -1160,6 +1160,127 @@ static void xe_vma_free(struct xe_vma *vma)
kfree(vma);
}
+struct va_range {
+ u64 start;
+ u64 end;
+};
+
+static void add_merged_range(struct va_range **ranges, int *count, int *capacity,
+ u64 start, u64 end)
+{
+ const int array_size = 8;
+ struct va_range *new_ranges;
+ int new_capacity;
+
+ if (*count == *capacity) {
+ new_capacity = *capacity ? *capacity * 2 : array_size;
+ new_ranges = krealloc(*ranges, new_capacity * sizeof(**ranges), GFP_KERNEL);
+ if (!new_ranges)
+ return;
+
+ *ranges = new_ranges;
+ *capacity = new_capacity;
+ }
+ (*ranges)[(*count)++] = (struct va_range){ .start = start, .end = end };
+}
+
+static void xe_vm_vmas_merge_worker(struct work_struct *work)
+{
+ struct xe_vm *vm = container_of(to_delayed_work(work), struct xe_vm, merge_vmas_work);
+ struct drm_gpuva *gpuva, *next = NULL;
+ struct va_range *merged_ranges = NULL;
+ int merge_count = 0, merge_capacity = 0;
+ bool in_merge = false;
+ u64 merge_start = 0, merge_end = 0;
+ int merge_len = 0;
+
+ if (!vm->merge_active)
+ return;
+
+ down_write(&vm->lock);
+
+ drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
+ struct xe_vma *vma = gpuva_to_vma(gpuva);
+
+ if (!xe_vma_is_cpu_addr_mirror(vma) || !xe_vma_has_default_mem_attrs(vma)) {
+ if (in_merge && merge_len > 1)
+ add_merged_range(&merged_ranges, &merge_count, &merge_capacity,
+ merge_start, merge_end);
+
+ in_merge = false;
+ merge_len = 0;
+ continue;
+ }
+
+ if (!in_merge) {
+ merge_start = xe_vma_start(vma);
+ merge_end = xe_vma_end(vma);
+ in_merge = true;
+ merge_len = 1;
+ } else if (xe_vma_start(vma) == merge_end && xe_vma_has_default_mem_attrs(vma)) {
+ merge_end = xe_vma_end(vma);
+ merge_len++;
+ } else {
+ if (merge_len > 1)
+ add_merged_range(&merged_ranges, &merge_count, &merge_capacity,
+ merge_start, merge_end);
+ merge_start = xe_vma_start(vma);
+ merge_end = xe_vma_end(vma);
+ merge_len = 1;
+ }
+ }
+
+ if (in_merge && merge_len > 1) {
+ add_merged_range(&merged_ranges, &merge_count, &merge_capacity,
+ merge_start, merge_end);
+ }
+
+ for (int i = 0; i < merge_count; i++) {
+ vm_dbg(&vm->xe->drm, "Merged VA range %d: start=0x%016llx, end=0x%016llx\n",
+ i, merged_ranges[i].start, merged_ranges[i].end);
+
+ if (xe_vm_alloc_cpu_addr_mirror_vma(vm, merged_ranges[i].start,
+ merged_ranges[i].end - merged_ranges[i].start))
+ break;
+ }
+
+ up_write(&vm->lock);
+ kfree(merged_ranges);
+ schedule_delayed_work(&vm->merge_vmas_work, msecs_to_jiffies(5000));
+}
+
+/*
+ * xe_vm_start_vmas_merge - Initialize and schedule VMA merge work
+ * @vm: Pointer to the xe_vm structure
+ *
+ * Initializes the delayed work item responsible for merging adjacent
+ * CPU address mirror VMAs with default memory attributes. This function
+ * sets the merge_active flag and schedules the work to run after a delay,
+ * allowing batching of VMA updates.
+ */
+static void xe_vm_start_vmas_merge(struct xe_vm *vm)
+{
+ if (vm->merge_active)
+ return;
+
+ vm->merge_active = true;
+ INIT_DELAYED_WORK(&vm->merge_vmas_work, xe_vm_vmas_merge_worker);
+ schedule_delayed_work(&vm->merge_vmas_work, msecs_to_jiffies(5000));
+}
+
+/*
+ * xe_vm_stop_vmas_merge - Cancel scheduled VMA merge work
+ * @vm: Pointer to the xe_vm structure
+ */
+static void xe_vm_stop_vmas_merge(struct xe_vm *vm)
+{
+ if (!vm->merge_active)
+ return;
+
+ vm->merge_active = false;
+ cancel_delayed_work_sync(&vm->merge_vmas_work);
+}
+
#define VMA_CREATE_FLAG_READ_ONLY BIT(0)
#define VMA_CREATE_FLAG_IS_NULL BIT(1)
#define VMA_CREATE_FLAG_DUMPABLE BIT(2)
@@ -1269,6 +1390,9 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
xe_vm_get(vm);
}
+ if (xe_vma_is_cpu_addr_mirror(vma))
+ xe_vm_start_vmas_merge(vm);
+
return vma;
}
@@ -1982,6 +2106,8 @@ static void vm_destroy_work_func(struct work_struct *w)
/* xe_vm_close_and_put was not called? */
xe_assert(xe, !vm->size);
+ xe_vm_stop_vmas_merge(vm);
+
if (xe_vm_in_preempt_fence_mode(vm))
flush_work(&vm->preempt.rebind_work);
diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
index 351242c92c12..c4f3542eb464 100644
--- a/drivers/gpu/drm/xe/xe_vm_types.h
+++ b/drivers/gpu/drm/xe/xe_vm_types.h
@@ -374,6 +374,21 @@ struct xe_vm {
bool batch_invalidate_tlb;
/** @xef: XE file handle for tracking this VM's drm client */
struct xe_file *xef;
+
+ /**
+ * @merge_vmas_work: Delayed work item used to merge CPU address mirror VMAs.
+ * This work is scheduled to scan the GPU virtual memory space and
+ * identify adjacent CPU address mirror VMAs that have default memory
+ * attributes. When such VMAs are found, they are merged into a single
+ * larger VMA to reduce fragmentation. The merging process is triggered
+ * asynchronously via a delayed workqueue avoid blocking critical paths
+ * and to batch updates when possible.
+ */
+ struct delayed_work merge_vmas_work;
+
+ /** @merge_active: True if merge_vmas_work has been initialized */
+ bool merge_active;
+
};
/** struct xe_vma_op_map - VMA map operation */
--
2.34.1
More information about the Intel-xe
mailing list