[PATCH v5 21/23] drm/xe/vm: Add a delayed worker to merge fragmented vmas

Tue Jul 29 04:39:51 UTC 2025

On Tue, Jul 22, 2025 at 07:05:24PM +0530, Himal Prasad Ghimiray wrote:
> During initial mirror bind initialize and start the delayed work item
> responsible for merging adjacent CPU address mirror VMAs with default
> memory attributes. This function sets the merge_active flag and schedules
> the work to run after a delay, allowing batching of VMA updates.
> 

I think we will need someway to defragment but it might need more
thought. The trade off between defragmenting on every insertion of
mirror VMA (binding a BO back to mirror) and every unmap restoring the
defaults vs. periodic worker needs to be carefully considered.

The trade off is more time up front (plus perhaps some additional
complexity) vs periodic worker which blocks out all memory transactions.

Since this doesn't affect any functionality, perhaps table for now + we
run this one by Thomas to formulate a plan / solution.

Matt

> Suggested-by: Matthew Brost <matthew.brost at intel.com>
> Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_vm.c       | 126 +++++++++++++++++++++++++++++++
>  drivers/gpu/drm/xe/xe_vm_types.h |  15 ++++
>  2 files changed, 141 insertions(+)
> 
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 003c8209f8bd..bee849167c0d 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -1160,6 +1160,127 @@ static void xe_vma_free(struct xe_vma *vma)
>  		kfree(vma);
>  }
>  
> +struct va_range {
> +	u64 start;
> +	u64 end;
> +};
> +
> +static void add_merged_range(struct va_range **ranges, int *count, int *capacity,
> +			     u64 start, u64 end)
> +{
> +	const int array_size  = 8;
> +	struct va_range *new_ranges;
> +	int new_capacity;
> +
> +	if (*count == *capacity) {
> +		new_capacity = *capacity ? *capacity * 2 : array_size;
> +		new_ranges = krealloc(*ranges, new_capacity * sizeof(**ranges), GFP_KERNEL);
> +		if (!new_ranges)
> +			return;
> +
> +		*ranges = new_ranges;
> +		*capacity = new_capacity;
> +	}
> +	(*ranges)[(*count)++] = (struct va_range){ .start = start, .end = end };
> +}
> +
> +static void xe_vm_vmas_merge_worker(struct work_struct *work)
> +{
> +	struct xe_vm *vm = container_of(to_delayed_work(work), struct xe_vm, merge_vmas_work);
> +	struct drm_gpuva *gpuva, *next = NULL;
> +	struct va_range *merged_ranges = NULL;
> +	int merge_count = 0, merge_capacity = 0;
> +	bool in_merge = false;
> +	u64 merge_start = 0, merge_end = 0;
> +	int merge_len = 0;
> +
> +	if (!vm->merge_active)
> +		return;
> +
> +	down_write(&vm->lock);
> +
> +	drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
> +		struct xe_vma *vma = gpuva_to_vma(gpuva);
> +
> +		if (!xe_vma_is_cpu_addr_mirror(vma) || !xe_vma_has_default_mem_attrs(vma)) {
> +			if (in_merge && merge_len > 1)
> +				add_merged_range(&merged_ranges, &merge_count, &merge_capacity,
> +						 merge_start, merge_end);
> +
> +			in_merge = false;
> +			merge_len = 0;
> +			continue;
> +		}
> +
> +		if (!in_merge) {
> +			merge_start = xe_vma_start(vma);
> +			merge_end = xe_vma_end(vma);
> +			in_merge = true;
> +			merge_len = 1;
> +		} else if (xe_vma_start(vma) == merge_end && xe_vma_has_default_mem_attrs(vma)) {
> +			merge_end = xe_vma_end(vma);
> +			merge_len++;
> +		} else {
> +			if (merge_len > 1)
> +				add_merged_range(&merged_ranges, &merge_count, &merge_capacity,
> +						 merge_start, merge_end);
> +			merge_start = xe_vma_start(vma);
> +			merge_end = xe_vma_end(vma);
> +			merge_len = 1;
> +		}
> +	}
> +
> +	if (in_merge && merge_len > 1) {
> +		add_merged_range(&merged_ranges, &merge_count, &merge_capacity,
> +				 merge_start, merge_end);
> +	}
> +
> +	for (int i = 0; i < merge_count; i++) {
> +		vm_dbg(&vm->xe->drm, "Merged VA range %d: start=0x%016llx, end=0x%016llx\n",
> +		       i, merged_ranges[i].start, merged_ranges[i].end);
> +
> +		if (xe_vm_alloc_cpu_addr_mirror_vma(vm, merged_ranges[i].start,
> +						    merged_ranges[i].end - merged_ranges[i].start))
> +			break;
> +	}
> +
> +	up_write(&vm->lock);
> +	kfree(merged_ranges);
> +	schedule_delayed_work(&vm->merge_vmas_work, msecs_to_jiffies(5000));
> +}
> +
> +/*
> + * xe_vm_start_vmas_merge - Initialize and schedule VMA merge work
> + * @vm: Pointer to the xe_vm structure
> + *
> + * Initializes the delayed work item responsible for merging adjacent
> + * CPU address mirror VMAs with default memory attributes. This function
> + * sets the merge_active flag and schedules the work to run after a delay,
> + * allowing batching of VMA updates.
> + */
> +static void xe_vm_start_vmas_merge(struct xe_vm *vm)
> +{
> +	if (vm->merge_active)
> +		return;
> +
> +	vm->merge_active = true;
> +	INIT_DELAYED_WORK(&vm->merge_vmas_work, xe_vm_vmas_merge_worker);
> +	schedule_delayed_work(&vm->merge_vmas_work, msecs_to_jiffies(5000));
> +}
> +
> +/*
> + * xe_vm_stop_vmas_merge - Cancel scheduled VMA merge work
> + * @vm: Pointer to the xe_vm structure
> + */
> +static void xe_vm_stop_vmas_merge(struct xe_vm *vm)
> +{
> +	if (!vm->merge_active)
> +		return;
> +
> +	vm->merge_active = false;
> +	cancel_delayed_work_sync(&vm->merge_vmas_work);
> +}
> +
>  #define VMA_CREATE_FLAG_READ_ONLY		BIT(0)
>  #define VMA_CREATE_FLAG_IS_NULL			BIT(1)
>  #define VMA_CREATE_FLAG_DUMPABLE		BIT(2)
> @@ -1269,6 +1390,9 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
>  		xe_vm_get(vm);
>  	}
>  
> +	if (xe_vma_is_cpu_addr_mirror(vma))
> +		xe_vm_start_vmas_merge(vm);
> +
>  	return vma;
>  }
>  
> @@ -1982,6 +2106,8 @@ static void vm_destroy_work_func(struct work_struct *w)
>  	/* xe_vm_close_and_put was not called? */
>  	xe_assert(xe, !vm->size);
>  
> +	xe_vm_stop_vmas_merge(vm);
> +
>  	if (xe_vm_in_preempt_fence_mode(vm))
>  		flush_work(&vm->preempt.rebind_work);
>  
> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
> index 351242c92c12..c4f3542eb464 100644
> --- a/drivers/gpu/drm/xe/xe_vm_types.h
> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> @@ -374,6 +374,21 @@ struct xe_vm {
>  	bool batch_invalidate_tlb;
>  	/** @xef: XE file handle for tracking this VM's drm client */
>  	struct xe_file *xef;
> +
> +	/**
> +	 * @merge_vmas_work: Delayed work item used to merge CPU address mirror VMAs.
> +	 * This work is scheduled to scan the GPU virtual memory space and
> +	 * identify adjacent CPU address mirror VMAs that have default memory
> +	 * attributes. When such VMAs are found, they are merged into a single
> +	 * larger VMA to reduce fragmentation. The merging process is triggered
> +	 * asynchronously via a delayed workqueue avoid blocking critical paths
> +	 * and to batch updates when possible.
> +	 */
> +	struct delayed_work merge_vmas_work;
> +
> +	/** @merge_active: True if merge_vmas_work has been initialized */
> +	bool merge_active;
> +
>  };
>  
>  /** struct xe_vma_op_map - VMA map operation */
> -- 
> 2.34.1
>