[PATCH v5 21/23] drm/xe/vm: Add a delayed worker to merge fragmented vmas

Wed Jul 30 11:08:16 UTC 2025

On 29-07-2025 10:09, Matthew Brost wrote:
> On Tue, Jul 22, 2025 at 07:05:24PM +0530, Himal Prasad Ghimiray wrote:
>> During initial mirror bind initialize and start the delayed work item
>> responsible for merging adjacent CPU address mirror VMAs with default
>> memory attributes. This function sets the merge_active flag and schedules
>> the work to run after a delay, allowing batching of VMA updates.
>>
> 
> I think we will need someway to defragment but it might need more
> thought. The trade off between defragmenting on every insertion of
> mirror VMA (binding a BO back to mirror) and every unmap restoring the
> defaults vs. periodic worker needs to be carefully considered.
> 
> The trade off is more time up front (plus perhaps some additional
> complexity) vs periodic worker which blocks out all memory transactions.
> 
> Since this doesn't affect any functionality, perhaps table for now + we
> run this one by Thomas to formulate a plan / solution.

Sure, Lets discuss and conclude before taking the approach. Will be 
dropping this patch from next version and will post seperately in future 
after finalizing on it.

Thanks
> 
> Matt
> 
>> Suggested-by: Matthew Brost <matthew.brost at intel.com>
>> Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
>> ---
>>   drivers/gpu/drm/xe/xe_vm.c       | 126 +++++++++++++++++++++++++++++++
>>   drivers/gpu/drm/xe/xe_vm_types.h |  15 ++++
>>   2 files changed, 141 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
>> index 003c8209f8bd..bee849167c0d 100644
>> --- a/drivers/gpu/drm/xe/xe_vm.c
>> +++ b/drivers/gpu/drm/xe/xe_vm.c
>> @@ -1160,6 +1160,127 @@ static void xe_vma_free(struct xe_vma *vma)
>>   		kfree(vma);
>>   }
>>   
>> +struct va_range {
>> +	u64 start;
>> +	u64 end;
>> +};
>> +
>> +static void add_merged_range(struct va_range **ranges, int *count, int *capacity,
>> +			     u64 start, u64 end)
>> +{
>> +	const int array_size  = 8;
>> +	struct va_range *new_ranges;
>> +	int new_capacity;
>> +
>> +	if (*count == *capacity) {
>> +		new_capacity = *capacity ? *capacity * 2 : array_size;
>> +		new_ranges = krealloc(*ranges, new_capacity * sizeof(**ranges), GFP_KERNEL);
>> +		if (!new_ranges)
>> +			return;
>> +
>> +		*ranges = new_ranges;
>> +		*capacity = new_capacity;
>> +	}
>> +	(*ranges)[(*count)++] = (struct va_range){ .start = start, .end = end };
>> +}
>> +
>> +static void xe_vm_vmas_merge_worker(struct work_struct *work)
>> +{
>> +	struct xe_vm *vm = container_of(to_delayed_work(work), struct xe_vm, merge_vmas_work);
>> +	struct drm_gpuva *gpuva, *next = NULL;
>> +	struct va_range *merged_ranges = NULL;
>> +	int merge_count = 0, merge_capacity = 0;
>> +	bool in_merge = false;
>> +	u64 merge_start = 0, merge_end = 0;
>> +	int merge_len = 0;
>> +
>> +	if (!vm->merge_active)
>> +		return;
>> +
>> +	down_write(&vm->lock);
>> +
>> +	drm_gpuvm_for_each_va_safe(gpuva, next, &vm->gpuvm) {
>> +		struct xe_vma *vma = gpuva_to_vma(gpuva);
>> +
>> +		if (!xe_vma_is_cpu_addr_mirror(vma) || !xe_vma_has_default_mem_attrs(vma)) {
>> +			if (in_merge && merge_len > 1)
>> +				add_merged_range(&merged_ranges, &merge_count, &merge_capacity,
>> +						 merge_start, merge_end);
>> +
>> +			in_merge = false;
>> +			merge_len = 0;
>> +			continue;
>> +		}
>> +
>> +		if (!in_merge) {
>> +			merge_start = xe_vma_start(vma);
>> +			merge_end = xe_vma_end(vma);
>> +			in_merge = true;
>> +			merge_len = 1;
>> +		} else if (xe_vma_start(vma) == merge_end && xe_vma_has_default_mem_attrs(vma)) {
>> +			merge_end = xe_vma_end(vma);
>> +			merge_len++;
>> +		} else {
>> +			if (merge_len > 1)
>> +				add_merged_range(&merged_ranges, &merge_count, &merge_capacity,
>> +						 merge_start, merge_end);
>> +			merge_start = xe_vma_start(vma);
>> +			merge_end = xe_vma_end(vma);
>> +			merge_len = 1;
>> +		}
>> +	}
>> +
>> +	if (in_merge && merge_len > 1) {
>> +		add_merged_range(&merged_ranges, &merge_count, &merge_capacity,
>> +				 merge_start, merge_end);
>> +	}
>> +
>> +	for (int i = 0; i < merge_count; i++) {
>> +		vm_dbg(&vm->xe->drm, "Merged VA range %d: start=0x%016llx, end=0x%016llx\n",
>> +		       i, merged_ranges[i].start, merged_ranges[i].end);
>> +
>> +		if (xe_vm_alloc_cpu_addr_mirror_vma(vm, merged_ranges[i].start,
>> +						    merged_ranges[i].end - merged_ranges[i].start))
>> +			break;
>> +	}
>> +
>> +	up_write(&vm->lock);
>> +	kfree(merged_ranges);
>> +	schedule_delayed_work(&vm->merge_vmas_work, msecs_to_jiffies(5000));
>> +}
>> +
>> +/*
>> + * xe_vm_start_vmas_merge - Initialize and schedule VMA merge work
>> + * @vm: Pointer to the xe_vm structure
>> + *
>> + * Initializes the delayed work item responsible for merging adjacent
>> + * CPU address mirror VMAs with default memory attributes. This function
>> + * sets the merge_active flag and schedules the work to run after a delay,
>> + * allowing batching of VMA updates.
>> + */
>> +static void xe_vm_start_vmas_merge(struct xe_vm *vm)
>> +{
>> +	if (vm->merge_active)
>> +		return;
>> +
>> +	vm->merge_active = true;
>> +	INIT_DELAYED_WORK(&vm->merge_vmas_work, xe_vm_vmas_merge_worker);
>> +	schedule_delayed_work(&vm->merge_vmas_work, msecs_to_jiffies(5000));
>> +}
>> +
>> +/*
>> + * xe_vm_stop_vmas_merge - Cancel scheduled VMA merge work
>> + * @vm: Pointer to the xe_vm structure
>> + */
>> +static void xe_vm_stop_vmas_merge(struct xe_vm *vm)
>> +{
>> +	if (!vm->merge_active)
>> +		return;
>> +
>> +	vm->merge_active = false;
>> +	cancel_delayed_work_sync(&vm->merge_vmas_work);
>> +}
>> +
>>   #define VMA_CREATE_FLAG_READ_ONLY		BIT(0)
>>   #define VMA_CREATE_FLAG_IS_NULL			BIT(1)
>>   #define VMA_CREATE_FLAG_DUMPABLE		BIT(2)
>> @@ -1269,6 +1390,9 @@ static struct xe_vma *xe_vma_create(struct xe_vm *vm,
>>   		xe_vm_get(vm);
>>   	}
>>   
>> +	if (xe_vma_is_cpu_addr_mirror(vma))
>> +		xe_vm_start_vmas_merge(vm);
>> +
>>   	return vma;
>>   }
>>   
>> @@ -1982,6 +2106,8 @@ static void vm_destroy_work_func(struct work_struct *w)
>>   	/* xe_vm_close_and_put was not called? */
>>   	xe_assert(xe, !vm->size);
>>   
>> +	xe_vm_stop_vmas_merge(vm);
>> +
>>   	if (xe_vm_in_preempt_fence_mode(vm))
>>   		flush_work(&vm->preempt.rebind_work);
>>   
>> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
>> index 351242c92c12..c4f3542eb464 100644
>> --- a/drivers/gpu/drm/xe/xe_vm_types.h
>> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
>> @@ -374,6 +374,21 @@ struct xe_vm {
>>   	bool batch_invalidate_tlb;
>>   	/** @xef: XE file handle for tracking this VM's drm client */
>>   	struct xe_file *xef;
>> +
>> +	/**
>> +	 * @merge_vmas_work: Delayed work item used to merge CPU address mirror VMAs.
>> +	 * This work is scheduled to scan the GPU virtual memory space and
>> +	 * identify adjacent CPU address mirror VMAs that have default memory
>> +	 * attributes. When such VMAs are found, they are merged into a single
>> +	 * larger VMA to reduce fragmentation. The merging process is triggered
>> +	 * asynchronously via a delayed workqueue avoid blocking critical paths
>> +	 * and to batch updates when possible.
>> +	 */
>> +	struct delayed_work merge_vmas_work;
>> +
>> +	/** @merge_active: True if merge_vmas_work has been initialized */
>> +	bool merge_active;
>> +
>>   };
>>   
>>   /** struct xe_vma_op_map - VMA map operation */
>> -- 
>> 2.34.1
>>