[PATCH v3 1/2] dma-fence: Add unwrap mode enum and macro for in-place deduplication

Yadav, Arvind arvyadav at amd.com
Tue Apr 29 16:57:17 UTC 2025


On 4/29/2025 9:38 PM, Christian König wrote:
> On 4/29/25 17:24, Arvind Yadav wrote:
>> Introduce a new `enum dma_fence_unwrap_mode` to support multiple modes
>> of fence unwrapping and merging behavior. This lays the groundwork for
>> more flexible and efficient fence operations.
>>
>> Add a new macro `dma_fence_unwrap_shrink_array()` to allow callers to
>> unwrap, deduplicate, and sort dma_fence arrays in-place, avoiding
>> additional memory allocations. This mode filters out already signaled
>> fences and keeps only the latest fence per context.
>>
>> To support this, refactor `__dma_fence_unwrap_merge()` to handle both
>> merge and shrink modes through the new `dma_fence_unwrap_mode` parameter.
>> The sorting and deduplication logic is extracted into a new helper function
>> `dma_fence_unwrap_dedup_sort()`.
>>
>> These changes allow clients to efficiently compact arrays of fences with
>> minimal overhead, improving memory usage and performance in common
>> scenarios like command submission or dependency tracking.
>>
>> v2: - Export this code from dma-fence-unwrap.c(by Christian).
>> v3: - To split this in a dma_buf patch and amd userq patch(by Sunil).
>>      - No need to add a new function just re-use existing(by Christian).
>>
>> Cc: Alex Deucher <alexander.deucher at amd.com>
>> Cc: Christian Koenig <christian.koenig at amd.com>
>> Cc: Sunil Khatri <sunil.khatri at amd.com>
>> Cc: Arunpravin Paneer Selvam <Arunpravin.PaneerSelvam at amd.com>
>> Signed-off-by: Arvind Yadav <Arvind.Yadav at amd.com>
>> ---
>>   drivers/dma-buf/dma-fence-unwrap.c | 54 +++++++++++++++++++++---------
>>   include/linux/dma-fence-unwrap.h   | 44 ++++++++++++++++++++++--
>>   2 files changed, 81 insertions(+), 17 deletions(-)
>>
>> diff --git a/drivers/dma-buf/dma-fence-unwrap.c b/drivers/dma-buf/dma-fence-unwrap.c
>> index 2a059ac0ed27..f389c37a0eb5 100644
>> --- a/drivers/dma-buf/dma-fence-unwrap.c
>> +++ b/drivers/dma-buf/dma-fence-unwrap.c
>> @@ -79,15 +79,51 @@ static int fence_cmp(const void *_a, const void *_b)
>>   	return 0;
>>   }
>>   
>> +static void dma_fence_unwrap_dedup_sort(struct dma_fence **array,
>> +					unsigned int *count)
> Just call that dma_fence_dedup_array() (e.g. without the unwrap).
Noted.
>
>> +{
>> +	int i, j;
>> +
>> +	sort(array, *count, sizeof(*array), fence_cmp, NULL);
>> +
>> +	/*
>> +	 * Only keep the most recent fence for each context.
>> +	 */
>> +	j = 0;
>> +	for (i = 1; i < *count; i++) {
>> +		if (array[i]->context == array[j]->context)
>> +			dma_fence_put(array[i]);
>> +		else
>> +			array[++j] = array[i];
>> +	}
>> +
>> +	*count = ++j;
>> +}
> Instead of making count a pointer just return the resulting count.
Noted.
>
>> +
>>   /* Implementation for the dma_fence_merge() marco, don't use directly */
>>   struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences,
>>   					   struct dma_fence **fences,
>> -					   struct dma_fence_unwrap *iter)
>> +					   struct dma_fence_unwrap *iter,
>> +					   enum dma_fence_unwrap_mode mode,
>> +					   unsigned int *out_count)
>>   {
>>   	struct dma_fence *tmp, *unsignaled = NULL, **array;
>>   	struct dma_fence_array *result;
>>   	ktime_t timestamp;
>> -	int i, j, count;
>> +	int i, count;
>> +
>> +	if (mode == DMA_FENCE_UNWRAP_ARRAY) {
>> +		array = fences;
>> +		count = num_fences;
>> +
>> +		if (count > 1)
>> +			dma_fence_unwrap_dedup_sort(array, &count);
>> +
>> +		if (out_count)
>> +			*out_count = count;
>> +
>> +		return NULL;
>> +	}
>
> Completely drop that. Just export the dma_fence_dedup_array() function and use that directly.
Sure, I will export  dma_fence_dedup_array.

Regards,
~arvind
>
> Regards,
> Christian.
>
>>   
>>   	count = 0;
>>   	timestamp = ns_to_ktime(0);
>> @@ -141,19 +177,7 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences,
>>   	if (count == 0 || count == 1)
>>   		goto return_fastpath;
>>   
>> -	sort(array, count, sizeof(*array), fence_cmp, NULL);
>> -
>> -	/*
>> -	 * Only keep the most recent fence for each context.
>> -	 */
>> -	j = 0;
>> -	for (i = 1; i < count; i++) {
>> -		if (array[i]->context == array[j]->context)
>> -			dma_fence_put(array[i]);
>> -		else
>> -			array[++j] = array[i];
>> -	}
>> -	count = ++j;
>> +	dma_fence_unwrap_dedup_sort(array, &count);
>>   
>>   	if (count > 1) {
>>   		result = dma_fence_array_create(count, array,
>> diff --git a/include/linux/dma-fence-unwrap.h b/include/linux/dma-fence-unwrap.h
>> index 66b1e56fbb81..b5683b024548 100644
>> --- a/include/linux/dma-fence-unwrap.h
>> +++ b/include/linux/dma-fence-unwrap.h
>> @@ -10,6 +10,18 @@
>>   
>>   struct dma_fence;
>>   
>> +/**
>> + * enum dma_fence_unwrap_mode - Mode of operation for fence unwrap and merge
>> + * @DMA_FENCE_UNWRAP_MERGE: Merge all unsignaled fences into a new dma_fence object.
>> + *                          Returns a single merged fence or stub.
>> + * @DMA_FENCE_UNWRAP_ARRAY: Deduplicate and sort fences in-place.
>> + *                          Returns nothing, but updates the input array and count.
>> + */
>> +enum dma_fence_unwrap_mode {
>> +	DMA_FENCE_UNWRAP_MERGE,  /* Return a single merged dma_fence or NULL on error */
>> +	DMA_FENCE_UNWRAP_ARRAY,  /* Return deduplicated, sorted in-place array */
>> +};
>> +
>>   /**
>>    * struct dma_fence_unwrap - cursor into the container structure
>>    *
>> @@ -50,7 +62,9 @@ struct dma_fence *dma_fence_unwrap_next(struct dma_fence_unwrap *cursor);
>>   
>>   struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences,
>>   					   struct dma_fence **fences,
>> -					   struct dma_fence_unwrap *cursors);
>> +					   struct dma_fence_unwrap *cursors,
>> +					   enum dma_fence_unwrap_mode mode,
>> +					   unsigned int *out_count);
>>   
>>   /**
>>    * dma_fence_unwrap_merge - unwrap and merge fences
>> @@ -58,6 +72,9 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences,
>>    * All fences given as parameters are unwrapped and merged back together as flat
>>    * dma_fence_array. Useful if multiple containers need to be merged together.
>>    *
>> + * Internally uses the DMA_FENCE_UNWRAP_MERGE mode to return a single merged
>> + * dma_fence (or a stub if all fences are signaled).
>> + *
>>    * Implemented as a macro to allocate the necessary arrays on the stack and
>>    * account the stack frame size to the caller.
>>    *
>> @@ -69,7 +86,30 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences,
>>   		struct dma_fence *__f[] = { __VA_ARGS__ };		\
>>   		struct dma_fence_unwrap __c[ARRAY_SIZE(__f)];		\
>>   									\
>> -		__dma_fence_unwrap_merge(ARRAY_SIZE(__f), __f, __c);	\
>> +		__dma_fence_unwrap_merge(ARRAY_SIZE(__f), __f, __c,	\
>> +					 DMA_FENCE_UNWRAP_MERGE, NULL);	\
>> +	})
>> +
>> +/**
>> + * dma_fence_unwrap_shrink_array - Deduplicate and sort an array of fences in-place
>> + * @ _num_fences: Number of input fences
>> + * @ _fences:     Array of dma_fence pointers to be deduplicated and sorted
>> + * @ _out_fences: Output variable updated with the number of fences after deduplication
>> + *
>> + * This macro unwraps each fence (handling dma_fence_array if needed),
>> + * filters out signaled fences, sorts the remaining ones by context and
>> + * timestamp, and removes duplicates (keeping only the most recent fence per context).
>> + *
>> + * Internally uses the DMA_FENCE_UNWRAP_ARRAY mode to perform in-place filtering and
>> + * sorting.
>> + *
>> + * The result is stored back in the input array (_fences) and the final count
>> + * is written to @_out_fences. No memory allocation is performed.
>> + */
>> +#define dma_fence_unwrap_shrink_array(_num_fences, _fences, _out_fences)	\
>> +	({									\
>> +		__dma_fence_unwrap_merge((_num_fences), (_fences), NULL,	\
>> +					 DMA_FENCE_UNWRAP_ARRAY, &_out_fences);	\
>>   	})
>>   
>>   #endif


More information about the amd-gfx mailing list