[PATCH 11/12] drm/v3d: Do not use intermediate storage when copying performance query results

Tvrtko Ursulin tvrtko.ursulin at igalia.com
Thu Jul 11 12:55:18 UTC 2024


On 11/07/2024 13:31, Iago Toral wrote:
> El mar, 09-07-2024 a las 17:34 +0100, Tvrtko Ursulin escribió:
>> From: Tvrtko Ursulin <tvrtko.ursulin at igalia.com>
>>
>> Removing the intermediate buffer removes the last use of the
>> V3D_MAX_COUNTERS define, which will enable further driver cleanup.
>>
>> While at it pull the 32 vs 64 bit copying decision outside the loop
>> in
>> order to reduce the number of conditional instructions.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at igalia.com>
>> ---
>>   drivers/gpu/drm/v3d/v3d_sched.c | 60 ++++++++++++++++++++-----------
>> --
>>   1 file changed, 37 insertions(+), 23 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/v3d/v3d_sched.c
>> b/drivers/gpu/drm/v3d/v3d_sched.c
>> index fc8730264386..77f795e38fad 100644
>> --- a/drivers/gpu/drm/v3d/v3d_sched.c
>> +++ b/drivers/gpu/drm/v3d/v3d_sched.c
>> @@ -421,18 +421,23 @@ v3d_reset_timestamp_queries(struct v3d_cpu_job
>> *job)
>>   	v3d_put_bo_vaddr(bo);
>>   }
>>   
>> +static void write_to_buffer_32(u32 *dst, unsigned int idx, u32
>> value)
>> +{
>> +	dst[idx] = value;
>> +}
>> +
>> +static void write_to_buffer_64(u64 *dst, unsigned int idx, u64
>> value)
>> +{
>> +	dst[idx] = value;
>> +}
>> +
>>   static void
>> -write_to_buffer(void *dst, u32 idx, bool do_64bit, u64 value)
>> +write_to_buffer(void *dst, unsigned int idx, bool do_64bit, u64
>> value)
>>   {
>> -	if (do_64bit) {
>> -		u64 *dst64 = (u64 *)dst;
>> -
>> -		dst64[idx] = value;
>> -	} else {
>> -		u32 *dst32 = (u32 *)dst;
>> -
>> -		dst32[idx] = (u32)value;
>> -	}
>> +	if (do_64bit)
>> +		write_to_buffer_64(dst, idx, value);
>> +	else
>> +		write_to_buffer_32(dst, idx, value);
>>   }
>>   
>>   static void
>> @@ -505,18 +510,23 @@ v3d_reset_performance_queries(struct
>> v3d_cpu_job *job)
>>   }
>>   
>>   static void
>> -v3d_write_performance_query_result(struct v3d_cpu_job *job, void
>> *data, u32 query)
>> +v3d_write_performance_query_result(struct v3d_cpu_job *job, void
>> *data,
>> +				   unsigned int query)
>>   {
>> -	struct v3d_performance_query_info *performance_query = &job-
>>> performance_query;
>> -	struct v3d_copy_query_results_info *copy = &job->copy;
>> +	struct v3d_performance_query_info *performance_query =
>> +						&job-
>>> performance_query;
>>   	struct v3d_file_priv *v3d_priv = job->base.file-
>>> driver_priv;
>>   	struct v3d_dev *v3d = job->base.v3d;
>> -	struct v3d_perfmon *perfmon;
>> -	u64 counter_values[V3D_MAX_COUNTERS];
>> +	unsigned int i, j, offset;
>>   
>> -	for (int i = 0; i < performance_query->nperfmons; i++) {
>> -		perfmon = v3d_perfmon_find(v3d_priv,
>> -					   performance_query-
>>> queries[query].kperfmon_ids[i]);
>> +	for (i = 0, offset = 0;
>> +	     i < performance_query->nperfmons;
>> +	     i++, offset += DRM_V3D_MAX_PERF_COUNTERS) {
>> +		struct v3d_performance_query *q =
>> +				&performance_query->queries[query];
> 
> Looks like we could move this before the loop.

Indeed! I will change it and re-send, either for v4 of the series, or 
single update if there will not be any other changes required.

> Otherwise this patch is:
> Reviewed-by: Iago Toral Quiroga <itoral at igalia.com>

Thanks!

Regards,

Tvrtko

>> +		struct v3d_perfmon *perfmon;
>> +
>> +		perfmon = v3d_perfmon_find(v3d_priv, q-
>>> kperfmon_ids[i]);
>>   		if (!perfmon) {
>>   			DRM_DEBUG("Failed to find perfmon.");
>>   			continue;
>> @@ -524,14 +534,18 @@ v3d_write_performance_query_result(struct
>> v3d_cpu_job *job, void *data, u32 quer
>>   
>>   		v3d_perfmon_stop(v3d, perfmon, true);
>>   
>> -		memcpy(&counter_values[i *
>> DRM_V3D_MAX_PERF_COUNTERS], perfmon->values,
>> -		       perfmon->ncounters * sizeof(u64));
>> +		if (job->copy.do_64bit) {
>> +			for (j = 0; j < perfmon->ncounters; j++)
>> +				write_to_buffer_64(data, offset + j,
>> +						   perfmon-
>>> values[j]);
>> +		} else {
>> +			for (j = 0; j < perfmon->ncounters; j++)
>> +				write_to_buffer_32(data, offset + j,
>> +						   perfmon-
>>> values[j]);
>> +		}
>>   
>>   		v3d_perfmon_put(perfmon);
>>   	}
>> -
>> -	for (int i = 0; i < performance_query->ncounters; i++)
>> -		write_to_buffer(data, i, copy->do_64bit,
>> counter_values[i]);
>>   }
>>   
>>   static void
> 


More information about the dri-devel mailing list