[PATCH v2 i-g-t 2/3] tests/intel/xe_sriov_scheduling: Compute throughput from completion timestamps
Bernatowicz, Marcin
marcin.bernatowicz at linux.intel.com
Mon Aug 25 07:31:30 UTC 2025
On 8/22/2025 3:44 PM, Laguna, Lukasz wrote:
>
> On 8/19/2025 18:50, Marcin Bernatowicz wrote:
>> Make throughput comparisons robust under overlap/prefill and CPU jitter
>> by basing the window on actual completion times rather than thread
>> timing.
>>
>> - Record per-sample complete_ts[] and per-slot submit_ts[].
>> - Build the common window from completions: [max(first), min(last)].
>> - Compute throughput as count/window within that window.
>> - Push durations as submit->completion (complete_ts − submit_ts) and
>> print "mean submit->signal latency".
>>
>> Signed-off-by: Marcin Bernatowicz <marcin.bernatowicz at linux.intel.com>
>> Cc: Adam Miszczak <adam.miszczak at linux.intel.com>
>> Cc: Jakub Kolakowski <jakub1.kolakowski at intel.com>
>> Cc: Kamil Konieczny <kamil.konieczny at linux.intel.com>
>> Cc: Lukasz Laguna <lukasz.laguna at intel.com>
>> Cc: Satyanarayana K V P <satyanarayana.k.v.p at intel.com>
>> ---
>> tests/intel/xe_sriov_scheduling.c | 65 ++++++++++++++++++-------------
>> 1 file changed, 37 insertions(+), 28 deletions(-)
>>
>> diff --git a/tests/intel/xe_sriov_scheduling.c b/tests/intel/
>> xe_sriov_scheduling.c
>> index 28a2b6e26..97263cbd2 100644
>> --- a/tests/intel/xe_sriov_scheduling.c
>> +++ b/tests/intel/xe_sriov_scheduling.c
>> @@ -39,6 +39,7 @@ struct subm_stats {
>> igt_stats_t samples;
>> uint64_t start_timestamp;
>> uint64_t end_timestamp;
>> + uint64_t *complete_ts; /* absolute completion timestamps (ns) */
>> unsigned int num_early_finish;
>> unsigned int concurrent_execs;
>> double concurrent_rate;
>> @@ -54,13 +55,14 @@ struct subm {
>> uint32_t vm;
>> struct drm_xe_engine_class_instance hwe;
>> uint32_t exec_queue_id;
>> - /* K slots (K BOs / addresses / mapped spinners / done fences) */
>> + /* K slots (K BOs / addresses / mapped spinners / done fences /
>> submit timestamps) */
>> unsigned int slots;
>> uint64_t *addr;
>> uint32_t *bo;
>> size_t bo_size;
>> struct xe_spin **spin;
>> uint32_t *done_fence;
>> + uint64_t *submit_ts;
>> struct drm_xe_sync sync[1];
>> struct drm_xe_exec exec;
>> };
>> @@ -101,8 +103,9 @@ static void subm_init(struct subm *s, int fd, int
>> vf_num, uint64_t addr,
>> s->bo = calloc(s->slots, sizeof(*s->bo));
>> s->spin = calloc(s->slots, sizeof(*s->spin));
>> s->done_fence = calloc(s->slots, sizeof(*s->done_fence));
>> + s->submit_ts = calloc(s->slots, sizeof(*s->submit_ts));
>> - igt_assert(s->addr && s->bo && s->spin && s->done_fence);
>> + igt_assert(s->addr && s->bo && s->spin && s->done_fence && s-
>> >submit_ts);
>> base = addr ? addr : 0x1a0000;
>> stride = ALIGN(s->bo_size, 0x10000);
>> @@ -137,6 +140,7 @@ static void subm_fini(struct subm *s)
>> free(s->bo);
>> free(s->spin);
>> free(s->done_fence);
>> + free(s->submit_ts);
>> }
>> static void subm_workload_init(struct subm *s, struct subm_work_desc
>> *work)
>> @@ -158,6 +162,7 @@ static void subm_wait_slot(struct subm *s,
>> unsigned int slot, uint64_t abs_timeo
>> static void subm_exec_slot(struct subm *s, unsigned int slot)
>> {
>> + struct timespec tv;
>> int nsync = 0;
>> syncobj_reset(s->fd, &s->done_fence[slot], 1);
>> @@ -169,6 +174,8 @@ static void subm_exec_slot(struct subm *s,
>> unsigned int slot)
>> s->exec.num_syncs = nsync;
>> s->exec.syncs = to_user_pointer(&s->sync[0]);
>> s->exec.address = s->addr[slot];
>> + igt_gettime(&tv);
>> + s->submit_ts[slot] = (uint64_t)tv.tv_sec * (uint64_t)NSEC_PER_SEC
>> + (uint64_t)tv.tv_nsec;
>> xe_exec(s->fd, &s->exec);
>> }
>> @@ -214,9 +221,11 @@ static void subm_exec_loop(struct subm *s, struct
>> subm_stats *stats,
>> for (i = 0; i < s->work.repeats; ++i) {
>> unsigned int slot = i % inflight;
>> - igt_gettime(&tv);
>> subm_wait_slot(s, slot, INT64_MAX);
>> - igt_stats_push(&stats->samples, igt_nsec_elapsed(&tv));
>> + igt_gettime(&tv);
>> + stats->complete_ts[i] = (uint64_t)tv.tv_sec *
>> (uint64_t)NSEC_PER_SEC +
>> + (uint64_t)tv.tv_nsec;
>> + igt_stats_push(&stats->samples, stats->complete_ts[i] - s-
>> >submit_ts[slot]);
>> if (!subm_is_work_complete(s, slot)) {
>> stats->num_early_finish++;
>> @@ -386,16 +395,22 @@ static void
>> compute_common_time_frame_stats(struct subm_set *set)
>> struct subm_stats *stats;
>> uint64_t common_start = 0;
>> uint64_t common_end = UINT64_MAX;
>> + uint64_t first_ts, last_ts;
>> - /* Find the common time frame */
>> + /* Find common window from completion timestamps */
>> for (i = 0; i < ndata; i++) {
>> stats = &data[i].stats;
>> - if (stats->start_timestamp > common_start)
>> - common_start = stats->start_timestamp;
>> + if (!stats->samples.n_values)
>> + continue;
>> +
>> + first_ts = stats->complete_ts[0];
>> + last_ts = stats->complete_ts[stats->samples.n_values - 1];
>> - if (stats->end_timestamp < common_end)
>> - common_end = stats->end_timestamp;
>> + if (first_ts > common_start)
>> + common_start = first_ts;
>> + if (last_ts < common_end)
>> + common_end = last_ts;
>> }
>> igt_info("common time frame: [%" PRIu64 ";%" PRIu64 "] %.2fms\n",
>> @@ -406,8 +421,7 @@ static void compute_common_time_frame_stats(struct
>> subm_set *set)
>> /* Compute concurrent_rate for each sample set within the common
>> time frame */
>> for (i = 0; i < ndata; i++) {
>> - uint64_t total_samples_duration = 0;
>> - uint64_t samples_duration_in_common_frame = 0;
>> + const double window_s = (common_end - common_start) * 1e-9;
>> stats = &data[i].stats;
>> stats->concurrent_execs = 0;
>> @@ -415,29 +429,20 @@ static void
>> compute_common_time_frame_stats(struct subm_set *set)
>> stats->concurrent_mean = 0.0;
>> for (j = 0; j < stats->samples.n_values; j++) {
>> - uint64_t sample_start = stats->start_timestamp +
>> total_samples_duration;
>> - uint64_t sample_end = sample_start + stats-
>> >samples.values_u64[j];
>> + uint64_t cts = stats->complete_ts[j];
>> - if (sample_start >= common_start &&
>> - sample_end <= common_end) {
>> + if (cts >= common_start && cts <= common_end) {
>> stats->concurrent_execs++;
>> - samples_duration_in_common_frame +=
>> - stats->samples.values_u64[j];
>> + stats->concurrent_mean += stats->samples.values_u64[j];
>> }
>> -
>> - total_samples_duration += stats->samples.values_u64[j];
>> }
>> - stats->concurrent_rate = samples_duration_in_common_frame ?
>> - (double)stats->concurrent_execs /
>> - (samples_duration_in_common_frame *
>> - 1e-9) :
>> - 0.0;
>> + stats->concurrent_rate = (window_s > 0.0) ?
>> + ((double)stats->concurrent_execs / window_s) : 0.0;
>> stats->concurrent_mean = stats->concurrent_execs ?
>> - (double)samples_duration_in_common_frame /
>> - stats->concurrent_execs :
>> - 0.0;
>> - igt_info("[%s] Throughput = %.4f execs/s mean duration=%.4fms
>> nsamples=%d\n",
>> + (double)stats->concurrent_mean /
>> + stats->concurrent_execs : 0.0;
>> + igt_info("[%s] Throughput = %.4f execs/s mean submit->signal
>> latency=%.4fms nsamples=%d\n",
>> data[i].subm.id, stats->concurrent_rate, stats-
>> >concurrent_mean * 1e-6,
>> stats->concurrent_execs);
>> }
>> @@ -667,6 +672,8 @@ static void throughput_ratio(int pf_fd, int
>> num_vfs, const struct subm_opts *opt
>> .repeats = job_sched_params.num_repeats });
>> igt_stats_init_with_size(&set->data[n].stats.samples,
>> set->data[n].subm.work.repeats);
>> + set->data[n].stats.complete_ts = calloc(set-
>> >data[n].subm.work.repeats,
>> + sizeof(uint64_t));
>
> missing free() in subm_set_free_data ?
Missed in patch split, my bad. Thanks for catching.
>
>> if (set->sync_method == SYNC_BARRIER)
>> set->data[n].barrier = &set->barrier;
>> }
>> @@ -762,6 +769,8 @@ static void nonpreempt_engine_resets(int pf_fd,
>> int num_vfs,
>> .repeats = MIN_NUM_REPEATS });
>> igt_stats_init_with_size(&set->data[n].stats.samples,
>> set->data[n].subm.work.repeats);
>> + set->data[n].stats.complete_ts = calloc(set-
>> >data[n].subm.work.repeats,
>> + sizeof(uint64_t));
>> if (set->sync_method == SYNC_BARRIER)
>> set->data[n].barrier = &set->barrier;
>> }
More information about the igt-dev
mailing list