[PATCH v2 i-g-t 2/3] tests/intel/xe_sriov_scheduling: Compute throughput from completion timestamps

Bernatowicz, Marcin marcin.bernatowicz at linux.intel.com
Mon Aug 25 07:31:30 UTC 2025



On 8/22/2025 3:44 PM, Laguna, Lukasz wrote:
> 
> On 8/19/2025 18:50, Marcin Bernatowicz wrote:
>> Make throughput comparisons robust under overlap/prefill and CPU jitter
>> by basing the window on actual completion times rather than thread 
>> timing.
>>
>> - Record per-sample complete_ts[] and per-slot submit_ts[].
>> - Build the common window from completions: [max(first), min(last)].
>> - Compute throughput as count/window within that window.
>> - Push durations as submit->completion (complete_ts − submit_ts) and
>> print "mean submit->signal latency".
>>
>> Signed-off-by: Marcin Bernatowicz <marcin.bernatowicz at linux.intel.com>
>> Cc: Adam Miszczak <adam.miszczak at linux.intel.com>
>> Cc: Jakub Kolakowski <jakub1.kolakowski at intel.com>
>> Cc: Kamil Konieczny <kamil.konieczny at linux.intel.com>
>> Cc: Lukasz Laguna <lukasz.laguna at intel.com>
>> Cc: Satyanarayana K V P <satyanarayana.k.v.p at intel.com>
>> ---
>>   tests/intel/xe_sriov_scheduling.c | 65 ++++++++++++++++++-------------
>>   1 file changed, 37 insertions(+), 28 deletions(-)
>>
>> diff --git a/tests/intel/xe_sriov_scheduling.c b/tests/intel/ 
>> xe_sriov_scheduling.c
>> index 28a2b6e26..97263cbd2 100644
>> --- a/tests/intel/xe_sriov_scheduling.c
>> +++ b/tests/intel/xe_sriov_scheduling.c
>> @@ -39,6 +39,7 @@ struct subm_stats {
>>       igt_stats_t samples;
>>       uint64_t start_timestamp;
>>       uint64_t end_timestamp;
>> +    uint64_t *complete_ts; /* absolute completion timestamps (ns) */
>>       unsigned int num_early_finish;
>>       unsigned int concurrent_execs;
>>       double concurrent_rate;
>> @@ -54,13 +55,14 @@ struct subm {
>>       uint32_t vm;
>>       struct drm_xe_engine_class_instance hwe;
>>       uint32_t exec_queue_id;
>> -    /* K slots (K BOs / addresses / mapped spinners / done fences) */
>> +    /* K slots (K BOs / addresses / mapped spinners / done fences / 
>> submit timestamps) */
>>       unsigned int slots;
>>       uint64_t *addr;
>>       uint32_t *bo;
>>       size_t bo_size;
>>       struct xe_spin **spin;
>>       uint32_t *done_fence;
>> +    uint64_t *submit_ts;
>>       struct drm_xe_sync sync[1];
>>       struct drm_xe_exec exec;
>>   };
>> @@ -101,8 +103,9 @@ static void subm_init(struct subm *s, int fd, int 
>> vf_num, uint64_t addr,
>>       s->bo = calloc(s->slots, sizeof(*s->bo));
>>       s->spin = calloc(s->slots, sizeof(*s->spin));
>>       s->done_fence = calloc(s->slots, sizeof(*s->done_fence));
>> +    s->submit_ts = calloc(s->slots, sizeof(*s->submit_ts));
>> -    igt_assert(s->addr && s->bo && s->spin && s->done_fence);
>> +    igt_assert(s->addr && s->bo && s->spin && s->done_fence && s- 
>> >submit_ts);
>>       base = addr ? addr : 0x1a0000;
>>       stride = ALIGN(s->bo_size, 0x10000);
>> @@ -137,6 +140,7 @@ static void subm_fini(struct subm *s)
>>       free(s->bo);
>>       free(s->spin);
>>       free(s->done_fence);
>> +    free(s->submit_ts);
>>   }
>>   static void subm_workload_init(struct subm *s, struct subm_work_desc 
>> *work)
>> @@ -158,6 +162,7 @@ static void subm_wait_slot(struct subm *s, 
>> unsigned int slot, uint64_t abs_timeo
>>   static void subm_exec_slot(struct subm *s, unsigned int slot)
>>   {
>> +    struct timespec tv;
>>       int nsync = 0;
>>       syncobj_reset(s->fd, &s->done_fence[slot], 1);
>> @@ -169,6 +174,8 @@ static void subm_exec_slot(struct subm *s, 
>> unsigned int slot)
>>       s->exec.num_syncs = nsync;
>>       s->exec.syncs = to_user_pointer(&s->sync[0]);
>>       s->exec.address = s->addr[slot];
>> +    igt_gettime(&tv);
>> +    s->submit_ts[slot] = (uint64_t)tv.tv_sec * (uint64_t)NSEC_PER_SEC 
>> + (uint64_t)tv.tv_nsec;
>>       xe_exec(s->fd, &s->exec);
>>   }
>> @@ -214,9 +221,11 @@ static void subm_exec_loop(struct subm *s, struct 
>> subm_stats *stats,
>>       for (i = 0; i < s->work.repeats; ++i) {
>>           unsigned int slot = i % inflight;
>> -        igt_gettime(&tv);
>>           subm_wait_slot(s, slot, INT64_MAX);
>> -        igt_stats_push(&stats->samples, igt_nsec_elapsed(&tv));
>> +        igt_gettime(&tv);
>> +        stats->complete_ts[i] = (uint64_t)tv.tv_sec * 
>> (uint64_t)NSEC_PER_SEC +
>> +                    (uint64_t)tv.tv_nsec;
>> +        igt_stats_push(&stats->samples, stats->complete_ts[i] - s- 
>> >submit_ts[slot]);
>>           if (!subm_is_work_complete(s, slot)) {
>>               stats->num_early_finish++;
>> @@ -386,16 +395,22 @@ static void 
>> compute_common_time_frame_stats(struct subm_set *set)
>>       struct subm_stats *stats;
>>       uint64_t common_start = 0;
>>       uint64_t common_end = UINT64_MAX;
>> +    uint64_t first_ts, last_ts;
>> -    /* Find the common time frame */
>> +    /* Find common window from completion timestamps */
>>       for (i = 0; i < ndata; i++) {
>>           stats = &data[i].stats;
>> -        if (stats->start_timestamp > common_start)
>> -            common_start = stats->start_timestamp;
>> +        if (!stats->samples.n_values)
>> +            continue;
>> +
>> +        first_ts = stats->complete_ts[0];
>> +        last_ts = stats->complete_ts[stats->samples.n_values - 1];
>> -        if (stats->end_timestamp < common_end)
>> -            common_end = stats->end_timestamp;
>> +        if (first_ts > common_start)
>> +            common_start = first_ts;
>> +        if (last_ts < common_end)
>> +            common_end = last_ts;
>>       }
>>       igt_info("common time frame: [%" PRIu64 ";%" PRIu64 "] %.2fms\n",
>> @@ -406,8 +421,7 @@ static void compute_common_time_frame_stats(struct 
>> subm_set *set)
>>       /* Compute concurrent_rate for each sample set within the common 
>> time frame */
>>       for (i = 0; i < ndata; i++) {
>> -        uint64_t total_samples_duration = 0;
>> -        uint64_t samples_duration_in_common_frame = 0;
>> +        const double window_s = (common_end - common_start) * 1e-9;
>>           stats = &data[i].stats;
>>           stats->concurrent_execs = 0;
>> @@ -415,29 +429,20 @@ static void 
>> compute_common_time_frame_stats(struct subm_set *set)
>>           stats->concurrent_mean = 0.0;
>>           for (j = 0; j < stats->samples.n_values; j++) {
>> -            uint64_t sample_start = stats->start_timestamp + 
>> total_samples_duration;
>> -            uint64_t sample_end = sample_start + stats- 
>> >samples.values_u64[j];
>> +            uint64_t cts = stats->complete_ts[j];
>> -            if (sample_start >= common_start &&
>> -                sample_end <= common_end) {
>> +            if (cts >= common_start && cts <= common_end) {
>>                   stats->concurrent_execs++;
>> -                samples_duration_in_common_frame +=
>> -                    stats->samples.values_u64[j];
>> +                stats->concurrent_mean += stats->samples.values_u64[j];
>>               }
>> -
>> -            total_samples_duration += stats->samples.values_u64[j];
>>           }
>> -        stats->concurrent_rate = samples_duration_in_common_frame ?
>> -                     (double)stats->concurrent_execs /
>> -                         (samples_duration_in_common_frame *
>> -                          1e-9) :
>> -                     0.0;
>> +        stats->concurrent_rate = (window_s > 0.0) ?
>> +                     ((double)stats->concurrent_execs / window_s) : 0.0;
>>           stats->concurrent_mean = stats->concurrent_execs ?
>> -                      (double)samples_duration_in_common_frame /
>> -                          stats->concurrent_execs :
>> -                      0.0;
>> -        igt_info("[%s] Throughput = %.4f execs/s mean duration=%.4fms 
>> nsamples=%d\n",
>> +                     (double)stats->concurrent_mean /
>> +                     stats->concurrent_execs : 0.0;
>> +        igt_info("[%s] Throughput = %.4f execs/s mean submit->signal 
>> latency=%.4fms nsamples=%d\n",
>>                data[i].subm.id, stats->concurrent_rate, stats- 
>> >concurrent_mean * 1e-6,
>>                stats->concurrent_execs);
>>       }
>> @@ -667,6 +672,8 @@ static void throughput_ratio(int pf_fd, int 
>> num_vfs, const struct subm_opts *opt
>>                       .repeats = job_sched_params.num_repeats });
>>           igt_stats_init_with_size(&set->data[n].stats.samples,
>>                        set->data[n].subm.work.repeats);
>> +        set->data[n].stats.complete_ts = calloc(set- 
>> >data[n].subm.work.repeats,
>> +                            sizeof(uint64_t));
> 
> missing free() in subm_set_free_data ?

Missed in patch split, my bad. Thanks for catching.

> 
>>           if (set->sync_method == SYNC_BARRIER)
>>               set->data[n].barrier = &set->barrier;
>>       }
>> @@ -762,6 +769,8 @@ static void nonpreempt_engine_resets(int pf_fd, 
>> int num_vfs,
>>                       .repeats = MIN_NUM_REPEATS });
>>           igt_stats_init_with_size(&set->data[n].stats.samples,
>>                        set->data[n].subm.work.repeats);
>> +        set->data[n].stats.complete_ts = calloc(set- 
>> >data[n].subm.work.repeats,
>> +                            sizeof(uint64_t));
>>           if (set->sync_method == SYNC_BARRIER)
>>               set->data[n].barrier = &set->barrier;
>>       }



More information about the igt-dev mailing list