[Intel-gfx] [igt-dev] [PATCH i-g-t v2] tests/perf_pmu: Improve accuracy by waiting on spinner to start

Thu Mar 15 16:01:47 UTC 2018

Quoting Tvrtko Ursulin (2018-03-15 15:46:00)
>  static void
> -__submit_spin_batch(int gem_fd,
> -                   struct drm_i915_gem_exec_object2 *obj,
> +__submit_spin_batch(int gem_fd, igt_spin_t *spin,
>                     const struct intel_execution_engine2 *e)
>  {
> -       struct drm_i915_gem_execbuffer2 eb = {
> -               .buffer_count = 1,
> -               .buffers_ptr = to_user_pointer(obj),
> -               .flags = e2ring(gem_fd, e),
> -       };
> +       struct drm_i915_gem_execbuffer2 eb = spin->execbuf;
> +
> +       eb.flags &= ~(0x3f | I915_EXEC_BSD_MASK);
> +       eb.flags |= e2ring(gem_fd, e);

I'm dubious about keeping spin->execbuf for precisely this reason.
Almost all the time I want to specify a different execution, and not use
the same context, random fences, etc.

However, it does give a convenient way to get buffers_count and
buffers_ptr, but that is all that is valid (imo) or at least should be
judiciously copied from rather than wholesale.

eb = {
	.buffers_ptr = spin->execbuf.buffers_ptr,
	.buffer_count = spin->execbuf.buffer_count,
	.flags = e2ring(gem_fd, e) | I915_EXEC_NORELOC,
};

> @@ -1545,24 +1575,30 @@ accuracy(int gem_fd, const struct intel_execution_engine2 *e,
>  
>                         igt_nsec_elapsed(&test_start);
>                         do {
> -                               unsigned int target_idle_us, t_busy;
> +                               unsigned int target_idle_us;
> +                               struct timespec start = { };
> +                               unsigned long prep_delay_ns;
>  
>                                 /* Restart the spinbatch. */
> +                               igt_nsec_elapsed(&start);
>                                 __rearm_spin_batch(spin);
> -                               __submit_spin_batch(gem_fd, &obj, e);
> +                               __submit_spin_batch(gem_fd, spin, e);
>  
> -                               /*
> -                                * Note that the submission may be delayed to a
> -                                * tasklet (ksoftirqd) which cannot run until we
> -                                * sleep as we hog the cpu (we are RT).
> -                                */
> +                                /* Wait for batch to start executing. */
> +                               __spin_wait(gem_fd, spin);
> +                               prep_delay_ns = igt_nsec_elapsed(&start);
>  
> -                               t_busy = measured_usleep(busy_us);
> +                               /* PWM busy sleep. */
> +                               memset(&start, 0, sizeof(start));
> +                               igt_nsec_elapsed(&start);

Can just keep using start, it's already has the current time from
calculating prep_delay_ns.

> +                               measured_usleep(busy_us);
>                                 igt_spin_batch_end(spin);
> -                               gem_sync(gem_fd, obj.handle);
> +                               gem_sync(gem_fd, spin->handle);
>  
> -                               total_busy_ns += t_busy;
> +                               total_busy_ns += igt_nsec_elapsed(&start);
> +                               total_idle_ns += prep_delay_ns;

Ok.

Reviewed-by: Chris Wilson <chris at chris-wilson.co.uk>
-Chris