[Intel-gfx] [PATCH i-g-t] i915/perf_pmu: Emit a semaphore to measure
Ramalingam C
ramalingam.c at intel.com
Fri Aug 21 09:27:40 UTC 2020
On 2020-08-10 at 13:44:15 +0100, Chris Wilson wrote:
> Don't assume the kernel will emit a semaphore to synchronise between two
> engine, and emit the semaphore ourselves for the basis of our
> measurements. The purpose of the test is to try and ascertain the
> accuracy of the two sampling methods, semaphore busyness uses register
> polling, whereas the engine busyness may use ktime_t of the CS events.
Looks good to me.
Reviewed-by: Ramalingam C <ramalingam.c at intel.com>
Tested on the platform too.
>
> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
> Cc: Ramalingam C <ramalingam.c at intel.com>
> ---
> tests/i915/perf_pmu.c | 94 +++++++++++++++++++++++++++++--------------
> 1 file changed, 64 insertions(+), 30 deletions(-)
>
> diff --git a/tests/i915/perf_pmu.c b/tests/i915/perf_pmu.c
> index 13e1bd93e..ecd4afbd6 100644
> --- a/tests/i915/perf_pmu.c
> +++ b/tests/i915/perf_pmu.c
> @@ -650,6 +650,7 @@ no_sema(int gem_fd, const struct intel_execution_engine2 *e, unsigned int flags)
> #define MI_SEMAPHORE_WAIT MI_INSTR(0x1c, 2) /* GEN8+ */
> #define MI_SEMAPHORE_POLL (1<<15)
> #define MI_SEMAPHORE_SAD_GTE_SDD (1<<12)
> +#define MI_SEMAPHORE_SAD_NEQ_SDD (5 << 12)
>
> static void
> sema_wait(int gem_fd, const struct intel_execution_engine2 *e,
> @@ -751,10 +752,39 @@ sema_wait(int gem_fd, const struct intel_execution_engine2 *e,
> assert_within_epsilon(val[1] - val[0], slept, tolerance);
> }
>
> +static uint32_t
> +create_sema(int gem_fd, struct drm_i915_gem_relocation_entry *reloc)
> +{
> + uint32_t cs[] = {
> + /* Reset our semaphore wait */
> + MI_STORE_DWORD_IMM,
> + 0,
> + 0,
> + 1,
> +
> + /* Wait until the semaphore value is set to 0 [by caller] */
> + MI_SEMAPHORE_WAIT | MI_SEMAPHORE_POLL | MI_SEMAPHORE_SAD_NEQ_SDD,
> + 1,
> + 0,
> + 0,
> +
> + MI_BATCH_BUFFER_END
> + };
> + uint32_t handle = gem_create(gem_fd, 4096);
> +
> + memset(reloc, 0, 2 * sizeof(*reloc));
> + reloc[0].target_handle = handle;
> + reloc[0].offset = 64 + 1 * sizeof(uint32_t);
> + reloc[1].target_handle = handle;
> + reloc[1].offset = 64 + 6 * sizeof(uint32_t);
> +
> + gem_write(gem_fd, handle, 64, cs, sizeof(cs));
> + return handle;
> +}
> +
> static void
> __sema_busy(int gem_fd, int pmu,
> const struct intel_execution_engine2 *e,
> - const struct intel_execution_engine2 *signal,
> int sema_pct,
> int busy_pct)
> {
> @@ -764,39 +794,54 @@ __sema_busy(int gem_fd, int pmu,
> };
> uint64_t total, sema, busy;
> uint64_t start[2], val[2];
> - igt_spin_t *spin[2];
> + struct drm_i915_gem_relocation_entry reloc[2];
> + struct drm_i915_gem_exec_object2 obj = {
> + .handle = create_sema(gem_fd, reloc),
> + .relocation_count = 2,
> + .relocs_ptr = to_user_pointer(reloc),
> + };
> + struct drm_i915_gem_execbuffer2 eb = {
> + .batch_start_offset = 64,
> + .buffer_count = 1,
> + .buffers_ptr = to_user_pointer(&obj),
> + .flags = e->flags,
> + };
> + igt_spin_t *spin;
> + uint32_t *map;
>
> /* Time spent being busy includes time waiting on semaphores */
> igt_assert(busy_pct >= sema_pct);
>
> gem_quiescent_gpu(gem_fd);
>
> - spin[0] = igt_spin_new(gem_fd,
> - .engine = signal->flags,
> - .flags = IGT_SPIN_FENCE_OUT | IGT_SPIN_POLL_RUN);
> - spin[1] = igt_spin_new(gem_fd,
> - .engine = e->flags,
> - .fence = spin[0]->out_fence,
> - .flags = IGT_SPIN_FENCE_IN);
> + map = gem_mmap__wc(gem_fd, obj.handle, 0, 4096, PROT_WRITE);
> + gem_execbuf(gem_fd, &eb);
> + spin = igt_spin_new(gem_fd, .engine = e->flags);
>
> - igt_spin_busywait_until_started(spin[0]);
> + /* Wait until the batch is executed and the semaphore is busy-waiting */
> + while (!READ_ONCE(*map) && gem_bo_busy(gem_fd, obj.handle))
> + ;
> + igt_assert(gem_bo_busy(gem_fd, obj.handle));
> + gem_close(gem_fd, obj.handle);
>
> total = pmu_read_multi(pmu, 2, start);
>
> sema = measured_usleep(batch_duration_ns * sema_pct / 100 / 1000);
> - igt_spin_end(spin[0]);
> + *map = 0; __sync_synchronize();
> busy = measured_usleep(batch_duration_ns * (busy_pct - sema_pct) / 100 / 1000);
> - igt_spin_end(spin[1]);
> + igt_spin_end(spin);
> measured_usleep(batch_duration_ns * (100 - busy_pct) / 100 / 1000);
>
> total = pmu_read_multi(pmu, 2, val) - total;
> + igt_spin_free(gem_fd, spin);
> + munmap(map, 4096);
>
> busy += sema;
> val[SEMA] -= start[SEMA];
> val[BUSY] -= start[BUSY];
>
> - igt_info("%s<-%s, target: {%.1f%% [%d], %.1f%% [%d]}, measured: {%.1f%%, %.1f%%}\n",
> - e->name, signal->name,
> + igt_info("%s, target: {%.1f%% [%d], %.1f%% [%d]}, measured: {%.1f%%, %.1f%%}\n",
> + e->name,
> sema * 100. / total, sema_pct,
> busy * 100. / total, busy_pct,
> val[SEMA] * 100. / total,
> @@ -809,8 +854,6 @@ __sema_busy(int gem_fd, int pmu,
> val[SEMA] * 1e-3, val[SEMA] * 100. / total,
> val[BUSY] * 1e-3, val[BUSY] * 100. / total);
>
> - igt_spin_free(gem_fd, spin[1]);
> - igt_spin_free(gem_fd, spin[0]);
> }
>
> static void
> @@ -818,25 +861,16 @@ sema_busy(int gem_fd,
> const struct intel_execution_engine2 *e,
> unsigned int flags)
> {
> - const struct intel_execution_engine2 *signal;
> int fd;
>
> - igt_require(gem_scheduler_has_semaphores(gem_fd));
> - igt_require(gem_scheduler_has_preemption(gem_fd));
> + igt_require(intel_gen(intel_get_drm_devid(gem_fd)) >= 8);
>
> - fd = open_group(gem_fd,
> - I915_PMU_ENGINE_SEMA(e->class, e->instance), -1);
> + fd = open_group(gem_fd, I915_PMU_ENGINE_SEMA(e->class, e->instance), -1);
> open_group(gem_fd, I915_PMU_ENGINE_BUSY(e->class, e->instance), fd);
>
> - __for_each_physical_engine(gem_fd, signal) {
> - if (e->class == signal->class &&
> - e->instance == signal->instance)
> - continue;
> -
> - __sema_busy(gem_fd, fd, e, signal, 50, 100);
> - __sema_busy(gem_fd, fd, e, signal, 25, 50);
> - __sema_busy(gem_fd, fd, e, signal, 75, 75);
> - }
> + __sema_busy(gem_fd, fd, e, 50, 100);
> + __sema_busy(gem_fd, fd, e, 25, 50);
> + __sema_busy(gem_fd, fd, e, 75, 75);
>
> close(fd);
> }
> --
> 2.28.0
>
More information about the Intel-gfx
mailing list