[Intel-gfx] [PATCH i-g-t 01/24] i915/gem_exec_latency: Measure the latency of context switching
Chris Wilson
chris at chris-wilson.co.uk
Fri Mar 22 09:21:32 UTC 2019
Measure the baseline latency between contexts in order to directly
compare that with the additional cost of preemption.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
tests/i915/gem_exec_latency.c | 145 ++++++++++++++++++++++++++++++++++
1 file changed, 145 insertions(+)
diff --git a/tests/i915/gem_exec_latency.c b/tests/i915/gem_exec_latency.c
index f308e0851..04ec7d9d2 100644
--- a/tests/i915/gem_exec_latency.c
+++ b/tests/i915/gem_exec_latency.c
@@ -629,6 +629,142 @@ rthog_latency_on_ring(int fd, unsigned int engine, const char *name, unsigned in
munmap(results, MMAP_SZ);
}
+static void context_switch(int i915,
+ unsigned int engine, const char *name,
+ unsigned int flags)
+{
+ struct drm_i915_gem_exec_object2 obj[2];
+ struct drm_i915_gem_relocation_entry reloc[5];
+ struct drm_i915_gem_execbuffer2 eb;
+ uint32_t *cs, *bbe, *results, v;
+ unsigned int mmio_base;
+ struct igt_mean mean;
+ uint32_t ctx[2];
+
+ /* XXX i915_query()! */
+ switch (engine) {
+ case I915_EXEC_DEFAULT:
+ case I915_EXEC_RENDER:
+ mmio_base = 0x2000;
+ break;
+#if 0
+ case I915_EXEC_BSD:
+ mmio_base = 0x12000;
+ break;
+#endif
+ case I915_EXEC_BLT:
+ mmio_base = 0x22000;
+ break;
+
+ case I915_EXEC_VEBOX:
+ if (intel_gen(intel_get_drm_devid(i915)) >= 11)
+ mmio_base = 0x1d8000;
+ else
+ mmio_base = 0x1a000;
+ break;
+
+ default:
+ igt_skip("mmio base not known\n");
+ }
+
+ for (int i = 0; i < ARRAY_SIZE(ctx); i++)
+ ctx[i] = gem_context_create(i915);
+
+ if (flags & PREEMPT) {
+ gem_context_set_priority(i915, ctx[0], -1023);
+ gem_context_set_priority(i915, ctx[1], +1023);
+ }
+
+ memset(obj, 0, sizeof(obj));
+ obj[0].handle = gem_create(i915, 4096);
+ gem_set_caching(i915, obj[0].handle, 1);
+ results = gem_mmap__cpu(i915, obj[0].handle, 0, 4096, PROT_READ);
+ gem_set_domain(i915, obj[0].handle, I915_GEM_DOMAIN_CPU, 0);
+
+ obj[1].handle = gem_create(i915, 4096);
+ memset(reloc,0, sizeof(reloc));
+ obj[1].relocation_count = ARRAY_SIZE(reloc);
+ obj[1].relocs_ptr = to_user_pointer(reloc);
+ bbe = gem_mmap__wc(i915, obj[1].handle, 0, 4096, PROT_WRITE);
+ gem_set_domain(i915, obj[1].handle,
+ I915_GEM_DOMAIN_WC, I915_GEM_DOMAIN_WC);
+
+ cs = bbe;
+ *cs++ = 0x5 << 23;
+ *cs++ = 0x24 << 23 | 2; /* SRM */
+ *cs++ = mmio_base + 0x358; /* TIMESTAMP */
+ reloc[0].target_handle = obj[0].handle;
+ reloc[0].offset = (cs - bbe) * sizeof(*cs);
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = MI_BATCH_BUFFER_START | 1;
+ reloc[1].target_handle = obj[1].handle;
+ reloc[1].offset = (cs - bbe) * sizeof(*cs);
+ *cs++ = 0;
+ *cs++ = 0;
+
+ cs = bbe + 64;
+ *cs++ = 0x24 << 23 | 2; /* SRM */
+ *cs++ = mmio_base + 0x358; /* TIMESTAMP */
+ reloc[2].target_handle = obj[0].handle;
+ reloc[2].offset = (cs - bbe) * sizeof(*cs);
+ *cs++ = reloc[2].delta = 4;
+ *cs++ = 0;
+ *cs++ = 0x29 << 23 | 2; /* LRM */
+ *cs++ = mmio_base + 0x600; /* GPR0 */
+ reloc[3].target_handle = obj[0].handle;
+ reloc[3].offset = (cs - bbe) * sizeof(*cs);
+ *cs++ = 0;
+ *cs++ = 0;
+ *cs++ = 0x24 << 23 | 2; /* SRM */
+ *cs++ = mmio_base + 0x600; /* GPR0 */
+ reloc[4].target_handle = obj[0].handle;
+ reloc[4].offset = (cs - bbe) * sizeof(*cs);
+ *cs++ = reloc[4].delta = 8;
+ *cs++ = 0;
+ *cs++ = 0xa << 23;
+
+ memset(&eb, 0, sizeof(eb));
+ eb.buffers_ptr = to_user_pointer(obj);
+ eb.buffer_count = ARRAY_SIZE(obj);
+ eb.flags = engine;
+ eb.flags |= LOCAL_I915_EXEC_NO_RELOC;
+
+ v = 0;
+ igt_mean_init(&mean);
+ igt_until_timeout(5) {
+ eb.rsvd1 = ctx[0];
+ eb.batch_start_offset = 0;
+ gem_execbuf(i915, &eb);
+
+ while (results[0] == v)
+ igt_assert(gem_bo_busy(i915, obj[1].handle));
+
+ eb.rsvd1 = ctx[1];
+ eb.batch_start_offset = 64 * sizeof(*cs);
+ gem_execbuf(i915, &eb);
+
+ *bbe = 0xa << 23;
+ gem_sync(i915, obj[1].handle);
+ *bbe = 0x5 << 23;
+
+ v = results[0];
+ igt_mean_add(&mean, (results[1] - results[2]) * rcs_clock);
+ }
+ igt_info("%s context switch latency%s: %.2f±%.2fus\n",
+ name, flags & PREEMPT ? " (preempt)" : "",
+ 1e-3 * igt_mean_get(&mean),
+ 1e-3 * sqrt(igt_mean_get_variance(&mean)));
+ munmap(results, 4096);
+ munmap(bbe, 4096);
+
+ for (int i = 0; i < ARRAY_SIZE(obj); i++)
+ gem_close(i915, obj[i].handle);
+
+ for (int i = 0; i < ARRAY_SIZE(ctx); i++)
+ gem_context_destroy(i915, ctx[i]);
+}
+
static double clockrate(int i915, int reg)
{
volatile uint32_t *mmio;
@@ -754,12 +890,21 @@ igt_main
e->exec_id | e->flags,
e->name, CORK);
+ igt_subtest_f("%s-cs", e->name)
+ context_switch(device,
+ e->exec_id | e->flags,
+ e->name, 0);
igt_subtest_group {
igt_fixture {
gem_require_contexts(device);
igt_require(gem_scheduler_has_preemption(device));
}
+ igt_subtest_f("%s-cs-preempt", e->name)
+ context_switch(device,
+ e->exec_id | e->flags,
+ e->name, PREEMPT);
+
igt_subtest_f("%s-preemption", e->name)
latency_from_ring(device,
e->exec_id | e->flags,
--
2.20.1
More information about the Intel-gfx
mailing list