[igt-dev] [PATCH i-g-t 1/6] tests/gem_exec_latency: Move to benchmarks

Abdiel Janulgue abdiel.janulgue at linux.intel.com
Mon Feb 5 09:35:20 UTC 2018


Signed-off-by: Abdiel Janulgue <abdiel.janulgue at linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
---
 benchmarks/Makefile.sources   |   1 +
 benchmarks/gem_exec_latency.c | 504 ++++++++++++++++++++++++++++++++++++++++++
 benchmarks/meson.build        |   1 +
 tests/Makefile.sources        |   1 -
 tests/gem_exec_latency.c      | 504 ------------------------------------------
 tests/meson.build             |   1 -
 6 files changed, 506 insertions(+), 506 deletions(-)
 create mode 100644 benchmarks/gem_exec_latency.c
 delete mode 100644 tests/gem_exec_latency.c

diff --git a/benchmarks/Makefile.sources b/benchmarks/Makefile.sources
index d150035..4562b28 100644
--- a/benchmarks/Makefile.sources
+++ b/benchmarks/Makefile.sources
@@ -15,6 +15,7 @@ benchmarks_prog_list =			\
 	gem_set_domain			\
 	gem_syslatency			\
 	gem_wsim			\
+	gem_exec_latency		\
 	kms_vblank			\
 	prime_lookup			\
 	vgem_mmap			\
diff --git a/benchmarks/gem_exec_latency.c b/benchmarks/gem_exec_latency.c
new file mode 100644
index 0000000..850404b
--- /dev/null
+++ b/benchmarks/gem_exec_latency.c
@@ -0,0 +1,504 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include "igt.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <sys/signal.h>
+#include <time.h>
+
+#include "drm.h"
+
+#include "igt_sysfs.h"
+#include "igt_vgem.h"
+
+#define LOCAL_I915_EXEC_NO_RELOC (1<<11)
+#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
+
+#define LOCAL_I915_EXEC_BSD_SHIFT      (13)
+#define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
+
+#define ENGINE_FLAGS  (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
+
+#define CORK 0x1
+#define PREEMPT 0x2
+
+static unsigned int ring_size;
+
+struct cork {
+	int device;
+	uint32_t handle;
+	uint32_t fence;
+};
+
+static void plug(int fd, struct cork *c)
+{
+	struct vgem_bo bo;
+	int dmabuf;
+
+	c->device = drm_open_driver(DRIVER_VGEM);
+
+	bo.width = bo.height = 1;
+	bo.bpp = 4;
+	vgem_create(c->device, &bo);
+	c->fence = vgem_fence_attach(c->device, &bo, VGEM_FENCE_WRITE);
+
+	dmabuf = prime_handle_to_fd(c->device, bo.handle);
+	c->handle = prime_fd_to_handle(fd, dmabuf);
+	close(dmabuf);
+}
+
+static void unplug(struct cork *c)
+{
+	vgem_fence_signal(c->device, c->fence);
+	close(c->device);
+}
+
+static void alarm_handler(int sig)
+{
+}
+
+static void set_timeout(int seconds)
+{
+	struct sigaction sa = { .sa_handler = alarm_handler };
+
+	sigaction(SIGALRM, seconds ? &sa : NULL, NULL);
+	alarm(seconds);
+}
+
+static int __execbuf(int fd, struct drm_i915_gem_execbuffer2 *execbuf)
+{
+	return ioctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf);
+}
+
+static unsigned int measure_ring_size(int fd)
+{
+	struct drm_i915_gem_exec_object2 obj[2];
+	struct drm_i915_gem_execbuffer2 execbuf;
+	const uint32_t bbe = MI_BATCH_BUFFER_END;
+	unsigned int count;
+	struct cork c;
+
+	memset(obj, 0, sizeof(obj));
+	obj[1].handle = gem_create(fd, 4096);
+	gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
+
+	plug(fd, &c);
+	obj[0].handle = c.handle;
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(obj);
+	execbuf.buffer_count = 2;
+
+	count = 0;
+	set_timeout(1);
+	while (__execbuf(fd, &execbuf) == 0)
+		count++;
+	set_timeout(0);
+
+	unplug(&c);
+	gem_close(fd, obj[1].handle);
+
+	return count;
+}
+
+#define RCS_TIMESTAMP (0x2000 + 0x358)
+static void latency_on_ring(int fd,
+			    unsigned ring, const char *name,
+			    unsigned flags)
+{
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	const int has_64bit_reloc = gen >= 8;
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_relocation_entry reloc;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct cork c;
+	volatile uint32_t *reg;
+	unsigned repeats = ring_size;
+	uint32_t start, end, *map, *results;
+	uint64_t offset;
+	double gpu_latency;
+	int i, j;
+
+	reg = (volatile uint32_t *)((volatile char *)igt_global_mmio + RCS_TIMESTAMP);
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj[1]);
+	execbuf.buffer_count = 2;
+	execbuf.flags = ring;
+	execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC | LOCAL_I915_EXEC_HANDLE_LUT;
+
+	memset(obj, 0, sizeof(obj));
+	obj[1].handle = gem_create(fd, 4096);
+	obj[1].flags = EXEC_OBJECT_WRITE;
+	results = gem_mmap__wc(fd, obj[1].handle, 0, 4096, PROT_READ);
+
+	obj[2].handle = gem_create(fd, 64*1024);
+	map = gem_mmap__wc(fd, obj[2].handle, 0, 64*1024, PROT_WRITE);
+	gem_set_domain(fd, obj[2].handle,
+		       I915_GEM_DOMAIN_GTT,
+		       I915_GEM_DOMAIN_GTT);
+	map[0] = MI_BATCH_BUFFER_END;
+	gem_execbuf(fd, &execbuf);
+
+	memset(&reloc,0, sizeof(reloc));
+	obj[2].relocation_count = 1;
+	obj[2].relocs_ptr = to_user_pointer(&reloc);
+
+	gem_set_domain(fd, obj[2].handle,
+		       I915_GEM_DOMAIN_GTT,
+		       I915_GEM_DOMAIN_GTT);
+
+	reloc.target_handle = flags & CORK ? 1 : 0;
+	reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+	reloc.write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+	reloc.presumed_offset = obj[1].offset;
+
+	for (j = 0; j < repeats; j++) {
+		execbuf.batch_start_offset = 64 * j;
+		reloc.offset =
+			execbuf.batch_start_offset + sizeof(uint32_t);
+		reloc.delta = sizeof(uint32_t) * j;
+
+		offset = reloc.presumed_offset;
+		offset += reloc.delta;
+
+		i = 16 * j;
+		/* MI_STORE_REG_MEM */
+		map[i++] = 0x24 << 23 | 1;
+		if (has_64bit_reloc)
+			map[i-1]++;
+		map[i++] = RCS_TIMESTAMP; /* ring local! */
+		map[i++] = offset;
+		if (has_64bit_reloc)
+			map[i++] = offset >> 32;
+		map[i++] = MI_BATCH_BUFFER_END;
+	}
+
+	if (flags & CORK) {
+		plug(fd, &c);
+		obj[0].handle = c.handle;
+		execbuf.buffers_ptr = to_user_pointer(&obj[0]);
+		execbuf.buffer_count = 3;
+	}
+
+	start = *reg;
+	for (j = 0; j < repeats; j++) {
+		uint64_t presumed_offset = reloc.presumed_offset;
+
+		execbuf.batch_start_offset = 64 * j;
+		reloc.offset =
+			execbuf.batch_start_offset + sizeof(uint32_t);
+		reloc.delta = sizeof(uint32_t) * j;
+
+		gem_execbuf(fd, &execbuf);
+		igt_assert(reloc.presumed_offset == presumed_offset);
+	}
+	end = *reg;
+	igt_assert(reloc.presumed_offset == obj[1].offset);
+
+	if (flags & CORK)
+		unplug(&c);
+
+	gem_set_domain(fd, obj[1].handle, I915_GEM_DOMAIN_GTT, 0);
+	gpu_latency = (results[repeats-1] - results[0]) / (double)(repeats-1);
+
+	gem_set_domain(fd, obj[2].handle,
+		       I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+
+	execbuf.batch_start_offset = 0;
+	for (j = 0; j < repeats - 1; j++) {
+		offset = obj[2].offset;
+		offset += 64 * (j + 1);
+
+		i = 16 * j + (has_64bit_reloc ? 4 : 3);
+		map[i] = MI_BATCH_BUFFER_START;
+		if (gen >= 8) {
+			map[i] |= 1 << 8 | 1;
+			map[i + 1] = offset;
+			map[i + 2] = offset >> 32;
+		} else if (gen >= 6) {
+			map[i] |= 1 << 8;
+			map[i + 1] = offset;
+		} else {
+			map[i] |= 2 << 6;
+			map[i + 1] = offset;
+			if (gen < 4)
+				map[i] |= 1;
+		}
+	}
+	offset = obj[2].offset;
+	gem_execbuf(fd, &execbuf);
+	igt_assert(offset == obj[2].offset);
+
+	gem_set_domain(fd, obj[1].handle, I915_GEM_DOMAIN_GTT, 0);
+	igt_info("%s: dispatch latency: %.2f, execution latency: %.2f (target %.2f)\n",
+		 name,
+		 (end - start) / (double)repeats,
+		 gpu_latency, (results[repeats - 1] - results[0]) / (double)(repeats - 1));
+
+	munmap(map, 64*1024);
+	munmap(results, 4096);
+	gem_close(fd, obj[1].handle);
+	gem_close(fd, obj[2].handle);
+}
+
+static void latency_from_ring(int fd,
+			      unsigned ring, const char *name,
+			      unsigned flags)
+{
+	const struct intel_execution_engine *e;
+	const int gen = intel_gen(intel_get_drm_devid(fd));
+	const int has_64bit_reloc = gen >= 8;
+	struct drm_i915_gem_exec_object2 obj[3];
+	struct drm_i915_gem_relocation_entry reloc;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	const unsigned int repeats = ring_size / 2;
+	uint32_t *map, *results;
+	uint32_t ctx[2] = {};
+	int i, j;
+
+	if (flags & PREEMPT) {
+		ctx[0] = gem_context_create(fd);
+		gem_context_set_priority(fd, ctx[0], -1023);
+
+		ctx[1] = gem_context_create(fd);
+		gem_context_set_priority(fd, ctx[1], 1023);
+	}
+
+	memset(&execbuf, 0, sizeof(execbuf));
+	execbuf.buffers_ptr = to_user_pointer(&obj[1]);
+	execbuf.buffer_count = 2;
+	execbuf.flags = ring;
+	execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC | LOCAL_I915_EXEC_HANDLE_LUT;
+	execbuf.rsvd1 = ctx[1];
+
+	memset(obj, 0, sizeof(obj));
+	obj[1].handle = gem_create(fd, 4096);
+	obj[1].flags = EXEC_OBJECT_WRITE;
+	results = gem_mmap__wc(fd, obj[1].handle, 0, 4096, PROT_READ);
+
+	obj[2].handle = gem_create(fd, 64*1024);
+	map = gem_mmap__wc(fd, obj[2].handle, 0, 64*1024, PROT_WRITE);
+	gem_set_domain(fd, obj[2].handle,
+		       I915_GEM_DOMAIN_GTT,
+		       I915_GEM_DOMAIN_GTT);
+	map[0] = MI_BATCH_BUFFER_END;
+	gem_execbuf(fd, &execbuf);
+
+	memset(&reloc,0, sizeof(reloc));
+	obj[2].relocation_count = 1;
+	obj[2].relocs_ptr = to_user_pointer(&reloc);
+
+	gem_set_domain(fd, obj[2].handle,
+		       I915_GEM_DOMAIN_GTT,
+		       I915_GEM_DOMAIN_GTT);
+
+	reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
+	reloc.write_domain = I915_GEM_DOMAIN_INSTRUCTION;
+	reloc.presumed_offset = obj[1].offset;
+	reloc.target_handle = flags & CORK ? 1 : 0;
+
+	for (e = intel_execution_engines; e->name; e++) {
+		igt_spin_t *spin = NULL;
+		struct cork c;
+
+		if (e->exec_id == 0)
+			continue;
+
+		if (!gem_has_ring(fd, e->exec_id | e->flags))
+			continue;
+
+		gem_set_domain(fd, obj[2].handle,
+			       I915_GEM_DOMAIN_GTT,
+			       I915_GEM_DOMAIN_GTT);
+
+		if (flags & PREEMPT)
+			spin = igt_spin_batch_new(fd, ctx[0], ring, 0);
+
+		if (flags & CORK) {
+			plug(fd, &c);
+			obj[0].handle = c.handle;
+			execbuf.buffers_ptr = to_user_pointer(&obj[0]);
+			execbuf.buffer_count = 3;
+		}
+
+		for (j = 0; j < repeats; j++) {
+			uint64_t offset;
+
+			execbuf.flags &= ~ENGINE_FLAGS;
+			execbuf.flags |= ring;
+
+			execbuf.batch_start_offset = 64 * j;
+			reloc.offset =
+				execbuf.batch_start_offset + sizeof(uint32_t);
+			reloc.delta = sizeof(uint32_t) * j;
+
+			reloc.presumed_offset = obj[1].offset;
+			offset = reloc.presumed_offset;
+			offset += reloc.delta;
+
+			i = 16 * j;
+			/* MI_STORE_REG_MEM */
+			map[i++] = 0x24 << 23 | 1;
+			if (has_64bit_reloc)
+				map[i-1]++;
+			map[i++] = RCS_TIMESTAMP; /* ring local! */
+			map[i++] = offset;
+			if (has_64bit_reloc)
+				map[i++] = offset >> 32;
+			map[i++] = MI_BATCH_BUFFER_END;
+
+			gem_execbuf(fd, &execbuf);
+
+			execbuf.flags &= ~ENGINE_FLAGS;
+			execbuf.flags |= e->exec_id | e->flags;
+
+			execbuf.batch_start_offset = 64 * (j + repeats);
+			reloc.offset =
+				execbuf.batch_start_offset + sizeof(uint32_t);
+			reloc.delta = sizeof(uint32_t) * (j + repeats);
+
+			reloc.presumed_offset = obj[1].offset;
+			offset = reloc.presumed_offset;
+			offset += reloc.delta;
+
+			i = 16 * (j + repeats);
+			/* MI_STORE_REG_MEM */
+			map[i++] = 0x24 << 23 | 1;
+			if (has_64bit_reloc)
+				map[i-1]++;
+			map[i++] = RCS_TIMESTAMP; /* ring local! */
+			map[i++] = offset;
+			if (has_64bit_reloc)
+				map[i++] = offset >> 32;
+			map[i++] = MI_BATCH_BUFFER_END;
+
+			gem_execbuf(fd, &execbuf);
+		}
+
+		if (flags & CORK)
+			unplug(&c);
+		gem_set_domain(fd, obj[1].handle,
+			       I915_GEM_DOMAIN_GTT,
+			       I915_GEM_DOMAIN_GTT);
+		igt_spin_batch_free(fd, spin);
+
+		igt_info("%s-%s delay: %.2f\n",
+			 name, e->name, (results[2*repeats-1] - results[0]) / (double)repeats);
+	}
+
+	munmap(map, 64*1024);
+	munmap(results, 4096);
+	gem_close(fd, obj[1].handle);
+	gem_close(fd, obj[2].handle);
+
+	if (flags & PREEMPT) {
+		gem_context_destroy(fd, ctx[1]);
+		gem_context_destroy(fd, ctx[0]);
+	}
+}
+
+igt_main
+{
+	const struct intel_execution_engine *e;
+	int device = -1;
+
+	igt_fixture {
+		device = drm_open_driver(DRIVER_INTEL);
+		igt_require_gem(device);
+		gem_require_mmap_wc(device);
+
+		gem_submission_print_method(device);
+
+		ring_size = measure_ring_size(device);
+		igt_info("Ring size: %d batches\n", ring_size);
+		igt_require(ring_size > 8);
+		ring_size -= 8; /* leave some spare */
+		if (ring_size > 1024)
+			ring_size = 1024;
+
+		intel_register_access_init(intel_get_pci_device(), false, device);
+	}
+
+	igt_subtest_group {
+		igt_fixture
+			igt_require(intel_gen(intel_get_drm_devid(device)) >= 7);
+
+		for (e = intel_execution_engines; e->name; e++) {
+			if (e->exec_id == 0)
+				continue;
+
+			igt_subtest_group {
+				igt_fixture {
+					gem_require_ring(device, e->exec_id | e->flags);
+				}
+
+				igt_subtest_f("%s-dispatch", e->name)
+					latency_on_ring(device,
+							e->exec_id | e->flags,
+							e->name, 0);
+
+				igt_subtest_f("%s-dispatch-queued", e->name)
+					latency_on_ring(device,
+							e->exec_id | e->flags,
+							e->name, CORK);
+
+				igt_subtest_f("%s-synchronisation", e->name)
+					latency_from_ring(device,
+							  e->exec_id | e->flags,
+							  e->name, 0);
+
+				igt_subtest_f("%s-synchronisation-queued", e->name)
+					latency_from_ring(device,
+							  e->exec_id | e->flags,
+							  e->name, CORK);
+
+				igt_subtest_group {
+					igt_fixture {
+						igt_require(gem_scheduler_has_preemption(device));
+					}
+
+					igt_subtest_f("%s-preemption", e->name)
+						latency_from_ring(device,
+								  e->exec_id | e->flags,
+								  e->name, PREEMPT);
+				}
+			}
+		}
+	}
+
+	igt_fixture {
+		close(device);
+	}
+}
diff --git a/benchmarks/meson.build b/benchmarks/meson.build
index 27836c1..c89bb76 100644
--- a/benchmarks/meson.build
+++ b/benchmarks/meson.build
@@ -12,6 +12,7 @@ benchmark_progs = [
 	'gem_prw',
 	'gem_set_domain',
 	'gem_syslatency',
+	'gem_exec_latency',
 	'kms_vblank',
 	'prime_lookup',
 	'vgem_mmap',
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index 870c909..2d8ecb6 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -77,7 +77,6 @@ TESTS_progs = \
 	gem_exec_fence \
 	gem_exec_flush \
 	gem_exec_gttfill \
-	gem_exec_latency \
 	gem_exec_lut_handle \
 	gem_exec_nop \
 	gem_exec_parallel \
diff --git a/tests/gem_exec_latency.c b/tests/gem_exec_latency.c
deleted file mode 100644
index 850404b..0000000
--- a/tests/gem_exec_latency.c
+++ /dev/null
@@ -1,504 +0,0 @@
-/*
- * Copyright © 2016 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- *
- */
-
-#include "igt.h"
-#include <unistd.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <string.h>
-#include <fcntl.h>
-#include <inttypes.h>
-#include <errno.h>
-#include <sys/stat.h>
-#include <sys/ioctl.h>
-#include <sys/time.h>
-#include <sys/signal.h>
-#include <time.h>
-
-#include "drm.h"
-
-#include "igt_sysfs.h"
-#include "igt_vgem.h"
-
-#define LOCAL_I915_EXEC_NO_RELOC (1<<11)
-#define LOCAL_I915_EXEC_HANDLE_LUT (1<<12)
-
-#define LOCAL_I915_EXEC_BSD_SHIFT      (13)
-#define LOCAL_I915_EXEC_BSD_MASK       (3 << LOCAL_I915_EXEC_BSD_SHIFT)
-
-#define ENGINE_FLAGS  (I915_EXEC_RING_MASK | LOCAL_I915_EXEC_BSD_MASK)
-
-#define CORK 0x1
-#define PREEMPT 0x2
-
-static unsigned int ring_size;
-
-struct cork {
-	int device;
-	uint32_t handle;
-	uint32_t fence;
-};
-
-static void plug(int fd, struct cork *c)
-{
-	struct vgem_bo bo;
-	int dmabuf;
-
-	c->device = drm_open_driver(DRIVER_VGEM);
-
-	bo.width = bo.height = 1;
-	bo.bpp = 4;
-	vgem_create(c->device, &bo);
-	c->fence = vgem_fence_attach(c->device, &bo, VGEM_FENCE_WRITE);
-
-	dmabuf = prime_handle_to_fd(c->device, bo.handle);
-	c->handle = prime_fd_to_handle(fd, dmabuf);
-	close(dmabuf);
-}
-
-static void unplug(struct cork *c)
-{
-	vgem_fence_signal(c->device, c->fence);
-	close(c->device);
-}
-
-static void alarm_handler(int sig)
-{
-}
-
-static void set_timeout(int seconds)
-{
-	struct sigaction sa = { .sa_handler = alarm_handler };
-
-	sigaction(SIGALRM, seconds ? &sa : NULL, NULL);
-	alarm(seconds);
-}
-
-static int __execbuf(int fd, struct drm_i915_gem_execbuffer2 *execbuf)
-{
-	return ioctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, execbuf);
-}
-
-static unsigned int measure_ring_size(int fd)
-{
-	struct drm_i915_gem_exec_object2 obj[2];
-	struct drm_i915_gem_execbuffer2 execbuf;
-	const uint32_t bbe = MI_BATCH_BUFFER_END;
-	unsigned int count;
-	struct cork c;
-
-	memset(obj, 0, sizeof(obj));
-	obj[1].handle = gem_create(fd, 4096);
-	gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
-
-	plug(fd, &c);
-	obj[0].handle = c.handle;
-
-	memset(&execbuf, 0, sizeof(execbuf));
-	execbuf.buffers_ptr = to_user_pointer(obj);
-	execbuf.buffer_count = 2;
-
-	count = 0;
-	set_timeout(1);
-	while (__execbuf(fd, &execbuf) == 0)
-		count++;
-	set_timeout(0);
-
-	unplug(&c);
-	gem_close(fd, obj[1].handle);
-
-	return count;
-}
-
-#define RCS_TIMESTAMP (0x2000 + 0x358)
-static void latency_on_ring(int fd,
-			    unsigned ring, const char *name,
-			    unsigned flags)
-{
-	const int gen = intel_gen(intel_get_drm_devid(fd));
-	const int has_64bit_reloc = gen >= 8;
-	struct drm_i915_gem_exec_object2 obj[3];
-	struct drm_i915_gem_relocation_entry reloc;
-	struct drm_i915_gem_execbuffer2 execbuf;
-	struct cork c;
-	volatile uint32_t *reg;
-	unsigned repeats = ring_size;
-	uint32_t start, end, *map, *results;
-	uint64_t offset;
-	double gpu_latency;
-	int i, j;
-
-	reg = (volatile uint32_t *)((volatile char *)igt_global_mmio + RCS_TIMESTAMP);
-
-	memset(&execbuf, 0, sizeof(execbuf));
-	execbuf.buffers_ptr = to_user_pointer(&obj[1]);
-	execbuf.buffer_count = 2;
-	execbuf.flags = ring;
-	execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC | LOCAL_I915_EXEC_HANDLE_LUT;
-
-	memset(obj, 0, sizeof(obj));
-	obj[1].handle = gem_create(fd, 4096);
-	obj[1].flags = EXEC_OBJECT_WRITE;
-	results = gem_mmap__wc(fd, obj[1].handle, 0, 4096, PROT_READ);
-
-	obj[2].handle = gem_create(fd, 64*1024);
-	map = gem_mmap__wc(fd, obj[2].handle, 0, 64*1024, PROT_WRITE);
-	gem_set_domain(fd, obj[2].handle,
-		       I915_GEM_DOMAIN_GTT,
-		       I915_GEM_DOMAIN_GTT);
-	map[0] = MI_BATCH_BUFFER_END;
-	gem_execbuf(fd, &execbuf);
-
-	memset(&reloc,0, sizeof(reloc));
-	obj[2].relocation_count = 1;
-	obj[2].relocs_ptr = to_user_pointer(&reloc);
-
-	gem_set_domain(fd, obj[2].handle,
-		       I915_GEM_DOMAIN_GTT,
-		       I915_GEM_DOMAIN_GTT);
-
-	reloc.target_handle = flags & CORK ? 1 : 0;
-	reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
-	reloc.write_domain = I915_GEM_DOMAIN_INSTRUCTION;
-	reloc.presumed_offset = obj[1].offset;
-
-	for (j = 0; j < repeats; j++) {
-		execbuf.batch_start_offset = 64 * j;
-		reloc.offset =
-			execbuf.batch_start_offset + sizeof(uint32_t);
-		reloc.delta = sizeof(uint32_t) * j;
-
-		offset = reloc.presumed_offset;
-		offset += reloc.delta;
-
-		i = 16 * j;
-		/* MI_STORE_REG_MEM */
-		map[i++] = 0x24 << 23 | 1;
-		if (has_64bit_reloc)
-			map[i-1]++;
-		map[i++] = RCS_TIMESTAMP; /* ring local! */
-		map[i++] = offset;
-		if (has_64bit_reloc)
-			map[i++] = offset >> 32;
-		map[i++] = MI_BATCH_BUFFER_END;
-	}
-
-	if (flags & CORK) {
-		plug(fd, &c);
-		obj[0].handle = c.handle;
-		execbuf.buffers_ptr = to_user_pointer(&obj[0]);
-		execbuf.buffer_count = 3;
-	}
-
-	start = *reg;
-	for (j = 0; j < repeats; j++) {
-		uint64_t presumed_offset = reloc.presumed_offset;
-
-		execbuf.batch_start_offset = 64 * j;
-		reloc.offset =
-			execbuf.batch_start_offset + sizeof(uint32_t);
-		reloc.delta = sizeof(uint32_t) * j;
-
-		gem_execbuf(fd, &execbuf);
-		igt_assert(reloc.presumed_offset == presumed_offset);
-	}
-	end = *reg;
-	igt_assert(reloc.presumed_offset == obj[1].offset);
-
-	if (flags & CORK)
-		unplug(&c);
-
-	gem_set_domain(fd, obj[1].handle, I915_GEM_DOMAIN_GTT, 0);
-	gpu_latency = (results[repeats-1] - results[0]) / (double)(repeats-1);
-
-	gem_set_domain(fd, obj[2].handle,
-		       I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
-
-	execbuf.batch_start_offset = 0;
-	for (j = 0; j < repeats - 1; j++) {
-		offset = obj[2].offset;
-		offset += 64 * (j + 1);
-
-		i = 16 * j + (has_64bit_reloc ? 4 : 3);
-		map[i] = MI_BATCH_BUFFER_START;
-		if (gen >= 8) {
-			map[i] |= 1 << 8 | 1;
-			map[i + 1] = offset;
-			map[i + 2] = offset >> 32;
-		} else if (gen >= 6) {
-			map[i] |= 1 << 8;
-			map[i + 1] = offset;
-		} else {
-			map[i] |= 2 << 6;
-			map[i + 1] = offset;
-			if (gen < 4)
-				map[i] |= 1;
-		}
-	}
-	offset = obj[2].offset;
-	gem_execbuf(fd, &execbuf);
-	igt_assert(offset == obj[2].offset);
-
-	gem_set_domain(fd, obj[1].handle, I915_GEM_DOMAIN_GTT, 0);
-	igt_info("%s: dispatch latency: %.2f, execution latency: %.2f (target %.2f)\n",
-		 name,
-		 (end - start) / (double)repeats,
-		 gpu_latency, (results[repeats - 1] - results[0]) / (double)(repeats - 1));
-
-	munmap(map, 64*1024);
-	munmap(results, 4096);
-	gem_close(fd, obj[1].handle);
-	gem_close(fd, obj[2].handle);
-}
-
-static void latency_from_ring(int fd,
-			      unsigned ring, const char *name,
-			      unsigned flags)
-{
-	const struct intel_execution_engine *e;
-	const int gen = intel_gen(intel_get_drm_devid(fd));
-	const int has_64bit_reloc = gen >= 8;
-	struct drm_i915_gem_exec_object2 obj[3];
-	struct drm_i915_gem_relocation_entry reloc;
-	struct drm_i915_gem_execbuffer2 execbuf;
-	const unsigned int repeats = ring_size / 2;
-	uint32_t *map, *results;
-	uint32_t ctx[2] = {};
-	int i, j;
-
-	if (flags & PREEMPT) {
-		ctx[0] = gem_context_create(fd);
-		gem_context_set_priority(fd, ctx[0], -1023);
-
-		ctx[1] = gem_context_create(fd);
-		gem_context_set_priority(fd, ctx[1], 1023);
-	}
-
-	memset(&execbuf, 0, sizeof(execbuf));
-	execbuf.buffers_ptr = to_user_pointer(&obj[1]);
-	execbuf.buffer_count = 2;
-	execbuf.flags = ring;
-	execbuf.flags |= LOCAL_I915_EXEC_NO_RELOC | LOCAL_I915_EXEC_HANDLE_LUT;
-	execbuf.rsvd1 = ctx[1];
-
-	memset(obj, 0, sizeof(obj));
-	obj[1].handle = gem_create(fd, 4096);
-	obj[1].flags = EXEC_OBJECT_WRITE;
-	results = gem_mmap__wc(fd, obj[1].handle, 0, 4096, PROT_READ);
-
-	obj[2].handle = gem_create(fd, 64*1024);
-	map = gem_mmap__wc(fd, obj[2].handle, 0, 64*1024, PROT_WRITE);
-	gem_set_domain(fd, obj[2].handle,
-		       I915_GEM_DOMAIN_GTT,
-		       I915_GEM_DOMAIN_GTT);
-	map[0] = MI_BATCH_BUFFER_END;
-	gem_execbuf(fd, &execbuf);
-
-	memset(&reloc,0, sizeof(reloc));
-	obj[2].relocation_count = 1;
-	obj[2].relocs_ptr = to_user_pointer(&reloc);
-
-	gem_set_domain(fd, obj[2].handle,
-		       I915_GEM_DOMAIN_GTT,
-		       I915_GEM_DOMAIN_GTT);
-
-	reloc.read_domains = I915_GEM_DOMAIN_INSTRUCTION;
-	reloc.write_domain = I915_GEM_DOMAIN_INSTRUCTION;
-	reloc.presumed_offset = obj[1].offset;
-	reloc.target_handle = flags & CORK ? 1 : 0;
-
-	for (e = intel_execution_engines; e->name; e++) {
-		igt_spin_t *spin = NULL;
-		struct cork c;
-
-		if (e->exec_id == 0)
-			continue;
-
-		if (!gem_has_ring(fd, e->exec_id | e->flags))
-			continue;
-
-		gem_set_domain(fd, obj[2].handle,
-			       I915_GEM_DOMAIN_GTT,
-			       I915_GEM_DOMAIN_GTT);
-
-		if (flags & PREEMPT)
-			spin = igt_spin_batch_new(fd, ctx[0], ring, 0);
-
-		if (flags & CORK) {
-			plug(fd, &c);
-			obj[0].handle = c.handle;
-			execbuf.buffers_ptr = to_user_pointer(&obj[0]);
-			execbuf.buffer_count = 3;
-		}
-
-		for (j = 0; j < repeats; j++) {
-			uint64_t offset;
-
-			execbuf.flags &= ~ENGINE_FLAGS;
-			execbuf.flags |= ring;
-
-			execbuf.batch_start_offset = 64 * j;
-			reloc.offset =
-				execbuf.batch_start_offset + sizeof(uint32_t);
-			reloc.delta = sizeof(uint32_t) * j;
-
-			reloc.presumed_offset = obj[1].offset;
-			offset = reloc.presumed_offset;
-			offset += reloc.delta;
-
-			i = 16 * j;
-			/* MI_STORE_REG_MEM */
-			map[i++] = 0x24 << 23 | 1;
-			if (has_64bit_reloc)
-				map[i-1]++;
-			map[i++] = RCS_TIMESTAMP; /* ring local! */
-			map[i++] = offset;
-			if (has_64bit_reloc)
-				map[i++] = offset >> 32;
-			map[i++] = MI_BATCH_BUFFER_END;
-
-			gem_execbuf(fd, &execbuf);
-
-			execbuf.flags &= ~ENGINE_FLAGS;
-			execbuf.flags |= e->exec_id | e->flags;
-
-			execbuf.batch_start_offset = 64 * (j + repeats);
-			reloc.offset =
-				execbuf.batch_start_offset + sizeof(uint32_t);
-			reloc.delta = sizeof(uint32_t) * (j + repeats);
-
-			reloc.presumed_offset = obj[1].offset;
-			offset = reloc.presumed_offset;
-			offset += reloc.delta;
-
-			i = 16 * (j + repeats);
-			/* MI_STORE_REG_MEM */
-			map[i++] = 0x24 << 23 | 1;
-			if (has_64bit_reloc)
-				map[i-1]++;
-			map[i++] = RCS_TIMESTAMP; /* ring local! */
-			map[i++] = offset;
-			if (has_64bit_reloc)
-				map[i++] = offset >> 32;
-			map[i++] = MI_BATCH_BUFFER_END;
-
-			gem_execbuf(fd, &execbuf);
-		}
-
-		if (flags & CORK)
-			unplug(&c);
-		gem_set_domain(fd, obj[1].handle,
-			       I915_GEM_DOMAIN_GTT,
-			       I915_GEM_DOMAIN_GTT);
-		igt_spin_batch_free(fd, spin);
-
-		igt_info("%s-%s delay: %.2f\n",
-			 name, e->name, (results[2*repeats-1] - results[0]) / (double)repeats);
-	}
-
-	munmap(map, 64*1024);
-	munmap(results, 4096);
-	gem_close(fd, obj[1].handle);
-	gem_close(fd, obj[2].handle);
-
-	if (flags & PREEMPT) {
-		gem_context_destroy(fd, ctx[1]);
-		gem_context_destroy(fd, ctx[0]);
-	}
-}
-
-igt_main
-{
-	const struct intel_execution_engine *e;
-	int device = -1;
-
-	igt_fixture {
-		device = drm_open_driver(DRIVER_INTEL);
-		igt_require_gem(device);
-		gem_require_mmap_wc(device);
-
-		gem_submission_print_method(device);
-
-		ring_size = measure_ring_size(device);
-		igt_info("Ring size: %d batches\n", ring_size);
-		igt_require(ring_size > 8);
-		ring_size -= 8; /* leave some spare */
-		if (ring_size > 1024)
-			ring_size = 1024;
-
-		intel_register_access_init(intel_get_pci_device(), false, device);
-	}
-
-	igt_subtest_group {
-		igt_fixture
-			igt_require(intel_gen(intel_get_drm_devid(device)) >= 7);
-
-		for (e = intel_execution_engines; e->name; e++) {
-			if (e->exec_id == 0)
-				continue;
-
-			igt_subtest_group {
-				igt_fixture {
-					gem_require_ring(device, e->exec_id | e->flags);
-				}
-
-				igt_subtest_f("%s-dispatch", e->name)
-					latency_on_ring(device,
-							e->exec_id | e->flags,
-							e->name, 0);
-
-				igt_subtest_f("%s-dispatch-queued", e->name)
-					latency_on_ring(device,
-							e->exec_id | e->flags,
-							e->name, CORK);
-
-				igt_subtest_f("%s-synchronisation", e->name)
-					latency_from_ring(device,
-							  e->exec_id | e->flags,
-							  e->name, 0);
-
-				igt_subtest_f("%s-synchronisation-queued", e->name)
-					latency_from_ring(device,
-							  e->exec_id | e->flags,
-							  e->name, CORK);
-
-				igt_subtest_group {
-					igt_fixture {
-						igt_require(gem_scheduler_has_preemption(device));
-					}
-
-					igt_subtest_f("%s-preemption", e->name)
-						latency_from_ring(device,
-								  e->exec_id | e->flags,
-								  e->name, PREEMPT);
-				}
-			}
-		}
-	}
-
-	igt_fixture {
-		close(device);
-	}
-}
diff --git a/tests/meson.build b/tests/meson.build
index 521a4c4..e77a221 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -55,7 +55,6 @@ test_progs = [
 	'gem_exec_fence',
 	'gem_exec_flush',
 	'gem_exec_gttfill',
-	'gem_exec_latency',
 	'gem_exec_lut_handle',
 	'gem_exec_nop',
 	'gem_exec_parallel',
-- 
2.7.4



More information about the igt-dev mailing list