[Intel-gfx] [PATCH i-g-t v3 3/6] tests/gem_scheduler: Add gem_scheduler test

Thu Mar 10 11:03:31 UTC 2016

This is intended to test the scheduler behaviour is correct.
The subtests are
<ring>-basic
Tests that batch buffers of the same priority submitted to a ring
execute in the order they are submitted.
<ring>-read
Submits a batch buffer with a read dependency to a buffer object to
a ring which is held in the scheduler queue by a long running batch
buffer. Submit batch buffers to other rings that have a read dependency
to the same buffer object. Ensure they execute before the batch buffer
being held up behind the long running batch buffer.
<ring>-write
Submits a batch buffer with a write dependency to a buffer object to
a ring which is held in the scheduler queue by a long running batch
buffer. Submit batch buffers to other rings that have a write dependency
to the same buffer object. Submit batch buffers with no interdependencies
to all rings. Ensure the batch buffers that have write dependencies are
executed in submission order but the batch buffers without interdependencies
do not get held up.

v2: Addressed review comments from Daniele Ceraolo Spurio

v3: Added logic to use generic BSD ring if there is 1 and BSD1 / BSD2 if there
    are 2.

Signed-off-by: Derek Morton <derek.j.morton at intel.com>
---
 tests/Makefile.sources |   1 +
 tests/gem_scheduler.c  | 459 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 460 insertions(+)
 create mode 100644 tests/gem_scheduler.c

diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index f8b18b0..c88e045 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -66,6 +66,7 @@ TESTS_progs_M = \
 	gem_request_retire \
 	gem_reset_stats \
 	gem_ringfill \
+	gem_scheduler \
 	gem_set_tiling_vs_blt \
 	gem_softpin \
 	gem_stolen \
diff --git a/tests/gem_scheduler.c b/tests/gem_scheduler.c
new file mode 100644
index 0000000..436440a
--- /dev/null
+++ b/tests/gem_scheduler.c
@@ -0,0 +1,459 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Derek Morton <derek.j.morton at intel.com>
+ *
+ */
+
+#include "igt.h"
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <inttypes.h>
+#include <time.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <fcntl.h>
+
+IGT_TEST_DESCRIPTION("Check scheduler behaviour. Basic tests ensure independant "
+                     "batch buffers of the same priority are executed in "
+                     "submission order. Read-read tests ensure "
+                     "batch buffers with a read dependency to the same buffer "
+                     "object do not block each other. Write-write dependency "
+                     "tests ensure batch buffers with a write dependency to a "
+                     "buffer object will be executed in submission order but "
+                     "will not block execution of other independant batch "
+                     "buffers.");
+
+#define SEC_TO_NSEC (1000 * 1000 * 1000)
+
+static struct ring {
+	const char *name;
+	int id;
+	bool exists;
+} rings[] = {
+	{ "render", I915_EXEC_RENDER, false },
+	{ "bsd",    I915_EXEC_BSD , false },
+	{ "bsd2",    I915_EXEC_BSD | 2<<13, false },
+	{ "blt",    I915_EXEC_BLT, false },
+	{ "vebox",  I915_EXEC_VEBOX, false },
+};
+
+#define NBR_RINGS (sizeof(rings)/sizeof(struct ring))
+
+static void check_rings(int fd) {
+	int loop;
+	for(loop=0; loop < NBR_RINGS; loop++) {
+		if(gem_has_ring(fd, rings[loop].id)) {
+			if(rings[loop].id == (I915_EXEC_BSD | 2<<13)) {
+				rings[loop].exists = gem_has_bsd2(fd);
+			}
+			else {
+				rings[loop].exists = true;
+				if(rings[loop].id == I915_EXEC_BSD)
+					/* If there is BSD2, need to make BSD1
+					 * explicit.
+					 */
+					if(gem_has_bsd2(fd))
+						rings[loop].id |= (1<<13);
+			}
+		}
+	}
+}
+
+static drm_intel_bo *create_and_check_bo(drm_intel_bufmgr *bufmgr, const char *desc)
+{
+	drm_intel_bo *bo = drm_intel_bo_alloc(bufmgr, desc, BATCH_SZ, BATCH_SZ);
+	igt_assert_f(bo, "Failed allocating %s\n", desc);
+	return bo;
+}
+
+static struct intel_batchbuffer *create_delay_bb(int fd, drm_intel_bufmgr *bufmgr,
+                                                 int ringid, uint32_t loops,
+                                                 drm_intel_bo *dest)
+{
+	struct intel_batchbuffer *buffer;
+	buffer = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+	igt_assert(buffer);
+	igt_create_delay_bb(fd, buffer, ringid, loops, dest);
+	return buffer;
+}
+
+static struct intel_batchbuffer *create_timestamp_bb(int fd, drm_intel_bufmgr *bufmgr,
+                                                     int ringid, drm_intel_bo *dest,
+                                                     drm_intel_bo *load, bool write)
+{
+	struct intel_batchbuffer *buffer;
+	buffer = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+	igt_assert(buffer);
+	igt_create_timestamp_bb(fd, buffer, ringid, dest, load, write);
+	return buffer;
+}
+
+static struct intel_batchbuffer *create_noop_bb(int fd, drm_intel_bufmgr *bufmgr,
+                                                int noops)
+{
+	struct intel_batchbuffer *buffer;
+	buffer = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
+	igt_assert(buffer);
+	igt_create_noop_bb(buffer, noops);
+	return buffer;
+}
+
+static void init_context(int *fd, drm_intel_bufmgr **bufmgr, int ringid)
+{
+	struct intel_batchbuffer *noop_bb;
+	*fd = drm_open_driver(DRIVER_INTEL);
+	igt_assert(*fd >= 0);
+	*bufmgr = drm_intel_bufmgr_gem_init(*fd, BATCH_SZ);
+	igt_assert(*bufmgr);
+	drm_intel_bufmgr_gem_enable_reuse(*bufmgr);
+	/* Send a noop batch buffer to force any deferred initialisation */
+	noop_bb = create_noop_bb(*fd, *bufmgr, 5);
+	intel_batchbuffer_flush_on_ring(noop_bb, ringid);
+	intel_batchbuffer_free(noop_bb);
+}
+
+/* Basic test. Check batch buffers of the same priority and with no dependencies
+ * are executed in the order they are submitted.
+ */
+#define NBR_BASIC_FDs (3)
+static void run_test_basic(int in_flight, int ringid)
+{
+	int fd[NBR_BASIC_FDs];
+	int loop;
+	drm_intel_bufmgr *bufmgr[NBR_BASIC_FDs];
+	uint32_t *delay_buf, *ts1_buf, *ts2_buf;
+	struct intel_batchbuffer *ts1_bb, *ts2_bb;
+	struct intel_batchbuffer **in_flight_bbs;
+	uint32_t calibrated_1s;
+	drm_intel_bo *delay_bo, *ts1_bo, *ts2_bo;
+
+	in_flight_bbs = malloc(in_flight * sizeof(struct intel_batchbuffer *));
+	igt_assert(in_flight_bbs);
+
+	/* Need multiple i915 fd's. Scheduler will not change execution order of
+	 * batch buffers from the same context.
+	 */
+	for(loop=0; loop < NBR_BASIC_FDs; loop++)
+		init_context(&(fd[loop]), &(bufmgr[loop]), ringid);
+
+
+	/* Create buffer objects */
+	delay_bo = create_and_check_bo(bufmgr[0], "delay bo");
+	ts1_bo = create_and_check_bo(bufmgr[1], "ts1 bo");
+	ts2_bo = create_and_check_bo(bufmgr[2], "ts2 bo");
+
+	/* Put some non zero values in the delay bo */
+	{
+		uint32_t data=0xffffffff;
+		drm_intel_bo_subdata(delay_bo, 0, 4, &data);
+	}
+
+	calibrated_1s = igt_calibrate_delay_bb(fd[0], bufmgr[0], ringid);
+
+	/* Batch buffers to fill the in flight queue */
+	in_flight_bbs[0] = create_delay_bb(fd[0], bufmgr[0], ringid, calibrated_1s, delay_bo);
+	for(loop = 1; loop < in_flight; loop++)
+		in_flight_bbs[loop] = create_noop_bb(fd[0], bufmgr[0], 5);
+
+	/* Extra batch buffers in the scheduler queue */
+	ts1_bb = create_timestamp_bb(fd[1], bufmgr[1], ringid, ts1_bo, NULL, false);
+	ts2_bb = create_timestamp_bb(fd[2], bufmgr[2], ringid, ts2_bo, NULL, false);
+
+	/* Flush batchbuffers */
+	for(loop = 0; loop < in_flight; loop++)
+		intel_batchbuffer_flush_on_ring(in_flight_bbs[loop], ringid);
+	intel_batchbuffer_flush_on_ring(ts1_bb, ringid);
+	intel_batchbuffer_flush_on_ring(ts2_bb, ringid);
+
+	/* This will not return until the bo has finished executing */
+	drm_intel_bo_map(delay_bo, 0);
+	drm_intel_bo_map(ts1_bo, 0);
+	drm_intel_bo_map(ts2_bo, 0);
+
+	delay_buf = delay_bo->virtual;
+	ts1_buf = ts1_bo->virtual;
+	ts2_buf = ts2_bo->virtual;
+
+	igt_debug("Delay Timestamp = 0x%08" PRIx32 "\n", delay_buf[2]);
+	igt_debug("TS1 Timestamp = 0x%08" PRIx32 "\n", ts1_buf[0]);
+	igt_debug("TS2 Timestamp = 0x%08" PRIx32 "\n", ts2_buf[0]);
+
+	/* buf[0] in the target buffer should be 0 if the batch buffer completed */
+	igt_assert_f(delay_buf[0] == 0,
+	             "delay_buf[0] expected 0x0, got 0x%" PRIx32 "\n", delay_buf[0]);
+
+	igt_assert_f(igt_compare_timestamps(delay_buf[2], ts1_buf[0]),
+	             "Delay ts (0x%08" PRIx32 ") > TS1 ts (0x%08" PRIx32 ")\n",
+	             delay_buf[2], ts1_buf[0]);
+	igt_assert_f(igt_compare_timestamps(ts1_buf[0], ts2_buf[0]),
+	             "TS1 ts (0x%08" PRIx32 ") > TS2 ts (0x%08" PRIx32 ")\n",
+	             ts1_buf[0], ts2_buf[0]);
+
+	/* Cleanup */
+	for(loop = 0; loop < in_flight; loop++)
+		intel_batchbuffer_free(in_flight_bbs[loop]);
+	intel_batchbuffer_free(ts1_bb);
+	intel_batchbuffer_free(ts2_bb);
+
+	drm_intel_bo_unreference(delay_bo);
+	drm_intel_bo_unreference(ts1_bo);
+	drm_intel_bo_unreference(ts2_bo);
+	for(loop = 0; loop < NBR_BASIC_FDs; loop++) {
+		drm_intel_bufmgr_destroy(bufmgr[loop]);
+		close(fd[loop]);
+	}
+	free(in_flight_bbs);
+}
+
+/* Dependency test.
+ * write=0, Submit batch buffers with read dependencies to all rings. Delay one
+ * with a long executing batch buffer. Check the others are not held up.
+ * write=1, Submit batch buffers with write dependencies to all rings. Delay one
+ * with a long executing batch buffer. Also submit batch buffers with no
+ * dependencies to all rings. Batch buffers with write dependencies should be
+ * executed in submission order. The batch buffers with no dependencies should
+ * not be held up.
+ */
+static void run_test_dependency(int in_flight, int ring, bool write)
+{
+	int fd[NBR_RINGS], fd2[NBR_RINGS];
+	int loop;
+	int prime_fd;
+	uint32_t *delay_buf, *ts_buf[NBR_RINGS], *ts2_buf[NBR_RINGS], *shared_buf;
+	uint32_t calibrated_1s;
+	drm_intel_bufmgr *bufmgr[NBR_RINGS], *bufmgr2[NBR_RINGS];
+	struct intel_batchbuffer *ts_bb[NBR_RINGS], *ts2_bb[NBR_RINGS], **in_flight_bbs;
+	drm_intel_bo *delay_bo, *ts_bo[NBR_RINGS], *ts2_bo[NBR_RINGS], *shared_bo[NBR_RINGS];
+
+	in_flight_bbs = malloc(in_flight * sizeof(struct intel_batchbuffer *));
+	igt_assert(in_flight_bbs);
+
+	/* Need multiple i915 fd's. Scheduler will not change execution order of
+	 * batch buffers from the same context.
+	 */
+	for(loop=0; loop < NBR_RINGS; loop++) {
+		init_context(&(fd[loop]), &(bufmgr[loop]), rings[loop].id);
+		if(write)
+			init_context(&(fd2[loop]), &(bufmgr2[loop]), rings[loop].id);
+	}
+
+	/* Create buffer objects */
+	delay_bo = create_and_check_bo(bufmgr[ring], "delay bo");
+	for(loop = 0; loop < NBR_RINGS; loop++) {
+		ts_bo[loop] = create_and_check_bo(bufmgr[loop], "ts bo");
+		if(write)
+			ts2_bo[loop] = create_and_check_bo(bufmgr2[loop], "ts2 bo");
+	}
+
+	/* Create shared buffer object */
+	shared_bo[0] = create_and_check_bo(bufmgr[0], "shared bo");
+
+	drm_intel_bo_gem_export_to_prime(shared_bo[0], &prime_fd);
+	for(loop = 1; loop < NBR_RINGS; loop++) {
+		shared_bo[loop] = drm_intel_bo_gem_create_from_prime(bufmgr[loop],
+		                                                     prime_fd, BATCH_SZ);
+		igt_assert(shared_bo[loop]);
+	}
+	close(prime_fd);
+
+	/* Put some non zero values in the delay and shared bo */
+	drm_intel_bo_map(delay_bo, 1);
+	delay_buf = delay_bo->virtual;
+	delay_buf[0] = 0xff;
+	drm_intel_bo_unmap(delay_bo);
+	drm_intel_bo_map(shared_bo[0], 1);
+	shared_buf = shared_bo[0]->virtual;
+	shared_buf[0] = 0xff00ff00;
+	drm_intel_bo_unmap(shared_bo[0]);
+
+	calibrated_1s = igt_calibrate_delay_bb(fd[ring], bufmgr[ring], rings[ring].id);
+
+	/* Batch buffers to fill the in flight queue */
+	in_flight_bbs[0] = create_delay_bb(fd[ring], bufmgr[ring], rings[ring].id,
+	                                   calibrated_1s, delay_bo);
+	for(loop = 1; loop < in_flight; loop++)
+		in_flight_bbs[loop] = create_noop_bb(fd[ring], bufmgr[ring], 5);
+
+	for(loop = 0; loop < NBR_RINGS; loop++) {
+		ts_bb[loop] = create_timestamp_bb(fd[loop], bufmgr[loop],
+		                                  rings[loop].id, ts_bo[loop],
+		                                  shared_bo[loop], write);
+		if(write)
+			ts2_bb[loop] = create_timestamp_bb(fd2[loop], bufmgr2[loop],
+			                                   rings[loop].id, ts2_bo[loop],
+			                                   NULL, false);
+	}
+
+	/* Flush batchbuffers */
+	for(loop = 0; loop < in_flight; loop++)
+		intel_batchbuffer_flush_on_ring(in_flight_bbs[loop], rings[ring].id);
+
+	intel_batchbuffer_flush_on_ring(ts_bb[ring], rings[ring].id);
+	for(loop = 0; loop < NBR_RINGS; loop++)
+		if((loop != ring) && rings[loop].exists)
+			intel_batchbuffer_flush_on_ring(ts_bb[loop], rings[loop].id);
+
+	if(write) {
+		intel_batchbuffer_flush_on_ring(ts2_bb[ring], rings[ring].id);
+		for(loop = 0; loop < NBR_RINGS; loop++)
+			if((loop != ring) && rings[loop].exists)
+				intel_batchbuffer_flush_on_ring(ts2_bb[loop], rings[loop].id);
+	}
+
+	/* This will not return until the bo has finished executing */
+	drm_intel_bo_map(delay_bo, 0);
+	delay_buf = delay_bo->virtual;
+	for(loop = 0; loop < NBR_RINGS; loop++) {
+		drm_intel_bo_map(ts_bo[loop], 0);
+		ts_buf[loop] = ts_bo[loop]->virtual;
+		if(write) {
+			drm_intel_bo_map(ts2_bo[loop], 0);
+			ts2_buf[loop] = ts2_bo[loop]->virtual;
+		}
+	}
+
+	/* buf[0] in the target buffer should be 0 if the batch buffer completed */
+	igt_assert_f(delay_buf[0] == 0,
+	             "delay_buf[0] expected 0x0, got 0x%" PRIx32 "\n",
+	             delay_buf[0]);
+
+	igt_debug("%6s delay timestamp = 0x%08" PRIx32 "\n",
+	          rings[ring].name, delay_buf[2]);
+	for(loop = 0; loop < NBR_RINGS; loop++)
+		igt_debug("%6s batch timestamp = 0x%08" PRIx32 "\n",
+		          rings[loop].name, ts_buf[loop][0]);
+
+	if(write) {
+		igt_debug("Independant batch buffers\n");
+		for(loop = 0; loop < NBR_RINGS; loop++)
+			igt_debug("%6s batch timestamp = 0x%08" PRIx32 "\n",
+			          rings[loop].name, ts2_buf[loop][0]);
+	}
+
+	for(loop = 0; loop < NBR_RINGS; loop++) {
+		if((loop != ring) && rings[loop].exists) {
+			if(write) {
+				/* Write dependency, delayed ring should run first */
+				igt_assert_f(igt_compare_timestamps(ts_buf[ring][0], ts_buf[loop][0]),
+				             "%s ran before %s - 0x%08" PRIx32 " vs 0x%08" PRIx32 "\n",
+				             rings[loop].name, rings[ring].name,
+				             ts_buf[loop][0], ts_buf[ring][0]);
+				/* Second bb without dependency should run first */
+				igt_assert_f(igt_compare_timestamps(ts2_buf[loop][0], ts_buf[loop][0]),
+				             "(%s) independant bb was held up - 0x%08" PRIx32 " vs 0x%08" PRIx32 "\n",
+				             rings[loop].name, ts_buf[loop][0], ts2_buf[loop][0]);
+			}
+			else
+				/* Read dependency, delayed ring should run last */
+				igt_assert_f(igt_compare_timestamps(ts_buf[loop][0], ts_buf[ring][0]),
+				             "%s ran after %s - 0x%08" PRIx32 " vs 0x%08" PRIx32 "\n",
+				             rings[loop].name, rings[ring].name,
+				             ts_buf[loop][0], ts_buf[ring][0]);
+		}
+	}
+
+	/* Cleanup */
+	for(loop = 0; loop < in_flight; loop++)
+		intel_batchbuffer_free(in_flight_bbs[loop]);
+
+	for(loop = 0; loop < NBR_RINGS; loop++) {
+		intel_batchbuffer_free(ts_bb[loop]);
+		drm_intel_bo_unreference(ts_bo[loop]);
+		drm_intel_bo_unreference(shared_bo[loop]);
+		if(write) {
+			intel_batchbuffer_free(ts2_bb[loop]);
+			drm_intel_bo_unreference(ts2_bo[loop]);
+		}
+	}
+
+	drm_intel_bo_unreference(delay_bo);
+
+	for(loop = 0; loop < NBR_RINGS; loop++) {
+		drm_intel_bufmgr_destroy(bufmgr[loop]);
+		close(fd[loop]);
+		if(write) {
+			drm_intel_bufmgr_destroy(bufmgr2[loop]);
+			close(fd2[loop]);
+		}
+	}
+
+	free(in_flight_bbs);
+}
+
+igt_main
+{
+	int loop;
+	int in_flight;
+	int fd;
+
+	igt_fixture {
+		int debug_fd;
+		int l;
+		char buf[6];
+		/* Get nbr of batch buffers that the scheduler will queue in the
+		 * HW. If this debugfs file does not exist there is no scheduler
+		 * so skip the test.
+		 */
+		debug_fd = igt_debugfs_open("i915_scheduler_min_flying", O_RDONLY);
+		igt_skip_on(debug_fd == -1);
+		l = read(debug_fd, buf, sizeof(buf)-1);
+		igt_assert(l > 0);
+		igt_assert(l < sizeof(buf));
+		buf[l] = '\0';
+		/* May be a decimal or hex number depending on scheduler version */
+		if(sscanf(buf, "0x%2x", &in_flight) != 1)
+			igt_assert_f(sscanf(buf, "%2d", &in_flight) == 1,
+			             "Error reading from i915_scheduler_min_flying\n");
+		close(debug_fd);
+		igt_debug("in flight = %d\n", in_flight);
+		fd = drm_open_driver(DRIVER_INTEL);
+		igt_assert(fd >= 0);
+		check_rings(fd);
+	}
+
+	for (loop=0; loop < NBR_RINGS; loop++)
+		igt_subtest_f("%s-basic", rings[loop].name) {
+			gem_require_ring(fd, rings[loop].id);
+			run_test_basic(in_flight, rings[loop].id);
+		}
+
+	for (loop=0; loop < NBR_RINGS; loop++)
+		igt_subtest_f("%s-read", rings[loop].name) {
+			gem_require_ring(fd, rings[loop].id);
+			run_test_dependency(in_flight, loop, false);
+		}
+
+	for (loop=0; loop < NBR_RINGS; loop++)
+		igt_subtest_f("%s-write", rings[loop].name) {
+			gem_require_ring(fd, rings[loop].id);
+			run_test_dependency(in_flight, loop, true);
+		}
+
+	igt_fixture {
+		close(fd);
+	}
+}
-- 
1.9.1