[Intel-gfx] [PATCH i-g-t 2/4] tests/gem_scheduler: Add gem_scheduler test

Wed Feb 17 12:37:07 UTC 2016

Hi,

first round of comments inline. A few things will probably have to 
change based on the comments on the previous patch in the series so I'll 
have a better look at the related logic once the new series is up.

Regards,
Daniele

On 12/02/16 09:38, Derek Morton wrote:
> This is intended to test the scheduler behaviour is correct.
> The subtests are
> <ring>-basic
> Tests that batch buffers of the same priority submitted to a ring
> execute in the order they are submitted.
> <ring>-read
> Submits a batch buffer with a read dependency to a buffer object to
> a ring which is held in the scheduler queue by a long running batch
> buffer. Submit batch buffers to other rings that have a read dependency
> to the same buffer object. Ensure they execute before the batch buffer
> being held up behind the long running batch buffer.
> <ring>-write
> Submits a batch buffer with a write dependency to a buffer object to
> a ring which is held in the scheduler queue by a long running batch
> buffer. Submit batch buffers to other rings that have a write dependency
> to the same buffer object. Submit batch buffers with no interdependencies
> to all rings. Ensure the batch buffers that have write dependencies are
> executed in submission order but the batch buffers without interdependencies
> do not get held up.
>
> Signed-off-by: Derek Morton <derek.j.morton at intel.com>
> ---
>   tests/Makefile.sources |   1 +
>   tests/gem_scheduler.c  | 409 +++++++++++++++++++++++++++++++++++++++++++++++++
>   2 files changed, 410 insertions(+)
>   create mode 100644 tests/gem_scheduler.c
>
> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> index df92586..439f62c 100644
> --- a/tests/Makefile.sources
> +++ b/tests/Makefile.sources
> @@ -66,6 +66,7 @@ TESTS_progs_M = \
>   	gem_request_retire \
>   	gem_reset_stats \
>   	gem_ringfill \
> +	gem_scheduler \
>   	gem_set_tiling_vs_blt \
>   	gem_softpin \
>   	gem_stolen \
> diff --git a/tests/gem_scheduler.c b/tests/gem_scheduler.c
> new file mode 100644
> index 0000000..4824c13
> --- /dev/null
> +++ b/tests/gem_scheduler.c
> @@ -0,0 +1,409 @@
> +/*
> + * Copyright © 2016 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + *    Derek Morton <derek.j.morton at intel.com>
> + *
> + */
> +
> +#include "igt.h"
> +#include <unistd.h>
> +#include <stdlib.h>
> +#include <stdint.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <inttypes.h>
> +#include <time.h>
> +#include <sys/stat.h>
> +#include <sys/ioctl.h>
> +#include <fcntl.h>
> +
> +IGT_TEST_DESCRIPTION("Check scheduler behaviour. Basic tests ensure independant "
> +                     "batch buffers of the same priority are executed in "
> +                     "submission order. Read-read tests ensure "
> +                     "batch buffers with a read dependency to the same buffer "
> +                     "object do not block each other. Write-write dependency "
> +                     "tests ensure batch buffers with a write dependency to a "
> +                     "buffer object will be executed in submission order but "
> +                     "will not block execution of other independant batch "
> +                     "buffers.");
> +
> +#define SEC_TO_NSEC (1000 * 1000 * 1000)
> +
> +struct ring {
> +	const char *name;
> +	int id;
> +} rings[] = {
> +	{ "render", I915_EXEC_RENDER },
> +	{ "bsd",    I915_EXEC_BSD },

Is BSD1/BSD2 difference of any importance for the tests?

> +	{ "blt",    I915_EXEC_BLT },
> +	{ "vebox",  I915_EXEC_VEBOX },
> +};

This is a slight duplication of intel_execution_engines. However, I'm 
not sure that intel_execution_engines is suitable here considering that 
it doesn't seem you want to check both the vanilla I915_EXEC_BSD case 
and the BSD1/BSD2 case

> +
> +#define NBR_RINGS (sizeof(rings)/sizeof(struct ring))
> +
> +/* Basic test. Check batch buffers of the same priority and with no dependencies
> + * are executed in the order they are submitted.
> + */
> +#define NBR_BASIC_FDs (3)
> +static void run_test_basic(int in_flight, int ringid)
> +{
> +	int fd[NBR_BASIC_FDs];
> +	int loop;
> +	drm_intel_bufmgr *bufmgr[NBR_BASIC_FDs];
> +	uint32_t *delay_buf, *ts1_buf, *ts2_buf;
> +	struct intel_batchbuffer *ts1_bb, *ts2_bb;
> +	struct intel_batchbuffer **in_flight_bbs;
> +	uint32_t calibrated_1s;
> +	drm_intel_bo *delay_bo, *ts1_bo, *ts2_bo;
> +
> +	in_flight_bbs = malloc(in_flight * sizeof(struct intel_batchbuffer *));
> +	igt_assert(in_flight_bbs);
> +
> +	/* Need multiple i915 fd's. Scheduler will not change execution order of
> +	 * batch buffers from the same context.
> +	 */
> +	for(loop=0; loop < NBR_BASIC_FDs; loop++) {
> +		struct intel_batchbuffer *noop_bb;
> +		fd[loop] = drm_open_driver(DRIVER_INTEL);
> +		igt_assert(fd[loop] >= 0);
> +		bufmgr[loop] = drm_intel_bufmgr_gem_init(fd[loop], BATCH_SZ);
> +		igt_assert(bufmgr[loop]);
> +		drm_intel_bufmgr_gem_enable_reuse(bufmgr[loop]);
> +		/* Send a noop batch buffer to force any deferred initialisation */
> +		noop_bb = igt_create_noop_bb(fd[loop], bufmgr[loop], ringid, 5);
> +		intel_batchbuffer_flush_on_ring(noop_bb, ringid);
> +		intel_batchbuffer_free(noop_bb);
> +	}
> +
> +	/* Create buffer objects */
> +	delay_bo = drm_intel_bo_alloc(bufmgr[0], "delay bo", BATCH_SZ, BATCH_SZ);
> +	igt_assert(delay_bo);
> +	ts1_bo = drm_intel_bo_alloc(bufmgr[1], "ts1 bo", BATCH_SZ, BATCH_SZ);
> +	igt_assert(ts1_bo);
> +	ts2_bo = drm_intel_bo_alloc(bufmgr[2], "ts2 bo", BATCH_SZ, BATCH_SZ);
> +	igt_assert(ts2_bo);
> +
> +	/* Put some non zero values in the delay bo */
> +	drm_intel_bo_map(delay_bo, 1);

As I already mentioned on the other patch in some cases you could use a 
subdata/get_subdata call instead of the map/unmap dance

> +	delay_buf = delay_bo->virtual;
> +	delay_buf[0] = 0xff;
> +	drm_intel_bo_unmap(delay_bo);
> +
> +	calibrated_1s = igt_calibrate_delay_bb(fd[0], bufmgr[0], ringid);
> +
> +	/* Batch buffers to fill the ring */

"fill the ring" feels a bit unclear to me, because you're not filling 
the ringbuffer but the in-flight queue of the scheduler. Maybe use 
something like "reach the maximum number of in flight batches"?

> +	in_flight_bbs[0] = igt_create_delay_bb(fd[0], bufmgr[0], ringid, calibrated_1s, delay_bo);
> +	for(loop = 1; loop < in_flight; loop++)
> +		in_flight_bbs[loop] = igt_create_noop_bb(fd[0], bufmgr[0], ringid, 5);
> +
> +	/* Extra batch buffers in the scheduler queue */
> +	ts1_bb = igt_create_timestamp_bb(fd[1], bufmgr[1], ringid, ts1_bo, NULL, false);
> +	ts2_bb = igt_create_timestamp_bb(fd[2], bufmgr[2], ringid, ts2_bo, NULL, false);
> +
> +	/* Flush batchbuffers */
> +	for(loop = 0; loop < in_flight; loop++)
> +		intel_batchbuffer_flush_on_ring(in_flight_bbs[loop], ringid);
> +	intel_batchbuffer_flush_on_ring(ts1_bb, ringid);
> +	intel_batchbuffer_flush_on_ring(ts2_bb, ringid);
> +
> +	/* This will not return until the bo has finished executing */
> +	drm_intel_bo_map(delay_bo, 0);
> +	drm_intel_bo_map(ts1_bo, 0);
> +	drm_intel_bo_map(ts2_bo, 0);
> +
> +	delay_buf = delay_bo->virtual;
> +	ts1_buf = ts1_bo->virtual;
> +	ts2_buf = ts2_bo->virtual;
> +
> +	igt_debug("Delay Timestamp = 0x%08" PRIx32 "\n", delay_buf[2]);
> +	igt_debug("TS1 Timestamp = 0x%08" PRIx32 "\n", ts1_buf[0]);
> +	igt_debug("TS2 Timestamp = 0x%08" PRIx32 "\n", ts2_buf[0]);
> +
> +	/* buf[0] in the target buffer should be 0 if the batch buffer completed */
> +	igt_assert_f(delay_buf[0] == 0,
> +	             "delay_buf[0] expected 0x0, got 0x%" PRIx32 "\n", delay_buf[0]);
> +
> +	igt_assert_f(igt_compare_timestamps(delay_buf[2], ts1_buf[0]),
> +	             "Delay ts (0x%08" PRIx32 ") > TS1 ts (0x%08" PRIx32 ")\n",
> +	             delay_buf[2], ts1_buf[0]);
> +	igt_assert_f(igt_compare_timestamps(ts1_buf[0], ts2_buf[0]),
> +	             "TS1 ts (0x%08" PRIx32 ") > TS2 ts (0x%08" PRIx32 ")\n",
> +	             ts1_buf[0], ts2_buf[0]);
> +
> +	/* Cleanup */
> +	for(loop = 0; loop < in_flight; loop++)
> +		intel_batchbuffer_free(in_flight_bbs[loop]);
> +	intel_batchbuffer_free(ts1_bb);
> +	intel_batchbuffer_free(ts2_bb);
> +
> +	drm_intel_bo_unreference(delay_bo);
> +	drm_intel_bo_unreference(ts1_bo);
> +	drm_intel_bo_unreference(ts2_bo);
> +	for(loop = 0; loop < 3; loop++) {

3  ->  NBR_BASIC_FDs

> +		drm_intel_bufmgr_destroy(bufmgr[loop]);
> +		close(fd[loop]);
> +	}
> +	free(in_flight_bbs);
> +}
> +
> +/* Dependency test.
> + * write=0, Submit batch buffers with read dependencies to all rings. Delay one
> + * with a long executing batch buffer. Check the others are not held up.
> + * write=1, Submit batch buffers with write dependencies to all rings. Delay one
> + * with a long executing batch buffer. Also submit batch buffers with no
> + * dependencies to all rings. Batch buffers with write dependencies should be
> + * executed in submission order. The batch buffers with no dependencies should
> + * not be held up.
> + */
> +static void run_test_dependency(int in_flight, int ring, bool write)

This function contains several setup/cleanup loops that make the actual 
test logic itself a bit difficult to isolate. Maybe those could be moved 
to helper function to make the distinction clearer

> +{
> +	int fd[NBR_RINGS], fd2[NBR_RINGS];
> +	int loop;
> +	int prime_fd;
> +	uint32_t *delay_buf, *ts_buf[NBR_RINGS], *ts2_buf[NBR_RINGS], *shared_buf;
> +	uint32_t calibrated_1s;
> +	drm_intel_bufmgr *bufmgr[NBR_RINGS], *bufmgr2[NBR_RINGS];
> +	struct intel_batchbuffer *ts_bb[NBR_RINGS], *ts2_bb[NBR_RINGS], **in_flight_bbs;
> +	drm_intel_bo *delay_bo, *ts_bo[NBR_RINGS], *ts2_bo[NBR_RINGS], *shared_bo[NBR_RINGS];
> +
> +	in_flight_bbs = malloc(in_flight * sizeof(struct intel_batchbuffer *));
> +	igt_assert(in_flight_bbs);
> +
> +	/* Need multiple i915 fd's. Scheduler will not change execution order of
> +	 * batch buffers from the same context.
> +	 */
> +	for(loop=0; loop < NBR_RINGS; loop++) {
> +		struct intel_batchbuffer *noop_bb;
> +		fd[loop] = drm_open_driver(DRIVER_INTEL);
> +		igt_assert(fd[loop] >= 0);
> +		bufmgr[loop] = drm_intel_bufmgr_gem_init(fd[loop], BATCH_SZ);
> +		igt_assert(bufmgr[loop]);
> +		drm_intel_bufmgr_gem_enable_reuse(bufmgr[loop]);
> +		/* Send a noop batch buffer to force any deferred initialisation */
> +		noop_bb = igt_create_noop_bb(fd[loop], bufmgr[loop], rings[loop].id, 5);
> +		intel_batchbuffer_flush_on_ring(noop_bb, rings[loop].id);
> +		intel_batchbuffer_free(noop_bb);

The fd opening code is duplicated several times in this file. Could use 
a static function to reduce duplication

> +		if(write) {
> +			struct intel_batchbuffer *noop_bb2;
> +			fd2[loop] = drm_open_driver(DRIVER_INTEL);
> +			igt_assert(fd2[loop] >= 0);
> +			bufmgr2[loop] = drm_intel_bufmgr_gem_init(fd2[loop], BATCH_SZ);
> +			igt_assert(bufmgr2[loop]);
> +			drm_intel_bufmgr_gem_enable_reuse(bufmgr2[loop]);
> +			/* Send a noop batch buffer to force any deferred initialisation */
> +			noop_bb2 = igt_create_noop_bb(fd2[loop], bufmgr2[loop], rings[loop].id, 5);
> +			intel_batchbuffer_flush_on_ring(noop_bb2, rings[loop].id);
> +			intel_batchbuffer_free(noop_bb2);
> +		}
> +	}
> +
> +	/* Create buffer objects */
> +	delay_bo = drm_intel_bo_alloc(bufmgr[ring], "delay bo", BATCH_SZ, BATCH_SZ);
> +	igt_assert(delay_bo);
> +	for(loop = 0; loop < NBR_RINGS; loop++) {
> +		ts_bo[loop] = drm_intel_bo_alloc(bufmgr[loop], "ts bo", BATCH_SZ, BATCH_SZ);
> +		igt_assert(ts_bo[loop]);
> +		if(write) {
> +			ts2_bo[loop] = drm_intel_bo_alloc(bufmgr2[loop], "ts bo", BATCH_SZ, BATCH_SZ);
> +			igt_assert(ts2_bo[loop]);
> +		}
> +	}
> +
> +	/* Create shared buffer object */
> +	shared_bo[0] = drm_intel_bo_alloc(bufmgr[0], "shared bo", BATCH_SZ, BATCH_SZ);
> +	igt_assert(shared_bo[0]);
> +
> +	drm_intel_bo_gem_export_to_prime(shared_bo[0], &prime_fd);
> +	for(loop = 1; loop < NBR_RINGS; loop++) {
> +		shared_bo[loop] = drm_intel_bo_gem_create_from_prime(bufmgr[loop],
> +		                                                     prime_fd, BATCH_SZ);
> +		igt_assert(shared_bo[loop]);
> +	}
> +	close(prime_fd);
> +
> +	/* Put some non zero values in the delay and shared bo */
> +	drm_intel_bo_map(delay_bo, 1);
> +	delay_buf = delay_bo->virtual;
> +	delay_buf[0] = 0xff;
> +	drm_intel_bo_unmap(delay_bo);
> +	drm_intel_bo_map(shared_bo[0], 1);
> +	shared_buf = shared_bo[0]->virtual;
> +	shared_buf[0] = 0xff00ff00;
> +	drm_intel_bo_unmap(shared_bo[0]);
> +
> +	calibrated_1s = igt_calibrate_delay_bb(fd[ring], bufmgr[ring], rings[ring].id);
> +
> +	/* Batch buffers to fill the ring */
> +	in_flight_bbs[0] = igt_create_delay_bb(fd[ring], bufmgr[ring],
> +	                                       rings[ring].id, calibrated_1s, delay_bo);
> +	for(loop = 1; loop < in_flight; loop++)
> +		in_flight_bbs[loop] = igt_create_noop_bb(fd[ring], bufmgr[ring],
> +		                                         rings[ring].id, 5);
> +
> +	for(loop = 0; loop < NBR_RINGS; loop++) {
> +		ts_bb[loop] = igt_create_timestamp_bb(fd[loop], bufmgr[loop],
> +		                  rings[loop].id, ts_bo[loop], shared_bo[loop], write);
> +		if(write)
> +			ts2_bb[loop] = igt_create_timestamp_bb(fd2[loop], bufmgr2[loop],
> +			                   rings[loop].id, ts2_bo[loop], NULL, false);
> +	}
> +
> +	/* Flush batchbuffers */
> +	for(loop = 0; loop < in_flight; loop++)
> +		intel_batchbuffer_flush_on_ring(in_flight_bbs[loop], rings[ring].id);
> +
> +	intel_batchbuffer_flush_on_ring(ts_bb[ring], rings[ring].id);
> +	for(loop = 0; loop < NBR_RINGS; loop++)
> +		if(loop != ring)
> +			intel_batchbuffer_flush_on_ring(ts_bb[loop], rings[loop].id);
> +
> +	if(write) {
> +		intel_batchbuffer_flush_on_ring(ts2_bb[ring], rings[ring].id);
> +		for(loop = 0; loop < NBR_RINGS; loop++)
> +			if(loop != ring)
> +				intel_batchbuffer_flush_on_ring(ts2_bb[loop], rings[loop].id);
> +	}
> +
> +	/* This will not return until the bo has finished executing */
> +	drm_intel_bo_map(delay_bo, 0);
> +	delay_buf = delay_bo->virtual;
> +	for(loop = 0; loop < NBR_RINGS; loop++) {
> +		drm_intel_bo_map(ts_bo[loop], 0);
> +		ts_buf[loop] = ts_bo[loop]->virtual;
> +		if(write) {
> +			drm_intel_bo_map(ts2_bo[loop], 0);
> +			ts2_buf[loop] = ts2_bo[loop]->virtual;
> +		}
> +	}
> +
> +	/* buf[0] in the target buffer should be 0 if the batch buffer completed */
> +	igt_assert_f(delay_buf[0] == 0,
> +	             "delay_buf[0] expected 0x0, got 0x%" PRIx32 "\n",
> +	             delay_buf[0]);
> +
> +	igt_debug("%6s delay timestamp = 0x%08" PRIx32 "\n",
> +	          rings[ring].name, delay_buf[2]);
> +	for(loop = 0; loop < NBR_RINGS; loop++)
> +		igt_debug("%6s batch timestamp = 0x%08" PRIx32 "\n",
> +		          rings[loop].name, ts_buf[loop][0]);
> +
> +	if(write) {
> +		igt_debug("Independant batch buffers\n");
> +		for(loop = 0; loop < NBR_RINGS; loop++)
> +			igt_debug("%6s batch timestamp = 0x%08" PRIx32 "\n",
> +			          rings[loop].name, ts2_buf[loop][0]);
> +	}
> +
> +	for(loop = 0; loop < NBR_RINGS; loop++) {
> +		if(loop != ring) {
> +			if(write) {
> +				/* Write dependency, delayed ring should run first */
> +				igt_assert_f(igt_compare_timestamps(ts_buf[ring][0], ts_buf[loop][0]),
> +				             "%s ran before %s - 0x%08" PRIx32 " vs 0x%08" PRIx32 "\n",
> +				             rings[loop].name, rings[ring].name,
> +				             ts_buf[loop][0], ts_buf[ring][0]);
> +				/* Second bb without dependency should run first */
> +				igt_assert_f(igt_compare_timestamps(ts2_buf[loop][0], ts_buf[loop][0]),
> +				             "(%s) independant bb was held up - 0x%08" PRIx32 " vs 0x%08" PRIx32 "\n",
> +				             rings[loop].name, ts_buf[loop][0], ts2_buf[loop][0]);
> +			}
> +			else
> +				/* Read dependency, delayed ring should run last */
> +				igt_assert_f(igt_compare_timestamps(ts_buf[loop][0], ts_buf[ring][0]),
> +				             "%s ran after %s - 0x%08" PRIx32 " vs 0x%08" PRIx32 "\n",
> +				             rings[loop].name, rings[ring].name,
> +				             ts_buf[loop][0], ts_buf[ring][0]);
> +		}
> +	}
> +
> +	/* Cleanup */
> +	for(loop = 0; loop < in_flight; loop++)
> +		intel_batchbuffer_free(in_flight_bbs[loop]);
> +
> +	for(loop = 0; loop < NBR_RINGS; loop++) {
> +		intel_batchbuffer_free(ts_bb[loop]);
> +		drm_intel_bo_unreference(ts_bo[loop]);
> +		drm_intel_bo_unreference(shared_bo[loop]);
> +		if(write) {
> +			intel_batchbuffer_free(ts2_bb[loop]);
> +			drm_intel_bo_unreference(ts2_bo[loop]);
> +		}
> +	}
> +
> +	drm_intel_bo_unreference(delay_bo);
> +
> +	for(loop = 0; loop < NBR_RINGS; loop++) {
> +		drm_intel_bufmgr_destroy(bufmgr[loop]);
> +		close(fd[loop]);
> +		if(write) {
> +			drm_intel_bufmgr_destroy(bufmgr2[loop]);
> +			close(fd2[loop]);
> +		}
> +	}
> +
> +	free(in_flight_bbs);
> +}
> +
> +igt_main
> +{
> +	int loop;
> +	int in_flight;
> +
> +	igt_fixture {
> +		int debug_fd;
> +		int l;
> +		char buf[6];
> +		/* Get nbr of batch buffers that the scheduler will queue in the
> +		 * HW. If this debugfs file does not exist there is no scheduler
> +		 * so skip the test.
> +		 */
> +		debug_fd = igt_debugfs_open("i915_scheduler_min_flying", O_RDONLY);
> +		igt_skip_on(debug_fd == -1);
> +		l = read(debug_fd, buf, sizeof(buf)-1);
> +		igt_assert(l > 0);
> +		igt_assert(l < sizeof(buf));
> +		buf[l] = '\0';
> +		/* May be a decimal or hex number depending on sheduler version */
> +		if(sscanf(buf, "0x%2x", &in_flight) != 1)
> +			igt_assert_f(sscanf(buf, "%2d", &in_flight) == 1,
> +			             "Error reading from i915_scheduler_min_flying\n");

This will probably stabilize before the scheduler is merged, so the 
final version should probably have only 1 sscanf call

> +		close(debug_fd);
> +		igt_debug("in flight = %d\n", in_flight);
> +	}
> +
> +	for (loop=0; loop < NBR_RINGS; loop++)
> +		igt_subtest_f("%s-basic", rings[loop].name) {
> +			run_test_basic(in_flight, rings[loop].id);
> +		}
> +
> +	for (loop=0; loop < NBR_RINGS; loop++)
> +		igt_subtest_f("%s-read", rings[loop].name) {
> +			run_test_dependency(in_flight, loop, false);
> +		}
> +
> +	for (loop=0; loop < NBR_RINGS; loop++)
> +		igt_subtest_f("%s-write", rings[loop].name) {
> +			run_test_dependency(in_flight, loop, true);
> +		}
> +
> +}