[Intel-gfx] [RFC] igt/gem_exec_fence: New test for sync/fence interface

John.C.Harrison at Intel.com John.C.Harrison at Intel.com
Tue Jan 19 08:04:51 PST 2016


From: John Harrison <John.C.Harrison at Intel.com>

Note, this is a work in progress. It is being posted now as there is
work going on to change the debugging interface used by this test. So
it would be useful to get some comments on whether the proposed
changes will cause a problem for this test or whether the test itself
should be done differently.

Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
Cc: Gustavo Padovan <gustavo.padovan at collabora.co.uk>
---
 lib/intel_batchbuffer.c |   36 ++
 lib/intel_batchbuffer.h |    1 +
 tests/Makefile.sources  |    1 +
 tests/gem_exec_fence.c  | 1470 +++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 1508 insertions(+)
 create mode 100644 tests/gem_exec_fence.c

diff --git a/lib/intel_batchbuffer.c b/lib/intel_batchbuffer.c
index 692521f..55c7f9f 100644
--- a/lib/intel_batchbuffer.c
+++ b/lib/intel_batchbuffer.c
@@ -186,6 +186,27 @@ intel_batchbuffer_flush_on_ring(struct intel_batchbuffer *batch, int ring)
 	intel_batchbuffer_reset(batch);
 }
 
+static void
+intel_batchbuffer_flush_on_ring_fence(struct intel_batchbuffer *batch, int ring,
+				      int fence_in, int *fence_out)
+{
+	unsigned int used = flush_on_ring_common(batch, ring);
+	drm_intel_context *ctx;
+
+	if (used == 0)
+		return;
+
+	do_or_die(drm_intel_bo_subdata(batch->bo, 0, used, batch->buffer));
+
+	batch->ptr = NULL;
+
+	ctx = batch->ctx;
+	do_or_die(drm_intel_gem_bo_context_fence_exec(batch->bo, ctx, used,
+						      ring, fence_in, fence_out));
+
+	intel_batchbuffer_reset(batch);
+}
+
 void
 intel_batchbuffer_set_context(struct intel_batchbuffer *batch,
 				     drm_intel_context *context)
@@ -239,6 +260,21 @@ intel_batchbuffer_flush(struct intel_batchbuffer *batch)
 	intel_batchbuffer_flush_on_ring(batch, ring);
 }
 
+/**
+ * intel_batchbuffer_flush_fence:
+ * @batch: batchbuffer object
+ *
+ * Submits the batch for execution on the blitter engine, selecting the right
+ * ring depending upon the hardware platform.
+ */
+void
+intel_batchbuffer_flush_fence(struct intel_batchbuffer *batch, int fence_in, int *fence_out)
+{
+	int ring = 0;
+	if (HAS_BLT_RING(batch->devid))
+		ring = I915_EXEC_BLT;
+	intel_batchbuffer_flush_on_ring_fence(batch, ring, fence_in, fence_out);
+}
 
 /**
  * intel_batchbuffer_emit_reloc:
diff --git a/lib/intel_batchbuffer.h b/lib/intel_batchbuffer.h
index 869747d..5dece2a 100644
--- a/lib/intel_batchbuffer.h
+++ b/lib/intel_batchbuffer.h
@@ -35,6 +35,7 @@ void intel_batchbuffer_free(struct intel_batchbuffer *batch);
 
 
 void intel_batchbuffer_flush(struct intel_batchbuffer *batch);
+void intel_batchbuffer_flush_fence(struct intel_batchbuffer *batch, int fence_in, int *fence_out);
 void intel_batchbuffer_flush_on_ring(struct intel_batchbuffer *batch, int ring);
 void intel_batchbuffer_flush_with_context(struct intel_batchbuffer *batch,
 					  drm_intel_context *context);
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index 8fb2de8..1000324 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -26,6 +26,7 @@ TESTS_progs_M = \
 	gem_exec_alignment \
 	gem_exec_bad_domains \
 	gem_exec_faulting_reloc \
+	gem_exec_fence \
 	gem_exec_nop \
 	gem_exec_params \
 	gem_exec_parse \
diff --git a/tests/gem_exec_fence.c b/tests/gem_exec_fence.c
new file mode 100644
index 0000000..ab6cc84
--- /dev/null
+++ b/tests/gem_exec_fence.c
@@ -0,0 +1,1470 @@
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Tvrtko Ursulin <tvrtko.ursulin at intel.com>
+ *    John Harrison <john.c.harrison at intel.com>
+ *    Geoff Miller <geoff.miller at intel.com>
+ *
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+#include <signal.h>
+#include <time.h>
+#include <pthread.h>
+#include "drm.h"
+#include "ioctl_wrappers.h"
+#include "drmtest.h"
+#include "intel_io.h"
+#include "intel_chipset.h"
+#include "igt_aux.h"
+#include "sync/sw_sync.h"
+#include "sync/sync.h"
+
+#define I915_DRIVER_NAME		"i915"
+#define FENCE_ACTIVE (0)
+#define FENCE_SIGNALED (1)
+
+/* Structure to encapsulate a sw_sync device */
+struct sw_sync_obj
+{
+	int fd;
+	int tick; /* Can't read this directly, so we track it here */
+};
+
+static struct sw_sync_obj user_sync_obj;
+
+/*
+ * Open a new sw sync object
+ * @obj: Pointer to a struct sw_sync_obj
+ * @return: Zero if open worked.
+ */
+static int init_sw_sync(struct sw_sync_obj *obj)
+{
+	obj->fd = open("/dev/sw_sync", O_RDWR);
+	obj->tick = 0;
+	return (obj->fd == -1);
+}
+
+/*
+ * Closes sw_sync device
+ * @obj: pointer to a struct sw_sync_obj
+ */
+static void close_sw_sync(struct sw_sync_obj *obj)
+{
+	close(obj->fd);
+}
+
+/*
+ * Creates a user fence at a given place on the timeline
+ * Assumes that we are using a struct sw_sync_obj called user_sync_obj in
+ * global scope
+ * @fence_out: the new fence returned to the user
+ * @value: the position to place the fence on the timeline
+ * @return: return value from ioctl
+ */
+static int user_create_fence(int *fence_out, uint32_t value)
+{
+	int ret;
+	struct sw_sync_create_fence_data data;
+	data.value = value;
+	ret = ioctl(user_sync_obj.fd, SW_SYNC_IOC_CREATE_FENCE, &data);
+	*fence_out = data.fence;
+	return ret;
+}
+
+/*
+ * Increments timeline by a given count
+ * Assumes that we are using a struct sw_sync_obj called user_sync_obj in
+ * global scope. Note that we also increment the local counter here, but
+ * only if the ioctl succeeded, to avoid going out of sync.
+ * @step: Number of steps to increment the timeline
+ * @return: return value from ioctl
+ */
+static int user_inc_timeline(uint32_t step)
+{
+	int ret;
+	uint32_t localstep = step;
+	ret = ioctl(user_sync_obj.fd, SW_SYNC_IOC_INC, &localstep);
+	if (ret == 0)
+	{
+		user_sync_obj.tick += localstep;
+	}
+	return ret;
+}
+
+/*
+ * Wait on a given fence for a timeout
+ * This is a basic wrapper around the SYNC_IOC_WAIT ioctl, see sync/sync.h
+ * for behavioural details.
+ * @fence: fd for fence to wait on
+ * @timeout: pointer to timeout value in milliseconds
+ * @return: return value of ioctl
+ */
+static int wait_fence(int fence, int *timeout)
+{
+	return ioctl(fence, SYNC_IOC_WAIT, timeout);
+}
+
+/*
+ * Merge two fences into a new fence
+ * @fence_out: pointer to fd for new fence
+ * @fence_a: first input fence
+ * @fence_b: second input fence
+ * @return: return value of ioctl
+ */
+static int merge_fence(int *fence_out, int fence_a, int fence_b)
+{
+	int ret;
+	struct sync_merge_data data;
+	data.fd2 = fence_b;
+	ret = ioctl(fence_a, SYNC_IOC_MERGE, &data);
+	if (ret == 0)
+	{
+		*fence_out = data.fence;
+	}
+	return ret;
+}
+
+/*
+ * Writes fence info into sync_fence_info structure. Note that this can be
+ * variable length, so set data->len accordingly - see sync/sync.h
+ * @fence: fence to get information on
+ * @data: pointer to struct sync_fence_info_data
+ * @return: return value from ioctl
+ */
+static int get_fence_info(int fence, struct sync_fence_info_data *data)
+{
+	return ioctl(fence, SYNC_IOC_FENCE_INFO, data);
+}
+
+static int fd;
+static drm_intel_bufmgr *bufmgr;
+static struct intel_batchbuffer *batch;
+static uint32_t devid;
+
+static uint32_t nop_batch[2] = {MI_BATCH_BUFFER_END};
+static uint32_t handle;
+
+/*
+ * Directly submits a nop via the EXECBUFFER2 Ioctl
+ * The user of this function is expected to set the flags and fence arguments
+ * correctly.
+ * @ctx pointer to a drm_intel_context created by the user (use NULL to ignore)
+ * @flags control the engine selection, enable fence output
+ * @fence_in fence used by the submission
+ * @fence_out pointer to a fence optionally returned by the submission
+ */
+static int nop_exec_with_ctx(drm_intel_context *ctx, unsigned int flags, int fence_in, int *fence_out)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 gem_exec[1];
+	int ret = 0;
+
+	gem_exec[0].handle = handle;
+	gem_exec[0].relocation_count = 0;
+	gem_exec[0].relocs_ptr = 0;
+	gem_exec[0].alignment = 0;
+	gem_exec[0].offset = 0;
+	gem_exec[0].flags = 0;
+	gem_exec[0].rsvd1 = 0;
+	gem_exec[0].rsvd2 = 0;
+
+	execbuf.buffers_ptr = (uintptr_t)gem_exec;
+	execbuf.buffer_count = 1;
+	execbuf.batch_start_offset = 0;
+	execbuf.batch_len = 8;
+	execbuf.cliprects_ptr = 0;
+	execbuf.num_cliprects = 0;
+	execbuf.DR1 = 0;
+	execbuf.DR4 = 0;
+	execbuf.flags = flags | I915_EXEC_SECURE;
+	if (ctx != NULL)
+	{
+		i915_execbuffer2_set_context_id(execbuf, *(int*)ctx);
+	}
+	else
+	{
+		i915_execbuffer2_set_context_id(execbuf, 0);
+	}
+	execbuf.rsvd2 = fence_in;
+
+	ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_EXECBUFFER2, &execbuf);
+	if (ret == 0 && fence_out != NULL)
+		*fence_out = (int) (execbuf.rsvd2 >> 32);
+
+	return ret;
+}
+
+/*
+ * Wrapper function for nop_exec_with_ctx for when context is not explicit
+ */
+static int nop_exec(unsigned int flags, int fence_in, int *fence_out)
+{
+	return nop_exec_with_ctx(NULL, flags, fence_in, fence_out);
+}
+
+/*
+ * Test to show that sending a batch buffer without requesting a fence
+ * doesn't return a fence.
+ * Assumptions: batch buffer was executed
+ */
+static void fence_not_requested_not_created(void)
+{
+	int fence;
+
+	igt_assert(nop_exec(I915_EXEC_RENDER, 0, &fence) == 0);
+	igt_assert(fence == 0);
+	gem_sync(fd, handle);
+}
+
+/*
+ * Test to show that we get a fence when one is requested.
+ * Assumptions: > 0 == valid fence, fence was initialised to <= 0
+ */
+static void fence_create(void)
+{
+	int fence;
+
+	igt_assert(nop_exec(I915_EXEC_RENDER | I915_EXEC_CREATE_FENCE, 0,
+			    &fence) == 0);
+	igt_assert(fence > 0); /* Zero is a valid FD but we assume it will
+				  always be taken during IGT runs and like this
+				  we can catch more errors. */
+
+	close(fence);
+	gem_sync(fd, handle);
+}
+
+/*
+ * Test to show that a requested fence can be queried and comes from the
+ * correct driver.
+ * Assumptions: valid fence values are >= 0, fence initialised to < 0
+ *              queried fence data is sensible
+ */
+static void fence_driver_data(void)
+{
+	int fence;
+	char buf[4096];
+	struct sync_fence_info_data *data =
+		(struct sync_fence_info_data *)buf;
+	struct sync_pt_info *pt = (struct sync_pt_info *)&data->pt_info;
+
+	igt_assert(nop_exec(I915_EXEC_RENDER | I915_EXEC_CREATE_FENCE, 0,
+			    &fence) == 0);
+	igt_assert(fence >= 0);
+
+	gem_sync(fd, handle);
+
+	/* Read the sync fence info and check it matches our driver */
+	data->len = sizeof(buf);
+	igt_assert(get_fence_info(fence, data) == 0);
+	igt_assert(strcmp(pt->driver_name, I915_DRIVER_NAME) == 0);
+
+	close(fence);
+}
+
+/*
+ * Helper function to get the status of a given fence
+ * Calls the _SYNC_IOC_FENCE_INFO ioctl
+ * @fence Fence object to check
+ * @return Status of fence
+ */
+static int get_fence_status(int fence)
+{
+	char buf[4096];
+	struct sync_fence_info_data *data =
+		(struct sync_fence_info_data *)buf;
+
+	data->len = sizeof(buf);
+	igt_assert(get_fence_info(fence, data) == 0);
+
+	return data->status;
+}
+
+/*
+ * Tests that a requested fence becomes signalled.
+ * Assumptions: the fence was active at some point, fence values etc.
+ */
+static void fence_signaled(void)
+{
+	int fence, status;
+	unsigned int loop = 10;
+
+	igt_assert(nop_exec(I915_EXEC_RENDER | I915_EXEC_CREATE_FENCE, 0,
+			    &fence) == 0);
+	igt_assert(fence > 0);
+
+	/* Make sure status is completed after a while */
+	status = get_fence_status(fence);
+	while (status == FENCE_ACTIVE && loop--) {
+		usleep(20000);
+		status = get_fence_status(fence);
+	}
+	igt_assert(status == FENCE_SIGNALED);
+
+	close(fence);
+}
+
+/*
+ * Helper function to create a blitting batch buffer
+ * Assumes that 'batch' is in scope
+ * @dst Destination buffer object
+ * @src Source buffer object
+ * @width Blit width
+ * @height Blit height
+ * @dst_pitch Destination pitch
+ * @src_pitch Source pitch
+ * TODO determine whether these instructions are ok for:
+ * a) multiple flavours of Gen
+ * b) public consumption
+ */
+static void blit_copy(drm_intel_bo *dst, drm_intel_bo *src,
+		      unsigned int width, unsigned int height,
+		      unsigned int dst_pitch, unsigned int src_pitch)
+{
+	BLIT_COPY_BATCH_START(0);
+	OUT_BATCH((3 << 24) | /* 32 bits */
+		  (0xcc << 16) | /* copy ROP */
+		  dst_pitch);
+	OUT_BATCH(0 << 16 | 0);
+	OUT_BATCH(height << 16 | width);
+	OUT_RELOC_FENCED(dst, I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 0);
+	OUT_BATCH(0 << 16 | 0);
+	OUT_BATCH(src_pitch);
+	OUT_RELOC_FENCED(src, I915_GEM_DOMAIN_RENDER, 0, 0);
+	ADVANCE_BATCH();
+
+	if (batch->gen >= 6) {
+		BEGIN_BATCH(3, 0);
+		OUT_BATCH(XY_SETUP_CLIP_BLT_CMD);
+		OUT_BATCH(0);
+		OUT_BATCH(0);
+		ADVANCE_BATCH();
+	}
+}
+
+#define NSEC_PER_SEC 1000000000L
+#define USEC_PER_SEC 1000000L
+
+/*
+ * Helper function to generate and submit a (series of ?) large blit(s)
+ * @limit number of repeated blits in the same batch buffer
+ * @timeout if nonzero, wait on the bo for timeout ns
+ * @fence_in pass in fence to use as sync point
+ * @fence_out pointer to pass back fence if timeout is nonzero
+ * @return zero or value of the bo wait call
+ */
+static int _emit_dummy_load__bcs(int limit, int timeout, int fence_in, int *fence_out)
+{
+	int i, ret = 0;
+	drm_intel_bo *src_bo, *dst_bo;
+
+	src_bo = drm_intel_bo_alloc(bufmgr, "dummy_bo", 2048*2048*4, 4096);
+	igt_assert(src_bo);
+
+	dst_bo = drm_intel_bo_alloc(bufmgr, "dummy_bo", 2048*2048*4, 4096);
+	igt_assert(dst_bo);
+
+	for (i = 0; i < limit; i++) {
+		blit_copy(dst_bo, src_bo,
+			  2048, 2048,
+			  2048*4, 2048*4);
+	}
+	intel_batchbuffer_flush_fence(batch, fence_in, timeout > 0 ? NULL : fence_out);
+
+	if (timeout > 0)
+		ret = drm_intel_gem_bo_wait(dst_bo, timeout * NSEC_PER_SEC);
+
+	drm_intel_bo_unreference(src_bo);
+	drm_intel_bo_unreference(dst_bo);
+
+	return ret;
+}
+
+/*
+ * Helper function to get current time in usecs
+ * @return Current time in usecs
+ */
+static unsigned long gettime_us(void)
+{
+	struct timespec ts;
+
+	clock_gettime(CLOCK_MONOTONIC, &ts);
+
+	return ts.tv_sec * 1000000 + ts.tv_nsec / 1000;
+}
+
+/*
+ * Helper function finds the limit to generate a second's worth
+ * of submission activity on a given ring (engine)
+ * @ring_name String representing ring (engine) name
+ * @emit Pointer to function that generates a workload
+ * @return Number of operations per second
+ */
+static int calibrate_dummy_load(const char *ring_name,
+				int (*emit)(int limit, int timeout, int fence_in, int *fence_out))
+{
+	unsigned long start;
+	int ops = 1;
+
+	start = gettime_us();
+
+	do {
+		unsigned long diff;
+		int ret;
+
+		ret = emit((ops+1)/2, 10, -1, NULL);
+		diff = gettime_us() - start;
+
+		if (ret || diff / USEC_PER_SEC >= 1)
+			break;
+
+		ops += ops;
+	} while (ops < 100000);
+
+	igt_debug("%s dummy load calibrated: %d operations / second\n",
+		  ring_name, ops);
+
+	return ops;
+}
+
+static int ops_per_sec;
+
+/*
+ * Helper function to submit N seconds worth of blits and generate
+ * a fence to wait on.
+ * @seconds Number of seconds worth of operations to submit
+ * @fence_out Pointer to requested fence
+ */
+static void emit_dummy_load__bcs(int seconds, int *fence_out)
+{
+	if (ops_per_sec == 0)
+		ops_per_sec = calibrate_dummy_load("bcs",
+						   _emit_dummy_load__bcs);
+
+	_emit_dummy_load__bcs(seconds * ops_per_sec, 0, -1, fence_out);
+}
+
+/*
+ * Tests that fences make a transition from active to signalled
+ * Assumptions: valid fence values
+ */
+static void fence_signal(void)
+{
+	int fence;
+	unsigned int loop = 1000;
+	int status;
+	unsigned long start, end;
+
+	start = gettime_us();
+
+	/* Submit a spinning batch */
+	emit_dummy_load__bcs(2, &fence);
+	igt_assert(fence > 0);
+
+	/* Make sure status is active after a while */
+	usleep(20000);
+	status = get_fence_status(fence);
+	igt_assert(status == FENCE_ACTIVE);
+
+	/* Make sure status is completed after a while */
+	status = get_fence_status(fence);
+	while (status == FENCE_ACTIVE && loop--) {
+		usleep(20000);
+		status = get_fence_status(fence);
+	}
+	igt_assert(status == FENCE_SIGNALED);
+	end = gettime_us();
+
+	printf("Fence took %ld.%06lds\n", (end - start) / 1000000, (end - start) % 1000000);
+
+	close(fence);
+}
+
+/*
+ * Dummy action for signal catcher
+ */
+static void signal_nop(int sig)
+{
+}
+
+/*
+ * Test that we can wait on an active fence for less than the time it
+ * takes to clear, and then wait for the fence to clear properly.
+ */
+static void fence_timeout(void)
+{
+	int fence;
+	int timeout = 500; /* in ms */
+	struct sigaction sigact, orig_sigact;
+
+	/* Submit a spinning batch */
+	emit_dummy_load__bcs(2, &fence);
+	igt_assert(fence > 0);
+
+	/* Make sure status is active after a while */
+	usleep(20000);
+	igt_assert(get_fence_status(fence) == FENCE_ACTIVE);
+
+	/* Set up signal to break the wait if broken */
+	memset(&sigact, 0, sizeof(sigact));
+	sigact.sa_handler = signal_nop;
+	igt_assert(sigaction(SIGALRM, &sigact, &orig_sigact) == 0);
+	alarm(10);
+
+	/* Wait on fence */
+	igt_assert(wait_fence(fence, &timeout) < 0);
+	igt_assert(errno == ETIME);
+
+	/* Wait for batch completion */
+	timeout = 100000;
+	igt_assert(wait_fence(fence, &timeout) == 0);
+	igt_assert(get_fence_status(fence) == FENCE_SIGNALED);
+
+	/* Restore and cleanup */
+	alarm(0);
+	igt_assert(sigaction(SIGALRM, &orig_sigact, NULL) == 0);
+	close(fence);
+}
+
+/*
+ * Tests that we can wait for a full fence timeout (repeat?)
+ */
+static void fence_wait(void)
+{
+	int fence;
+	int timeout = 4000; /* in ms */
+	struct sigaction sigact, orig_sigact;
+	int ret;
+
+	/* Submit a spinning batch */
+	emit_dummy_load__bcs(1, &fence);
+	igt_assert(fence > 0);
+
+	/* Make sure status is active after a while */
+	usleep(20000);
+	igt_assert(get_fence_status(fence) == FENCE_ACTIVE);
+
+	/* Set up signal to break the wait if broken */
+	memset(&sigact, 0, sizeof(sigact));
+	sigact.sa_handler = signal_nop;
+	igt_assert(sigaction(SIGALRM, &sigact, &orig_sigact) == 0);
+	alarm(10);
+
+	/* Wait for batch completion */
+	ret = wait_fence(fence, &timeout);
+	igt_assert(ret == 0);
+	igt_assert(get_fence_status(fence) == FENCE_SIGNALED);
+
+	/* Restore and cleanup */
+	alarm(0);
+	igt_assert(sigaction(SIGALRM, &orig_sigact, NULL) == 0);
+	close(fence);
+}
+
+/*
+ * Tests that a previously requested fence can be submitted with a batch
+ * buffer. Does not make any checks on the second fence.
+ */
+static void fence_wait_fence(void)
+{
+	int fence, fence_dup;
+	int timeout = 40000; /* in ms */
+	struct sigaction sigact, orig_sigact;
+	int ret;
+
+	/* Submit a spinning batch */
+	emit_dummy_load__bcs(2, &fence);
+	igt_assert(fence > 0);
+
+	igt_assert(get_fence_status(fence) == FENCE_ACTIVE);
+
+	fence_dup = dup(fence);
+	_emit_dummy_load__bcs(1 * ops_per_sec, 0, fence_dup, NULL);
+
+	/* Make sure status is active after a while */
+	usleep(20000);
+	igt_assert(get_fence_status(fence) == FENCE_ACTIVE);
+
+	/* Set up signal to break the wait if broken */
+	memset(&sigact, 0, sizeof(sigact));
+	sigact.sa_handler = signal_nop;
+	igt_assert(sigaction(SIGALRM, &sigact, &orig_sigact) == 0);
+	alarm(10);
+
+	/* Wait for batch completion */
+	ret = wait_fence(fence, &timeout);
+	igt_assert(ret == 0);
+	igt_assert(get_fence_status(fence) == FENCE_SIGNALED);
+
+	/* Restore and cleanup */
+	alarm(0);
+	igt_assert(sigaction(SIGALRM, &orig_sigact, NULL) == 0);
+	close(fence);
+	close(fence_dup);
+}
+
+/*
+ * Tests that a previously requested fence can be submitted with a batch
+ * buffer. Checks that a simultaneously requested fence still works as
+ * expected.
+ */
+static void fence_wait_fence2(void)
+{
+	int fence, fence_dup;
+	int lastfence;
+	int timeout = 40000; /* in ms */
+	struct sigaction sigact, orig_sigact;
+	int ret;
+
+	/* Submit a spinning batch */
+	emit_dummy_load__bcs(2, &fence);
+	igt_assert(fence > 0);
+
+	igt_assert(get_fence_status(fence) == FENCE_ACTIVE);
+
+	fence_dup = dup(fence);
+	_emit_dummy_load__bcs(1 * ops_per_sec, 0, fence_dup, &lastfence);
+
+	/* Make sure status is active after a while */
+	usleep(20000);
+	igt_assert(get_fence_status(fence) == FENCE_ACTIVE);
+	igt_assert(get_fence_status(lastfence) == FENCE_ACTIVE);
+
+	/* Set up signal to break the wait if broken */
+	memset(&sigact, 0, sizeof(sigact));
+	sigact.sa_handler = signal_nop;
+	igt_assert(sigaction(SIGALRM, &sigact, &orig_sigact) == 0);
+	alarm(10);
+
+	usleep(20000);
+
+	/* Wait for batch completion */
+	ret = wait_fence(fence, &timeout);
+	igt_assert(ret == 0);
+	igt_assert(get_fence_status(fence) == FENCE_SIGNALED);
+
+	/* Check the second task is still running */
+	igt_assert(get_fence_status(lastfence) == FENCE_ACTIVE);
+
+	usleep(20000);
+
+	/* Wait for batch completion */
+	ret = wait_fence(lastfence, &timeout);
+	igt_assert(ret == 0);
+	igt_assert(get_fence_status(lastfence) == FENCE_SIGNALED);
+
+	/* Restore and cleanup */
+	alarm(0);
+	igt_assert(sigaction(SIGALRM, &orig_sigact, NULL) == 0);
+	close(fence);
+	close(fence_dup);
+}
+
+/*
+ * Test that i915 can wait on a user-created fence
+ */
+static void fence_user_fence_wait(void)
+{
+	int myfence, checkfence;
+	int myfencedup;
+	int ret;
+	int timeout = 4000; /* in ms */
+
+	/* Create a fence with the user sync device, at timeline offset 10 */
+	igt_assert(user_create_fence(&myfence, 10) == 0);
+	igt_assert(get_fence_status(myfence) == FENCE_ACTIVE);
+
+	/* Create a copy to submit to the driver */
+	myfencedup = dup(myfence);
+
+	/* Submit fence with driver - requesting a fence back */
+	ops_per_sec = calibrate_dummy_load("bcs", _emit_dummy_load__bcs);
+	_emit_dummy_load__bcs(1 * ops_per_sec, 0, myfencedup, &checkfence);
+
+	/* Make sure our workload is stalled */
+	igt_assert(wait_fence(checkfence, &timeout) < 0);
+	igt_assert(errno == ETIME);
+
+	/* Increment the timeline until the user fence is signalled */
+	ret = get_fence_status(myfence);
+	while (ret != FENCE_SIGNALED)
+	{
+		igt_assert(get_fence_status(checkfence) == FENCE_ACTIVE);
+		igt_assert(user_inc_timeline(1) == 0);
+		ret = get_fence_status(myfence);
+	}
+
+	/* Check the workload is still active */
+	igt_assert(get_fence_status(checkfence) == FENCE_ACTIVE);
+
+	/* Check that our workload will now finish */
+	igt_assert(wait_fence(checkfence, &timeout) == 0);
+	igt_assert(get_fence_status(checkfence) == FENCE_SIGNALED);
+
+	/* Close the fence */
+	close(myfence);
+}
+
+/*
+ * Test that i915 can wait on a user-created fence
+ */
+static void fence_user_fence_leak(void)
+{
+	int myfence, checkfence;
+	int myfencedup;
+	int timeout = 500; /* in ms */
+
+	printf( "******* WARNING *** WARNING *** WARNING *******\n" );
+	printf( "Until kernel sync code is fixed, this test will\n" );
+	printf( "leak batch buffers that can never be completed!\n" );
+	printf( "******* WARNING *** WARNING *** WARNING *******\n" );
+	return;
+
+	close_sw_sync(&user_sync_obj);
+	igt_assert(init_sw_sync(&user_sync_obj) == 0);
+
+	/* Create a fence with the user sync device, at timeline offset 10 */
+	igt_assert(user_create_fence(&myfence, 10) == 0);
+	igt_assert(get_fence_status(myfence) == FENCE_ACTIVE);
+
+	/* Create a copy to submit to the driver */
+	myfencedup = dup(myfence);
+
+	/* Submit fence with driver - requesting a fence back */
+	_emit_dummy_load__bcs(1, 0, myfencedup, &checkfence);
+
+	/* Make sure our workload is stalled */
+	igt_assert(wait_fence(checkfence, &timeout) < 0);
+	igt_assert(errno == ETIME);
+
+	/* Close the fence without signalling it */
+	close(myfence);
+
+	/* Close the timeline and leak the fence */
+	close_sw_sync(&user_sync_obj);
+
+	/* Check that our workload will now finish */
+	timeout = 1000; /* in ms */
+	igt_assert(wait_fence(checkfence, &timeout) == 0);
+	igt_assert(get_fence_status(checkfence) == FENCE_SIGNALED);
+}
+
+/*
+ * Test out-of-order fence signalling
+ * A series of batch buffers are created so that they are dependent on fences
+ * which are in a different order:
+ * - bb[0] is dependent on f_user[1]
+ * - bb[1] is dependent on f_user[0]
+ */
+static void fence_ooo_fence(void)
+{
+	int f_out[2];
+	int f_user[2];
+	int f_user_dups[2];
+	int timeout = 200; /* in ms */
+
+	close_sw_sync(&user_sync_obj);
+	igt_assert(init_sw_sync(&user_sync_obj) == 0);
+
+	/* Create user fences */
+	igt_assert(user_create_fence(&f_user[0], 1) == 0);
+	igt_assert(user_create_fence(&f_user[1], 2) == 0);
+
+	/* Check they are still active */
+	igt_assert(get_fence_status(f_user[0]) == FENCE_ACTIVE);
+	igt_assert(get_fence_status(f_user[1]) == FENCE_ACTIVE);
+
+	/* Create duplicates for submission */
+	f_user_dups[0] = dup(f_user[0]);
+	f_user_dups[1] = dup(f_user[1]);
+
+	/* Generate buffer chain */
+	igt_assert(nop_exec(
+		I915_EXEC_RENDER |
+		I915_EXEC_CREATE_FENCE |
+		I915_EXEC_WAIT_FENCE,
+		f_user_dups[1], &f_out[0]) == 0);
+	igt_assert(nop_exec(
+		I915_EXEC_RENDER |
+		I915_EXEC_CREATE_FENCE |
+		I915_EXEC_WAIT_FENCE,
+		f_user_dups[0], &f_out[1]) == 0);
+
+	/* Wait and check both are still active */
+	usleep(timeout * 1000);
+	igt_assert(get_fence_status(f_out[0]) == FENCE_ACTIVE);
+	igt_assert(get_fence_status(f_out[1]) == FENCE_ACTIVE);
+
+	/* Signal f_user[0] */
+	igt_assert(user_inc_timeline(1) == 0);
+	igt_assert(get_fence_status(f_user[0]) == FENCE_SIGNALED);
+
+	/* Check f_out[0..1] remain active */
+	usleep(timeout * 1000);
+	igt_assert(get_fence_status(f_out[0]) == FENCE_ACTIVE);
+	igt_assert(get_fence_status(f_out[1]) == FENCE_ACTIVE);
+
+	/* Signal f_user[1] */
+	igt_assert(user_inc_timeline(1) == 0);
+	igt_assert(get_fence_status(f_user[1]) == FENCE_SIGNALED);
+
+	/* Check f_out[0..1] signal as expected */
+	igt_assert(wait_fence(f_out[0], &timeout) == 0);
+	igt_assert(wait_fence(f_out[1], &timeout) == 0);
+	igt_assert(get_fence_status(f_out[0]) == FENCE_SIGNALED);
+	igt_assert(get_fence_status(f_out[1]) == FENCE_SIGNALED);
+
+	/* Close fences */
+	close(f_user[0]);
+	close(f_user[1]);
+}
+
+/*
+ * Test to show that fences from drm can be merged and waited on as one
+ */
+static void fence_merge(void)
+{
+	int start_fence, start_fence_dup;
+	int fence_merged_even, fence_merged_odd;
+	int fence_out[4];
+	int fence_out_dup[4];
+	int fence_final;
+
+	int wait_timeout, i;
+
+	close_sw_sync(&user_sync_obj);
+	igt_assert(init_sw_sync(&user_sync_obj) == 0);
+
+	igt_assert(user_create_fence(&start_fence, 1) == 0);
+	start_fence_dup = dup(start_fence);
+
+	/* Submit and request fences for a chain of workloads */
+	ops_per_sec = calibrate_dummy_load("bcs", _emit_dummy_load__bcs);
+	_emit_dummy_load__bcs(1 * ops_per_sec, 0,
+			      start_fence_dup, &fence_out[0]);
+	fence_out_dup[0] = dup(fence_out[0]);
+
+	for (i = 1; i < 4; i++)
+	{
+		_emit_dummy_load__bcs(1 * ops_per_sec, 0, fence_out_dup[i - 1],
+				      &fence_out[i]);
+		fence_out_dup[i] = dup(fence_out[i]);
+	}
+
+	/* Merge alternate drm fences into even and odd fences */
+	igt_assert(merge_fence(&fence_merged_even,
+			       fence_out[0], fence_out[2]) == 0);
+	igt_assert(merge_fence(&fence_merged_odd,
+			       fence_out[1], fence_out[3]) == 0);
+
+	/* Create additional batch to wait on the new merged fences */
+	igt_assert(nop_exec(
+		I915_EXEC_RENDER |
+		I915_EXEC_CREATE_FENCE |
+		I915_EXEC_WAIT_FENCE,
+		dup(fence_merged_odd), &fence_final) == 0);
+
+	/* Signal the user fence to begin the chain */
+	igt_assert(user_inc_timeline(1) == 0);
+
+	/* Wait on each drm fence and check merged fence statuses */
+	wait_timeout = 15000;
+
+	/* After batch 0, all should still be active */
+	igt_assert(wait_fence(fence_out[0], &wait_timeout) == 0);
+	igt_assert(get_fence_status(fence_out[0]) == FENCE_SIGNALED);
+	igt_assert(get_fence_status(fence_merged_even) == FENCE_ACTIVE);
+	igt_assert(get_fence_status(fence_merged_odd) == FENCE_ACTIVE);
+	igt_assert(get_fence_status(fence_final) == FENCE_ACTIVE);
+
+	/* After batch 1, all should still be active */
+	igt_assert(wait_fence(fence_out[1], &wait_timeout) == 0);
+	igt_assert(get_fence_status(fence_out[1]) == FENCE_SIGNALED);
+	igt_assert(get_fence_status(fence_merged_even) == FENCE_ACTIVE);
+	igt_assert(get_fence_status(fence_merged_odd) == FENCE_ACTIVE);
+	igt_assert(get_fence_status(fence_final) == FENCE_ACTIVE);
+
+	/* After batch 2, fence_merged_even should be complete */
+	igt_assert(wait_fence(fence_merged_even, &wait_timeout) == 0);
+	igt_assert(get_fence_status(fence_out[2]) == FENCE_SIGNALED);
+	igt_assert(get_fence_status(fence_merged_even) == FENCE_SIGNALED);
+	igt_assert(get_fence_status(fence_merged_odd) == FENCE_ACTIVE);
+	igt_assert(get_fence_status(fence_final) == FENCE_ACTIVE);
+
+	/* After batch 3, all fences should be complete */
+	igt_assert(wait_fence(fence_merged_odd, &wait_timeout) == 0);
+	igt_assert(get_fence_status(fence_out[3]) == FENCE_SIGNALED);
+	igt_assert(get_fence_status(fence_merged_even) == FENCE_SIGNALED);
+	igt_assert(get_fence_status(fence_merged_odd) == FENCE_SIGNALED);
+
+	/* Nop is too short to see whether it was active after merged fence
+	   was signalled */
+	igt_assert(wait_fence(fence_final, &wait_timeout) == 0);
+	igt_assert(get_fence_status(fence_final) == FENCE_SIGNALED);
+
+	/* Close */
+	close(start_fence);
+}
+
+/*
+ * Test for behaviour of multiple batches dependent on single fence
+ * Show that signalling the fence does not override other dependencies
+ * Scenario A: Same context, multiple batches complete in submission order
+ *             despite being triggered by same user fence
+ * Scenario B: Batches in different contexts reliant on same fence but waiting
+ *             on earlier work in same context
+ */
+static void fence_multidependency(void)
+{
+	int start_fence;
+	int start_fence_dup, start_fence_dup2;
+	int fence_out[2];
+	int wait_timeout;
+
+	drm_intel_context *ctx[2];
+
+	/* Scenario A */
+	close_sw_sync(&user_sync_obj);
+	igt_assert(init_sw_sync(&user_sync_obj) == 0);
+
+	/* Create user fence to trigger */
+	igt_assert(user_create_fence(&start_fence, 1) == 0);
+	start_fence_dup = dup(start_fence);
+	start_fence_dup2 = dup(start_fence);
+
+	/* Create long workloads, dependent on same fence */
+	ops_per_sec = calibrate_dummy_load("bcs", _emit_dummy_load__bcs);
+	_emit_dummy_load__bcs(3 * ops_per_sec, 0,
+			      start_fence_dup, &fence_out[0]);
+	_emit_dummy_load__bcs(1 * ops_per_sec, 0,
+			      start_fence_dup2, &fence_out[1]);
+
+	/* Note that first workload is much longer than the first
+	 * to help make sure that it completes first */
+	/* Signal fence */
+	igt_assert(user_inc_timeline(1) == 0);
+	/* Check that workload first submitted completes first */
+	wait_timeout = 45000;
+	igt_assert(wait_fence(fence_out[0], &wait_timeout) == 0);
+	igt_assert(get_fence_status(fence_out[0]) == FENCE_SIGNALED);
+	igt_assert(get_fence_status(fence_out[1]) == FENCE_ACTIVE);
+
+	igt_assert(wait_fence(fence_out[1], &wait_timeout) == 0);
+	igt_assert(get_fence_status(fence_out[1]) == FENCE_SIGNALED);
+
+	close(start_fence);
+
+	if (batch->gen < 8)
+	{
+		printf("Skipping LRC-related tests\n");
+		return;
+	}
+
+	/* Scenario B */
+	close_sw_sync(&user_sync_obj);
+	igt_assert(init_sw_sync(&user_sync_obj) == 0);
+
+	/* Create user fence to trigger */
+	igt_assert(user_create_fence(&start_fence, 1) == 0);
+	start_fence_dup = dup(start_fence);
+	start_fence_dup2 = dup(start_fence);
+
+	/* Create contexts */
+	igt_assert((ctx[0] = drm_intel_gem_context_create(bufmgr)) != NULL);
+	igt_assert((ctx[1] = drm_intel_gem_context_create(bufmgr)) != NULL);
+	/* Create long workloads on different contexts */
+	/* They are dependent on the same fence */
+	ops_per_sec = calibrate_dummy_load("bcs", _emit_dummy_load__bcs);
+	intel_batchbuffer_set_context(batch, ctx[0]);
+	_emit_dummy_load__bcs(3 * ops_per_sec, 0,
+			      start_fence_dup, &fence_out[0]);
+	intel_batchbuffer_set_context(batch, ctx[1]);
+	_emit_dummy_load__bcs(1 * ops_per_sec, 0,
+			      start_fence_dup2, &fence_out[1]);
+	/* Signal fence */
+	igt_assert(user_inc_timeline(1) == 0);
+
+	igt_assert(wait_fence(fence_out[0], &wait_timeout) == 0);
+	igt_assert(get_fence_status(fence_out[0]) == FENCE_SIGNALED);
+	igt_assert(get_fence_status(fence_out[1]) == FENCE_ACTIVE);
+
+	igt_assert(wait_fence(fence_out[1], &wait_timeout) == 0);
+	igt_assert(get_fence_status(fence_out[1]) == FENCE_SIGNALED);
+
+	drm_intel_gem_context_destroy(ctx[0]);
+	drm_intel_gem_context_destroy(ctx[1]);
+
+	close(start_fence);
+}
+
+/*
+ * Quick and dirty test to break things by setting up a dependency on a user
+ * fence and then failing to signal it.
+ * That's the theory anyway - it doesn't seem to cause too many problems
+ */
+static void fence_user_forget(void)
+{
+	int myfence;
+	int fence_out;
+	int timeout;
+
+	/* Restart our sync device to reset the timeline to zero */
+	/* This is a shortcoming of the mini-api at the top of this file */
+	close_sw_sync(&user_sync_obj);
+	igt_assert(init_sw_sync(&user_sync_obj) == 0);
+
+	/* Create a user fence at step 1 */
+	igt_assert(user_create_fence(&myfence, 1) == 0);
+	igt_assert(get_fence_status(myfence) == FENCE_ACTIVE);
+
+	/* Create a submission dependent on this fence */
+	igt_assert(nop_exec(
+		I915_EXEC_RENDER |
+		I915_EXEC_CREATE_FENCE |
+		I915_EXEC_WAIT_FENCE,
+		dup(myfence), &fence_out) == 0);
+
+	/* Wait on our output fence */
+	timeout = 10000; /* in ms */
+	igt_assert(wait_fence(fence_out, &timeout) != 0);
+	igt_assert(get_fence_status(fence_out) == FENCE_ACTIVE);
+
+	/* If we reached here, then we know that the driver is still waiting */
+	/* This will block everything on the render's default context */
+	/* To signal our user fence, advance the timeline by one: */
+	/* igt_assert(user_inc_timeline(1) == 0); */
+	/* Clean up the fence we made - not sure whether this is necessary */
+	close(myfence);
+}
+
+/*
+ * Test to show that fences can be used across multiple engines
+ */
+static void fence_multiengine(void)
+{
+	int start_fence;
+	int fence_a[2];
+	int fence_b;
+	int fence_merged;
+	int timeout = 200; /* in ms */
+
+	close_sw_sync(&user_sync_obj);
+	igt_assert(init_sw_sync(&user_sync_obj) == 0);
+
+	/* Create user fence */
+	igt_assert(user_create_fence(&start_fence, 1) == 0);
+
+	/* Set up fences and dependent batches */
+
+	/* User triggers A, which triggers B, then back to A */
+	igt_assert(nop_exec(
+		I915_EXEC_RENDER |
+		I915_EXEC_CREATE_FENCE |
+		I915_EXEC_WAIT_FENCE,
+		dup(start_fence), &fence_a[0]) == 0);
+	igt_assert(nop_exec(
+		I915_EXEC_BLT |
+		I915_EXEC_CREATE_FENCE |
+		I915_EXEC_WAIT_FENCE,
+		dup(fence_a[0]), &fence_b) == 0);
+	igt_assert(nop_exec(
+		I915_EXEC_RENDER |
+		I915_EXEC_CREATE_FENCE |
+		I915_EXEC_WAIT_FENCE,
+		dup(fence_b), &fence_a[1]) == 0);
+
+	/* We also create a merged fence to show everything finished */
+	igt_assert(merge_fence(&fence_merged, fence_a[0], fence_a[1]) == 0);
+	igt_assert(merge_fence(&fence_merged, fence_merged, fence_b) == 0);
+
+	/* Wait and check everything is still active */
+	usleep(timeout * 1000);
+	igt_assert(get_fence_status(fence_a[0]) == FENCE_ACTIVE);
+	igt_assert(get_fence_status(fence_a[1]) == FENCE_ACTIVE);
+	igt_assert(get_fence_status(fence_b) == FENCE_ACTIVE);
+
+	/* Trigger first user fence */
+	igt_assert(user_inc_timeline(1) == 0);
+
+	/* Check first fence from A has finished */
+	igt_assert(wait_fence(fence_a[0], &timeout) == 0);
+	igt_assert(get_fence_status(fence_a[0]) == FENCE_SIGNALED);
+	/* Check fence from B has finished */
+	igt_assert(wait_fence(fence_b, &timeout) == 0);
+	igt_assert(get_fence_status(fence_b) == FENCE_SIGNALED);
+	/* Check second fence from A has finished */
+	igt_assert(wait_fence(fence_a[1], &timeout) == 0);
+	igt_assert(get_fence_status(fence_a[1]) == FENCE_SIGNALED);
+
+	/* Check merged fence finished */
+	igt_assert(get_fence_status(fence_merged) == FENCE_SIGNALED);
+
+	close(start_fence);
+}
+
+/*
+ * Gets the status of a given thread id
+ * @mutex pointer to a mutex guarding the state array
+ * @state pointer to the int we are using as a status indicator
+ */
+static int thread_get_status(pthread_mutex_t *mutex, int *state)
+{
+	int value;
+	while (pthread_mutex_trylock(mutex) != 0)
+		usleep(1000);
+	value = *state;
+	pthread_mutex_unlock(mutex);
+	return value;
+}
+
+/*
+ * Sets the status of a thread
+ * @mutex pointer to a mutex guarding the state array
+ * @state pointer to the int we are using as a status indicator
+ * @value value we would like the state set to
+ */
+static void thread_update_status(pthread_mutex_t *mutex,
+				 int *state, int value)
+{
+	while (pthread_mutex_trylock(mutex) != 0)
+		usleep(1000);
+
+	*state = value;
+	pthread_mutex_unlock(mutex);
+}
+
+/* Thread states */
+#define TSTATE_BEGUN		(1) /* thread has begun */
+#define TSTATE_BUSY		(2) /* thread is busy */
+#define TSTATE_FENCE_READY	(3) /* thread has produced a fence */
+#define TSTATE_SUBMITTED	(4) /* thread has submitted all buffers */
+#define TSTATE_COMPLETE		(5) /* thread has completed */
+
+/*
+ * Structure passed to the thrash_thread function
+ */
+struct thrash_data
+{
+	int id; /* id of the thread for reference */
+	int start_fence; /* starting fence, created by user */
+	pthread_mutex_t *state_mutex; /* mutex to control access to state */
+	int *state; /* pointer to this thread's state integer */
+	int *fence_array; /* pointer to the public fence array */
+	int num_submissions; /* number of nop submissions */
+	int num_threads; /* number of threads */
+};
+
+/*
+ * Thread function to thrash the submission mechanism for a given context
+ * Each thread uses the same drm fd and engine
+ * Each thread is given the same user fence as a trigger
+ * Each thread contains a loop to generate many dependent submissions
+ * The returned fences are used as input for other threads
+ * and also merged into a superfence for that thread
+ * When each thread has finished submitting, it signals its readiness
+ * The main thread checks that all threads are ready, then triggers
+ * @data pointer to the thrash_data structure passed in to the thread
+ */
+static void *thrash_thread(void *data)
+{
+	int i;
+	int fence_out, super_fence;
+	int next_thread;
+	struct thrash_data *params;
+	drm_intel_context *ctx;
+
+	/* Get the thread parameters */
+	params = (struct thrash_data *) data;
+	next_thread = (params->id + 1) % params->num_threads;
+
+	thread_update_status(params->state_mutex,
+			     params->state, TSTATE_BEGUN);
+
+	/* Create the context */
+	ctx = drm_intel_gem_context_create(bufmgr);
+
+	/* First nop will be dependent on the starting fence */
+	fence_out = params->start_fence;
+
+	/* Submit the nops */
+	for (i = 0; i < params->num_submissions; i++)
+	{
+		/* Show that we're busy */
+		thread_update_status(params->state_mutex,
+			     params->state, TSTATE_BUSY);
+
+		igt_assert(nop_exec_with_ctx(ctx,
+			I915_EXEC_RENDER |
+			I915_EXEC_CREATE_FENCE |
+			I915_EXEC_WAIT_FENCE,
+			dup(fence_out), &fence_out) == 0);
+
+		/* Only need to do a merge from the second submission */
+		if (i > 0)
+			igt_assert(merge_fence(&super_fence,
+					       super_fence, fence_out) == 0);
+		else
+			super_fence = fence_out;
+
+		/* Update the public fence and make it available */
+		params->fence_array[params->id] = fence_out;
+
+		thread_update_status(params->state_mutex,
+			     params->state, TSTATE_FENCE_READY);
+
+		/* Wait for next thread to have an available fence */
+		while (thread_get_status(
+				params->state_mutex,
+				(params->state - params->id + next_thread)) <
+			TSTATE_FENCE_READY)
+			usleep(1000);
+
+		/* Get the next thread's fence */
+		fence_out = params->fence_array[next_thread];
+	}
+
+	printf("[%d] Finished submitting\n", params->id);
+	usleep(1000);
+
+	/* If we have a large enough queue limit in the scheduler, we
+	   will have submitted everything already, so the whole queue is
+	   waiting for the user to trigger the first fence. But if N_THREADS x
+	   num_submissions is greater than the limit, we could have executed
+	   this already during our usleep */
+	if (get_fence_status(super_fence) != FENCE_ACTIVE)
+	{
+		printf("[%d] super not active: %d\n", params->id,
+			get_fence_status(super_fence));
+	}
+
+	/*igt_assert(get_fence_status(params->start_fence) == FENCE_ACTIVE);*/
+
+	/* Update thread status */
+	thread_update_status(params->state_mutex,
+			     params->state, TSTATE_SUBMITTED);
+	printf("[%d] recorded state %d\n",
+	       params->id, *(params->state));
+
+	/* Wait for our super_fence to finish */
+	while (get_fence_status(super_fence) != FENCE_SIGNALED)
+		usleep(1000);
+
+	/* Update thread status */
+	thread_update_status(params->state_mutex,
+			     params->state, TSTATE_COMPLETE);
+	printf("[%d] recorded state %d\n",
+	       params->id, *(params->state));
+
+	/* Destroy the context */
+	drm_intel_gem_context_destroy(ctx);
+
+	return NULL;
+}
+
+/*
+ * Check the that all the threads have reached a particular status
+ * @p_mutex pointer to a mutex guarding the state array
+ * @num_threads The number of threads we are checking
+ * @statearr Pointer to the first integer in a num_threads-sized array
+ * @state The checkpoint we are expecting the threads to have reached
+ */
+static void check_thread_state(pthread_mutex_t *p_mutex, int num_threads,
+			       int *statearr, int state)
+{
+	int done, i;
+	int counter = 0;
+	done = 0;
+	/* A limit of 25 tries is imposed, in case of deadlock */
+	while (!done && (counter < 25))
+	{
+		if (pthread_mutex_trylock(p_mutex) == 0)
+		{
+			done = 1;
+			for (i = 0; i < num_threads; i++)
+			{
+				if (statearr[i] < state)
+				{
+					done = 0;
+					//printf("Waiting for %d on %d\n", state, i);
+					break;
+				}
+			}
+			pthread_mutex_unlock(p_mutex);
+		}
+		usleep(50000);
+		counter++;
+	}
+
+	if (!done)
+	{
+		printf("Couldn't finish checking state %d\n", state);
+	}
+}
+
+/*
+ * Thrash fences across multiple threads, using a single fence to kick it off
+ */
+static void fence_multithread(void)
+{
+	int i;
+	int N_THREADS = 8;
+	int N_SUBMISSIONS = 9;
+	pthread_mutex_t state_mutex;
+	pthread_t thread_handles[N_THREADS];
+	struct thrash_data t_params[N_THREADS];
+	int statearr[N_THREADS];
+	int fence_array[N_THREADS];
+
+	int start_fence;
+
+	close_sw_sync(&user_sync_obj);
+	igt_assert(init_sw_sync(&user_sync_obj) == 0);
+
+	/* Create user fence */
+	igt_assert(user_create_fence(&start_fence, 1) == 0);
+
+	/* Populate thread data */
+	for (i = 0; i < N_THREADS; i++)
+	{
+		t_params[i].id = i;
+		t_params[i].start_fence = start_fence;
+		t_params[i].state_mutex = &state_mutex;
+		t_params[i].state = &(statearr[i]);
+		t_params[i].fence_array = fence_array;
+		t_params[i].num_submissions = N_SUBMISSIONS;
+		t_params[i].num_threads = N_THREADS;
+		statearr[i] = 0;
+		fence_array[i] = -1;
+	}
+
+	pthread_mutex_init(&state_mutex, NULL);
+
+	/* Launch threads */
+	for (i = 0; i < N_THREADS; i++)
+		pthread_create(&thread_handles[i], NULL, thrash_thread,
+			       (void *) (&t_params[i]));
+
+	/* Wait for submissions to complete */
+	check_thread_state(&state_mutex, N_THREADS, statearr, TSTATE_SUBMITTED);
+
+	printf("Finished checking threads for state %d\n", TSTATE_SUBMITTED);
+
+	user_inc_timeline(1);
+	printf("Incremented timeline\n");
+
+	check_thread_state(&state_mutex, N_THREADS, statearr, TSTATE_COMPLETE);
+	printf("Finished checking threads for state %d\n", TSTATE_COMPLETE);
+
+	/* Finish threads */
+	for (i = 0; i < N_THREADS; i++)
+		pthread_join(thread_handles[i], NULL);
+
+	pthread_mutex_destroy(&state_mutex);
+
+	close(start_fence);
+}
+
+igt_main
+{
+	igt_fixture {
+		igt_assert(init_sw_sync(&user_sync_obj) == 0);
+		fd = drm_open_driver_master(DRIVER_INTEL);
+
+		handle = gem_create(fd, 4096);
+		gem_write(fd, handle, 0, nop_batch, sizeof(nop_batch));
+
+		bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
+		devid = intel_get_drm_devid(fd);
+		batch = intel_batchbuffer_alloc(bufmgr, devid);
+	}
+
+	igt_subtest("not-requested-not-created")
+		fence_not_requested_not_created();
+
+	igt_subtest("create")
+		fence_create();
+
+	igt_subtest("driver-data")
+		fence_driver_data();
+
+	igt_subtest("signaled")
+		fence_signaled();
+
+	igt_subtest("signal")
+		fence_signal();
+
+	igt_subtest("wait")
+		fence_wait();
+
+	igt_subtest("timeout")
+		fence_timeout();
+
+	igt_subtest("wait-fence")
+		fence_wait_fence();
+
+	igt_subtest("wait-fence2")
+		fence_wait_fence2();
+
+	igt_subtest("user-fence-wait")
+		fence_user_fence_wait();
+
+	igt_subtest("user-fence-ooo")
+		fence_ooo_fence();
+
+	igt_subtest("user-fence-leak")
+		fence_user_fence_leak();
+
+	igt_subtest("merge")
+		fence_merge();
+
+/*
+	igt_subtest("multidependency")
+		fence_multidependency();
+
+	igt_subtest("user-fence-forget")
+		fence_user_forget();
+*/
+	igt_subtest("multiengine")
+		fence_multiengine();
+
+	igt_subtest("multithread")
+		fence_multithread();
+
+	igt_fixture {
+		gem_close(fd, handle);
+		intel_batchbuffer_free(batch);
+		close(fd);
+		close_sw_sync(&user_sync_obj);
+	}
+}
-- 
1.9.1



More information about the Intel-gfx mailing list