[Intel-gfx] [PATCH i-g-t] tests/gem_eio: Never re-use contexts which were in the middle of GPU reset

Tvrtko Ursulin tursulin at ursulin.net
Tue Mar 27 16:40:56 UTC 2018


From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

Contexts executing when reset triggers are potentialy corrupt so trying to
use them from a subsequent test (like the default context) can hang the
GPU or even the driver.

Workaround that by always creating a dedicated context which will be
running when GPU reset happens.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
 tests/gem_eio.c | 96 +++++++++++++++++++++++++++++++++++----------------------
 1 file changed, 60 insertions(+), 36 deletions(-)

diff --git a/tests/gem_eio.c b/tests/gem_eio.c
index b824d9d4c9c0..cefe26adf893 100644
--- a/tests/gem_eio.c
+++ b/tests/gem_eio.c
@@ -249,14 +249,34 @@ static int __check_wait(int fd, uint32_t bo, unsigned int wait)
 	return ret;
 }
 
+static uint32_t context_create_safe(int i915)
+{
+	struct drm_i915_gem_context_param param;
+
+	memset(&param, 0, sizeof(param));
+
+	param.ctx_id = gem_context_create(i915);
+	param.param = I915_CONTEXT_PARAM_BANNABLE;
+	gem_context_set_param(i915, &param);
+
+	param.param = I915_CONTEXT_PARAM_NO_ERROR_CAPTURE;
+	param.value = 1;
+	gem_context_set_param(i915, &param);
+
+	return param.ctx_id;
+}
+
 #define TEST_WEDGE (1)
 
 static void test_wait(int fd, unsigned int flags, unsigned int wait)
 {
 	igt_spin_t *hang;
+	uint32_t ctx;
 
 	igt_require_gem(fd);
 
+	ctx = context_create_safe(fd);
+
 	/*
 	 * If the request we wait on completes due to a hang (even for
 	 * that request), the user expects the return value to 0 (success).
@@ -267,7 +287,7 @@ static void test_wait(int fd, unsigned int flags, unsigned int wait)
 	else
 		igt_require(i915_reset_control(true));
 
-	hang = spin_sync(fd, 0, I915_EXEC_DEFAULT);
+	hang = spin_sync(fd, ctx, I915_EXEC_DEFAULT);
 
 	igt_assert_eq(__check_wait(fd, hang->handle, wait), 0);
 
@@ -276,6 +296,8 @@ static void test_wait(int fd, unsigned int flags, unsigned int wait)
 	igt_require(i915_reset_control(true));
 
 	trigger_reset(fd);
+
+	gem_context_destroy(fd, ctx);
 }
 
 static void test_suspend(int fd, int state)
@@ -309,6 +331,7 @@ static void test_inflight(int fd, unsigned int wait)
 
 	for_each_engine(fd, engine) {
 		struct drm_i915_gem_execbuffer2 execbuf;
+		uint32_t ctx = context_create_safe(fd);
 		igt_spin_t *hang;
 		int fence[64]; /* conservative estimate of ring size */
 
@@ -316,7 +339,7 @@ static void test_inflight(int fd, unsigned int wait)
 		igt_debug("Starting %s on engine '%s'\n", __func__, e__->name);
 		igt_require(i915_reset_control(false));
 
-		hang = spin_sync(fd, 0, engine);
+		hang = spin_sync(fd, ctx, engine);
 		obj[0].handle = hang->handle;
 
 		memset(&execbuf, 0, sizeof(execbuf));
@@ -340,6 +363,8 @@ static void test_inflight(int fd, unsigned int wait)
 		igt_spin_batch_free(fd, hang);
 		igt_assert(i915_reset_control(true));
 		trigger_reset(fd);
+
+		gem_context_destroy(fd, ctx);
 	}
 }
 
@@ -350,17 +375,20 @@ static void test_inflight_suspend(int fd)
 	uint32_t bbe = MI_BATCH_BUFFER_END;
 	int fence[64]; /* conservative estimate of ring size */
 	igt_spin_t *hang;
+	uint32_t ctx;
 
 	igt_require_gem(fd);
 	igt_require(gem_has_exec_fence(fd));
 	igt_require(i915_reset_control(false));
 
+	ctx = context_create_safe(fd);
+
 	memset(obj, 0, sizeof(obj));
 	obj[0].flags = EXEC_OBJECT_WRITE;
 	obj[1].handle = gem_create(fd, 4096);
 	gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
 
-	hang = spin_sync(fd, 0, 0);
+	hang = spin_sync(fd, ctx, 0);
 	obj[0].handle = hang->handle;
 
 	memset(&execbuf, 0, sizeof(execbuf));
@@ -387,23 +415,8 @@ static void test_inflight_suspend(int fd)
 	igt_spin_batch_free(fd, hang);
 	igt_assert(i915_reset_control(true));
 	trigger_reset(fd);
-}
-
-static uint32_t context_create_safe(int i915)
-{
-	struct drm_i915_gem_context_param param;
-
-	memset(&param, 0, sizeof(param));
 
-	param.ctx_id = gem_context_create(i915);
-	param.param = I915_CONTEXT_PARAM_BANNABLE;
-	gem_context_set_param(i915, &param);
-
-	param.param = I915_CONTEXT_PARAM_NO_ERROR_CAPTURE;
-	param.value = 1;
-	gem_context_set_param(i915, &param);
-
-	return param.ctx_id;
+	gem_context_destroy(fd, ctx);
 }
 
 static void test_inflight_contexts(int fd, unsigned int wait)
@@ -411,40 +424,40 @@ static void test_inflight_contexts(int fd, unsigned int wait)
 	struct drm_i915_gem_exec_object2 obj[2];
 	const uint32_t bbe = MI_BATCH_BUFFER_END;
 	unsigned int engine;
-	uint32_t ctx[64];
+	uint32_t ctx[65];
 
 	igt_require_gem(fd);
 	igt_require(gem_has_exec_fence(fd));
 	gem_require_contexts(fd);
 
-	for (unsigned int n = 0; n < ARRAY_SIZE(ctx); n++)
-		ctx[n] = context_create_safe(fd);
-
 	memset(obj, 0, sizeof(obj));
 	obj[0].flags = EXEC_OBJECT_WRITE;
 	obj[1].handle = gem_create(fd, 4096);
 	gem_write(fd, obj[1].handle, 0, &bbe, sizeof(bbe));
 
 	for_each_engine(fd, engine) {
-		struct drm_i915_gem_execbuffer2 execbuf;
+		struct drm_i915_gem_execbuffer2 execbuf = { };
 		igt_spin_t *hang;
 		int fence[64];
 
-		gem_quiescent_gpu(fd);
-
 		igt_debug("Starting %s on engine '%s'\n", __func__, e__->name);
+
 		igt_require(i915_reset_control(false));
 
-		hang = spin_sync(fd, 0, engine);
+		for (unsigned int n = 0; n < ARRAY_SIZE(ctx); n++)
+			ctx[n] = context_create_safe(fd);
+
+		gem_quiescent_gpu(fd);
+
+		hang = spin_sync(fd, ctx[0], engine);
 		obj[0].handle = hang->handle;
 
-		memset(&execbuf, 0, sizeof(execbuf));
 		execbuf.buffers_ptr = to_user_pointer(obj);
 		execbuf.buffer_count = 2;
 		execbuf.flags = engine | I915_EXEC_FENCE_OUT;
 
-		for (unsigned int n = 0; n < ARRAY_SIZE(fence); n++) {
-			execbuf.rsvd1 = ctx[n];
+		for (unsigned int n = 0; n < (ARRAY_SIZE(fence) - 1); n++) {
+			execbuf.rsvd1 = ctx[n + 1];
 			gem_execbuf_wr(fd, &execbuf);
 			fence[n] = execbuf.rsvd2 >> 32;
 			igt_assert(fence[n] != -1);
@@ -452,18 +465,19 @@ static void test_inflight_contexts(int fd, unsigned int wait)
 
 		igt_assert_eq(__check_wait(fd, obj[1].handle, wait), 0);
 
-		for (unsigned int n = 0; n < ARRAY_SIZE(fence); n++) {
+		for (unsigned int n = 0; n < (ARRAY_SIZE(fence) - 1); n++) {
 			igt_assert_eq(sync_fence_status(fence[n]), -EIO);
 			close(fence[n]);
 		}
 
 		igt_spin_batch_free(fd, hang);
+
+		for (unsigned int n = 0; n < ARRAY_SIZE(ctx); n++)
+			gem_context_destroy(fd, ctx[n]);
+
 		igt_assert(i915_reset_control(true));
 		trigger_reset(fd);
 	}
-
-	for (unsigned int n = 0; n < ARRAY_SIZE(ctx); n++)
-		gem_context_destroy(fd, ctx[n]);
 }
 
 static void test_inflight_external(int fd)
@@ -473,15 +487,18 @@ static void test_inflight_external(int fd)
 	struct drm_i915_gem_exec_object2 obj;
 	igt_spin_t *hang;
 	uint32_t fence;
+	uint32_t ctx;
 	IGT_CORK_FENCE(cork);
 
 	igt_require_sw_sync();
 	igt_require(gem_has_exec_fence(fd));
 
+	ctx = context_create_safe(fd);
+
 	fence = igt_cork_plug(&cork, fd);
 
 	igt_require(i915_reset_control(false));
-	hang = __spin_poll(fd, 0, 0);
+	hang = __spin_poll(fd, ctx, 0);
 
 	memset(&obj, 0, sizeof(obj));
 	obj.handle = gem_create(fd, 4096);
@@ -514,6 +531,8 @@ static void test_inflight_external(int fd)
 	igt_spin_batch_free(fd, hang);
 	igt_assert(i915_reset_control(true));
 	trigger_reset(fd);
+
+	gem_context_destroy(fd, ctx);
 }
 
 static void test_inflight_internal(int fd, unsigned int wait)
@@ -524,12 +543,15 @@ static void test_inflight_internal(int fd, unsigned int wait)
 	unsigned engine, nfence = 0;
 	int fences[16];
 	igt_spin_t *hang;
+	uint32_t ctx;
 
 	igt_require_gem(fd);
 	igt_require(gem_has_exec_fence(fd));
 
+	ctx = context_create_safe(fd);
+
 	igt_require(i915_reset_control(false));
-	hang = spin_sync(fd, 0, 0);
+	hang = spin_sync(fd, ctx, 0);
 
 	memset(obj, 0, sizeof(obj));
 	obj[0].handle = hang->handle;
@@ -560,6 +582,8 @@ static void test_inflight_internal(int fd, unsigned int wait)
 	igt_spin_batch_free(fd, hang);
 	igt_assert(i915_reset_control(true));
 	trigger_reset(fd);
+
+	gem_context_destroy(fd, ctx);
 }
 
 static int fd = -1;
-- 
2.14.1



More information about the Intel-gfx mailing list