[Intel-gfx] [PATCH i-g-t 2/2] i915/gem_ctx_persistence: Give the CPU scheduler a kick on timeouts

Chris Wilson chris at chris-wilson.co.uk
Wed Apr 22 10:41:02 UTC 2020


We have allowed the CPU 2s to process the hang and cleanup; but clearly
this is not always enough. Let's just give the CPU one last kick before
declaring that we have an issue, to try and be sure that we have a bug
to fix before worrying.

References: https://gitlab.freedesktop.org/drm/intel/issues/1528
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 tests/i915/gem_ctx_persistence.c | 43 ++++++++++++++++++++++----------
 1 file changed, 30 insertions(+), 13 deletions(-)

diff --git a/tests/i915/gem_ctx_persistence.c b/tests/i915/gem_ctx_persistence.c
index 3d52987d1..dea62fa38 100644
--- a/tests/i915/gem_ctx_persistence.c
+++ b/tests/i915/gem_ctx_persistence.c
@@ -55,11 +55,21 @@ static void cleanup(int i915)
 	igt_require_gem(i915);
 }
 
-static int wait_for_status(int fence, int timeout)
+static void kick_kthreads(int i915)
+{
+	/* Give the *CPU* scheduler a kick! */
+	igt_drop_caches_set(i915, DROP_TASKLETS);
+}
+
+static int wait_for_status(int i915, int fence, int timeout)
 {
 	int err;
 
 	err = sync_fence_wait(fence, timeout);
+	if (err == -ETIME) {
+		kick_kthreads(i915);
+		err = sync_fence_wait(fence, timeout);
+	}
 	if (err)
 		return err;
 
@@ -250,8 +260,8 @@ static void test_nonpersistent_mixed(int i915, unsigned int engine)
 	}
 
 	/* Outer pair of contexts were non-persistent and killed */
-	igt_assert_eq(wait_for_status(fence[0], reset_timeout_ms), -EIO);
-	igt_assert_eq(wait_for_status(fence[2], reset_timeout_ms), -EIO);
+	igt_assert_eq(wait_for_status(i915, fence[0], reset_timeout_ms), -EIO);
+	igt_assert_eq(wait_for_status(i915, fence[2], reset_timeout_ms), -EIO);
 
 	/* But the middle context is still running */
 	igt_assert_eq(sync_fence_wait(fence[1], 0), -ETIME);
@@ -440,7 +450,8 @@ static void test_nonpersistent_file(int i915)
 	close(i915);
 	flush_delayed_fput(debugfs);
 
-	igt_assert_eq(wait_for_status(spin->out_fence, reset_timeout_ms), -EIO);
+	igt_assert_eq(wait_for_status(i915, spin->out_fence, reset_timeout_ms),
+		      -EIO);
 
 	spin->handle = 0;
 	igt_spin_free(-1, spin);
@@ -478,8 +489,10 @@ static void test_nonpersistent_queued(int i915, unsigned int engine)
 
 	gem_context_destroy(i915, ctx);
 
-	igt_assert_eq(wait_for_status(spin->out_fence, reset_timeout_ms), -EIO);
-	igt_assert_eq(wait_for_status(fence, reset_timeout_ms), -EIO);
+	igt_assert_eq(wait_for_status(i915, spin->out_fence, reset_timeout_ms),
+		      -EIO);
+	igt_assert_eq(wait_for_status(i915, fence, reset_timeout_ms),
+		      -EIO);
 
 	igt_spin_free(i915, spin);
 }
@@ -554,7 +567,7 @@ static void test_process(int i915)
 	fence = recvfd(sv[1]);
 	close(sv[1]);
 
-	igt_assert_eq(wait_for_status(fence, reset_timeout_ms), -EIO);
+	igt_assert_eq(wait_for_status(i915, fence, reset_timeout_ms), -EIO);
 	close(fence);
 
 	/* We have to manually clean up the orphaned spinner */
@@ -607,7 +620,7 @@ static void test_process_mixed(int pfd, unsigned int engine)
 	close(sv[1]);
 
 	/* First fence is non-persistent, so should be reset */
-	igt_assert_eq(wait_for_status(fence[0], reset_timeout_ms), -EIO);
+	igt_assert_eq(wait_for_status(pfd, fence[0], reset_timeout_ms), -EIO);
 	close(fence[0]);
 
 	/* Second fence is persistent, so should be still spinning */
@@ -677,11 +690,12 @@ test_saturated_hostile(int i915, const struct intel_execution_engine2 *engine)
 	gem_context_destroy(i915, ctx);
 
 	/* Hostile request requires a GPU reset to terminate */
-	igt_assert_eq(wait_for_status(spin->out_fence, reset_timeout_ms), -EIO);
+	igt_assert_eq(wait_for_status(i915, spin->out_fence, reset_timeout_ms),
+		      -EIO);
 
 	/* All other spinners should be left unharmed */
 	gem_quiescent_gpu(i915);
-	igt_assert_eq(wait_for_status(fence, reset_timeout_ms), 1);
+	igt_assert_eq(wait_for_status(i915, fence, reset_timeout_ms), 1);
 	close(fence);
 }
 
@@ -746,7 +760,7 @@ static void test_processes(int i915)
 
 		if (i == 0) {
 			/* First fence is non-persistent, so should be reset */
-			igt_assert_eq(wait_for_status(fence, reset_timeout_ms),
+			igt_assert_eq(wait_for_status(i915, fence, reset_timeout_ms),
 				      -EIO);
 		} else {
 			/* Second fence is persistent, so still spinning */
@@ -790,10 +804,12 @@ static void __smoker(int i915,
 
 	igt_spin_end(spin);
 
-	igt_assert_eq(wait_for_status(spin->out_fence, timeout), expected);
+	igt_assert_eq(wait_for_status(i915, spin->out_fence, timeout),
+		      expected);
 
 	if (fence != -1) {
-		igt_assert_eq(wait_for_status(fence, timeout), expected);
+		igt_assert_eq(wait_for_status(i915, fence, timeout),
+			      expected);
 		close(fence);
 	}
 
@@ -987,6 +1003,7 @@ static void close_replace_race(int i915)
 	close(out[1]);
 
 	if (sync_fence_wait(fence, MSEC_PER_SEC / 2)) {
+		kick_kthreads(i915);
 		igt_debugfs_dump(i915, "i915_engine_info");
 		igt_assert(sync_fence_wait(fence, MSEC_PER_SEC / 2) == 0);
 	}
-- 
2.26.2



More information about the Intel-gfx mailing list