[Intel-gfx] [PATCH i-g-t 2/2] i915/gem_ctx_persistence: Give the CPU scheduler a kick on timeouts
Chris Wilson
chris at chris-wilson.co.uk
Wed Apr 22 10:41:02 UTC 2020
We have allowed the CPU 2s to process the hang and cleanup; but clearly
this is not always enough. Let's just give the CPU one last kick before
declaring that we have an issue, to try and be sure that we have a bug
to fix before worrying.
References: https://gitlab.freedesktop.org/drm/intel/issues/1528
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
tests/i915/gem_ctx_persistence.c | 43 ++++++++++++++++++++++----------
1 file changed, 30 insertions(+), 13 deletions(-)
diff --git a/tests/i915/gem_ctx_persistence.c b/tests/i915/gem_ctx_persistence.c
index 3d52987d1..dea62fa38 100644
--- a/tests/i915/gem_ctx_persistence.c
+++ b/tests/i915/gem_ctx_persistence.c
@@ -55,11 +55,21 @@ static void cleanup(int i915)
igt_require_gem(i915);
}
-static int wait_for_status(int fence, int timeout)
+static void kick_kthreads(int i915)
+{
+ /* Give the *CPU* scheduler a kick! */
+ igt_drop_caches_set(i915, DROP_TASKLETS);
+}
+
+static int wait_for_status(int i915, int fence, int timeout)
{
int err;
err = sync_fence_wait(fence, timeout);
+ if (err == -ETIME) {
+ kick_kthreads(i915);
+ err = sync_fence_wait(fence, timeout);
+ }
if (err)
return err;
@@ -250,8 +260,8 @@ static void test_nonpersistent_mixed(int i915, unsigned int engine)
}
/* Outer pair of contexts were non-persistent and killed */
- igt_assert_eq(wait_for_status(fence[0], reset_timeout_ms), -EIO);
- igt_assert_eq(wait_for_status(fence[2], reset_timeout_ms), -EIO);
+ igt_assert_eq(wait_for_status(i915, fence[0], reset_timeout_ms), -EIO);
+ igt_assert_eq(wait_for_status(i915, fence[2], reset_timeout_ms), -EIO);
/* But the middle context is still running */
igt_assert_eq(sync_fence_wait(fence[1], 0), -ETIME);
@@ -440,7 +450,8 @@ static void test_nonpersistent_file(int i915)
close(i915);
flush_delayed_fput(debugfs);
- igt_assert_eq(wait_for_status(spin->out_fence, reset_timeout_ms), -EIO);
+ igt_assert_eq(wait_for_status(i915, spin->out_fence, reset_timeout_ms),
+ -EIO);
spin->handle = 0;
igt_spin_free(-1, spin);
@@ -478,8 +489,10 @@ static void test_nonpersistent_queued(int i915, unsigned int engine)
gem_context_destroy(i915, ctx);
- igt_assert_eq(wait_for_status(spin->out_fence, reset_timeout_ms), -EIO);
- igt_assert_eq(wait_for_status(fence, reset_timeout_ms), -EIO);
+ igt_assert_eq(wait_for_status(i915, spin->out_fence, reset_timeout_ms),
+ -EIO);
+ igt_assert_eq(wait_for_status(i915, fence, reset_timeout_ms),
+ -EIO);
igt_spin_free(i915, spin);
}
@@ -554,7 +567,7 @@ static void test_process(int i915)
fence = recvfd(sv[1]);
close(sv[1]);
- igt_assert_eq(wait_for_status(fence, reset_timeout_ms), -EIO);
+ igt_assert_eq(wait_for_status(i915, fence, reset_timeout_ms), -EIO);
close(fence);
/* We have to manually clean up the orphaned spinner */
@@ -607,7 +620,7 @@ static void test_process_mixed(int pfd, unsigned int engine)
close(sv[1]);
/* First fence is non-persistent, so should be reset */
- igt_assert_eq(wait_for_status(fence[0], reset_timeout_ms), -EIO);
+ igt_assert_eq(wait_for_status(pfd, fence[0], reset_timeout_ms), -EIO);
close(fence[0]);
/* Second fence is persistent, so should be still spinning */
@@ -677,11 +690,12 @@ test_saturated_hostile(int i915, const struct intel_execution_engine2 *engine)
gem_context_destroy(i915, ctx);
/* Hostile request requires a GPU reset to terminate */
- igt_assert_eq(wait_for_status(spin->out_fence, reset_timeout_ms), -EIO);
+ igt_assert_eq(wait_for_status(i915, spin->out_fence, reset_timeout_ms),
+ -EIO);
/* All other spinners should be left unharmed */
gem_quiescent_gpu(i915);
- igt_assert_eq(wait_for_status(fence, reset_timeout_ms), 1);
+ igt_assert_eq(wait_for_status(i915, fence, reset_timeout_ms), 1);
close(fence);
}
@@ -746,7 +760,7 @@ static void test_processes(int i915)
if (i == 0) {
/* First fence is non-persistent, so should be reset */
- igt_assert_eq(wait_for_status(fence, reset_timeout_ms),
+ igt_assert_eq(wait_for_status(i915, fence, reset_timeout_ms),
-EIO);
} else {
/* Second fence is persistent, so still spinning */
@@ -790,10 +804,12 @@ static void __smoker(int i915,
igt_spin_end(spin);
- igt_assert_eq(wait_for_status(spin->out_fence, timeout), expected);
+ igt_assert_eq(wait_for_status(i915, spin->out_fence, timeout),
+ expected);
if (fence != -1) {
- igt_assert_eq(wait_for_status(fence, timeout), expected);
+ igt_assert_eq(wait_for_status(i915, fence, timeout),
+ expected);
close(fence);
}
@@ -987,6 +1003,7 @@ static void close_replace_race(int i915)
close(out[1]);
if (sync_fence_wait(fence, MSEC_PER_SEC / 2)) {
+ kick_kthreads(i915);
igt_debugfs_dump(i915, "i915_engine_info");
igt_assert(sync_fence_wait(fence, MSEC_PER_SEC / 2) == 0);
}
--
2.26.2
More information about the Intel-gfx
mailing list