[Intel-gfx] [RFC i-g-t] tests/drv_hangman: test for acthd increasing through invalid VM space
daniele.ceraolospurio at intel.com
daniele.ceraolospurio at intel.com
Thu Feb 18 17:34:50 UTC 2016
From: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
The hangcheck logic will not flag an hang if acthd keeps increasing.
However, if a malformed batch jumps to an invalid offset in the ppgtt it
can potentially continue executing through the whole address space
without triggering the hangcheck mechanism.
This patch adds a test to simulate the issue. I've kept the test running
for more than 10 minutes before killing it on a BDW and no hang occurred.
I've sampled i915_hangcheck_info a few times during the run and got the
following:
Hangcheck active, fires in 468ms
render ring:
seqno = fffff55e [current fffff55e]
ACTHD = 0x47df685ecc [current 0x4926b81d90]
max ACTHD = 0x47df685ecc
score = 0
action = 2
instdone read = 0xffd7ffff 0xffffffff 0xffffffff 0xffffffff
instdone accu = 0x00000000 0x00000000 0x00000000 0x00000000
Hangcheck active, fires in 424ms
render ring:
seqno = fffff55e [current fffff55e]
ACTHD = 0x6c953d3a34 [current 0x6de5e76fa4]
max ACTHD = 0x6c953d3a34
score = 0
action = 2
instdone read = 0xffd7ffff 0xffffffff 0xffffffff 0xffffffff
instdone accu = 0x00000000 0x00000000 0x00000000 0x00000000
Hangcheck active, fires in 1692ms
render ring:
seqno = fffff55e [current fffff55e]
ACTHD = 0x1f49b0366dc [current 0x1f4dcbd88ec]
max ACTHD = 0x1f49b0366dc
score = 0
action = 2
instdone read = 0xffd7ffff 0xffffffff 0xffffffff 0xffffffff
instdone accu = 0x00000000 0x00000000 0x00000000 0x00000000
Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
Cc: Arun Siluvery <arun.siluvery at linux.intel.com>
Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
---
tests/drv_hangman.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 52 insertions(+)
diff --git a/tests/drv_hangman.c b/tests/drv_hangman.c
index 8a465cf..353e7a8 100644
--- a/tests/drv_hangman.c
+++ b/tests/drv_hangman.c
@@ -288,6 +288,55 @@ static void test_error_state_capture(unsigned ring_id,
check_error_state(gen, cmd_parser, ring_name, offset);
}
+/* This test covers the case where we end up in an uninitialised area of the
+ * ppgtt at an offset greater than the one where the last buffer is mapped. This
+ * is particularly relevant if 48b ppgtt is enabled because the ppgtt is
+ * massively bigger compared to the 32b case and it takes a lot more time to
+ * wrap, so the acthd can potentially keep increasing for a long time
+ */
+static void ppgtt_walking(void)
+{
+ int fd;
+ unsigned timeout = 100;
+ struct drm_i915_gem_execbuffer2 execbuf;
+ struct drm_i915_gem_exec_object2 gem_exec;
+ uint32_t handle;
+ uint32_t batch[4];
+
+ fd = drm_open_driver(DRIVER_INTEL);
+ igt_require(gem_gtt_type(fd) > 2);
+
+ /* the batch will be mapped to an offset < 4GB because the flag to allow
+ * 48b offsets is not specified, so jump to address 0x00000001 00000000
+ */
+ batch[0] = MI_BATCH_BUFFER_START | 1;
+ batch[1] = 0;
+ batch[2] = 1;
+ batch[3] = MI_BATCH_BUFFER_END;
+
+ handle = gem_create(fd, 4096);
+ gem_write(fd, handle, 0, batch, sizeof(batch));
+
+ memset(&gem_exec, 0, sizeof(gem_exec));
+ gem_exec.handle = handle;
+
+ memset(&execbuf, 0, sizeof(execbuf));
+ execbuf.buffers_ptr = (uintptr_t)&gem_exec;
+ execbuf.buffer_count = 1;
+ execbuf.batch_len = 8;
+
+ gem_execbuf(fd, &execbuf);
+
+ while (gem_bo_busy(fd, handle) && timeout > 0) {
+ igt_debug("decreasing timeout to %u\n", --timeout);
+ sleep(1);
+ }
+
+ gem_close(fd, handle);
+ close(fd);
+ igt_assert(timeout > 0);
+}
+
igt_main
{
const struct intel_execution_engine *e;
@@ -314,4 +363,7 @@ igt_main
test_error_state_capture(e->exec_id | e->flags,
e->full_name);
}
+
+ igt_subtest("ppgtt-walking")
+ ppgtt_walking();
}
--
1.9.1
More information about the Intel-gfx
mailing list