[Intel-gfx] [PATCH i-g-t 1/3] igt/gem_pipe_control_store_loop: Add qword write tests
Michał Winiarski
michal.winiarski at intel.com
Thu Apr 7 18:39:47 UTC 2016
Test description suggested that all platforms were testing qword writes,
while in fact only gen4-gen5 did.
v2: Test dword/qword writes for all available platforms.
v3: Rewrite, drop libdrm/intel_batchbuffer dependencies,
drop brw_emit_post_sync_nonzero_flush WA for gen6/gen7,
drop WC_FLUSH/TC_FLUSH on gen4/gen5,
drop preuse tests, use gem_wait instead of set_domain.
v4: Back to preuse, do not use gem_write.
Cc: Chris Wilson <chris at chris-wilson.co.uk>
Signed-off-by: Michał Winiarski <michal.winiarski at intel.com>
---
tests/gem_pipe_control_store_loop.c | 290 +++++++++++++++++++++---------------
1 file changed, 173 insertions(+), 117 deletions(-)
diff --git a/tests/gem_pipe_control_store_loop.c b/tests/gem_pipe_control_store_loop.c
index a155ad1..3cf4b31 100644
--- a/tests/gem_pipe_control_store_loop.c
+++ b/tests/gem_pipe_control_store_loop.c
@@ -26,10 +26,10 @@
*/
/*
- * Testcase: (TLB-)Coherency of pipe_control QW writes
+ * Testcase: (TLB-)Coherency of pipe_control writes
*
- * Writes a counter-value into an always newly allocated target bo (by disabling
- * buffer reuse). Decently trashes on tlb inconsistencies, too.
+ * Writes a counter-value into target bo.
+ * Decently trashes on tlb inconsistencies, too.
*/
#include "igt.h"
#include <stdlib.h>
@@ -43,11 +43,11 @@
#include "drm.h"
#include "intel_bufmgr.h"
-IGT_TEST_DESCRIPTION("Test (TLB-)Coherency of pipe_control QW writes.");
+IGT_TEST_DESCRIPTION("Test (TLB-)Coherency of pipe_control writes.");
-static drm_intel_bufmgr *bufmgr;
-struct intel_batchbuffer *batch;
uint32_t devid;
+int gen;
+int fd;
#define GFX_OP_PIPE_CONTROL ((0x3<<29)|(0x3<<27)|(0x2<<24)|2)
#define PIPE_CONTROL_WRITE_IMMEDIATE (1<<14)
@@ -60,134 +60,190 @@ uint32_t devid;
#define PIPE_CONTROL_CS_STALL (1<<20)
#define PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */
-/* Like the store dword test, but we create new command buffers each time */
+#define TEST_STORE_LOOP_BUFFER_REUSED (1 << 0)
+#define TEST_STORE_LOOP_QWORD_WRITE (1 << 1)
+#define TEST_STORE_LOOP_ALL_FLAGS (TEST_STORE_LOOP_BUFFER_REUSED | \
+ TEST_STORE_LOOP_QWORD_WRITE)
+
+static uint64_t
+preuse(uint32_t buf_handle)
+{
+ int i = 0;
+ uint32_t batch_handle;
+ uint32_t *batch;
+ struct drm_i915_gem_relocation_entry reloc;
+ struct drm_i915_gem_execbuffer2 execbuf;
+ struct drm_i915_gem_exec_object2 object[2];
+
+ batch_handle = gem_create(fd, 4096);
+ batch = gem_mmap__cpu(fd, batch_handle, 0, 4096, PROT_READ | PROT_WRITE);
+
+ batch[i++] = XY_COLOR_BLT_CMD_NOLEN |
+ COLOR_BLT_WRITE_ALPHA |
+ XY_SRC_COPY_BLT_WRITE_RGB;
+ if (gen >= 8)
+ batch[i - 1] |= 5;
+ else
+ batch[i - 1] |= 4;
+
+ batch[i++] = ((3 << 24) | (0xf0 << 16) | 64);
+ batch[i++] = 0; /* dst x1,y1 */
+ batch[i++] = (1 << 16 | 1); /* dst x2,y2 */
+ batch[i++] = 0; /* reloc */
+ if (gen >= 8)
+ batch[i++] = 0; /* reloc_high */
+ batch[i++] = 0xdeadbeef;
+ batch[i++] = MI_BATCH_BUFFER_END;
+
+ memset(&object, 0, sizeof(object));
+ memset(&reloc, 0, sizeof(reloc));
+ memset(&execbuf, 0, sizeof(execbuf));
+
+ reloc.target_handle = buf_handle;
+ reloc.delta = 0;
+ reloc.offset = 4 * sizeof(batch[0]);
+ reloc.presumed_offset = 0;
+ reloc.read_domains = I915_GEM_DOMAIN_RENDER;
+ reloc.write_domain = I915_GEM_DOMAIN_RENDER;
+
+ object[0].handle = buf_handle;
+ object[1].handle = batch_handle;
+ object[1].relocation_count = 1;
+ object[1].relocs_ptr = (uintptr_t)&reloc;
+
+ if (gen >= 6)
+ execbuf.flags = I915_EXEC_BLT;
+ execbuf.buffers_ptr = (uintptr_t)object;
+ execbuf.buffer_count = 2;
+ gem_execbuf(fd, &execbuf);
+
+ munmap(batch, 4096);
+ gem_close(fd, batch_handle);
+
+ return reloc.presumed_offset;
+}
+
static void
-store_pipe_control_loop(bool preuse_buffer)
+store_pipe_control_loop(uint32_t flags)
{
- int i, val = 0;
+ const bool preuse_buffer = flags & TEST_STORE_LOOP_BUFFER_REUSED;
+ const bool qword_write = flags & TEST_STORE_LOOP_QWORD_WRITE;
+
+ int val, i;
+ uint32_t reloc_offset;
+ uint64_t presumed_offset;
+ uint32_t batch_handle;
+ uint32_t *batch;
+ uint32_t buf_handle;
uint32_t *buf;
- drm_intel_bo *target_bo;
-
- for (i = 0; i < SLOW_QUICK(0x10000, 4); i++) {
- /* we want to check tlb consistency of the pipe_control target,
- * so get a new buffer every time around */
- target_bo = drm_intel_bo_alloc(bufmgr, "target bo", 4096, 4096);
- igt_assert(target_bo);
-
- if (preuse_buffer) {
- COLOR_BLIT_COPY_BATCH_START(0);
- OUT_BATCH((3 << 24) | (0xf0 << 16) | 64);
- OUT_BATCH(0);
- OUT_BATCH(1 << 16 | 1);
-
- /*
- * IMPORTANT: We need to preuse the buffer in a
- * different domain than what the pipe control write
- * (and kernel wa) uses!
- */
- OUT_RELOC_FENCED(target_bo,
- I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
- 0);
- OUT_BATCH(0xdeadbeef);
- ADVANCE_BATCH();
-
- intel_batchbuffer_flush(batch);
- }
-
- /* gem_storedw_batches_loop.c is a bit overenthusiastic with
- * creating new batchbuffers - with buffer reuse disabled, the
- * support code will do that for us. */
- if (batch->gen >= 8) {
- BEGIN_BATCH(4, 1);
- OUT_BATCH(GFX_OP_PIPE_CONTROL + 1);
- OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
- OUT_RELOC_FENCED(target_bo,
- I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
- PIPE_CONTROL_GLOBAL_GTT);
- OUT_BATCH(val); /* write data */
- ADVANCE_BATCH();
-
- } else if (batch->gen >= 6) {
- /* work-around hw issue, see intel_emit_post_sync_nonzero_flush
- * in mesa sources. */
- BEGIN_BATCH(4, 1);
- OUT_BATCH(GFX_OP_PIPE_CONTROL);
- OUT_BATCH(PIPE_CONTROL_CS_STALL |
- PIPE_CONTROL_STALL_AT_SCOREBOARD);
- OUT_BATCH(0); /* address */
- OUT_BATCH(0); /* write data */
- ADVANCE_BATCH();
-
- BEGIN_BATCH(4, 1);
- OUT_BATCH(GFX_OP_PIPE_CONTROL);
- OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
- OUT_RELOC(target_bo,
- I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
- PIPE_CONTROL_GLOBAL_GTT);
- OUT_BATCH(val); /* write data */
- ADVANCE_BATCH();
- } else if (batch->gen >= 4) {
- BEGIN_BATCH(4, 1);
- OUT_BATCH(GFX_OP_PIPE_CONTROL | PIPE_CONTROL_WC_FLUSH |
- PIPE_CONTROL_TC_FLUSH |
- PIPE_CONTROL_WRITE_IMMEDIATE | 2);
- OUT_RELOC(target_bo,
- I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
- PIPE_CONTROL_GLOBAL_GTT);
- OUT_BATCH(val);
- OUT_BATCH(0xdeadbeef);
- ADVANCE_BATCH();
+ struct drm_i915_gem_relocation_entry reloc;
+ struct drm_i915_gem_execbuffer2 execbuf;
+ struct drm_i915_gem_exec_object2 object[2];
+
+ /* no dword writes on gen4/gen5 and gen9+ */
+ if (!qword_write)
+ igt_skip_on(gen < 6 || gen > 8);
+
+ batch_handle = gem_create(fd, 4096);
+ batch = gem_mmap__cpu(fd, batch_handle, 0, 4096, PROT_READ | PROT_WRITE);
+
+ for (val = 0; val < SLOW_QUICK(0x10000, 4); val++) {
+ i = 0;
+ buf_handle = gem_create(fd, 4096);
+
+ buf = gem_mmap__cpu(fd, buf_handle, 0, 4096, PROT_READ | PROT_WRITE);
+ gem_set_domain(fd, buf_handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+ buf[0] = 0xdeadbeef;
+ buf[1] = 0xdeadbeef;
+ if (preuse_buffer)
+ presumed_offset = preuse(buf_handle);
+ else
+ presumed_offset = 0;
+
+ if (gen == 6)
+ presumed_offset |= PIPE_CONTROL_GLOBAL_GTT;
+
+ if (gen >= 6) {
+ batch[i++] = GFX_OP_PIPE_CONTROL + (gen >= 8) + qword_write;
+ batch[i++] = PIPE_CONTROL_WRITE_IMMEDIATE;
+ batch[i++] = (uint32_t)presumed_offset; /* reloc */
+ reloc_offset = i - 1;
+ if (gen >= 8)
+ batch[i++] = (uint32_t)(presumed_offset >> 32); /* reloc_high */
+ } else {
+ /* qword write */
+ batch[i++] = (GFX_OP_PIPE_CONTROL |
+ PIPE_CONTROL_WRITE_IMMEDIATE);
+ batch[i++] = (uint32_t)presumed_offset; /* reloc */
+ reloc_offset = i - 1;
}
- intel_batchbuffer_flush_on_ring(batch, 0);
-
- drm_intel_bo_map(target_bo, 1);
-
- buf = target_bo->virtual;
- igt_assert(buf[0] == val);
-
- drm_intel_bo_unmap(target_bo);
- /* Make doublesure that this buffer won't get reused. */
- drm_intel_bo_disable_reuse(target_bo);
- drm_intel_bo_unreference(target_bo);
-
- val++;
+ batch[i++] = val; /* write data */
+ if (qword_write)
+ batch[i++] = ~val; /* dword_high */
+ else
+ batch[i++] = MI_NOOP | 0xabcd;
+ batch[i++] = MI_BATCH_BUFFER_END;
+
+ memset(object, 0, sizeof(object));
+ memset(&reloc, 0, sizeof(reloc));
+ memset(&execbuf, 0, sizeof(execbuf));
+
+ reloc.target_handle = buf_handle;
+ reloc.delta = 0;
+ reloc.offset = reloc_offset * sizeof(batch[0]);
+ reloc.presumed_offset = 0;
+ reloc.read_domains = I915_GEM_DOMAIN_RENDER;
+ reloc.write_domain = I915_GEM_DOMAIN_RENDER;
+
+ if (gen == 6)
+ object[0].flags |= EXEC_OBJECT_NEEDS_GTT;
+ object[0].handle = buf_handle;
+ object[1].handle = batch_handle;
+ object[1].relocation_count = 1;
+ object[1].relocs_ptr = (uintptr_t)&reloc;
+
+ execbuf.buffers_ptr = (uintptr_t)object;
+ execbuf.buffer_count = 2;
+ gem_execbuf(fd, &execbuf);
+
+ gem_wait(fd, buf_handle, NULL);
+
+ igt_assert_eq_u32(buf[0], val);
+ if (qword_write)
+ igt_assert_eq_u32(buf[1], ~val);
+ else
+ igt_assert_eq_u32(buf[1], 0xdeadbeef);
+
+ munmap(buf, 4096);
+ gem_close(fd, buf_handle);
}
-}
-int fd;
+ munmap(batch, 4096);
+ gem_close(fd, batch_handle);
+}
igt_main
{
igt_fixture {
fd = drm_open_driver(DRIVER_INTEL);
devid = intel_get_drm_devid(fd);
+ gen = intel_gen(devid);
- bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
- igt_assert(bufmgr);
-
- igt_skip_on(IS_GEN2(devid) || IS_GEN3(devid));
+ igt_skip_on(gen < 4);
igt_skip_on(devid == PCI_CHIP_I965_G); /* has totally broken pipe control */
-
- /* IMPORTANT: No call to
- * drm_intel_bufmgr_gem_enable_reuse(bufmgr);
- * here because we wan't to have fresh buffers (to trash the tlb)
- * every time! */
-
- batch = intel_batchbuffer_alloc(bufmgr, devid);
- igt_assert(batch);
}
- igt_subtest("fresh-buffer")
- store_pipe_control_loop(false);
-
- igt_subtest("reused-buffer")
- store_pipe_control_loop(true);
-
- igt_fixture {
- intel_batchbuffer_free(batch);
- drm_intel_bufmgr_destroy(bufmgr);
+ for (uint32_t flags = 0; flags < TEST_STORE_LOOP_ALL_FLAGS + 1; flags++) {
+ igt_subtest_f("%sbuffer%s",
+ flags & TEST_STORE_LOOP_BUFFER_REUSED ?
+ "reused-" : "fresh-",
+ flags & TEST_STORE_LOOP_QWORD_WRITE ?
+ "-qword-write" : "") {
+ store_pipe_control_loop(flags);
+ }
+ }
+ igt_fixture
close(fd);
- }
}
--
2.8.0
More information about the Intel-gfx
mailing list