[Intel-gfx] [PATCH i-g-t 1/3] igt/gem_pipe_control_store_loop: Add qword write tests

Thu Apr 7 18:39:47 UTC 2016

Test description suggested that all platforms were testing qword writes,
while in fact only gen4-gen5 did.

v2: Test dword/qword writes for all available platforms.
v3: Rewrite, drop libdrm/intel_batchbuffer dependencies,
    drop brw_emit_post_sync_nonzero_flush WA for gen6/gen7,
    drop WC_FLUSH/TC_FLUSH on gen4/gen5,
    drop preuse tests, use gem_wait instead of set_domain.
v4: Back to preuse, do not use gem_write.

Cc: Chris Wilson <chris at chris-wilson.co.uk>
Signed-off-by: Michał Winiarski <michal.winiarski at intel.com>
---
 tests/gem_pipe_control_store_loop.c | 290 +++++++++++++++++++++---------------
 1 file changed, 173 insertions(+), 117 deletions(-)

diff --git a/tests/gem_pipe_control_store_loop.c b/tests/gem_pipe_control_store_loop.c
index a155ad1..3cf4b31 100644
--- a/tests/gem_pipe_control_store_loop.c
+++ b/tests/gem_pipe_control_store_loop.c
@@ -26,10 +26,10 @@
  */
 
 /*
- * Testcase: (TLB-)Coherency of pipe_control QW writes
+ * Testcase: (TLB-)Coherency of pipe_control writes
  *
- * Writes a counter-value into an always newly allocated target bo (by disabling
- * buffer reuse). Decently trashes on tlb inconsistencies, too.
+ * Writes a counter-value into target bo.
+ * Decently trashes on tlb inconsistencies, too.
  */
 #include "igt.h"
 #include <stdlib.h>
@@ -43,11 +43,11 @@
 #include "drm.h"
 #include "intel_bufmgr.h"
 
-IGT_TEST_DESCRIPTION("Test (TLB-)Coherency of pipe_control QW writes.");
+IGT_TEST_DESCRIPTION("Test (TLB-)Coherency of pipe_control writes.");
 
-static drm_intel_bufmgr *bufmgr;
-struct intel_batchbuffer *batch;
 uint32_t devid;
+int gen;
+int fd;
 
 #define GFX_OP_PIPE_CONTROL	((0x3<<29)|(0x3<<27)|(0x2<<24)|2)
 #define   PIPE_CONTROL_WRITE_IMMEDIATE	(1<<14)
@@ -60,134 +60,190 @@ uint32_t devid;
 #define   PIPE_CONTROL_CS_STALL	(1<<20)
 #define   PIPE_CONTROL_GLOBAL_GTT (1<<2) /* in addr dword */
 
-/* Like the store dword test, but we create new command buffers each time */
+#define TEST_STORE_LOOP_BUFFER_REUSED	(1 << 0)
+#define TEST_STORE_LOOP_QWORD_WRITE	(1 << 1)
+#define TEST_STORE_LOOP_ALL_FLAGS	(TEST_STORE_LOOP_BUFFER_REUSED | \
+					 TEST_STORE_LOOP_QWORD_WRITE)
+
+static uint64_t
+preuse(uint32_t buf_handle)
+{
+	int i = 0;
+	uint32_t batch_handle;
+	uint32_t *batch;
+	struct drm_i915_gem_relocation_entry reloc;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 object[2];
+
+	batch_handle = gem_create(fd, 4096);
+	batch = gem_mmap__cpu(fd, batch_handle, 0, 4096, PROT_READ | PROT_WRITE);
+
+	batch[i++] = XY_COLOR_BLT_CMD_NOLEN |
+		  COLOR_BLT_WRITE_ALPHA |
+		  XY_SRC_COPY_BLT_WRITE_RGB;
+	if (gen >= 8)
+		batch[i - 1] |= 5;
+	else
+		batch[i - 1] |= 4;
+
+	batch[i++] = ((3 << 24) | (0xf0 << 16) | 64);
+	batch[i++] = 0; /* dst x1,y1 */
+	batch[i++] = (1 << 16 | 1); /* dst x2,y2 */
+	batch[i++] = 0; /* reloc */
+	if (gen >= 8)
+		batch[i++] = 0; /* reloc_high */
+	batch[i++] = 0xdeadbeef;
+	batch[i++] = MI_BATCH_BUFFER_END;
+
+	memset(&object, 0, sizeof(object));
+	memset(&reloc, 0, sizeof(reloc));
+	memset(&execbuf, 0, sizeof(execbuf));
+
+	reloc.target_handle = buf_handle;
+	reloc.delta = 0;
+	reloc.offset = 4 * sizeof(batch[0]);
+	reloc.presumed_offset = 0;
+	reloc.read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc.write_domain = I915_GEM_DOMAIN_RENDER;
+
+	object[0].handle = buf_handle;
+	object[1].handle = batch_handle;
+	object[1].relocation_count = 1;
+	object[1].relocs_ptr = (uintptr_t)&reloc;
+
+	if (gen >= 6)
+		execbuf.flags = I915_EXEC_BLT;
+	execbuf.buffers_ptr = (uintptr_t)object;
+	execbuf.buffer_count = 2;
+	gem_execbuf(fd, &execbuf);
+
+	munmap(batch, 4096);
+	gem_close(fd, batch_handle);
+
+	return reloc.presumed_offset;
+}
+
 static void
-store_pipe_control_loop(bool preuse_buffer)
+store_pipe_control_loop(uint32_t flags)
 {
-	int i, val = 0;
+	const bool preuse_buffer = flags & TEST_STORE_LOOP_BUFFER_REUSED;
+	const bool qword_write = flags & TEST_STORE_LOOP_QWORD_WRITE;
+
+	int val, i;
+	uint32_t reloc_offset;
+	uint64_t presumed_offset;
+	uint32_t batch_handle;
+	uint32_t *batch;
+	uint32_t buf_handle;
 	uint32_t *buf;
-	drm_intel_bo *target_bo;
-
-	for (i = 0; i < SLOW_QUICK(0x10000, 4); i++) {
-		/* we want to check tlb consistency of the pipe_control target,
-		 * so get a new buffer every time around */
-		target_bo = drm_intel_bo_alloc(bufmgr, "target bo", 4096, 4096);
-		igt_assert(target_bo);
-
-		if (preuse_buffer) {
-			COLOR_BLIT_COPY_BATCH_START(0);
-			OUT_BATCH((3 << 24) | (0xf0 << 16) | 64);
-			OUT_BATCH(0);
-			OUT_BATCH(1 << 16 | 1);
-
-			/*
-			 * IMPORTANT: We need to preuse the buffer in a
-			 * different domain than what the pipe control write
-			 * (and kernel wa) uses!
-			 */
-			OUT_RELOC_FENCED(target_bo,
-			     I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
-			     0);
-			OUT_BATCH(0xdeadbeef);
-			ADVANCE_BATCH();
-
-			intel_batchbuffer_flush(batch);
-		}
-
-		/* gem_storedw_batches_loop.c is a bit overenthusiastic with
-		 * creating new batchbuffers - with buffer reuse disabled, the
-		 * support code will do that for us. */
-		if (batch->gen >= 8) {
-			BEGIN_BATCH(4, 1);
-			OUT_BATCH(GFX_OP_PIPE_CONTROL + 1);
-			OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
-			OUT_RELOC_FENCED(target_bo,
-			     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
-			     PIPE_CONTROL_GLOBAL_GTT);
-			OUT_BATCH(val); /* write data */
-			ADVANCE_BATCH();
-
-		} else if (batch->gen >= 6) {
-			/* work-around hw issue, see intel_emit_post_sync_nonzero_flush
-			 * in mesa sources. */
-			BEGIN_BATCH(4, 1);
-			OUT_BATCH(GFX_OP_PIPE_CONTROL);
-			OUT_BATCH(PIPE_CONTROL_CS_STALL |
-			     PIPE_CONTROL_STALL_AT_SCOREBOARD);
-			OUT_BATCH(0); /* address */
-			OUT_BATCH(0); /* write data */
-			ADVANCE_BATCH();
-
-			BEGIN_BATCH(4, 1);
-			OUT_BATCH(GFX_OP_PIPE_CONTROL);
-			OUT_BATCH(PIPE_CONTROL_WRITE_IMMEDIATE);
-			OUT_RELOC(target_bo,
-			     I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, 
-			     PIPE_CONTROL_GLOBAL_GTT);
-			OUT_BATCH(val); /* write data */
-			ADVANCE_BATCH();
-		} else if (batch->gen >= 4) {
-			BEGIN_BATCH(4, 1);
-			OUT_BATCH(GFX_OP_PIPE_CONTROL | PIPE_CONTROL_WC_FLUSH |
-					PIPE_CONTROL_TC_FLUSH |
-					PIPE_CONTROL_WRITE_IMMEDIATE | 2);
-			OUT_RELOC(target_bo,
-				I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
-				PIPE_CONTROL_GLOBAL_GTT);
-			OUT_BATCH(val);
-			OUT_BATCH(0xdeadbeef);
-			ADVANCE_BATCH();
+	struct drm_i915_gem_relocation_entry reloc;
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 object[2];
+
+	/* no dword writes on gen4/gen5 and gen9+ */
+	if (!qword_write)
+		igt_skip_on(gen < 6 || gen > 8);
+
+	batch_handle = gem_create(fd, 4096);
+	batch = gem_mmap__cpu(fd, batch_handle, 0, 4096, PROT_READ | PROT_WRITE);
+
+	for (val = 0; val < SLOW_QUICK(0x10000, 4); val++) {
+		i = 0;
+		buf_handle = gem_create(fd, 4096);
+
+		buf = gem_mmap__cpu(fd, buf_handle, 0, 4096, PROT_READ | PROT_WRITE);
+		gem_set_domain(fd, buf_handle, I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+		buf[0] = 0xdeadbeef;
+		buf[1] = 0xdeadbeef;
+		if (preuse_buffer)
+			presumed_offset = preuse(buf_handle);
+		else
+			presumed_offset = 0;
+
+		if (gen == 6)
+			presumed_offset |= PIPE_CONTROL_GLOBAL_GTT;
+
+		if (gen >= 6) {
+			batch[i++] = GFX_OP_PIPE_CONTROL + (gen >= 8) + qword_write;
+			batch[i++] = PIPE_CONTROL_WRITE_IMMEDIATE;
+			batch[i++] = (uint32_t)presumed_offset; /* reloc */
+			reloc_offset = i - 1;
+			if (gen >= 8)
+				batch[i++] = (uint32_t)(presumed_offset >> 32); /* reloc_high */
+		} else {
+			/* qword write */
+			batch[i++] = (GFX_OP_PIPE_CONTROL |
+					PIPE_CONTROL_WRITE_IMMEDIATE);
+			batch[i++] = (uint32_t)presumed_offset; /* reloc */
+			reloc_offset = i - 1;
 		}
 
-		intel_batchbuffer_flush_on_ring(batch, 0);
-
-		drm_intel_bo_map(target_bo, 1);
-
-		buf = target_bo->virtual;
-		igt_assert(buf[0] == val);
-
-		drm_intel_bo_unmap(target_bo);
-		/* Make doublesure that this buffer won't get reused. */
-		drm_intel_bo_disable_reuse(target_bo);
-		drm_intel_bo_unreference(target_bo);
-
-		val++;
+		batch[i++] = val; /* write data */
+		if (qword_write)
+			batch[i++] = ~val; /* dword_high */
+		else
+			batch[i++] = MI_NOOP | 0xabcd;
+		batch[i++] = MI_BATCH_BUFFER_END;
+
+		memset(object, 0, sizeof(object));
+		memset(&reloc, 0, sizeof(reloc));
+		memset(&execbuf, 0, sizeof(execbuf));
+
+		reloc.target_handle = buf_handle;
+		reloc.delta = 0;
+		reloc.offset = reloc_offset * sizeof(batch[0]);
+		reloc.presumed_offset = 0;
+		reloc.read_domains = I915_GEM_DOMAIN_RENDER;
+		reloc.write_domain = I915_GEM_DOMAIN_RENDER;
+
+		if (gen == 6)
+			object[0].flags |= EXEC_OBJECT_NEEDS_GTT;
+		object[0].handle = buf_handle;
+		object[1].handle = batch_handle;
+		object[1].relocation_count = 1;
+		object[1].relocs_ptr = (uintptr_t)&reloc;
+
+		execbuf.buffers_ptr = (uintptr_t)object;
+		execbuf.buffer_count = 2;
+		gem_execbuf(fd, &execbuf);
+
+		gem_wait(fd, buf_handle, NULL);
+
+		igt_assert_eq_u32(buf[0], val);
+		if (qword_write)
+			igt_assert_eq_u32(buf[1], ~val);
+		else
+			igt_assert_eq_u32(buf[1], 0xdeadbeef);
+
+		munmap(buf, 4096);
+		gem_close(fd, buf_handle);
 	}
-}
 
-int fd;
+	munmap(batch, 4096);
+	gem_close(fd, batch_handle);
+}
 
 igt_main
 {
 	igt_fixture {
 		fd = drm_open_driver(DRIVER_INTEL);
 		devid = intel_get_drm_devid(fd);
+		gen = intel_gen(devid);
 
-		bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
-		igt_assert(bufmgr);
-
-		igt_skip_on(IS_GEN2(devid) || IS_GEN3(devid));
+		igt_skip_on(gen < 4);
 		igt_skip_on(devid == PCI_CHIP_I965_G); /* has totally broken pipe control */
-
-		/* IMPORTANT: No call to
-		 * drm_intel_bufmgr_gem_enable_reuse(bufmgr);
-		 * here because we wan't to have fresh buffers (to trash the tlb)
-		 * every time! */
-
-		batch = intel_batchbuffer_alloc(bufmgr, devid);
-		igt_assert(batch);
 	}
 
-	igt_subtest("fresh-buffer")
-		store_pipe_control_loop(false);
-
-	igt_subtest("reused-buffer")
-		store_pipe_control_loop(true);
-
-	igt_fixture {
-		intel_batchbuffer_free(batch);
-		drm_intel_bufmgr_destroy(bufmgr);
+	for (uint32_t flags = 0; flags < TEST_STORE_LOOP_ALL_FLAGS + 1; flags++) {
+		igt_subtest_f("%sbuffer%s",
+			      flags & TEST_STORE_LOOP_BUFFER_REUSED ?
+			      "reused-" : "fresh-",
+			      flags & TEST_STORE_LOOP_QWORD_WRITE ?
+			      "-qword-write" : "") {
+				store_pipe_control_loop(flags);
+		}
+	}
 
+	igt_fixture
 		close(fd);
-	}
 }
-- 
2.8.0