[igt-dev] [PATCH i-g-t v2 3/3] igt: Add a test for Data Port Coherency.

Fri Oct 5 18:04:36 UTC 2018

From: "Lis, Tomasz" <tomasz.lis at intel.com>

This new test performs a gpgpu fill operation with spin buffer at end.
When Data Port Coherency is enabled, all the memory writes are required
to be visible on CPU side, even though the batch buffer did not finished
yet. Without the coherency enabled, memory writes made on GPU side are
not fully visible on CPU side until the spin buffer is released and the
execution of that buffer is finished.

Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
Cc: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Michal Winiarski <michal.winiarski at intel.com>

Signed-off-by: Tomasz Lis <tomasz.lis at intel.com>
---
 include/drm-uapi/i915_drm.h     |   7 +
 tests/Makefile.sources          |   1 +
 tests/gem_gpgpu_fill_coherent.c | 299 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 307 insertions(+)
 create mode 100644 tests/gem_gpgpu_fill_coherent.c

diff --git a/include/drm-uapi/i915_drm.h b/include/drm-uapi/i915_drm.h
index 16e452a..86bfac4 100644
--- a/include/drm-uapi/i915_drm.h
+++ b/include/drm-uapi/i915_drm.h
@@ -1456,6 +1456,13 @@ struct drm_i915_gem_context_param {
 #define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
 #define   I915_CONTEXT_DEFAULT_PRIORITY		0
 #define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
+/*
+ * When data port level coherency is enabled, the GPU will update memory
+ * buffers shared with CPU, by forcing internal cache units to send memory
+ * writes to higher level caches faster. Enabling data port coherency has
+ * a performance cost.
+ */
+#define I915_CONTEXT_PARAM_DATA_PORT_COHERENCY 0x7
 	__u64 value;
 };
 
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index 001f1a2..b33d2d5 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -93,6 +93,7 @@ TESTS_progs = \
 	gem_flink_basic \
 	gem_flink_race \
 	gem_gpgpu_fill \
+	gem_gpgpu_fill_coherent \
 	gem_gtt_cpu_tlb \
 	gem_gtt_hog \
 	gem_gtt_speed \
diff --git a/tests/gem_gpgpu_fill_coherent.c b/tests/gem_gpgpu_fill_coherent.c
new file mode 100644
index 0000000..bf867c3
--- /dev/null
+++ b/tests/gem_gpgpu_fill_coherent.c
@@ -0,0 +1,299 @@
+/*
+ * Copyright © 2013 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Damien Lespiau <damien.lespiau at intel.com>
+ *    Xiang, Haihao <haihao.xiang at intel.com>
+ */
+
+/** @file gem_gpgpu_fill_coherent.c
+ *
+ * Check that the Data port coherency option han an effect on workload
+ * execution. Performs gpgpu_fill() with a spin buffer at end, expects
+ * coherent writes to be visible from CPU and non-coherent ones to not be
+ * visible within specific time frame.
+ */
+
+#include "igt.h"
+#include <stdbool.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "intel_bufmgr.h"
+#include "gpu_cmds.h"
+
+#define WIDTH 64
+#define HEIGHT 64
+#define SIZE (HEIGHT*WIDTH)
+
+#define COLOR_C4	0xc4
+#define COLOR_4C	0x4c
+
+#define DEFAULT_CONTEXT_IDX	0
+
+typedef struct {
+	int drm_fd;
+	uint32_t devid;
+	drm_intel_bufmgr *bufmgr;
+} data_t;
+
+/**
+ * __gem_context_set_coherency:
+ * @fd: open i915 drm file descriptor
+ * @ctx_id: i915 context id
+ * @enable: desired context coherency state
+ *
+ * This function modifies Data port coherency property of the context.
+ *
+ * Returns: An integer equal to zero for success and negative for failure
+ */
+static int
+__gem_context_set_coherency(int fd, uint32_t ctx_id, int enable)
+{
+	struct drm_i915_gem_context_param p;
+
+	memset(&p, 0, sizeof(p));
+	p.ctx_id = ctx_id;
+	p.size = 0;
+	p.param = I915_CONTEXT_PARAM_DATA_PORT_COHERENCY;
+	p.value = enable;
+
+	return __gem_context_set_param(fd, &p);
+}
+
+/**
+ * gem_context_set_coherency:
+ * @fd: open i915 drm file descriptor
+ * @ctx_id: i915 context id
+ * @enable: desired context coherency state
+ *
+ * Like __gem_context_set_coherency(), except we assert on failure.
+ */
+static void
+gem_context_set_coherency(int fd, uint32_t ctx_id, int enable)
+{
+	igt_assert(__gem_context_set_coherency(fd, ctx_id, enable) == 0);
+}
+
+/**
+ * gem_context_get_coherency:
+ * @fd: open i915 drm file descriptor
+ * @ctx_id: i915 context id
+ *
+ * Retrieves current value of the data port coherency param.
+ */
+static int
+gem_context_get_coherency(int fd, uint32_t ctx_id)
+{
+	struct drm_i915_gem_context_param p;
+	memset(&p, 0, sizeof(p));
+	p.ctx_id = ctx_id;
+	p.size = 0;
+	p.param = I915_CONTEXT_PARAM_DATA_PORT_COHERENCY;
+
+	igt_assert(__gem_context_get_param(fd, &p) == 0);
+
+	return p.value;
+}
+
+static void
+scratch_buf_init(data_t *data, struct igt_buf *buf,
+		 int width, int height, uint8_t color)
+{
+	memset(buf, 0, sizeof(*buf));
+
+	buf->stride = width;
+	buf->tiling = I915_TILING_NONE;
+	buf->size = width*height;
+	buf->bo = drm_intel_bo_alloc(data->bufmgr, "", buf->size, 4096);
+
+	buf->data = drm_intel_gem_bo_map__wc(buf->bo);
+	if (!buf->data)
+		buf->data = drm_intel_gem_bo_map__gtt(buf->bo);
+
+	memset(buf->data, color, buf->size);
+}
+
+/**
+ * scratch_buf_check_plain:
+ * @buf: structure containing the buffer to check
+ * @width: width dimension of the 2D buffer, equal to stride
+ * @height: height dimension of the 2D buffer
+ *
+ * This function verifies whether the buffer is completely filled with given color.
+ *
+ * Returns: void; asserts if verification failed
+ */
+static void
+scratch_buf_check_plain(struct igt_buf *buf, int width, int height, uint8_t color)
+{
+	int i, j;
+        uint8_t val;
+        uint8_t *linear;
+
+	linear = (uint8_t *)buf->data;
+	for (i = 0; i < width; i++) {
+		for (j = 0; j < height; j++) {
+			val = linear[j * width + i];
+	                igt_assert_f(val == color,
+				"Expected 0x%02x, found 0x%02x at (%d,%d)\n",
+				color, val, i, j);
+		}
+	}
+}
+
+/**
+ * scratch_buf_check_vstripes:
+ * @buf: structure containing the buffer to check
+ * @width: width dimension of the 2D buffer, equal to stride
+ * @height: height dimension of the 2D buffer
+ *
+ * This function verifies content of the buffer while it is being filled by the GPU.
+ * The buffer should contain 2 vertical stripes of different color.
+ *
+ * Returns: True if left stripe is filled, false on timeout; asserts if right stripe is not filled
+ */
+static bool
+scratch_buf_check_vstripes(struct igt_buf *buf, int width, int height,
+			   uint8_t colorl, uint8_t colorr)
+{
+	int i, j;
+	uint8_t val;
+	uint8_t *linear;
+
+	linear = (uint8_t *)buf->data;
+	for (i = 0; i < WIDTH; i++) {
+		for (j = 0; j < HEIGHT; j++) {
+			val = linear[j * width + i];
+			if (j < HEIGHT / 2) {
+				if (val != colorl) return false;
+			} else {
+				igt_assert_f(val == colorr,
+					"Expected 0x%02x, found 0x%02x at (%d,%d)\n",
+					colorr, val, i, j);
+			}
+		}
+	}
+	return true;
+}
+
+static uint32_t
+igt_batchbuffer_spin_emit(struct intel_batchbuffer *batch)
+{
+	uint32_t spin_offset;
+	spin_offset = intel_batchbuffer_subdata_offset(batch, batch->ptr);
+	OUT_BATCH(MI_NOOP);
+	/* recurse */
+	OUT_BATCH(MI_BATCH_BUFFER_START | 1 << 8 | 1);
+	OUT_RELOC(batch->bo, I915_GEM_DOMAIN_COMMAND, 0, spin_offset);
+	return spin_offset;
+}
+
+static void
+igt_batchbuffer_spin_finish(struct intel_batchbuffer *batch, uint32_t spin_offset)
+{
+	uint8_t * mmbuffer;
+	mmbuffer = drm_intel_gem_bo_map__wc(batch->bo);
+	if (!mmbuffer)
+		mmbuffer = drm_intel_gem_bo_map__gtt(batch->bo);
+
+	*((uint32_t *)(mmbuffer + spin_offset)) = MI_BATCH_BUFFER_END;
+
+	drm_intel_bo_unmap(batch->bo);
+}
+
+static void
+igt_batchbuffer_coherency_test(data_t *data, struct intel_batchbuffer *batch,
+			       igt_fillfunc_t emit_gpgpu_fill, bool coherent)
+{
+	struct igt_buf dst;
+        uint32_t batch_end, spin_offset;
+	bool res;
+
+	scratch_buf_init(data, &dst, WIDTH, HEIGHT, COLOR_C4);
+	scratch_buf_check_plain(&dst, WIDTH, HEIGHT, COLOR_C4);
+
+        intel_batchbuffer_flush(batch);
+	gem_context_set_coherency(data->drm_fd, DEFAULT_CONTEXT_IDX, coherent);
+
+	emit_gpgpu_fill(batch,
+		   &dst, 0, 0, WIDTH, HEIGHT / 2,
+		   COLOR_4C);
+
+	spin_offset = igt_batchbuffer_spin_emit(batch);
+
+        OUT_BATCH(MI_BATCH_BUFFER_END);
+        batch_end = intel_batchbuffer_align(batch, 8);
+
+        gen7_render_flush(batch, batch_end);
+
+	res = igt_wait(scratch_buf_check_vstripes(&dst, WIDTH, HEIGHT, COLOR_4C, COLOR_C4), 100, 1);
+
+	igt_batchbuffer_spin_finish(batch, spin_offset);
+
+        intel_batchbuffer_reset(batch);
+
+	if (coherent)
+		igt_assert_f(res,
+			"coherent buffer fill not visible on CPU side after 100ms");
+	else
+		igt_assert_f(!res,
+			"non-coherent buffer fill visible on CPU side within 100ms");
+}
+
+igt_simple_main
+{
+	data_t data = {0, };
+	struct intel_batchbuffer *batch = NULL;
+	igt_fillfunc_t emit_gpgpu_fill = NULL;
+
+	data.drm_fd = drm_open_driver_render(DRIVER_INTEL);
+	data.devid = intel_get_drm_devid(data.drm_fd);
+	igt_require_gem(data.drm_fd);
+
+	data.bufmgr = drm_intel_bufmgr_gem_init(data.drm_fd, 4096);
+	igt_assert(data.bufmgr);
+
+	emit_gpgpu_fill = igt_get_emit_gpgpu_fillfunc(data.devid);
+
+	igt_require_f(emit_gpgpu_fill,
+		      "no gpgpu-fill function\n");
+	igt_require_f(gem_context_get_coherency(data.drm_fd, 0) >= 0,
+		"Data Port Coherency not supported by kernel\n");
+
+	batch = intel_batchbuffer_alloc(data.bufmgr, data.devid);
+	igt_assert(batch);
+
+	/* Test without coherency */
+	igt_batchbuffer_coherency_test(&data, batch, emit_gpgpu_fill, 0);
+
+	/* Now test with Data Port coherency */
+	igt_batchbuffer_coherency_test(&data, batch, emit_gpgpu_fill, 1);
+}
-- 
2.7.4