[Intel-gfx] [PATCH i-g-t 3/3] gem_memory_bandwidth: Add test

Mon Oct 5 04:42:25 PDT 2015

This is a benchmark for testing the GPU read and write bandwidth.

Issue: VIZ-5664
Signed-off-by: Antti Koskipaa <antti.koskipaa at linux.intel.com>
---
 tests/.gitignore             |   1 +
 tests/Makefile.sources       |   1 +
 tests/gem_memory_bandwidth.c | 209 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 211 insertions(+)
 create mode 100644 tests/gem_memory_bandwidth.c

diff --git a/tests/.gitignore b/tests/.gitignore
index dc8bb53..2ea4107 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -65,6 +65,7 @@ gem_linear_blits
 gem_lut_handle
 gem_madvise
 gem_media_fill
+gem_memory_bandwidth
 gem_mmap
 gem_mmap_gtt
 gem_mmap_offset_exhaustion
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index 2e2e088..4429c29 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -36,6 +36,7 @@ TESTS_progs_M = \
 	gem_flink_race \
 	gem_linear_blits \
 	gem_madvise \
+	gem_memory_bandwidth \
 	gem_mmap \
 	gem_mmap_gtt \
 	gem_mmap_wc \
diff --git a/tests/gem_memory_bandwidth.c b/tests/gem_memory_bandwidth.c
new file mode 100644
index 0000000..a44987e
--- /dev/null
+++ b/tests/gem_memory_bandwidth.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright © 2013-2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Antti Koskipää <antti.koskipaa at intel.com>
+ *    Damien Lespiau <damien.lespiau at intel.com>
+ */
+
+/*
+ * We need a way to test memory bandwidth bottlenecks and understand better
+ * where they are. This test bypasses Mesa and uses the kernel GEM interface
+ * directly.
+ *
+ * Say there is a performance regression. Where is it, Mesa or kernel? Just
+ * compare the results of this test to the Mesa bandwidth results. If they are
+ * similar, the problem is in the kernel. If Mesa is much slower than this test,
+ * the problem is in Mesa.
+ */
+
+#include "igt.h"
+#include <stdbool.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+
+#include <drm.h>
+
+#include "intel_bufmgr.h"
+
+IGT_TEST_DESCRIPTION("GPU memory bandwidth benchmark.");
+
+/* Each test block is 1 meg. */
+#define WIDTH 512
+#define STRIDE (WIDTH*4)
+#define HEIGHT 512
+#define SIZE (HEIGHT*STRIDE)
+/* Run this many times. 10240 = 10 gigabytes are copied. 1024 was too small,
+ * giving too much variance. */
+#define LOOPS 10240
+/* How many buffers to allocate for main memory speed testing.
+ * Must be large enough to thrash the caches.
+ */
+#define NBUFS 512
+
+#define SRC_COLOR	0xffff00ff
+#define DST_COLOR	0xfff0ff00
+
+typedef struct {
+	int fd;
+	uint32_t devid;
+	drm_intel_bufmgr *bufmgr;
+	struct intel_batchbuffer *batch;
+	igt_render_copyfunc_t render_copy;
+	igt_render_copyfunc_t render_read;
+	igt_render_copyfunc_t render_write;
+	uint32_t linear[WIDTH * HEIGHT];
+} data_t;
+
+static void data_init(data_t *data)
+{
+	data->fd = drm_open_driver(DRIVER_INTEL);
+	data->devid = intel_get_drm_devid(data->fd);
+
+	data->bufmgr = drm_intel_bufmgr_gem_init(data->fd, 4096);
+	igt_assert(data->bufmgr);
+
+	data->render_copy = igt_get_render_copyfunc(data->devid);
+	igt_require_f(data->render_copy,
+		      "no render-copy function\n");
+	data->render_write = igt_get_render_writefunc(data->devid);
+	igt_require_f(data->render_write,
+		      "no render-write function\n");
+	data->render_read = igt_get_render_readfunc(data->devid);
+	igt_require_f(data->render_read,
+		      "no render-read function\n");
+
+	data->batch = intel_batchbuffer_alloc(data->bufmgr, data->devid);
+	igt_assert(data->batch);
+
+	for (int i = 0; i < WIDTH * HEIGHT; i++)
+		data->linear[i] = i;
+
+}
+
+static void data_fini(data_t *data)
+{
+	intel_batchbuffer_free(data->batch);
+	drm_intel_bufmgr_destroy(data->bufmgr);
+	close(data->fd);
+}
+
+static int scratch_buf_init(data_t *data, struct igt_buf *buf,
+			    int width, int height, int stride, uint32_t color)
+{
+	drm_intel_bo *bo;
+
+	bo = drm_intel_bo_alloc(data->bufmgr, "", SIZE, 4096);
+	if (!bo)
+		return -1;
+	gem_write(data->fd, bo->handle, 0, data->linear,
+		  sizeof(data->linear));
+
+	buf->bo = bo;
+	buf->stride = stride;
+	buf->tiling = I915_TILING_NONE;
+	buf->size = SIZE;
+	return 0;
+}
+
+static void scratch_buf_fini(data_t *data, struct igt_buf *buf)
+{
+	dri_bo_unreference(buf->bo);
+	memset(buf, 0, sizeof(*buf));
+}
+
+static void print_bandwidth(const char *desc, struct timeval *start, struct timeval *end)
+{
+	struct timeval diff;
+	uint64_t usecs;
+	timersub(end, start, &diff);
+	usecs = diff.tv_sec * 1000000ULL + diff.tv_usec;
+	igt_assert(usecs != 0);
+	/* 1 byte/us = 1M/s */
+	printf("%s: %i MB in %f seconds, %f MB/s\n", desc,
+	       LOOPS, (float)usecs / 1.0e6,
+	       (float)(STRIDE*HEIGHT*(uint64_t)LOOPS) / (float)usecs);
+}
+
+static void test(data_t *data, struct igt_buf *bufs, igt_render_copyfunc_t func,
+		 const char *desc)
+{
+	int i;
+	struct timeval start, end;
+
+	gettimeofday(&start, NULL);
+
+	for (i = 0; i < LOOPS; i++)
+		func(data->batch, NULL, &bufs[i % NBUFS], 0, 0, WIDTH, HEIGHT,
+		     &bufs[(i + 1) % NBUFS], WIDTH / 2, HEIGHT / 2);
+
+	gettimeofday(&end, NULL);
+	print_bandwidth(desc, &start, &end);
+}
+
+int main(int argc, char **argv)
+{
+	data_t data = {0, };
+	struct igt_buf bufs[NBUFS];
+
+	igt_subtest_init(argc, argv);
+
+	igt_fixture {
+		data_init(&data);
+		for (int i = 0; i < NBUFS; i++)
+			if (scratch_buf_init(&data, &bufs[i], WIDTH, HEIGHT, STRIDE, SRC_COLOR)) {
+				printf("Not enough memory to allocate all scratch buffers. Need" \
+				       "%i megabytes more.\n", NBUFS - i);
+				for (i--; i >= 0; i--)
+					scratch_buf_fini(&data, &bufs[i]);
+				igt_fail(IGT_EXIT_FAILURE);
+			}
+	}
+
+	igt_subtest("copy") {
+		test(&data, bufs, data.render_copy, "Copy");
+	}
+
+	igt_subtest("write") {
+		test(&data, bufs, data.render_write, "Write");
+	}
+
+	igt_subtest("read") {
+		test(&data, bufs, data.render_read, "Read");
+	}
+
+	igt_fixture {
+		for (int i = 0; i < NBUFS; i++)
+			scratch_buf_fini(&data, &bufs[i]);
+		data_fini(&data);
+	}
+
+	igt_exit();
+}
-- 
2.3.6