[Intel-gfx] [PATCH i-g-t 3/3] gem_memory_bandwidth: Add test
Antti Koskipaa
antti.koskipaa at linux.intel.com
Mon Oct 5 04:42:25 PDT 2015
This is a benchmark for testing the GPU read and write bandwidth.
Issue: VIZ-5664
Signed-off-by: Antti Koskipaa <antti.koskipaa at linux.intel.com>
---
tests/.gitignore | 1 +
tests/Makefile.sources | 1 +
tests/gem_memory_bandwidth.c | 209 +++++++++++++++++++++++++++++++++++++++++++
3 files changed, 211 insertions(+)
create mode 100644 tests/gem_memory_bandwidth.c
diff --git a/tests/.gitignore b/tests/.gitignore
index dc8bb53..2ea4107 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -65,6 +65,7 @@ gem_linear_blits
gem_lut_handle
gem_madvise
gem_media_fill
+gem_memory_bandwidth
gem_mmap
gem_mmap_gtt
gem_mmap_offset_exhaustion
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index 2e2e088..4429c29 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -36,6 +36,7 @@ TESTS_progs_M = \
gem_flink_race \
gem_linear_blits \
gem_madvise \
+ gem_memory_bandwidth \
gem_mmap \
gem_mmap_gtt \
gem_mmap_wc \
diff --git a/tests/gem_memory_bandwidth.c b/tests/gem_memory_bandwidth.c
new file mode 100644
index 0000000..a44987e
--- /dev/null
+++ b/tests/gem_memory_bandwidth.c
@@ -0,0 +1,209 @@
+/*
+ * Copyright © 2013-2014 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Antti Koskipää <antti.koskipaa at intel.com>
+ * Damien Lespiau <damien.lespiau at intel.com>
+ */
+
+/*
+ * We need a way to test memory bandwidth bottlenecks and understand better
+ * where they are. This test bypasses Mesa and uses the kernel GEM interface
+ * directly.
+ *
+ * Say there is a performance regression. Where is it, Mesa or kernel? Just
+ * compare the results of this test to the Mesa bandwidth results. If they are
+ * similar, the problem is in the kernel. If Mesa is much slower than this test,
+ * the problem is in Mesa.
+ */
+
+#include "igt.h"
+#include <stdbool.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+
+#include <drm.h>
+
+#include "intel_bufmgr.h"
+
+IGT_TEST_DESCRIPTION("GPU memory bandwidth benchmark.");
+
+/* Each test block is 1 meg. */
+#define WIDTH 512
+#define STRIDE (WIDTH*4)
+#define HEIGHT 512
+#define SIZE (HEIGHT*STRIDE)
+/* Run this many times. 10240 = 10 gigabytes are copied. 1024 was too small,
+ * giving too much variance. */
+#define LOOPS 10240
+/* How many buffers to allocate for main memory speed testing.
+ * Must be large enough to thrash the caches.
+ */
+#define NBUFS 512
+
+#define SRC_COLOR 0xffff00ff
+#define DST_COLOR 0xfff0ff00
+
+typedef struct {
+ int fd;
+ uint32_t devid;
+ drm_intel_bufmgr *bufmgr;
+ struct intel_batchbuffer *batch;
+ igt_render_copyfunc_t render_copy;
+ igt_render_copyfunc_t render_read;
+ igt_render_copyfunc_t render_write;
+ uint32_t linear[WIDTH * HEIGHT];
+} data_t;
+
+static void data_init(data_t *data)
+{
+ data->fd = drm_open_driver(DRIVER_INTEL);
+ data->devid = intel_get_drm_devid(data->fd);
+
+ data->bufmgr = drm_intel_bufmgr_gem_init(data->fd, 4096);
+ igt_assert(data->bufmgr);
+
+ data->render_copy = igt_get_render_copyfunc(data->devid);
+ igt_require_f(data->render_copy,
+ "no render-copy function\n");
+ data->render_write = igt_get_render_writefunc(data->devid);
+ igt_require_f(data->render_write,
+ "no render-write function\n");
+ data->render_read = igt_get_render_readfunc(data->devid);
+ igt_require_f(data->render_read,
+ "no render-read function\n");
+
+ data->batch = intel_batchbuffer_alloc(data->bufmgr, data->devid);
+ igt_assert(data->batch);
+
+ for (int i = 0; i < WIDTH * HEIGHT; i++)
+ data->linear[i] = i;
+
+}
+
+static void data_fini(data_t *data)
+{
+ intel_batchbuffer_free(data->batch);
+ drm_intel_bufmgr_destroy(data->bufmgr);
+ close(data->fd);
+}
+
+static int scratch_buf_init(data_t *data, struct igt_buf *buf,
+ int width, int height, int stride, uint32_t color)
+{
+ drm_intel_bo *bo;
+
+ bo = drm_intel_bo_alloc(data->bufmgr, "", SIZE, 4096);
+ if (!bo)
+ return -1;
+ gem_write(data->fd, bo->handle, 0, data->linear,
+ sizeof(data->linear));
+
+ buf->bo = bo;
+ buf->stride = stride;
+ buf->tiling = I915_TILING_NONE;
+ buf->size = SIZE;
+ return 0;
+}
+
+static void scratch_buf_fini(data_t *data, struct igt_buf *buf)
+{
+ dri_bo_unreference(buf->bo);
+ memset(buf, 0, sizeof(*buf));
+}
+
+static void print_bandwidth(const char *desc, struct timeval *start, struct timeval *end)
+{
+ struct timeval diff;
+ uint64_t usecs;
+ timersub(end, start, &diff);
+ usecs = diff.tv_sec * 1000000ULL + diff.tv_usec;
+ igt_assert(usecs != 0);
+ /* 1 byte/us = 1M/s */
+ printf("%s: %i MB in %f seconds, %f MB/s\n", desc,
+ LOOPS, (float)usecs / 1.0e6,
+ (float)(STRIDE*HEIGHT*(uint64_t)LOOPS) / (float)usecs);
+}
+
+static void test(data_t *data, struct igt_buf *bufs, igt_render_copyfunc_t func,
+ const char *desc)
+{
+ int i;
+ struct timeval start, end;
+
+ gettimeofday(&start, NULL);
+
+ for (i = 0; i < LOOPS; i++)
+ func(data->batch, NULL, &bufs[i % NBUFS], 0, 0, WIDTH, HEIGHT,
+ &bufs[(i + 1) % NBUFS], WIDTH / 2, HEIGHT / 2);
+
+ gettimeofday(&end, NULL);
+ print_bandwidth(desc, &start, &end);
+}
+
+int main(int argc, char **argv)
+{
+ data_t data = {0, };
+ struct igt_buf bufs[NBUFS];
+
+ igt_subtest_init(argc, argv);
+
+ igt_fixture {
+ data_init(&data);
+ for (int i = 0; i < NBUFS; i++)
+ if (scratch_buf_init(&data, &bufs[i], WIDTH, HEIGHT, STRIDE, SRC_COLOR)) {
+ printf("Not enough memory to allocate all scratch buffers. Need" \
+ "%i megabytes more.\n", NBUFS - i);
+ for (i--; i >= 0; i--)
+ scratch_buf_fini(&data, &bufs[i]);
+ igt_fail(IGT_EXIT_FAILURE);
+ }
+ }
+
+ igt_subtest("copy") {
+ test(&data, bufs, data.render_copy, "Copy");
+ }
+
+ igt_subtest("write") {
+ test(&data, bufs, data.render_write, "Write");
+ }
+
+ igt_subtest("read") {
+ test(&data, bufs, data.render_read, "Read");
+ }
+
+ igt_fixture {
+ for (int i = 0; i < NBUFS; i++)
+ scratch_buf_fini(&data, &bufs[i]);
+ data_fini(&data);
+ }
+
+ igt_exit();
+}
--
2.3.6
More information about the Intel-gfx
mailing list