[Intel-gfx] [PATCH i-g-t 3/3] gem_memory_bandwidth: Add test
Daniel Vetter
daniel at ffwll.ch
Tue Oct 6 02:00:40 PDT 2015
On Mon, Oct 05, 2015 at 02:42:25PM +0300, Antti Koskipaa wrote:
> This is a benchmark for testing the GPU read and write bandwidth.
>
> Issue: VIZ-5664
> Signed-off-by: Antti Koskipaa <antti.koskipaa at linux.intel.com>
> ---
> tests/.gitignore | 1 +
> tests/Makefile.sources | 1 +
> tests/gem_memory_bandwidth.c | 209 +++++++++++++++++++++++++++++++++++++++++++
> 3 files changed, 211 insertions(+)
> create mode 100644 tests/gem_memory_bandwidth.c
>
> diff --git a/tests/.gitignore b/tests/.gitignore
> index dc8bb53..2ea4107 100644
> --- a/tests/.gitignore
> +++ b/tests/.gitignore
> @@ -65,6 +65,7 @@ gem_linear_blits
> gem_lut_handle
> gem_madvise
> gem_media_fill
> +gem_memory_bandwidth
> gem_mmap
> gem_mmap_gtt
> gem_mmap_offset_exhaustion
> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> index 2e2e088..4429c29 100644
> --- a/tests/Makefile.sources
> +++ b/tests/Makefile.sources
> @@ -36,6 +36,7 @@ TESTS_progs_M = \
> gem_flink_race \
> gem_linear_blits \
> gem_madvise \
> + gem_memory_bandwidth \
> gem_mmap \
> gem_mmap_gtt \
> gem_mmap_wc \
> diff --git a/tests/gem_memory_bandwidth.c b/tests/gem_memory_bandwidth.c
> new file mode 100644
> index 0000000..a44987e
> --- /dev/null
> +++ b/tests/gem_memory_bandwidth.c
> @@ -0,0 +1,209 @@
> +/*
> + * Copyright © 2013-2014 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + * Antti Koskip?? <antti.koskipaa at intel.com>
> + * Damien Lespiau <damien.lespiau at intel.com>
> + */
> +
> +/*
> + * We need a way to test memory bandwidth bottlenecks and understand better
> + * where they are. This test bypasses Mesa and uses the kernel GEM interface
> + * directly.
> + *
> + * Say there is a performance regression. Where is it, Mesa or kernel? Just
> + * compare the results of this test to the Mesa bandwidth results. If they are
> + * similar, the problem is in the kernel. If Mesa is much slower than this test,
> + * the problem is in Mesa.
> + */
> +
> +#include "igt.h"
> +#include <stdbool.h>
> +#include <unistd.h>
> +#include <stdlib.h>
> +#include <sys/ioctl.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <fcntl.h>
> +#include <inttypes.h>
> +#include <errno.h>
> +#include <sys/stat.h>
> +#include <sys/time.h>
> +
> +#include <drm.h>
> +
> +#include "intel_bufmgr.h"
> +
> +IGT_TEST_DESCRIPTION("GPU memory bandwidth benchmark.");
> +
> +/* Each test block is 1 meg. */
> +#define WIDTH 512
> +#define STRIDE (WIDTH*4)
> +#define HEIGHT 512
> +#define SIZE (HEIGHT*STRIDE)
> +/* Run this many times. 10240 = 10 gigabytes are copied. 1024 was too small,
> + * giving too much variance. */
> +#define LOOPS 10240
> +/* How many buffers to allocate for main memory speed testing.
> + * Must be large enough to thrash the caches.
> + */
> +#define NBUFS 512
> +
> +#define SRC_COLOR 0xffff00ff
> +#define DST_COLOR 0xfff0ff00
> +
> +typedef struct {
> + int fd;
> + uint32_t devid;
> + drm_intel_bufmgr *bufmgr;
> + struct intel_batchbuffer *batch;
> + igt_render_copyfunc_t render_copy;
> + igt_render_copyfunc_t render_read;
> + igt_render_copyfunc_t render_write;
> + uint32_t linear[WIDTH * HEIGHT];
> +} data_t;
> +
> +static void data_init(data_t *data)
> +{
> + data->fd = drm_open_driver(DRIVER_INTEL);
> + data->devid = intel_get_drm_devid(data->fd);
> +
> + data->bufmgr = drm_intel_bufmgr_gem_init(data->fd, 4096);
> + igt_assert(data->bufmgr);
> +
> + data->render_copy = igt_get_render_copyfunc(data->devid);
> + igt_require_f(data->render_copy,
> + "no render-copy function\n");
> + data->render_write = igt_get_render_writefunc(data->devid);
> + igt_require_f(data->render_write,
> + "no render-write function\n");
> + data->render_read = igt_get_render_readfunc(data->devid);
> + igt_require_f(data->render_read,
> + "no render-read function\n");
> +
> + data->batch = intel_batchbuffer_alloc(data->bufmgr, data->devid);
> + igt_assert(data->batch);
> +
> + for (int i = 0; i < WIDTH * HEIGHT; i++)
> + data->linear[i] = i;
> +
> +}
> +
> +static void data_fini(data_t *data)
> +{
> + intel_batchbuffer_free(data->batch);
> + drm_intel_bufmgr_destroy(data->bufmgr);
> + close(data->fd);
> +}
> +
> +static int scratch_buf_init(data_t *data, struct igt_buf *buf,
> + int width, int height, int stride, uint32_t color)
> +{
> + drm_intel_bo *bo;
> +
> + bo = drm_intel_bo_alloc(data->bufmgr, "", SIZE, 4096);
> + if (!bo)
> + return -1;
> + gem_write(data->fd, bo->handle, 0, data->linear,
> + sizeof(data->linear));
> +
> + buf->bo = bo;
> + buf->stride = stride;
> + buf->tiling = I915_TILING_NONE;
> + buf->size = SIZE;
> + return 0;
> +}
> +
> +static void scratch_buf_fini(data_t *data, struct igt_buf *buf)
> +{
> + dri_bo_unreference(buf->bo);
> + memset(buf, 0, sizeof(*buf));
> +}
> +
> +static void print_bandwidth(const char *desc, struct timeval *start, struct timeval *end)
> +{
> + struct timeval diff;
> + uint64_t usecs;
> + timersub(end, start, &diff);
> + usecs = diff.tv_sec * 1000000ULL + diff.tv_usec;
> + igt_assert(usecs != 0);
> + /* 1 byte/us = 1M/s */
> + printf("%s: %i MB in %f seconds, %f MB/s\n", desc,
> + LOOPS, (float)usecs / 1.0e6,
> + (float)(STRIDE*HEIGHT*(uint64_t)LOOPS) / (float)usecs);
> +}
> +
> +static void test(data_t *data, struct igt_buf *bufs, igt_render_copyfunc_t func,
> + const char *desc)
> +{
> + int i;
> + struct timeval start, end;
> +
> + gettimeofday(&start, NULL);
> +
> + for (i = 0; i < LOOPS; i++)
> + func(data->batch, NULL, &bufs[i % NBUFS], 0, 0, WIDTH, HEIGHT,
> + &bufs[(i + 1) % NBUFS], WIDTH / 2, HEIGHT / 2);
> +
> + gettimeofday(&end, NULL);
> + print_bandwidth(desc, &start, &end);
> +}
> +
> +int main(int argc, char **argv)
> +{
> + data_t data = {0, };
> + struct igt_buf bufs[NBUFS];
> +
> + igt_subtest_init(argc, argv);
> +
igt_main {
> + igt_fixture {
> + data_init(&data);
> + for (int i = 0; i < NBUFS; i++)
> + if (scratch_buf_init(&data, &bufs[i], WIDTH, HEIGHT, STRIDE, SRC_COLOR)) {
> + printf("Not enough memory to allocate all scratch buffers. Need" \
> + "%i megabytes more.\n", NBUFS - i);
> + for (i--; i >= 0; i--)
> + scratch_buf_fini(&data, &bufs[i]);
> + igt_fail(IGT_EXIT_FAILURE);
> + }
> + }
> +
> + igt_subtest("copy") {
> + test(&data, bufs, data.render_copy, "Copy");
> + }
> +
> + igt_subtest("write") {
> + test(&data, bufs, data.render_write, "Write");
> + }
> +
> + igt_subtest("read") {
> + test(&data, bufs, data.render_read, "Read");
> + }
> +
> + igt_fixture {
> + for (int i = 0; i < NBUFS; i++)
> + scratch_buf_fini(&data, &bufs[i]);
> + data_fini(&data);
> + }
}
And you can drop igt_exit and igt_subtest_init.
-Daniel
> +
> + igt_exit();
> +}
> --
> 2.3.6
>
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx
--
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch
More information about the Intel-gfx
mailing list