[Intel-gfx] [PATCH i-g-t 3/3] gem_memory_bandwidth: Add test

Tue Oct 6 02:00:40 PDT 2015

On Mon, Oct 05, 2015 at 02:42:25PM +0300, Antti Koskipaa wrote:
> This is a benchmark for testing the GPU read and write bandwidth.
> 
> Issue: VIZ-5664
> Signed-off-by: Antti Koskipaa <antti.koskipaa at linux.intel.com>
> ---
>  tests/.gitignore             |   1 +
>  tests/Makefile.sources       |   1 +
>  tests/gem_memory_bandwidth.c | 209 +++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 211 insertions(+)
>  create mode 100644 tests/gem_memory_bandwidth.c
> 
> diff --git a/tests/.gitignore b/tests/.gitignore
> index dc8bb53..2ea4107 100644
> --- a/tests/.gitignore
> +++ b/tests/.gitignore
> @@ -65,6 +65,7 @@ gem_linear_blits
>  gem_lut_handle
>  gem_madvise
>  gem_media_fill
> +gem_memory_bandwidth
>  gem_mmap
>  gem_mmap_gtt
>  gem_mmap_offset_exhaustion
> diff --git a/tests/Makefile.sources b/tests/Makefile.sources
> index 2e2e088..4429c29 100644
> --- a/tests/Makefile.sources
> +++ b/tests/Makefile.sources
> @@ -36,6 +36,7 @@ TESTS_progs_M = \
>  	gem_flink_race \
>  	gem_linear_blits \
>  	gem_madvise \
> +	gem_memory_bandwidth \
>  	gem_mmap \
>  	gem_mmap_gtt \
>  	gem_mmap_wc \
> diff --git a/tests/gem_memory_bandwidth.c b/tests/gem_memory_bandwidth.c
> new file mode 100644
> index 0000000..a44987e
> --- /dev/null
> +++ b/tests/gem_memory_bandwidth.c
> @@ -0,0 +1,209 @@
> +/*
> + * Copyright © 2013-2014 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + *    Antti Koskip?? <antti.koskipaa at intel.com>
> + *    Damien Lespiau <damien.lespiau at intel.com>
> + */
> +
> +/*
> + * We need a way to test memory bandwidth bottlenecks and understand better
> + * where they are. This test bypasses Mesa and uses the kernel GEM interface
> + * directly.
> + *
> + * Say there is a performance regression. Where is it, Mesa or kernel? Just
> + * compare the results of this test to the Mesa bandwidth results. If they are
> + * similar, the problem is in the kernel. If Mesa is much slower than this test,
> + * the problem is in Mesa.
> + */
> +
> +#include "igt.h"
> +#include <stdbool.h>
> +#include <unistd.h>
> +#include <stdlib.h>
> +#include <sys/ioctl.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <fcntl.h>
> +#include <inttypes.h>
> +#include <errno.h>
> +#include <sys/stat.h>
> +#include <sys/time.h>
> +
> +#include <drm.h>
> +
> +#include "intel_bufmgr.h"
> +
> +IGT_TEST_DESCRIPTION("GPU memory bandwidth benchmark.");
> +
> +/* Each test block is 1 meg. */
> +#define WIDTH 512
> +#define STRIDE (WIDTH*4)
> +#define HEIGHT 512
> +#define SIZE (HEIGHT*STRIDE)
> +/* Run this many times. 10240 = 10 gigabytes are copied. 1024 was too small,
> + * giving too much variance. */
> +#define LOOPS 10240
> +/* How many buffers to allocate for main memory speed testing.
> + * Must be large enough to thrash the caches.
> + */
> +#define NBUFS 512
> +
> +#define SRC_COLOR	0xffff00ff
> +#define DST_COLOR	0xfff0ff00
> +
> +typedef struct {
> +	int fd;
> +	uint32_t devid;
> +	drm_intel_bufmgr *bufmgr;
> +	struct intel_batchbuffer *batch;
> +	igt_render_copyfunc_t render_copy;
> +	igt_render_copyfunc_t render_read;
> +	igt_render_copyfunc_t render_write;
> +	uint32_t linear[WIDTH * HEIGHT];
> +} data_t;
> +
> +static void data_init(data_t *data)
> +{
> +	data->fd = drm_open_driver(DRIVER_INTEL);
> +	data->devid = intel_get_drm_devid(data->fd);
> +
> +	data->bufmgr = drm_intel_bufmgr_gem_init(data->fd, 4096);
> +	igt_assert(data->bufmgr);
> +
> +	data->render_copy = igt_get_render_copyfunc(data->devid);
> +	igt_require_f(data->render_copy,
> +		      "no render-copy function\n");
> +	data->render_write = igt_get_render_writefunc(data->devid);
> +	igt_require_f(data->render_write,
> +		      "no render-write function\n");
> +	data->render_read = igt_get_render_readfunc(data->devid);
> +	igt_require_f(data->render_read,
> +		      "no render-read function\n");
> +
> +	data->batch = intel_batchbuffer_alloc(data->bufmgr, data->devid);
> +	igt_assert(data->batch);
> +
> +	for (int i = 0; i < WIDTH * HEIGHT; i++)
> +		data->linear[i] = i;
> +
> +}
> +
> +static void data_fini(data_t *data)
> +{
> +	intel_batchbuffer_free(data->batch);
> +	drm_intel_bufmgr_destroy(data->bufmgr);
> +	close(data->fd);
> +}
> +
> +static int scratch_buf_init(data_t *data, struct igt_buf *buf,
> +			    int width, int height, int stride, uint32_t color)
> +{
> +	drm_intel_bo *bo;
> +
> +	bo = drm_intel_bo_alloc(data->bufmgr, "", SIZE, 4096);
> +	if (!bo)
> +		return -1;
> +	gem_write(data->fd, bo->handle, 0, data->linear,
> +		  sizeof(data->linear));
> +
> +	buf->bo = bo;
> +	buf->stride = stride;
> +	buf->tiling = I915_TILING_NONE;
> +	buf->size = SIZE;
> +	return 0;
> +}
> +
> +static void scratch_buf_fini(data_t *data, struct igt_buf *buf)
> +{
> +	dri_bo_unreference(buf->bo);
> +	memset(buf, 0, sizeof(*buf));
> +}
> +
> +static void print_bandwidth(const char *desc, struct timeval *start, struct timeval *end)
> +{
> +	struct timeval diff;
> +	uint64_t usecs;
> +	timersub(end, start, &diff);
> +	usecs = diff.tv_sec * 1000000ULL + diff.tv_usec;
> +	igt_assert(usecs != 0);
> +	/* 1 byte/us = 1M/s */
> +	printf("%s: %i MB in %f seconds, %f MB/s\n", desc,
> +	       LOOPS, (float)usecs / 1.0e6,
> +	       (float)(STRIDE*HEIGHT*(uint64_t)LOOPS) / (float)usecs);
> +}
> +
> +static void test(data_t *data, struct igt_buf *bufs, igt_render_copyfunc_t func,
> +		 const char *desc)
> +{
> +	int i;
> +	struct timeval start, end;
> +
> +	gettimeofday(&start, NULL);
> +
> +	for (i = 0; i < LOOPS; i++)
> +		func(data->batch, NULL, &bufs[i % NBUFS], 0, 0, WIDTH, HEIGHT,
> +		     &bufs[(i + 1) % NBUFS], WIDTH / 2, HEIGHT / 2);
> +
> +	gettimeofday(&end, NULL);
> +	print_bandwidth(desc, &start, &end);
> +}
> +
> +int main(int argc, char **argv)
> +{
> +	data_t data = {0, };
> +	struct igt_buf bufs[NBUFS];
> +
> +	igt_subtest_init(argc, argv);
> +
igt_main {

> +	igt_fixture {
> +		data_init(&data);
> +		for (int i = 0; i < NBUFS; i++)
> +			if (scratch_buf_init(&data, &bufs[i], WIDTH, HEIGHT, STRIDE, SRC_COLOR)) {
> +				printf("Not enough memory to allocate all scratch buffers. Need" \
> +				       "%i megabytes more.\n", NBUFS - i);
> +				for (i--; i >= 0; i--)
> +					scratch_buf_fini(&data, &bufs[i]);
> +				igt_fail(IGT_EXIT_FAILURE);
> +			}
> +	}
> +
> +	igt_subtest("copy") {
> +		test(&data, bufs, data.render_copy, "Copy");
> +	}
> +
> +	igt_subtest("write") {
> +		test(&data, bufs, data.render_write, "Write");
> +	}
> +
> +	igt_subtest("read") {
> +		test(&data, bufs, data.render_read, "Read");
> +	}
> +
> +	igt_fixture {
> +		for (int i = 0; i < NBUFS; i++)
> +			scratch_buf_fini(&data, &bufs[i]);
> +		data_fini(&data);
> +	}
}

And you can drop igt_exit and igt_subtest_init.
-Daniel

> +
> +	igt_exit();
> +}
> -- 
> 2.3.6
> 

> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx


-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch