[Intel-gfx] [PATCH 3/3] tests/gem_userptr_benchmark: Benchmarking userptr surfaces and impact

Volkin, Bradley D bradley.d.volkin at intel.com
Fri Apr 18 01:18:46 CEST 2014


On Wed, Mar 19, 2014 at 04:13:06AM -0700, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> 
> This adds a small benchmark for the new userptr functionality.
> 
> Apart from basic surface creation and destruction, also tested is the
> impact of having userptr surfaces in the process address space. Reason
> for that is the impact of MMU notifiers on common address space
> operations like munmap() which is per process.
> 
> v2:
>   * Moved to benchmarks.
>   * Added pointer read/write tests.
>   * Changed output to say iterations per second instead of
>     operations per second.
>   * Multiply result by batch size for multi-create* tests
>     for a more comparable number with create-destroy test.
> 
> v3:
>   * Fixed ioctl detection on kernels without MMU_NOTIFIERs.
> 
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>  Android.mk                         |   3 +-
>  benchmarks/.gitignore              |   1 +
>  benchmarks/Android.mk              |  36 +++
>  benchmarks/Makefile.am             |   7 +-
>  benchmarks/Makefile.sources        |   6 +
>  benchmarks/gem_userptr_benchmark.c | 513 +++++++++++++++++++++++++++++++++++++
>  6 files changed, 558 insertions(+), 8 deletions(-)
>  create mode 100644 benchmarks/Android.mk
>  create mode 100644 benchmarks/Makefile.sources
>  create mode 100644 benchmarks/gem_userptr_benchmark.c
> 
> diff --git a/Android.mk b/Android.mk
> index 8aeb2d4..0c969b8 100644
> --- a/Android.mk
> +++ b/Android.mk
> @@ -1,2 +1 @@
> -include $(call all-named-subdir-makefiles, lib tests tools)
> -
> +include $(call all-named-subdir-makefiles, lib tests tools benchmarks)
> diff --git a/benchmarks/.gitignore b/benchmarks/.gitignore
> index ddea6f7..09e5bd8 100644
> --- a/benchmarks/.gitignore
> +++ b/benchmarks/.gitignore
> @@ -1,3 +1,4 @@
> +gem_userptr_benchmark
>  intel_upload_blit_large
>  intel_upload_blit_large_gtt
>  intel_upload_blit_large_map
> diff --git a/benchmarks/Android.mk b/benchmarks/Android.mk
> new file mode 100644
> index 0000000..5bb8ef5
> --- /dev/null
> +++ b/benchmarks/Android.mk
> @@ -0,0 +1,36 @@
> +LOCAL_PATH := $(call my-dir)
> +
> +include $(LOCAL_PATH)/Makefile.sources
> +
> +#================#
> +
> +define add_benchmark
> +    include $(CLEAR_VARS)
> +
> +    LOCAL_SRC_FILES := $1.c
> +
> +    LOCAL_CFLAGS += -DHAVE_STRUCT_SYSINFO_TOTALRAM
> +    LOCAL_CFLAGS += -DANDROID -UNDEBUG -include "check-ndebug.h"
> +    LOCAL_CFLAGS += -std=c99
> +    # FIXME: drop once Bionic correctly annotates "noreturn" on pthread_exit
> +    LOCAL_CFLAGS += -Wno-error=return-type
> +    # Excessive complaining for established cases. Rely on the Linux version warnings.
> +    LOCAL_CFLAGS += -Wno-sign-compare
> +
> +    LOCAL_MODULE := $1
> +    LOCAL_MODULE_TAGS := optional
> +
> +    LOCAL_STATIC_LIBRARIES := libintel_gpu_tools
> +
> +    LOCAL_SHARED_LIBRARIES := libpciaccess  \
> +                              libdrm        \
> +                              libdrm_intel
> +
> +    include $(BUILD_EXECUTABLE)
> +endef
> +
> +#================#
> +
> +benchmark_list := $(bin_PROGRAMS)
> +
> +$(foreach item,$(benchmark_list),$(eval $(call add_benchmark,$(item))))
> diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am
> index e2ad784..d173bf4 100644
> --- a/benchmarks/Makefile.am
> +++ b/benchmarks/Makefile.am
> @@ -1,9 +1,4 @@
> -
> -bin_PROGRAMS = 				\
> -	intel_upload_blit_large		\
> -	intel_upload_blit_large_gtt	\
> -	intel_upload_blit_large_map	\
> -	intel_upload_blit_small
> +include Makefile.sources
>  
>  AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/lib
>  AM_CFLAGS = $(DRM_CFLAGS) $(CWARNFLAGS) $(CAIRO_CFLAGS)
> diff --git a/benchmarks/Makefile.sources b/benchmarks/Makefile.sources
> new file mode 100644
> index 0000000..fd6c107
> --- /dev/null
> +++ b/benchmarks/Makefile.sources
> @@ -0,0 +1,6 @@
> +bin_PROGRAMS =                          \
> +        intel_upload_blit_large         \
> +        intel_upload_blit_large_gtt     \
> +        intel_upload_blit_large_map     \
> +        intel_upload_blit_small         \
> +        gem_userptr_benchmark

You might split the makefile cleanup aspect of this into a separate
patch, but I'm fine either way.

> diff --git a/benchmarks/gem_userptr_benchmark.c b/benchmarks/gem_userptr_benchmark.c
> new file mode 100644
> index 0000000..218f6f1
> --- /dev/null
> +++ b/benchmarks/gem_userptr_benchmark.c
> @@ -0,0 +1,513 @@
> +/*
> + * Copyright © 2014 Intel Corporation
> + *
> + * Permission is hereby granted, free of charge, to any person obtaining a
> + * copy of this software and associated documentation files (the "Software"),
> + * to deal in the Software without restriction, including without limitation
> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
> + * and/or sell copies of the Software, and to permit persons to whom the
> + * Software is furnished to do so, subject to the following conditions:
> + *
> + * The above copyright notice and this permission notice (including the next
> + * paragraph) shall be included in all copies or substantial portions of the
> + * Software.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
> + * IN THE SOFTWARE.
> + *
> + * Authors:
> + *    Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> + *
> + */
> +
> +/** @file gem_userptr_benchmark.c
> + *
> + * Benchmark the userptr code and impact of having userptr surfaces
> + * in process address space on some normal operations.
> + *
> + */
> +
> +#include <stdlib.h>
> +#include <stdio.h>
> +#include <string.h>
> +#include <fcntl.h>
> +#include <inttypes.h>
> +#include <errno.h>
> +#include <sys/stat.h>
> +#include <sys/time.h>
> +#include <sys/mman.h>
> +#include "drm.h"
> +#include "i915_drm.h"
> +#include "drmtest.h"
> +#include "intel_bufmgr.h"
> +#include "intel_batchbuffer.h"
> +#include "intel_gpu_tools.h"
> +
> +#define WIDTH 128
> +#define HEIGHT 128
> +#define PAGE_SIZE 4096
> +
> +#define LOCAL_I915_GEM_USERPTR       0x34
> +#define LOCAL_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_USERPTR, struct local_i915_gem_userptr)
> +struct local_i915_gem_userptr {
> +	uint64_t user_ptr;
> +	uint64_t user_size;
> +	uint32_t flags;
> +#define I915_USERPTR_READ_ONLY (1<<0)
> +#define I915_USERPTR_UNSYNCHRONIZED (1<<31)
> +	uint32_t handle;
> +};
> +
> +static uint32_t userptr_flags = I915_USERPTR_UNSYNCHRONIZED;
> +
> +static uint32_t linear[WIDTH*HEIGHT];

I may have missed it, but I don't think we use this variable except
to do sizeof(linear). If that's the case, a constant for the buffer
size might make the code more readable.

> +
> +static void gem_userptr_test_unsynchronized(void)
> +{
> +	userptr_flags = I915_USERPTR_UNSYNCHRONIZED;
> +}
> +
> +static void gem_userptr_test_synchronized(void)
> +{
> +	userptr_flags = 0;
> +}
> +
> +static int gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t *handle)
> +{
> +	struct local_i915_gem_userptr userptr;
> +	int ret;
> +
> +	userptr.user_ptr = (uintptr_t)ptr;
> +	userptr.user_size = size;
> +	userptr.flags = userptr_flags;
> +	if (read_only)
> +		userptr.flags |= I915_USERPTR_READ_ONLY;
> +
> +	ret = drmIoctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &userptr);
> +	if (ret)
> +		ret = errno;
> +	igt_skip_on_f(ret == ENODEV &&
> +		      (userptr_flags & I915_USERPTR_UNSYNCHRONIZED) == 0,
> +		      "Skipping, synchronized mappings with no kernel CONFIG_MMU_NOTIFIER?");
> +	if (ret == 0)
> +		*handle = userptr.handle;
> +
> +	return ret;
> +}
> +
> +static uint32_t
> +create_userptr(int fd, uint32_t val, uint32_t *ptr)
> +{
> +	uint32_t handle;
> +	int i, ret;
> +
> +	ret = gem_userptr(fd, ptr, sizeof(linear), 0, &handle);
> +	igt_assert(ret == 0);
> +	igt_assert(handle != 0);
> +
> +	/* Fill the BO with dwords starting at val */
> +	for (i = 0; i < WIDTH*HEIGHT; i++)
> +		ptr[i] = val++;
> +
> +	return handle;
> +}

I don't see that this function is used in this test.

> +
> +static void **handle_ptr_map;
> +static unsigned int num_handle_ptr_map;

I'd prefer that we explicitly initialize at least num_handle_ptr_map.

> +
> +static void add_handle_ptr(uint32_t handle, void *ptr)
> +{
> +	if (handle >= num_handle_ptr_map) {
> +		handle_ptr_map = realloc(handle_ptr_map,
> +					 (handle + 1000) * sizeof(void*));
> +		num_handle_ptr_map = handle + 1000;
> +	}
> +
> +	handle_ptr_map[handle] = ptr;
> +}
> +
> +static void *get_handle_ptr(uint32_t handle)
> +{
> +	return handle_ptr_map[handle];
> +}
> +
> +static void free_handle_ptr(uint32_t handle)
> +{
> +	igt_assert(handle < num_handle_ptr_map);
> +	igt_assert(handle_ptr_map[handle]);
> +
> +	free(handle_ptr_map[handle]);
> +	handle_ptr_map[handle] = NULL;
> +}
> +
> +static uint32_t create_userptr_bo(int fd, int size)
> +{
> +	void *ptr;
> +	uint32_t handle;
> +	int ret;
> +
> +	ret = posix_memalign(&ptr, PAGE_SIZE, size);
> +	igt_assert(ret == 0);
> +
> +	ret = gem_userptr(fd, (uint32_t *)ptr, size, 0, &handle);
> +	igt_assert(ret == 0);
> +	add_handle_ptr(handle, ptr);
> +
> +	return handle;
> +}
> +
> +static void free_userptr_bo(int fd, uint32_t handle)
> +{
> +	gem_close(fd, handle);
> +	free_handle_ptr(handle);
> +}
> +
> +static int has_userptr(int fd)
> +{
> +	uint32_t handle = 0;
> +	void *ptr;
> +	uint32_t oldflags;
> +	int ret;
> +
> +	assert(posix_memalign(&ptr, PAGE_SIZE, PAGE_SIZE) == 0);
> +	oldflags = userptr_flags;
> +	gem_userptr_test_unsynchronized();
> +	ret = gem_userptr(fd, ptr, PAGE_SIZE, 0, &handle);
> +	userptr_flags = oldflags;
> +	if (ret != 0) {
> +		free(ptr);
> +		return 0;
> +	}
> +
> +	gem_close(fd, handle);
> +	free(ptr);
> +
> +	return handle != 0;
> +}
> +
> +static const unsigned int nr_bos[] = {0, 1, 10, 100, 1000};
> +static const unsigned int test_duration_sec = 3;
> +
> +static volatile unsigned int run_test;
> +
> +static void alarm_handler(int sig)
> +{
> +	assert(run_test == 1);
> +	run_test = 0;
> +}
> +
> +static void start_test(unsigned int duration)
> +{
> +	run_test = 1;
> +	if (duration == 0)
> +		duration = test_duration_sec;
> +	signal(SIGALRM, alarm_handler);
> +	alarm(duration);
> +}
> +
> +static void exchange_ptr(void *array, unsigned i, unsigned j)
> +{
> +	void **arr, *tmp;
> +	arr = (void **)array;
> +
> +	tmp = arr[i];
> +	arr[i] = arr[j];
> +	arr[j] = tmp;
> +}
> +
> +static void test_malloc_free(int random)
> +{
> +	unsigned long iter = 0;
> +	unsigned int i, tot = 1000;
> +	void *ptr[tot];
> +
> +	start_test(test_duration_sec);
> +
> +	while (run_test) {
> +		for (i = 0; i < tot; i++) {
> +			ptr[i] = malloc(1000);
> +			assert(ptr[i]);
> +		}
> +		if (random)
> +			igt_permute_array(ptr, tot, exchange_ptr);
> +		for (i = 0; i < tot; i++)
> +			free(ptr[i]);
> +		iter++;
> +	}
> +
> +	printf("%8lu iter/s\n", iter / test_duration_sec);
> +}
> +
> +static void test_malloc_realloc_free(int random)
> +{
> +	unsigned long iter = 0;
> +	unsigned int i, tot = 1000;
> +	void *ptr[tot];
> +
> +	start_test(test_duration_sec);
> +
> +	while (run_test) {
> +		for (i = 0; i < tot; i++) {
> +			ptr[i] = malloc(1000);
> +			assert(ptr[i]);
> +		}
> +		if (random)
> +			igt_permute_array(ptr, tot, exchange_ptr);
> +		for (i = 0; i < tot; i++) {
> +			ptr[i] = realloc(ptr[i], 2000);
> +			assert(ptr[i]);
> +		}
> +		if (random)
> +			igt_permute_array(ptr, tot, exchange_ptr);
> +		for (i = 0; i < tot; i++)
> +			free(ptr[i]);
> +		iter++;
> +	}
> +
> +	printf("%8lu iter/s\n", iter / test_duration_sec);
> +}
> +
> +static void test_mmap_unmap(int random)
> +{
> +	unsigned long iter = 0;
> +	unsigned int i, tot = 1000;
> +	void *ptr[tot];
> +
> +	start_test(test_duration_sec);
> +
> +	while (run_test) {
> +		for (i = 0; i < tot; i++) {
> +			ptr[i] = mmap(NULL, 1000, PROT_READ | PROT_WRITE,
> +					MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> +			assert(ptr[i] != MAP_FAILED);
> +		}
> +		if (random)
> +			igt_permute_array(ptr, tot, exchange_ptr);
> +		for (i = 0; i < tot; i++)
> +			munmap(ptr[i], 1000);
> +		iter++;
> +	}
> +
> +	printf("%8lu iter/s\n", iter / test_duration_sec);
> +}
> +
> +static void test_ptr_read(void *ptr)
> +{
> +	unsigned long iter = 0;
> +	volatile unsigned long *p;
> +	unsigned long i, loops;
> +	register unsigned long v;
> +
> +	loops = sizeof(linear) / sizeof(unsigned long) / 4;
> +
> +	start_test(test_duration_sec);
> +
> +	while (run_test) {
> +		p = (unsigned long *)ptr;
> +		for (i = 0; i < loops; i++) {
> +			v = *p++;
> +			v = *p++;
> +			v = *p++;
> +			v = *p++;
> +		}
> +		iter++;
> +	}
> +
> +	printf("%8lu MB/s\n", iter / test_duration_sec * sizeof(linear) / 1000000);
> +}
> +
> +static void test_ptr_write(void *ptr)
> +{
> +	unsigned long iter = 0;
> +	volatile unsigned long *p;
> +	register unsigned long i, loops;
> +
> +	loops = sizeof(linear) / sizeof(unsigned long) / 4;
> +
> +	start_test(test_duration_sec);
> +
> +	while (run_test) {
> +		p = (unsigned long *)ptr;
> +		for (i = 0; i < loops; i++) {
> +			*p++ = i;
> +			*p++ = i;
> +			*p++ = i;
> +			*p++ = i;
> +		}
> +		iter++;
> +	}
> +
> +	printf("%8lu MB/s\n", iter / test_duration_sec * sizeof(linear) / 1000000);
> +}
> +
> +static void test_impact(int fd)
> +{
> +	unsigned int total = sizeof(nr_bos) / sizeof(nr_bos[0]);
> +	unsigned int subtest, i;
> +	uint32_t handles[nr_bos[total-1]];
> +	void *ptr;
> +	char buffer[sizeof(linear)];
> +
> +	for (subtest = 0; subtest < total; subtest++) {
> +		for (i = 0; i < nr_bos[subtest]; i++)
> +			handles[i] = create_userptr_bo(fd, sizeof(linear));
> +
> +		if (nr_bos[subtest] > 0)
> +			ptr = get_handle_ptr(handles[0]);
> +		else
> +			ptr = buffer;
> +
> +		printf("ptr-read,                   %5u bos = ", nr_bos[subtest]);
> +		test_ptr_read(ptr);
> +
> +		printf("ptr-write                   %5u bos = ", nr_bos[subtest]);
> +		test_ptr_write(ptr);
> +
> +		printf("malloc-free,                %5u bos = ", nr_bos[subtest]);
> +		test_malloc_free(0);
> +		printf("malloc-free-random          %5u bos = ", nr_bos[subtest]);
> +		test_malloc_free(1);
> +
> +		printf("malloc-realloc-free,        %5u bos = ", nr_bos[subtest]);
> +		test_malloc_realloc_free(0);
> +		printf("malloc-realloc-free-random, %5u bos = ", nr_bos[subtest]);
> +		test_malloc_realloc_free(1);
> +
> +		printf("mmap-unmap,                 %5u bos = ", nr_bos[subtest]);
> +		test_mmap_unmap(0);
> +		printf("mmap-unmap-random,          %5u bos = ", nr_bos[subtest]);
> +		test_mmap_unmap(1);
> +
> +		for (i = 0; i < nr_bos[subtest]; i++)
> +			free_userptr_bo(fd, handles[i]);
> +	}
> +}
> +
> +static void test_single(int fd)
> +{
> +	char *ptr, *bo_ptr;
> +	uint32_t handle = 0;
> +	unsigned long iter = 0;
> +	int ret;
> +	unsigned long map_size = sizeof(linear) + PAGE_SIZE - 1;
> +
> +	ptr = mmap(NULL, map_size, PROT_READ | PROT_WRITE,
> +			MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> +	assert(ptr != MAP_FAILED);
> +
> +	bo_ptr = (char *)(((unsigned long)ptr + (PAGE_SIZE - 1))
> +					& ~(PAGE_SIZE - 1));

You might add an ALIGN macro in this file or a suitable header. A couple of
.c files in lib/ individually define one already.

Brad

> +
> +	start_test(test_duration_sec);
> +
> +	while (run_test) {
> +		ret = gem_userptr(fd, bo_ptr, sizeof(linear), 0, &handle);
> +		assert(ret == 0);
> +		gem_close(fd, handle);
> +		iter++;
> +	}
> +
> +	munmap(ptr, map_size);
> +
> +	printf("%8lu iter/s\n", iter / test_duration_sec);
> +}
> +
> +static void test_multiple(int fd, unsigned int batch, int random)
> +{
> +	char *ptr, *bo_ptr;
> +	uint32_t handles[10000];
> +	int map[10000];
> +	unsigned long iter = 0;
> +	int ret;
> +	int i;
> +	unsigned long map_size = batch * sizeof(linear) + PAGE_SIZE - 1;
> +
> +	assert(batch < (sizeof(handles) / sizeof(handles[0])));
> +	assert(batch < (sizeof(map) / sizeof(map[0])));
> +
> +	ptr = mmap(NULL, map_size, PROT_READ | PROT_WRITE,
> +			MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
> +	assert(ptr != MAP_FAILED);
> +
> +	bo_ptr = (char *)(((unsigned long)ptr + (PAGE_SIZE - 1))
> +				& ~(PAGE_SIZE - 1));
> +
> +	for (i = 0; i < batch; i++)
> +		map[i] = i;
> +
> +	start_test(test_duration_sec);
> +
> +	while (run_test) {
> +		if (random)
> +			igt_permute_array(map, batch, igt_exchange_int);
> +		for (i = 0; i < batch; i++) {
> +			ret = gem_userptr(fd, bo_ptr + map[i] * sizeof(linear),
> +						sizeof(linear),
> +						0, &handles[i]);
> +			assert(ret == 0);
> +		}
> +		if (random)
> +			igt_permute_array(map, batch, igt_exchange_int);
> +		for (i = 0; i < batch; i++)
> +			gem_close(fd, handles[map[i]]);
> +		iter++;
> +	}
> +
> +	munmap(ptr, map_size);
> +
> +	printf("%8lu iter/s\n", iter * batch / test_duration_sec);
> +}
> +
> +static void test_userptr(int fd)
> +{
> +	printf("create-destroy                = ");
> +	test_single(fd);
> +
> +	printf("multi-create-destroy          = ");
> +	test_multiple(fd, 100, 0);
> +
> +	printf("multi-create-destroy-random   = ");
> +	test_multiple(fd, 100, 1);
> +}
> +
> +int main(int argc, char **argv)
> +{
> +	int fd = -1, ret;
> +
> +	igt_skip_on_simulation();
> +
> +	igt_subtest_init(argc, argv);
> +
> +	fd = drm_open_any();
> +	igt_assert(fd >= 0);
> +
> +	ret = has_userptr(fd);
> +	igt_skip_on_f(ret == 0, "No userptr support - %s (%d)\n",
> +			strerror(errno), ret);
> +
> +
> +	gem_userptr_test_unsynchronized();
> +
> +	igt_subtest("userptr-unsync")
> +		test_userptr(fd);
> +
> +	igt_subtest("userptr-impact-unsync")
> +		test_impact(fd);
> +
> +	gem_userptr_test_synchronized();
> +
> +	igt_subtest("userptr-sync")
> +		test_userptr(fd);
> +
> +	igt_subtest("userptr-impact-sync")
> +		test_impact(fd);
> +
> +	igt_exit();
> +
> +	return 0;
> +}
> -- 
> 1.9.0
> 
> _______________________________________________
> Intel-gfx mailing list
> Intel-gfx at lists.freedesktop.org
> http://lists.freedesktop.org/mailman/listinfo/intel-gfx



More information about the Intel-gfx mailing list