[Intel-gfx] [PATCH 3/3] tests/gem_userptr_benchmark: Benchmarking userptr surfaces and impact

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Wed Apr 23 15:28:54 CEST 2014


Hi Brad,

On 04/18/2014 12:18 AM, Volkin, Bradley D wrote:
> On Wed, Mar 19, 2014 at 04:13:06AM -0700, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>>
>> This adds a small benchmark for the new userptr functionality.
>>
>> Apart from basic surface creation and destruction, also tested is the
>> impact of having userptr surfaces in the process address space. Reason
>> for that is the impact of MMU notifiers on common address space
>> operations like munmap() which is per process.
>>
>> v2:
>>    * Moved to benchmarks.
>>    * Added pointer read/write tests.
>>    * Changed output to say iterations per second instead of
>>      operations per second.
>>    * Multiply result by batch size for multi-create* tests
>>      for a more comparable number with create-destroy test.
>>
>> v3:
>>    * Fixed ioctl detection on kernels without MMU_NOTIFIERs.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>> Cc: Chris Wilson <chris at chris-wilson.co.uk>
>> ---
>>   Android.mk                         |   3 +-
>>   benchmarks/.gitignore              |   1 +
>>   benchmarks/Android.mk              |  36 +++
>>   benchmarks/Makefile.am             |   7 +-
>>   benchmarks/Makefile.sources        |   6 +
>>   benchmarks/gem_userptr_benchmark.c | 513 +++++++++++++++++++++++++++++++++++++
>>   6 files changed, 558 insertions(+), 8 deletions(-)
>>   create mode 100644 benchmarks/Android.mk
>>   create mode 100644 benchmarks/Makefile.sources
>>   create mode 100644 benchmarks/gem_userptr_benchmark.c
>>
>> diff --git a/Android.mk b/Android.mk
>> index 8aeb2d4..0c969b8 100644
>> --- a/Android.mk
>> +++ b/Android.mk
>> @@ -1,2 +1 @@
>> -include $(call all-named-subdir-makefiles, lib tests tools)
>> -
>> +include $(call all-named-subdir-makefiles, lib tests tools benchmarks)
>> diff --git a/benchmarks/.gitignore b/benchmarks/.gitignore
>> index ddea6f7..09e5bd8 100644
>> --- a/benchmarks/.gitignore
>> +++ b/benchmarks/.gitignore
>> @@ -1,3 +1,4 @@
>> +gem_userptr_benchmark
>>   intel_upload_blit_large
>>   intel_upload_blit_large_gtt
>>   intel_upload_blit_large_map
>> diff --git a/benchmarks/Android.mk b/benchmarks/Android.mk
>> new file mode 100644
>> index 0000000..5bb8ef5
>> --- /dev/null
>> +++ b/benchmarks/Android.mk
>> @@ -0,0 +1,36 @@
>> +LOCAL_PATH := $(call my-dir)
>> +
>> +include $(LOCAL_PATH)/Makefile.sources
>> +
>> +#================#
>> +
>> +define add_benchmark
>> +    include $(CLEAR_VARS)
>> +
>> +    LOCAL_SRC_FILES := $1.c
>> +
>> +    LOCAL_CFLAGS += -DHAVE_STRUCT_SYSINFO_TOTALRAM
>> +    LOCAL_CFLAGS += -DANDROID -UNDEBUG -include "check-ndebug.h"
>> +    LOCAL_CFLAGS += -std=c99
>> +    # FIXME: drop once Bionic correctly annotates "noreturn" on pthread_exit
>> +    LOCAL_CFLAGS += -Wno-error=return-type
>> +    # Excessive complaining for established cases. Rely on the Linux version warnings.
>> +    LOCAL_CFLAGS += -Wno-sign-compare
>> +
>> +    LOCAL_MODULE := $1
>> +    LOCAL_MODULE_TAGS := optional
>> +
>> +    LOCAL_STATIC_LIBRARIES := libintel_gpu_tools
>> +
>> +    LOCAL_SHARED_LIBRARIES := libpciaccess  \
>> +                              libdrm        \
>> +                              libdrm_intel
>> +
>> +    include $(BUILD_EXECUTABLE)
>> +endef
>> +
>> +#================#
>> +
>> +benchmark_list := $(bin_PROGRAMS)
>> +
>> +$(foreach item,$(benchmark_list),$(eval $(call add_benchmark,$(item))))
>> diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am
>> index e2ad784..d173bf4 100644
>> --- a/benchmarks/Makefile.am
>> +++ b/benchmarks/Makefile.am
>> @@ -1,9 +1,4 @@
>> -
>> -bin_PROGRAMS = 				\
>> -	intel_upload_blit_large		\
>> -	intel_upload_blit_large_gtt	\
>> -	intel_upload_blit_large_map	\
>> -	intel_upload_blit_small
>> +include Makefile.sources
>>
>>   AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/lib
>>   AM_CFLAGS = $(DRM_CFLAGS) $(CWARNFLAGS) $(CAIRO_CFLAGS)
>> diff --git a/benchmarks/Makefile.sources b/benchmarks/Makefile.sources
>> new file mode 100644
>> index 0000000..fd6c107
>> --- /dev/null
>> +++ b/benchmarks/Makefile.sources
>> @@ -0,0 +1,6 @@
>> +bin_PROGRAMS =                          \
>> +        intel_upload_blit_large         \
>> +        intel_upload_blit_large_gtt     \
>> +        intel_upload_blit_large_map     \
>> +        intel_upload_blit_small         \
>> +        gem_userptr_benchmark
>
> You might split the makefile cleanup aspect of this into a separate
> patch, but I'm fine either way.

Good idea, will try to squeeze that in.

>> diff --git a/benchmarks/gem_userptr_benchmark.c b/benchmarks/gem_userptr_benchmark.c
>> new file mode 100644
>> index 0000000..218f6f1
>> --- /dev/null
>> +++ b/benchmarks/gem_userptr_benchmark.c
>> @@ -0,0 +1,513 @@
>> +/*
>> + * Copyright © 2014 Intel Corporation
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the next
>> + * paragraph) shall be included in all copies or substantial portions of the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
>> + * IN THE SOFTWARE.
>> + *
>> + * Authors:
>> + *    Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>> + *
>> + */
>> +
>> +/** @file gem_userptr_benchmark.c
>> + *
>> + * Benchmark the userptr code and impact of having userptr surfaces
>> + * in process address space on some normal operations.
>> + *
>> + */
>> +
>> +#include <stdlib.h>
>> +#include <stdio.h>
>> +#include <string.h>
>> +#include <fcntl.h>
>> +#include <inttypes.h>
>> +#include <errno.h>
>> +#include <sys/stat.h>
>> +#include <sys/time.h>
>> +#include <sys/mman.h>
>> +#include "drm.h"
>> +#include "i915_drm.h"
>> +#include "drmtest.h"
>> +#include "intel_bufmgr.h"
>> +#include "intel_batchbuffer.h"
>> +#include "intel_gpu_tools.h"
>> +
>> +#define WIDTH 128
>> +#define HEIGHT 128
>> +#define PAGE_SIZE 4096
>> +
>> +#define LOCAL_I915_GEM_USERPTR       0x34
>> +#define LOCAL_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_USERPTR, struct local_i915_gem_userptr)
>> +struct local_i915_gem_userptr {
>> +	uint64_t user_ptr;
>> +	uint64_t user_size;
>> +	uint32_t flags;
>> +#define I915_USERPTR_READ_ONLY (1<<0)
>> +#define I915_USERPTR_UNSYNCHRONIZED (1<<31)
>> +	uint32_t handle;
>> +};
>> +
>> +static uint32_t userptr_flags = I915_USERPTR_UNSYNCHRONIZED;
>> +
>> +static uint32_t linear[WIDTH*HEIGHT];
>
> I may have missed it, but I don't think we use this variable except
> to do sizeof(linear). If that's the case, a constant for the buffer
> size might make the code more readable.

You're right, too much copy & paste.

>> +
>> +static void gem_userptr_test_unsynchronized(void)
>> +{
>> +	userptr_flags = I915_USERPTR_UNSYNCHRONIZED;
>> +}
>> +
>> +static void gem_userptr_test_synchronized(void)
>> +{
>> +	userptr_flags = 0;
>> +}
>> +
>> +static int gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t *handle)
>> +{
>> +	struct local_i915_gem_userptr userptr;
>> +	int ret;
>> +
>> +	userptr.user_ptr = (uintptr_t)ptr;
>> +	userptr.user_size = size;
>> +	userptr.flags = userptr_flags;
>> +	if (read_only)
>> +		userptr.flags |= I915_USERPTR_READ_ONLY;
>> +
>> +	ret = drmIoctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &userptr);
>> +	if (ret)
>> +		ret = errno;
>> +	igt_skip_on_f(ret == ENODEV &&
>> +		      (userptr_flags & I915_USERPTR_UNSYNCHRONIZED) == 0,
>> +		      "Skipping, synchronized mappings with no kernel CONFIG_MMU_NOTIFIER?");
>> +	if (ret == 0)
>> +		*handle = userptr.handle;
>> +
>> +	return ret;
>> +}
>> +
>> +static uint32_t
>> +create_userptr(int fd, uint32_t val, uint32_t *ptr)
>> +{
>> +	uint32_t handle;
>> +	int i, ret;
>> +
>> +	ret = gem_userptr(fd, ptr, sizeof(linear), 0, &handle);
>> +	igt_assert(ret == 0);
>> +	igt_assert(handle != 0);
>> +
>> +	/* Fill the BO with dwords starting at val */
>> +	for (i = 0; i < WIDTH*HEIGHT; i++)
>> +		ptr[i] = val++;
>> +
>> +	return handle;
>> +}
>
> I don't see that this function is used in this test.

You are right, artifact of this file starting from gem_userptr_blits.c 
and then removing stuff. Obviously not enough removed.

>> +
>> +static void **handle_ptr_map;
>> +static unsigned int num_handle_ptr_map;
>
> I'd prefer that we explicitly initialize at least num_handle_ptr_map.

To zero, why?

>> +
>> +static void add_handle_ptr(uint32_t handle, void *ptr)
>> +{
>> +	if (handle >= num_handle_ptr_map) {
>> +		handle_ptr_map = realloc(handle_ptr_map,
>> +					 (handle + 1000) * sizeof(void*));
>> +		num_handle_ptr_map = handle + 1000;
>> +	}
>> +
>> +	handle_ptr_map[handle] = ptr;
>> +}
>> +
>> +static void *get_handle_ptr(uint32_t handle)
>> +{
>> +	return handle_ptr_map[handle];
>> +}
>> +
>> +static void free_handle_ptr(uint32_t handle)
>> +{
>> +	igt_assert(handle < num_handle_ptr_map);
>> +	igt_assert(handle_ptr_map[handle]);
>> +
>> +	free(handle_ptr_map[handle]);
>> +	handle_ptr_map[handle] = NULL;
>> +}
>> +
>> +static uint32_t create_userptr_bo(int fd, int size)
>> +{
>> +	void *ptr;
>> +	uint32_t handle;
>> +	int ret;
>> +
>> +	ret = posix_memalign(&ptr, PAGE_SIZE, size);
>> +	igt_assert(ret == 0);
>> +
>> +	ret = gem_userptr(fd, (uint32_t *)ptr, size, 0, &handle);
>> +	igt_assert(ret == 0);
>> +	add_handle_ptr(handle, ptr);
>> +
>> +	return handle;
>> +}
>> +
>> +static void free_userptr_bo(int fd, uint32_t handle)
>> +{
>> +	gem_close(fd, handle);
>> +	free_handle_ptr(handle);
>> +}
>> +
>> +static int has_userptr(int fd)
>> +{
>> +	uint32_t handle = 0;
>> +	void *ptr;
>> +	uint32_t oldflags;
>> +	int ret;
>> +
>> +	assert(posix_memalign(&ptr, PAGE_SIZE, PAGE_SIZE) == 0);
>> +	oldflags = userptr_flags;
>> +	gem_userptr_test_unsynchronized();
>> +	ret = gem_userptr(fd, ptr, PAGE_SIZE, 0, &handle);
>> +	userptr_flags = oldflags;
>> +	if (ret != 0) {
>> +		free(ptr);
>> +		return 0;
>> +	}
>> +
>> +	gem_close(fd, handle);
>> +	free(ptr);
>> +
>> +	return handle != 0;
>> +}
>> +
>> +static const unsigned int nr_bos[] = {0, 1, 10, 100, 1000};
>> +static const unsigned int test_duration_sec = 3;
>> +
>> +static volatile unsigned int run_test;
>> +
>> +static void alarm_handler(int sig)
>> +{
>> +	assert(run_test == 1);
>> +	run_test = 0;
>> +}
>> +
>> +static void start_test(unsigned int duration)
>> +{
>> +	run_test = 1;
>> +	if (duration == 0)
>> +		duration = test_duration_sec;
>> +	signal(SIGALRM, alarm_handler);
>> +	alarm(duration);
>> +}
>> +
>> +static void exchange_ptr(void *array, unsigned i, unsigned j)
>> +{
>> +	void **arr, *tmp;
>> +	arr = (void **)array;
>> +
>> +	tmp = arr[i];
>> +	arr[i] = arr[j];
>> +	arr[j] = tmp;
>> +}
>> +
>> +static void test_malloc_free(int random)
>> +{
>> +	unsigned long iter = 0;
>> +	unsigned int i, tot = 1000;
>> +	void *ptr[tot];
>> +
>> +	start_test(test_duration_sec);
>> +
>> +	while (run_test) {
>> +		for (i = 0; i < tot; i++) {
>> +			ptr[i] = malloc(1000);
>> +			assert(ptr[i]);
>> +		}
>> +		if (random)
>> +			igt_permute_array(ptr, tot, exchange_ptr);
>> +		for (i = 0; i < tot; i++)
>> +			free(ptr[i]);
>> +		iter++;
>> +	}
>> +
>> +	printf("%8lu iter/s\n", iter / test_duration_sec);
>> +}
>> +
>> +static void test_malloc_realloc_free(int random)
>> +{
>> +	unsigned long iter = 0;
>> +	unsigned int i, tot = 1000;
>> +	void *ptr[tot];
>> +
>> +	start_test(test_duration_sec);
>> +
>> +	while (run_test) {
>> +		for (i = 0; i < tot; i++) {
>> +			ptr[i] = malloc(1000);
>> +			assert(ptr[i]);
>> +		}
>> +		if (random)
>> +			igt_permute_array(ptr, tot, exchange_ptr);
>> +		for (i = 0; i < tot; i++) {
>> +			ptr[i] = realloc(ptr[i], 2000);
>> +			assert(ptr[i]);
>> +		}
>> +		if (random)
>> +			igt_permute_array(ptr, tot, exchange_ptr);
>> +		for (i = 0; i < tot; i++)
>> +			free(ptr[i]);
>> +		iter++;
>> +	}
>> +
>> +	printf("%8lu iter/s\n", iter / test_duration_sec);
>> +}
>> +
>> +static void test_mmap_unmap(int random)
>> +{
>> +	unsigned long iter = 0;
>> +	unsigned int i, tot = 1000;
>> +	void *ptr[tot];
>> +
>> +	start_test(test_duration_sec);
>> +
>> +	while (run_test) {
>> +		for (i = 0; i < tot; i++) {
>> +			ptr[i] = mmap(NULL, 1000, PROT_READ | PROT_WRITE,
>> +					MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
>> +			assert(ptr[i] != MAP_FAILED);
>> +		}
>> +		if (random)
>> +			igt_permute_array(ptr, tot, exchange_ptr);
>> +		for (i = 0; i < tot; i++)
>> +			munmap(ptr[i], 1000);
>> +		iter++;
>> +	}
>> +
>> +	printf("%8lu iter/s\n", iter / test_duration_sec);
>> +}
>> +
>> +static void test_ptr_read(void *ptr)
>> +{
>> +	unsigned long iter = 0;
>> +	volatile unsigned long *p;
>> +	unsigned long i, loops;
>> +	register unsigned long v;
>> +
>> +	loops = sizeof(linear) / sizeof(unsigned long) / 4;
>> +
>> +	start_test(test_duration_sec);
>> +
>> +	while (run_test) {
>> +		p = (unsigned long *)ptr;
>> +		for (i = 0; i < loops; i++) {
>> +			v = *p++;
>> +			v = *p++;
>> +			v = *p++;
>> +			v = *p++;
>> +		}
>> +		iter++;
>> +	}
>> +
>> +	printf("%8lu MB/s\n", iter / test_duration_sec * sizeof(linear) / 1000000);
>> +}
>> +
>> +static void test_ptr_write(void *ptr)
>> +{
>> +	unsigned long iter = 0;
>> +	volatile unsigned long *p;
>> +	register unsigned long i, loops;
>> +
>> +	loops = sizeof(linear) / sizeof(unsigned long) / 4;
>> +
>> +	start_test(test_duration_sec);
>> +
>> +	while (run_test) {
>> +		p = (unsigned long *)ptr;
>> +		for (i = 0; i < loops; i++) {
>> +			*p++ = i;
>> +			*p++ = i;
>> +			*p++ = i;
>> +			*p++ = i;
>> +		}
>> +		iter++;
>> +	}
>> +
>> +	printf("%8lu MB/s\n", iter / test_duration_sec * sizeof(linear) / 1000000);
>> +}
>> +
>> +static void test_impact(int fd)
>> +{
>> +	unsigned int total = sizeof(nr_bos) / sizeof(nr_bos[0]);
>> +	unsigned int subtest, i;
>> +	uint32_t handles[nr_bos[total-1]];
>> +	void *ptr;
>> +	char buffer[sizeof(linear)];
>> +
>> +	for (subtest = 0; subtest < total; subtest++) {
>> +		for (i = 0; i < nr_bos[subtest]; i++)
>> +			handles[i] = create_userptr_bo(fd, sizeof(linear));
>> +
>> +		if (nr_bos[subtest] > 0)
>> +			ptr = get_handle_ptr(handles[0]);
>> +		else
>> +			ptr = buffer;
>> +
>> +		printf("ptr-read,                   %5u bos = ", nr_bos[subtest]);
>> +		test_ptr_read(ptr);
>> +
>> +		printf("ptr-write                   %5u bos = ", nr_bos[subtest]);
>> +		test_ptr_write(ptr);
>> +
>> +		printf("malloc-free,                %5u bos = ", nr_bos[subtest]);
>> +		test_malloc_free(0);
>> +		printf("malloc-free-random          %5u bos = ", nr_bos[subtest]);
>> +		test_malloc_free(1);
>> +
>> +		printf("malloc-realloc-free,        %5u bos = ", nr_bos[subtest]);
>> +		test_malloc_realloc_free(0);
>> +		printf("malloc-realloc-free-random, %5u bos = ", nr_bos[subtest]);
>> +		test_malloc_realloc_free(1);
>> +
>> +		printf("mmap-unmap,                 %5u bos = ", nr_bos[subtest]);
>> +		test_mmap_unmap(0);
>> +		printf("mmap-unmap-random,          %5u bos = ", nr_bos[subtest]);
>> +		test_mmap_unmap(1);
>> +
>> +		for (i = 0; i < nr_bos[subtest]; i++)
>> +			free_userptr_bo(fd, handles[i]);
>> +	}
>> +}
>> +
>> +static void test_single(int fd)
>> +{
>> +	char *ptr, *bo_ptr;
>> +	uint32_t handle = 0;
>> +	unsigned long iter = 0;
>> +	int ret;
>> +	unsigned long map_size = sizeof(linear) + PAGE_SIZE - 1;
>> +
>> +	ptr = mmap(NULL, map_size, PROT_READ | PROT_WRITE,
>> +			MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
>> +	assert(ptr != MAP_FAILED);
>> +
>> +	bo_ptr = (char *)(((unsigned long)ptr + (PAGE_SIZE - 1))
>> +					& ~(PAGE_SIZE - 1));
>
> You might add an ALIGN macro in this file or a suitable header. A couple of
> .c files in lib/ individually define one already.

OK, will add that in the patch series.

Thanks for the review!

Regards,

Tvrtko



More information about the Intel-gfx mailing list