[Intel-gfx] [PATCH 3/3] tests/gem_userptr_benchmark: Benchmarking userptr surfaces and impact
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Wed Apr 23 15:28:54 CEST 2014
Hi Brad,
On 04/18/2014 12:18 AM, Volkin, Bradley D wrote:
> On Wed, Mar 19, 2014 at 04:13:06AM -0700, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>>
>> This adds a small benchmark for the new userptr functionality.
>>
>> Apart from basic surface creation and destruction, also tested is the
>> impact of having userptr surfaces in the process address space. Reason
>> for that is the impact of MMU notifiers on common address space
>> operations like munmap() which is per process.
>>
>> v2:
>> * Moved to benchmarks.
>> * Added pointer read/write tests.
>> * Changed output to say iterations per second instead of
>> operations per second.
>> * Multiply result by batch size for multi-create* tests
>> for a more comparable number with create-destroy test.
>>
>> v3:
>> * Fixed ioctl detection on kernels without MMU_NOTIFIERs.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>> Cc: Chris Wilson <chris at chris-wilson.co.uk>
>> ---
>> Android.mk | 3 +-
>> benchmarks/.gitignore | 1 +
>> benchmarks/Android.mk | 36 +++
>> benchmarks/Makefile.am | 7 +-
>> benchmarks/Makefile.sources | 6 +
>> benchmarks/gem_userptr_benchmark.c | 513 +++++++++++++++++++++++++++++++++++++
>> 6 files changed, 558 insertions(+), 8 deletions(-)
>> create mode 100644 benchmarks/Android.mk
>> create mode 100644 benchmarks/Makefile.sources
>> create mode 100644 benchmarks/gem_userptr_benchmark.c
>>
>> diff --git a/Android.mk b/Android.mk
>> index 8aeb2d4..0c969b8 100644
>> --- a/Android.mk
>> +++ b/Android.mk
>> @@ -1,2 +1 @@
>> -include $(call all-named-subdir-makefiles, lib tests tools)
>> -
>> +include $(call all-named-subdir-makefiles, lib tests tools benchmarks)
>> diff --git a/benchmarks/.gitignore b/benchmarks/.gitignore
>> index ddea6f7..09e5bd8 100644
>> --- a/benchmarks/.gitignore
>> +++ b/benchmarks/.gitignore
>> @@ -1,3 +1,4 @@
>> +gem_userptr_benchmark
>> intel_upload_blit_large
>> intel_upload_blit_large_gtt
>> intel_upload_blit_large_map
>> diff --git a/benchmarks/Android.mk b/benchmarks/Android.mk
>> new file mode 100644
>> index 0000000..5bb8ef5
>> --- /dev/null
>> +++ b/benchmarks/Android.mk
>> @@ -0,0 +1,36 @@
>> +LOCAL_PATH := $(call my-dir)
>> +
>> +include $(LOCAL_PATH)/Makefile.sources
>> +
>> +#================#
>> +
>> +define add_benchmark
>> + include $(CLEAR_VARS)
>> +
>> + LOCAL_SRC_FILES := $1.c
>> +
>> + LOCAL_CFLAGS += -DHAVE_STRUCT_SYSINFO_TOTALRAM
>> + LOCAL_CFLAGS += -DANDROID -UNDEBUG -include "check-ndebug.h"
>> + LOCAL_CFLAGS += -std=c99
>> + # FIXME: drop once Bionic correctly annotates "noreturn" on pthread_exit
>> + LOCAL_CFLAGS += -Wno-error=return-type
>> + # Excessive complaining for established cases. Rely on the Linux version warnings.
>> + LOCAL_CFLAGS += -Wno-sign-compare
>> +
>> + LOCAL_MODULE := $1
>> + LOCAL_MODULE_TAGS := optional
>> +
>> + LOCAL_STATIC_LIBRARIES := libintel_gpu_tools
>> +
>> + LOCAL_SHARED_LIBRARIES := libpciaccess \
>> + libdrm \
>> + libdrm_intel
>> +
>> + include $(BUILD_EXECUTABLE)
>> +endef
>> +
>> +#================#
>> +
>> +benchmark_list := $(bin_PROGRAMS)
>> +
>> +$(foreach item,$(benchmark_list),$(eval $(call add_benchmark,$(item))))
>> diff --git a/benchmarks/Makefile.am b/benchmarks/Makefile.am
>> index e2ad784..d173bf4 100644
>> --- a/benchmarks/Makefile.am
>> +++ b/benchmarks/Makefile.am
>> @@ -1,9 +1,4 @@
>> -
>> -bin_PROGRAMS = \
>> - intel_upload_blit_large \
>> - intel_upload_blit_large_gtt \
>> - intel_upload_blit_large_map \
>> - intel_upload_blit_small
>> +include Makefile.sources
>>
>> AM_CPPFLAGS = -I$(top_srcdir) -I$(top_srcdir)/lib
>> AM_CFLAGS = $(DRM_CFLAGS) $(CWARNFLAGS) $(CAIRO_CFLAGS)
>> diff --git a/benchmarks/Makefile.sources b/benchmarks/Makefile.sources
>> new file mode 100644
>> index 0000000..fd6c107
>> --- /dev/null
>> +++ b/benchmarks/Makefile.sources
>> @@ -0,0 +1,6 @@
>> +bin_PROGRAMS = \
>> + intel_upload_blit_large \
>> + intel_upload_blit_large_gtt \
>> + intel_upload_blit_large_map \
>> + intel_upload_blit_small \
>> + gem_userptr_benchmark
>
> You might split the makefile cleanup aspect of this into a separate
> patch, but I'm fine either way.
Good idea, will try to squeeze that in.
>> diff --git a/benchmarks/gem_userptr_benchmark.c b/benchmarks/gem_userptr_benchmark.c
>> new file mode 100644
>> index 0000000..218f6f1
>> --- /dev/null
>> +++ b/benchmarks/gem_userptr_benchmark.c
>> @@ -0,0 +1,513 @@
>> +/*
>> + * Copyright © 2014 Intel Corporation
>> + *
>> + * Permission is hereby granted, free of charge, to any person obtaining a
>> + * copy of this software and associated documentation files (the "Software"),
>> + * to deal in the Software without restriction, including without limitation
>> + * the rights to use, copy, modify, merge, publish, distribute, sublicense,
>> + * and/or sell copies of the Software, and to permit persons to whom the
>> + * Software is furnished to do so, subject to the following conditions:
>> + *
>> + * The above copyright notice and this permission notice (including the next
>> + * paragraph) shall be included in all copies or substantial portions of the
>> + * Software.
>> + *
>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
>> + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
>> + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
>> + * IN THE SOFTWARE.
>> + *
>> + * Authors:
>> + * Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>> + *
>> + */
>> +
>> +/** @file gem_userptr_benchmark.c
>> + *
>> + * Benchmark the userptr code and impact of having userptr surfaces
>> + * in process address space on some normal operations.
>> + *
>> + */
>> +
>> +#include <stdlib.h>
>> +#include <stdio.h>
>> +#include <string.h>
>> +#include <fcntl.h>
>> +#include <inttypes.h>
>> +#include <errno.h>
>> +#include <sys/stat.h>
>> +#include <sys/time.h>
>> +#include <sys/mman.h>
>> +#include "drm.h"
>> +#include "i915_drm.h"
>> +#include "drmtest.h"
>> +#include "intel_bufmgr.h"
>> +#include "intel_batchbuffer.h"
>> +#include "intel_gpu_tools.h"
>> +
>> +#define WIDTH 128
>> +#define HEIGHT 128
>> +#define PAGE_SIZE 4096
>> +
>> +#define LOCAL_I915_GEM_USERPTR 0x34
>> +#define LOCAL_IOCTL_I915_GEM_USERPTR DRM_IOWR (DRM_COMMAND_BASE + LOCAL_I915_GEM_USERPTR, struct local_i915_gem_userptr)
>> +struct local_i915_gem_userptr {
>> + uint64_t user_ptr;
>> + uint64_t user_size;
>> + uint32_t flags;
>> +#define I915_USERPTR_READ_ONLY (1<<0)
>> +#define I915_USERPTR_UNSYNCHRONIZED (1<<31)
>> + uint32_t handle;
>> +};
>> +
>> +static uint32_t userptr_flags = I915_USERPTR_UNSYNCHRONIZED;
>> +
>> +static uint32_t linear[WIDTH*HEIGHT];
>
> I may have missed it, but I don't think we use this variable except
> to do sizeof(linear). If that's the case, a constant for the buffer
> size might make the code more readable.
You're right, too much copy & paste.
>> +
>> +static void gem_userptr_test_unsynchronized(void)
>> +{
>> + userptr_flags = I915_USERPTR_UNSYNCHRONIZED;
>> +}
>> +
>> +static void gem_userptr_test_synchronized(void)
>> +{
>> + userptr_flags = 0;
>> +}
>> +
>> +static int gem_userptr(int fd, void *ptr, int size, int read_only, uint32_t *handle)
>> +{
>> + struct local_i915_gem_userptr userptr;
>> + int ret;
>> +
>> + userptr.user_ptr = (uintptr_t)ptr;
>> + userptr.user_size = size;
>> + userptr.flags = userptr_flags;
>> + if (read_only)
>> + userptr.flags |= I915_USERPTR_READ_ONLY;
>> +
>> + ret = drmIoctl(fd, LOCAL_IOCTL_I915_GEM_USERPTR, &userptr);
>> + if (ret)
>> + ret = errno;
>> + igt_skip_on_f(ret == ENODEV &&
>> + (userptr_flags & I915_USERPTR_UNSYNCHRONIZED) == 0,
>> + "Skipping, synchronized mappings with no kernel CONFIG_MMU_NOTIFIER?");
>> + if (ret == 0)
>> + *handle = userptr.handle;
>> +
>> + return ret;
>> +}
>> +
>> +static uint32_t
>> +create_userptr(int fd, uint32_t val, uint32_t *ptr)
>> +{
>> + uint32_t handle;
>> + int i, ret;
>> +
>> + ret = gem_userptr(fd, ptr, sizeof(linear), 0, &handle);
>> + igt_assert(ret == 0);
>> + igt_assert(handle != 0);
>> +
>> + /* Fill the BO with dwords starting at val */
>> + for (i = 0; i < WIDTH*HEIGHT; i++)
>> + ptr[i] = val++;
>> +
>> + return handle;
>> +}
>
> I don't see that this function is used in this test.
You are right, artifact of this file starting from gem_userptr_blits.c
and then removing stuff. Obviously not enough removed.
>> +
>> +static void **handle_ptr_map;
>> +static unsigned int num_handle_ptr_map;
>
> I'd prefer that we explicitly initialize at least num_handle_ptr_map.
To zero, why?
>> +
>> +static void add_handle_ptr(uint32_t handle, void *ptr)
>> +{
>> + if (handle >= num_handle_ptr_map) {
>> + handle_ptr_map = realloc(handle_ptr_map,
>> + (handle + 1000) * sizeof(void*));
>> + num_handle_ptr_map = handle + 1000;
>> + }
>> +
>> + handle_ptr_map[handle] = ptr;
>> +}
>> +
>> +static void *get_handle_ptr(uint32_t handle)
>> +{
>> + return handle_ptr_map[handle];
>> +}
>> +
>> +static void free_handle_ptr(uint32_t handle)
>> +{
>> + igt_assert(handle < num_handle_ptr_map);
>> + igt_assert(handle_ptr_map[handle]);
>> +
>> + free(handle_ptr_map[handle]);
>> + handle_ptr_map[handle] = NULL;
>> +}
>> +
>> +static uint32_t create_userptr_bo(int fd, int size)
>> +{
>> + void *ptr;
>> + uint32_t handle;
>> + int ret;
>> +
>> + ret = posix_memalign(&ptr, PAGE_SIZE, size);
>> + igt_assert(ret == 0);
>> +
>> + ret = gem_userptr(fd, (uint32_t *)ptr, size, 0, &handle);
>> + igt_assert(ret == 0);
>> + add_handle_ptr(handle, ptr);
>> +
>> + return handle;
>> +}
>> +
>> +static void free_userptr_bo(int fd, uint32_t handle)
>> +{
>> + gem_close(fd, handle);
>> + free_handle_ptr(handle);
>> +}
>> +
>> +static int has_userptr(int fd)
>> +{
>> + uint32_t handle = 0;
>> + void *ptr;
>> + uint32_t oldflags;
>> + int ret;
>> +
>> + assert(posix_memalign(&ptr, PAGE_SIZE, PAGE_SIZE) == 0);
>> + oldflags = userptr_flags;
>> + gem_userptr_test_unsynchronized();
>> + ret = gem_userptr(fd, ptr, PAGE_SIZE, 0, &handle);
>> + userptr_flags = oldflags;
>> + if (ret != 0) {
>> + free(ptr);
>> + return 0;
>> + }
>> +
>> + gem_close(fd, handle);
>> + free(ptr);
>> +
>> + return handle != 0;
>> +}
>> +
>> +static const unsigned int nr_bos[] = {0, 1, 10, 100, 1000};
>> +static const unsigned int test_duration_sec = 3;
>> +
>> +static volatile unsigned int run_test;
>> +
>> +static void alarm_handler(int sig)
>> +{
>> + assert(run_test == 1);
>> + run_test = 0;
>> +}
>> +
>> +static void start_test(unsigned int duration)
>> +{
>> + run_test = 1;
>> + if (duration == 0)
>> + duration = test_duration_sec;
>> + signal(SIGALRM, alarm_handler);
>> + alarm(duration);
>> +}
>> +
>> +static void exchange_ptr(void *array, unsigned i, unsigned j)
>> +{
>> + void **arr, *tmp;
>> + arr = (void **)array;
>> +
>> + tmp = arr[i];
>> + arr[i] = arr[j];
>> + arr[j] = tmp;
>> +}
>> +
>> +static void test_malloc_free(int random)
>> +{
>> + unsigned long iter = 0;
>> + unsigned int i, tot = 1000;
>> + void *ptr[tot];
>> +
>> + start_test(test_duration_sec);
>> +
>> + while (run_test) {
>> + for (i = 0; i < tot; i++) {
>> + ptr[i] = malloc(1000);
>> + assert(ptr[i]);
>> + }
>> + if (random)
>> + igt_permute_array(ptr, tot, exchange_ptr);
>> + for (i = 0; i < tot; i++)
>> + free(ptr[i]);
>> + iter++;
>> + }
>> +
>> + printf("%8lu iter/s\n", iter / test_duration_sec);
>> +}
>> +
>> +static void test_malloc_realloc_free(int random)
>> +{
>> + unsigned long iter = 0;
>> + unsigned int i, tot = 1000;
>> + void *ptr[tot];
>> +
>> + start_test(test_duration_sec);
>> +
>> + while (run_test) {
>> + for (i = 0; i < tot; i++) {
>> + ptr[i] = malloc(1000);
>> + assert(ptr[i]);
>> + }
>> + if (random)
>> + igt_permute_array(ptr, tot, exchange_ptr);
>> + for (i = 0; i < tot; i++) {
>> + ptr[i] = realloc(ptr[i], 2000);
>> + assert(ptr[i]);
>> + }
>> + if (random)
>> + igt_permute_array(ptr, tot, exchange_ptr);
>> + for (i = 0; i < tot; i++)
>> + free(ptr[i]);
>> + iter++;
>> + }
>> +
>> + printf("%8lu iter/s\n", iter / test_duration_sec);
>> +}
>> +
>> +static void test_mmap_unmap(int random)
>> +{
>> + unsigned long iter = 0;
>> + unsigned int i, tot = 1000;
>> + void *ptr[tot];
>> +
>> + start_test(test_duration_sec);
>> +
>> + while (run_test) {
>> + for (i = 0; i < tot; i++) {
>> + ptr[i] = mmap(NULL, 1000, PROT_READ | PROT_WRITE,
>> + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
>> + assert(ptr[i] != MAP_FAILED);
>> + }
>> + if (random)
>> + igt_permute_array(ptr, tot, exchange_ptr);
>> + for (i = 0; i < tot; i++)
>> + munmap(ptr[i], 1000);
>> + iter++;
>> + }
>> +
>> + printf("%8lu iter/s\n", iter / test_duration_sec);
>> +}
>> +
>> +static void test_ptr_read(void *ptr)
>> +{
>> + unsigned long iter = 0;
>> + volatile unsigned long *p;
>> + unsigned long i, loops;
>> + register unsigned long v;
>> +
>> + loops = sizeof(linear) / sizeof(unsigned long) / 4;
>> +
>> + start_test(test_duration_sec);
>> +
>> + while (run_test) {
>> + p = (unsigned long *)ptr;
>> + for (i = 0; i < loops; i++) {
>> + v = *p++;
>> + v = *p++;
>> + v = *p++;
>> + v = *p++;
>> + }
>> + iter++;
>> + }
>> +
>> + printf("%8lu MB/s\n", iter / test_duration_sec * sizeof(linear) / 1000000);
>> +}
>> +
>> +static void test_ptr_write(void *ptr)
>> +{
>> + unsigned long iter = 0;
>> + volatile unsigned long *p;
>> + register unsigned long i, loops;
>> +
>> + loops = sizeof(linear) / sizeof(unsigned long) / 4;
>> +
>> + start_test(test_duration_sec);
>> +
>> + while (run_test) {
>> + p = (unsigned long *)ptr;
>> + for (i = 0; i < loops; i++) {
>> + *p++ = i;
>> + *p++ = i;
>> + *p++ = i;
>> + *p++ = i;
>> + }
>> + iter++;
>> + }
>> +
>> + printf("%8lu MB/s\n", iter / test_duration_sec * sizeof(linear) / 1000000);
>> +}
>> +
>> +static void test_impact(int fd)
>> +{
>> + unsigned int total = sizeof(nr_bos) / sizeof(nr_bos[0]);
>> + unsigned int subtest, i;
>> + uint32_t handles[nr_bos[total-1]];
>> + void *ptr;
>> + char buffer[sizeof(linear)];
>> +
>> + for (subtest = 0; subtest < total; subtest++) {
>> + for (i = 0; i < nr_bos[subtest]; i++)
>> + handles[i] = create_userptr_bo(fd, sizeof(linear));
>> +
>> + if (nr_bos[subtest] > 0)
>> + ptr = get_handle_ptr(handles[0]);
>> + else
>> + ptr = buffer;
>> +
>> + printf("ptr-read, %5u bos = ", nr_bos[subtest]);
>> + test_ptr_read(ptr);
>> +
>> + printf("ptr-write %5u bos = ", nr_bos[subtest]);
>> + test_ptr_write(ptr);
>> +
>> + printf("malloc-free, %5u bos = ", nr_bos[subtest]);
>> + test_malloc_free(0);
>> + printf("malloc-free-random %5u bos = ", nr_bos[subtest]);
>> + test_malloc_free(1);
>> +
>> + printf("malloc-realloc-free, %5u bos = ", nr_bos[subtest]);
>> + test_malloc_realloc_free(0);
>> + printf("malloc-realloc-free-random, %5u bos = ", nr_bos[subtest]);
>> + test_malloc_realloc_free(1);
>> +
>> + printf("mmap-unmap, %5u bos = ", nr_bos[subtest]);
>> + test_mmap_unmap(0);
>> + printf("mmap-unmap-random, %5u bos = ", nr_bos[subtest]);
>> + test_mmap_unmap(1);
>> +
>> + for (i = 0; i < nr_bos[subtest]; i++)
>> + free_userptr_bo(fd, handles[i]);
>> + }
>> +}
>> +
>> +static void test_single(int fd)
>> +{
>> + char *ptr, *bo_ptr;
>> + uint32_t handle = 0;
>> + unsigned long iter = 0;
>> + int ret;
>> + unsigned long map_size = sizeof(linear) + PAGE_SIZE - 1;
>> +
>> + ptr = mmap(NULL, map_size, PROT_READ | PROT_WRITE,
>> + MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
>> + assert(ptr != MAP_FAILED);
>> +
>> + bo_ptr = (char *)(((unsigned long)ptr + (PAGE_SIZE - 1))
>> + & ~(PAGE_SIZE - 1));
>
> You might add an ALIGN macro in this file or a suitable header. A couple of
> .c files in lib/ individually define one already.
OK, will add that in the patch series.
Thanks for the review!
Regards,
Tvrtko
More information about the Intel-gfx
mailing list