[Intel-gfx] [PATCH] test: Measure performance of copying between tiled and untiled/LLC

Chris Wilson chris at chris-wilson.co.uk
Wed Jun 6 12:04:20 CEST 2012


The goal is compare the speed of copying to and from a LLC bo using
either the CPU or GPU.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 lib/drmtest.c        |   14 +++
 lib/drmtest.h        |    1 +
 tests/.gitignore     |    1 +
 tests/Makefile.am    |    1 +
 tests/llc_exec_blt.c |  278 ++++++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 295 insertions(+)
 create mode 100644 tests/llc_exec_blt.c

diff --git a/lib/drmtest.c b/lib/drmtest.c
index cdf46aa..feab1cb 100644
--- a/lib/drmtest.c
+++ b/lib/drmtest.c
@@ -74,6 +74,20 @@ bool gem_uses_aliasing_ppgtt(int fd)
 	return val;
 }
 
+bool gem_has_llc(int fd)
+{
+	struct drm_i915_getparam gp;
+	int val;
+
+	gp.param = 17; /* HAS_LLC */
+	gp.value = &val;
+
+	if (ioctl(fd, DRM_IOCTL_I915_GETPARAM, &gp, sizeof(gp)))
+		return false;
+
+	return val != 0;
+}
+
 int gem_available_fences(int fd)
 {
 	struct drm_i915_getparam gp;
diff --git a/lib/drmtest.h b/lib/drmtest.h
index 4021104..fcc875d 100644
--- a/lib/drmtest.h
+++ b/lib/drmtest.h
@@ -63,6 +63,7 @@ int gem_madvise(int fd, uint32_t handle, int state);
 
 /* feature test helpers */
 bool gem_uses_aliasing_ppgtt(int fd);
+bool gem_has_llc(int fd);
 int gem_available_fences(int fd);
 
 /* generally useful helpers */
diff --git a/tests/.gitignore b/tests/.gitignore
index f486a87..fce1497 100644
--- a/tests/.gitignore
+++ b/tests/.gitignore
@@ -14,6 +14,7 @@ gem_double_irq_loop
 gem_dummy_reloc_loop
 gem_exec_bad_domains
 gem_exec_blt
+llc_exec_blt
 gem_exec_faulting_reloc
 gem_exec_nop
 gem_fenced_exec_thrash
diff --git a/tests/Makefile.am b/tests/Makefile.am
index 5a64660..dcaa1d4 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -13,6 +13,7 @@ TESTS_progs = \
 	gem_gtt_concurrent_blit \
 	gem_exec_nop \
 	gem_exec_blt \
+	llc_exec_blt \
 	gem_exec_bad_domains \
 	gem_exec_faulting_reloc \
 	gem_flink \
diff --git a/tests/llc_exec_blt.c b/tests/llc_exec_blt.c
new file mode 100644
index 0000000..9a18b46
--- /dev/null
+++ b/tests/llc_exec_blt.c
@@ -0,0 +1,278 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chris Wilson <chris at chris-wilson.co.uk>
+ *
+ */
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <string.h>
+#include <assert.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+#include "drm.h"
+#include "i915_drm.h"
+#include "drmtest.h"
+#include "intel_chipset.h"
+#include "intel_gpu_tools.h"
+
+#define OBJECT_SIZE 16384
+#define PITCH 4096
+
+#define COPY_BLT_CMD		(2<<29|0x53<<22|0x6)
+#define BLT_WRITE_ALPHA		(1<<21)
+#define BLT_WRITE_RGB		(1<<20)
+#define BLT_SRC_TILED		(1<<15)
+#define BLT_DST_TILED		(1<<11)
+
+static int gem_tiled_blt(uint32_t *b,
+			 uint32_t src,
+			 uint32_t dst,
+			 uint32_t tiling,
+			 uint32_t length,
+			 struct drm_i915_gem_relocation_entry *reloc)
+{
+	int height = length / PITCH;
+	int pitch;
+
+	assert(height < (1<<16));
+
+	pitch = PITCH;
+	if (tiling & BLT_DST_TILED)
+		pitch /= 4;
+
+	b[0] = COPY_BLT_CMD | BLT_WRITE_ALPHA | BLT_WRITE_RGB | tiling;
+	b[1] = 0xcc << 16 | 1 << 25 | 1 << 24 | pitch;
+	b[2] = 0;
+	b[3] = height << 16 | PITCH >> 2;
+	b[4] = 0;
+
+	reloc->offset = 4 * sizeof(uint32_t);
+	reloc->delta = 0;
+	reloc->target_handle = dst;
+	reloc->read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc->write_domain = I915_GEM_DOMAIN_RENDER;
+	reloc->presumed_offset = 0;
+	reloc++;
+
+	pitch = PITCH;
+	if (tiling & BLT_SRC_TILED)
+		pitch /= 4;
+
+	b[5] = 0;
+	b[6] = pitch;
+	b[7] = 0;
+	reloc->offset = 7 * sizeof(uint32_t);
+	reloc->delta = 0;
+	reloc->target_handle = src;
+	reloc->read_domains = I915_GEM_DOMAIN_RENDER;
+	reloc->write_domain = 0;
+	reloc->presumed_offset = 0;
+	reloc++;
+
+	b[8] = MI_BATCH_BUFFER_END;
+	b[9] = 0;
+
+	return 10 * sizeof(uint32_t);
+}
+
+static int gem_exec(int fd, struct drm_i915_gem_execbuffer2 *execbuf, int loops)
+{
+	int ret = 0;
+
+	while (loops-- && ret == 0) {
+		ret = drmIoctl(fd,
+			       DRM_IOCTL_I915_GEM_EXECBUFFER2,
+			       execbuf);
+	}
+
+	return ret;
+}
+
+static double elapsed(const struct timeval *start,
+		      const struct timeval *end,
+		      int loop)
+{
+	return (1e6*(end->tv_sec - start->tv_sec) + (end->tv_usec - start->tv_usec))/loop;
+}
+
+static const char *bytes_per_sec(char *buf, double v)
+{
+	const char *order[] = {
+		"",
+		"KiB",
+		"MiB",
+		"GiB",
+		"TiB",
+		"PiB",
+		NULL,
+	}, **o = order;
+
+	while (v > 1024 && o[1]) {
+		v /= 1024;
+		o++;
+	}
+	sprintf(buf, "%.1f%s/s", v, *o);
+	return buf;
+}
+
+static void run(int object_size)
+{
+	struct drm_i915_gem_execbuffer2 execbuf;
+	struct drm_i915_gem_exec_object2 exec[3];
+	struct drm_i915_gem_relocation_entry reloc[4];
+	uint32_t buf[20];
+	uint32_t handle, gtt, llc;
+	int fd, len, count;
+	int ring;
+
+	fd = drm_open_any();
+	if (!gem_has_llc(fd))
+		return;
+
+	handle = gem_create(fd, 4096);
+	gtt = gem_create(fd, object_size);
+	gem_set_tiling(fd, gtt, I915_TILING_X, PITCH);
+	llc = gem_create(fd, object_size);
+
+	len = gem_tiled_blt(buf, gtt, llc, BLT_SRC_TILED, object_size, reloc);
+	gem_write(fd, handle, 0, buf, len);
+
+	memset(exec, 0, sizeof(exec));
+	exec[0].handle = gtt;
+	exec[1].handle = llc;
+	exec[2].handle = handle;
+	exec[2].relocation_count = 2;
+	exec[2].relocs_ptr = (uintptr_t)reloc;
+
+	ring = 0;
+	if (HAS_BLT_RING(intel_get_drm_devid(fd)))
+		ring = I915_EXEC_BLT;
+
+	execbuf.buffers_ptr = (uintptr_t)exec;
+	execbuf.buffer_count = 3;
+	execbuf.batch_start_offset = 0;
+	execbuf.batch_len = len;
+	execbuf.cliprects_ptr = 0;
+	execbuf.num_cliprects = 0;
+	execbuf.DR1 = 0;
+	execbuf.DR4 = 0;
+	execbuf.flags = ring;
+	execbuf.rsvd1 = 0;
+	execbuf.rsvd2 = 0;
+
+	for (count = 1; count <= 1<<17; count <<= 1) {
+		struct timeval start, end;
+
+		gettimeofday(&start, NULL);
+		if (gem_exec(fd, &execbuf, count))
+			exit(1);
+		gem_sync(fd, handle);
+		gettimeofday(&end, NULL);
+		printf("Time to blt from GTT to LLC %d bytes x %6d:	%7.3fµs, %s\n",
+		       object_size, count,
+		       elapsed(&start, &end, count),
+		       bytes_per_sec((char *)buf, object_size/elapsed(&start, &end, count)*1e6));
+		fflush(stdout);
+	}
+
+	len = gem_tiled_blt(buf, llc, gtt, BLT_DST_TILED, object_size, reloc);
+	gem_write(fd, handle, 0, buf, len);
+
+	for (count = 1; count <= 1<<17; count <<= 1) {
+		struct timeval start, end;
+
+		gettimeofday(&start, NULL);
+		if (gem_exec(fd, &execbuf, count))
+			exit(1);
+		gem_sync(fd, handle);
+		gettimeofday(&end, NULL);
+		printf("Time to blt from LLC to GTT %d bytes x %6d:	%7.3fµs, %s\n",
+		       object_size, count,
+		       elapsed(&start, &end, count),
+		       bytes_per_sec((char *)buf, object_size/elapsed(&start, &end, count)*1e6));
+		fflush(stdout);
+	}
+
+	{
+		void *ptr_llc = gem_mmap__cpu(fd, llc, object_size, PROT_READ | PROT_WRITE);
+		void *ptr_gtt = gem_mmap__gtt(fd, gtt, object_size, PROT_READ | PROT_WRITE);
+		int c;
+
+		for (count = 1; count <= 32; count <<= 1) {
+			struct timeval start, end;
+
+			gettimeofday(&start, NULL);
+			for (c = 0; c < count; c++)
+				memcpy(ptr_llc, ptr_gtt, object_size);
+			gettimeofday(&end, NULL);
+
+			printf("Time to copy from GTT to LLC %d bytes x %6d:	%7.3fµs, %s\n",
+			       object_size, count,
+			       elapsed(&start, &end, count),
+			       bytes_per_sec((char *)buf, object_size/elapsed(&start, &end, count)*1e6));
+		}
+		fflush(stdout);
+
+		for (count = 1; count <= 32; count <<= 1) {
+			struct timeval start, end;
+			gettimeofday(&start, NULL);
+			for (c = 0; c < count; c++)
+				memcpy(ptr_gtt, ptr_llc, object_size);
+			gettimeofday(&end, NULL);
+
+			printf("Time to copy from LLC to GTT %d bytes x %6d:	%7.3fµs, %s\n",
+			       object_size, count,
+			       elapsed(&start, &end, count),
+			       bytes_per_sec((char *)buf, object_size/elapsed(&start, &end, count)*1e6));
+			fflush(stdout);
+		}
+	}
+
+	gem_close(fd, handle);
+	close(fd);
+}
+
+int main(int argc, char **argv)
+{
+	int i;
+
+	if (argc > 1) {
+		for (i = 1; i < argc; i++) {
+			int object_size = atoi(argv[i]);
+			if (object_size)
+				run((object_size + PITCH - 1) & -PITCH);
+		}
+	} else
+		run(OBJECT_SIZE);
+
+	return 0;
+}
-- 
1.7.10




More information about the Intel-gfx mailing list