[Intel-gfx] [PATCH i-g-t] amdgpu/amd_syslatency: Measure the impact of CS load on the rest of the system
Chris Wilson
chris at chris-wilson.co.uk
Wed Jul 4 14:20:53 UTC 2018
Like benchmarks/gem_syslatency, but to investigate/compare the impact
with amdgpu.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
---
lib/igt_aux.h | 1 +
tests/Makefile.am | 2 +
tests/Makefile.sources | 1 +
tests/amdgpu/amd_syslatency.c | 404 ++++++++++++++++++++++++++++++++++
tests/amdgpu/meson.build | 1 +
5 files changed, 409 insertions(+)
create mode 100644 tests/amdgpu/amd_syslatency.c
diff --git a/lib/igt_aux.h b/lib/igt_aux.h
index 9a962881b..3641e4ee3 100644
--- a/lib/igt_aux.h
+++ b/lib/igt_aux.h
@@ -33,6 +33,7 @@
#include <stdbool.h>
#include <stddef.h>
#include <sys/time.h>
+#include <sys/syscall.h>
#include <i915/gem_submission.h>
diff --git a/tests/Makefile.am b/tests/Makefile.am
index f41ad5096..69300448a 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -156,5 +156,7 @@ amdgpu_amd_cs_nop_CFLAGS = $(AM_CFLAGS) $(DRM_AMDGPU_CFLAGS)
amdgpu_amd_cs_nop_LDADD = $(LDADD) $(DRM_AMDGPU_LIBS)
amdgpu_amd_prime_CFLAGS = $(AM_CFLAGS) $(DRM_AMDGPU_CFLAGS)
amdgpu_amd_prime_LDADD = $(LDADD) $(DRM_AMDGPU_LIBS)
+amdgpu_amd_syslatency_CFLAGS = $(AM_CFLAGS) $(DRM_AMDGPU_CFLAGS) -pthread
+amdgpu_amd_syslatency_LDADD = $(LDADD) $(DRM_AMDGPU_LIBS) -lpthread
endif
diff --git a/tests/Makefile.sources b/tests/Makefile.sources
index 54b4a3c21..002af360e 100644
--- a/tests/Makefile.sources
+++ b/tests/Makefile.sources
@@ -19,6 +19,7 @@ AMDGPU_TESTS = \
amdgpu/amd_basic \
amdgpu/amd_cs_nop \
amdgpu/amd_prime \
+ amdgpu/amd_syslatency \
$(NULL)
TESTS_progs = \
diff --git a/tests/amdgpu/amd_syslatency.c b/tests/amdgpu/amd_syslatency.c
new file mode 100644
index 000000000..b4fb2fc01
--- /dev/null
+++ b/tests/amdgpu/amd_syslatency.c
@@ -0,0 +1,404 @@
+/*
+ * Copyright © 2017 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "igt.h"
+#include "drmtest.h"
+
+#include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <sys/stat.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+#include <amdgpu.h>
+#include <amdgpu_drm.h>
+
+#include "igt_stats.h"
+
+#define GFX_COMPUTE_NOP 0xffff1000
+#define SDMA_NOP 0x0
+
+static int
+amdgpu_bo_alloc_and_map(amdgpu_device_handle dev, unsigned size,
+ unsigned alignment, unsigned heap, uint64_t flags,
+ amdgpu_bo_handle *bo, void **cpu, uint64_t *mc_address,
+ amdgpu_va_handle *va_handle)
+{
+ struct amdgpu_bo_alloc_request request = {
+ .alloc_size = size,
+ .phys_alignment = alignment,
+ .preferred_heap = heap,
+ .flags = flags,
+ };
+ amdgpu_bo_handle buf_handle;
+ amdgpu_va_handle handle;
+ uint64_t vmc_addr;
+ int r;
+
+ r = amdgpu_bo_alloc(dev, &request, &buf_handle);
+ if (r)
+ return r;
+
+ r = amdgpu_va_range_alloc(dev,
+ amdgpu_gpu_va_range_general,
+ size, alignment, 0, &vmc_addr,
+ &handle, 0);
+ if (r)
+ goto error_va_alloc;
+
+ r = amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_MAP);
+ if (r)
+ goto error_va_map;
+
+ r = amdgpu_bo_cpu_map(buf_handle, cpu);
+ if (r)
+ goto error_cpu_map;
+
+ *bo = buf_handle;
+ *mc_address = vmc_addr;
+ *va_handle = handle;
+
+ return 0;
+
+error_cpu_map:
+ amdgpu_bo_cpu_unmap(buf_handle);
+
+error_va_map:
+ amdgpu_bo_va_op(buf_handle, 0, size, vmc_addr, 0, AMDGPU_VA_OP_UNMAP);
+
+error_va_alloc:
+ amdgpu_bo_free(buf_handle);
+ return r;
+}
+
+static void
+amdgpu_bo_unmap_and_free(amdgpu_bo_handle bo, amdgpu_va_handle va_handle,
+ uint64_t mc_addr, uint64_t size)
+{
+ amdgpu_bo_cpu_unmap(bo);
+ amdgpu_bo_va_op(bo, 0, size, mc_addr, 0, AMDGPU_VA_OP_UNMAP);
+ amdgpu_va_range_free(va_handle);
+ amdgpu_bo_free(bo);
+}
+
+static void amdgpu_cs_sync(amdgpu_context_handle context,
+ unsigned int ip_type,
+ int ring,
+ unsigned int seqno)
+{
+ struct amdgpu_cs_fence fence = {
+ .context = context,
+ .ip_type = ip_type,
+ .ring = ring,
+ .fence = seqno,
+ };
+ uint32_t expired;
+ int err;
+
+ err = amdgpu_cs_query_fence_status(&fence,
+ AMDGPU_TIMEOUT_INFINITE,
+ 0, &expired);
+ igt_assert_eq(err, 0);
+}
+
+static volatile int done;
+
+struct busyspin {
+ pthread_t thread;
+ unsigned long count;
+ amdgpu_device_handle device;
+ unsigned int ip_type;
+ unsigned int ring;
+};
+
+struct sys_wait {
+ pthread_t thread;
+ struct igt_mean mean;
+};
+
+static void force_low_latency(void)
+{
+ int32_t target = 0;
+ int fd = open("/dev/cpu_dma_latency", O_RDWR);
+ if (fd < 0 || write(fd, &target, sizeof(target)) < 0)
+ fprintf(stderr,
+ "Unable to prevent CPU sleeps and force low latency using /dev/cpu_dma_latency: %s\n",
+ strerror(errno));
+}
+
+static void *busyspin(void *arg)
+{
+ struct busyspin *bs = arg;
+ amdgpu_bo_handle ib_result_handle;
+ void *ib_result_cpu;
+ uint64_t ib_result_mc_address;
+ amdgpu_context_handle context;
+ struct amdgpu_cs_request ibs_request;
+ struct amdgpu_cs_ib_info ib_info;
+ uint32_t *ptr;
+ amdgpu_bo_list_handle bo_list;
+ amdgpu_va_handle va_handle;
+ int i, r;
+
+ amdgpu_cs_ctx_create(bs->device, &context);
+
+ r = amdgpu_bo_alloc_and_map(bs->device, 4096, 4096,
+ AMDGPU_GEM_DOMAIN_GTT, 0,
+ &ib_result_handle, &ib_result_cpu,
+ &ib_result_mc_address, &va_handle);
+ igt_assert_eq(r, 0);
+
+ ptr = ib_result_cpu;
+ for (i = 0; i < 16; ++i)
+ ptr[i] = GFX_COMPUTE_NOP;
+
+ r = amdgpu_bo_list_create(bs->device, 1, &ib_result_handle, NULL, &bo_list);
+ igt_assert_eq(r, 0);
+
+
+ memset(&ib_info, 0, sizeof(struct amdgpu_cs_ib_info));
+ ib_info.ib_mc_address = ib_result_mc_address;
+ ib_info.size = 16;
+
+ memset(&ibs_request, 0, sizeof(struct amdgpu_cs_request));
+ ibs_request.ip_type = bs->ip_type;
+ ibs_request.ring = bs->ring;
+ ibs_request.number_of_ibs = 1;
+ ibs_request.ibs = &ib_info;
+ ibs_request.resources = bo_list;
+
+ bs->count = 0;
+ while (!done) {
+ amdgpu_cs_submit(context, 0, &ibs_request, 1);
+ bs->count++;
+ }
+
+ amdgpu_cs_sync(context, bs->ip_type, bs->ring, ibs_request.seq_no);
+
+ r = amdgpu_bo_list_destroy(bo_list);
+ igt_assert_eq(r, 0);
+
+ amdgpu_bo_unmap_and_free(ib_result_handle, va_handle,
+ ib_result_mc_address, 4096);
+
+ amdgpu_cs_ctx_free(context);
+
+ return NULL;
+}
+
+static double elapsed(const struct timespec *a, const struct timespec *b)
+{
+ return 1e9*(b->tv_sec - a->tv_sec) + (b->tv_nsec - a ->tv_nsec);
+}
+
+static void *sys_wait(void *arg)
+{
+ struct sys_wait *w = arg;
+ struct sigevent sev;
+ timer_t timer;
+ sigset_t mask;
+ struct timespec now;
+#define SIG SIGRTMIN
+
+ sigemptyset(&mask);
+ sigaddset(&mask, SIG);
+ sigprocmask(SIG_SETMASK, &mask, NULL);
+
+ sev.sigev_notify = SIGEV_SIGNAL | SIGEV_THREAD_ID;
+ sev.sigev_notify_thread_id = gettid();
+ sev.sigev_signo = SIG;
+ timer_create(CLOCK_MONOTONIC, &sev, &timer);
+
+ clock_gettime(CLOCK_MONOTONIC, &now);
+ while (!done) {
+ struct itimerspec its;
+ int sigs;
+
+ its.it_value = now;
+ its.it_value.tv_nsec += 100 * 1000;
+ its.it_value.tv_nsec += rand() % (NSEC_PER_SEC / 1000);
+ if (its.it_value.tv_nsec >= NSEC_PER_SEC) {
+ its.it_value.tv_nsec -= NSEC_PER_SEC;
+ its.it_value.tv_sec += 1;
+ }
+ its.it_interval.tv_sec = its.it_interval.tv_nsec = 0;
+ timer_settime(timer, TIMER_ABSTIME, &its, NULL);
+
+ sigwait(&mask, &sigs);
+ clock_gettime(CLOCK_MONOTONIC, &now);
+ igt_mean_add(&w->mean, elapsed(&its.it_value, &now));
+ }
+
+ sigprocmask(SIG_UNBLOCK, &mask, NULL);
+ timer_delete(timer);
+
+ return NULL;
+}
+
+static void bind_cpu(pthread_attr_t *attr, int cpu)
+{
+#ifdef __USE_GNU
+ cpu_set_t mask;
+
+ if (cpu == -1)
+ return;
+
+ CPU_ZERO(&mask);
+ CPU_SET(cpu, &mask);
+
+ pthread_attr_setaffinity_np(attr, sizeof(mask), &mask);
+#endif
+}
+
+static void rtprio(pthread_attr_t *attr, int prio)
+{
+#ifdef PTHREAD_EXPLICIT_SCHED
+ struct sched_param param = { .sched_priority = 99 };
+ pthread_attr_setinheritsched(attr, PTHREAD_EXPLICIT_SCHED);
+ pthread_attr_setschedpolicy(attr, SCHED_FIFO);
+ pthread_attr_setschedparam(attr, ¶m);
+#endif
+}
+
+static double l_estimate(igt_stats_t *stats)
+{
+ if (stats->n_values > 9)
+ return igt_stats_get_trimean(stats);
+ else if (stats->n_values > 5)
+ return igt_stats_get_median(stats);
+ else
+ return igt_stats_get_mean(stats);
+}
+
+static double min_measurement_error(void)
+{
+ struct timespec start, end;
+ int n;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ for (n = 0; n < 1024; n++)
+ clock_gettime(CLOCK_MONOTONIC, &end);
+
+ return elapsed(&start, &end) / n;
+}
+
+static void syslatency(amdgpu_device_handle device,
+ double min,
+ const char *name,
+ unsigned int ip_type,
+ unsigned int ring,
+ unsigned int timeout)
+{
+ const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
+ struct busyspin *busy;
+ struct sys_wait *wait;
+ pthread_attr_t attr;
+ igt_stats_t cycles, mean, max;
+ int n;
+
+ done = 0;
+
+ busy = calloc(ncpus, sizeof(*busy));
+ pthread_attr_init(&attr);
+ for (n = 0; n < ncpus; n++) {
+ bind_cpu(&attr, n);
+ busy[n].device = device;
+ busy[n].ip_type = ip_type;
+ busy[n].ring = ring;
+ pthread_create(&busy[n].thread, &attr, busyspin, &busy[n]);
+ }
+
+ wait = calloc(ncpus, sizeof(*wait));
+ pthread_attr_init(&attr);
+ rtprio(&attr, 99);
+ for (n = 0; n < ncpus; n++) {
+ igt_mean_init(&wait[n].mean);
+ bind_cpu(&attr, n);
+ pthread_create(&wait[n].thread, &attr, sys_wait, &wait[n]);
+ }
+
+ sleep(timeout);
+ done = 1;
+
+ igt_stats_init_with_size(&cycles, ncpus);
+ for (n = 0; n < ncpus; n++) {
+ pthread_join(busy[n].thread, NULL);
+ igt_stats_push(&cycles, busy[n].count);
+ }
+
+ igt_stats_init_with_size(&mean, ncpus);
+ igt_stats_init_with_size(&max, ncpus);
+ for (n = 0; n < ncpus; n++) {
+ pthread_join(wait[n].thread, NULL);
+ igt_stats_push_float(&mean, wait[n].mean.mean);
+ igt_stats_push_float(&max, wait[n].mean.max);
+ }
+
+ igt_info("%s: cycles=%.0f, latency mean=%.3fus max=%.0fus\n",
+ name,
+ igt_stats_get_mean(&cycles),
+ (igt_stats_get_mean(&mean) - min)/ 1000,
+ (l_estimate(&max) - min) / 1000);
+}
+
+igt_main
+{
+ amdgpu_device_handle device;
+ const struct engine {
+ const char *name;
+ unsigned int ip_type;
+ } engines[] = {
+ { "compute", AMDGPU_HW_IP_COMPUTE },
+ { "gfx", AMDGPU_HW_IP_GFX },
+ { },
+ }, *e;
+ double min;
+ int fd = -1;
+
+ igt_fixture {
+ uint32_t major, minor;
+ int err;
+
+ fd = __drm_open_driver(DRIVER_AMDGPU);
+ igt_require(fd >= 0);
+
+ err = amdgpu_device_initialize(fd, &major, &minor, &device);
+ igt_require(err == 0);
+
+ force_low_latency();
+ min = min_measurement_error();
+ }
+
+ for (e = engines; e->name; e++) {
+ igt_subtest_f("%s-0", e->name)
+ syslatency(device, min, e->name, e->ip_type, 0, 20);
+ }
+
+ igt_fixture {
+ amdgpu_device_deinitialize(device);
+ close(fd);
+ }
+}
diff --git a/tests/amdgpu/meson.build b/tests/amdgpu/meson.build
index af5e74c7a..d24a84f68 100644
--- a/tests/amdgpu/meson.build
+++ b/tests/amdgpu/meson.build
@@ -5,6 +5,7 @@ if libdrm_amdgpu.found()
amdgpu_progs += [ 'amd_basic',
'amd_cs_nop',
'amd_prime',
+ 'amd_syslatency',
]
amdgpu_deps += libdrm_amdgpu
endif
--
2.18.0
More information about the Intel-gfx
mailing list