[igt-dev] [PATCH] i915/poc: Use semaphore wait to sync gpu and cpu times
Umesh Nerlige Ramappa
umesh.nerlige.ramappa at intel.com
Wed Aug 23 18:45:38 UTC 2023
This is just a POC to sync gpu and cpu time. The requirement is to
provide a solution that works with SRIOV as well.
The CS will block polling on a semaphore. The semaphore is signaled by
CPU by writing the CPU timestamp into the SAD field. As soon as the CS
unblocks, it reads the RING_TIMESTAMP. This makes the 2 values as close
to each other as possible.
Accuracy is within a few us (1 to 2). Repeated runs get better accuracy.
Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
---
tests/i915/cpu_gpu_time.c | 220 ++++++++++++++++++++++++++++++++++++++
tests/meson.build | 1 +
2 files changed, 221 insertions(+)
create mode 100644 tests/i915/cpu_gpu_time.c
diff --git a/tests/i915/cpu_gpu_time.c b/tests/i915/cpu_gpu_time.c
new file mode 100644
index 000000000..a87a3fa88
--- /dev/null
+++ b/tests/i915/cpu_gpu_time.c
@@ -0,0 +1,220 @@
+/*
+ * Copyright © 2016 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <errno.h>
+#include <signal.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/times.h>
+#include <sys/types.h>
+#include <dirent.h>
+#include <time.h>
+#include <poll.h>
+#include <math.h>
+
+#include "i915/gem.h"
+#include "i915/gem_create.h"
+#include "igt.h"
+#include "igt_core.h"
+#include "igt_device.h"
+#include "igt_kmod.h"
+#include "igt_perf.h"
+#include "igt_sysfs.h"
+#include "igt_pm.h"
+#include "intel_ctx.h"
+#include "sw_sync.h"
+
+/**
+ * TEST: cpu_gpu_time
+ * Description: Test correlated time
+ * Run type: FULL
+ *
+ * SUBTEST: cpu-gpu-time
+ * Description: Test time correlation
+ * Feature: i915 streaming interface, oa
+ * Test category: Perf
+ */
+
+static void
+test_cpu_gpu_time(int gem_fd,
+ const intel_ctx_t *ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t *cpu_ns,
+ uint64_t *gpu_ns)
+{
+ struct drm_i915_gem_relocation_entry reloc[2] = {};
+ struct drm_i915_gem_exec_object2 obj[2] = {};
+ struct drm_i915_gem_execbuffer2 eb = {};
+ uint32_t bb_handle, obj_handle;
+ uint32_t *obj_ptr;
+ uint32_t batch[64];
+ uint32_t mmio_base;
+ uint64_t ahnd = get_reloc_ahnd(gem_fd, ctx->id);
+ uint64_t obj_offset, bb_offset, *gpu_ts;
+ struct timespec *ts;
+ int i = 0;
+
+ igt_require(intel_gen(intel_get_drm_devid(gem_fd)) >= 8);
+
+ mmio_base = gem_engine_mmio_base(gem_fd, e->name);
+
+ /**
+ * Setup up a batchbuffer with a polling semaphore wait command which
+ * will wait on an value in a shared bo to change. This way we are able
+ * to control how much time we will spend in this bb.
+ */
+
+ bb_handle = gem_create(gem_fd, 4096);
+ obj_handle = gem_create(gem_fd, 4096);
+ bb_offset = get_offset(ahnd, bb_handle, 4096, 0);
+ obj_offset = get_offset(ahnd, obj_handle, 4096, 0);
+
+ obj_ptr = gem_mmap__device_coherent(gem_fd, obj_handle, 0, 4096, PROT_WRITE);
+
+#define obj(__o) (obj_offset + __o)
+ /* Poll from CPU to check the batch started */
+ batch[i++] = MI_STORE_DWORD_IMM_GEN4;
+ batch[i++] = obj(0);
+ batch[i++] = obj(0) >> 32;
+ batch[i++] = 1;
+
+ /* Block the batch until this offset has a value GTE than 1 */
+ batch[i++] = MI_SEMAPHORE_WAIT |
+ MI_SEMAPHORE_POLL |
+ MI_SEMAPHORE_SAD_GTE_SDD;
+ batch[i++] = 1;
+ batch[i++] = obj(4);
+ batch[i++] = obj(4) >> 32;
+
+ /* Once unblocked, capture RING timestamp */
+ batch[i++] = MI_STORE_REGISTER_MEM_GEN8;
+ batch[i++] = mmio_base + 0x358;
+ batch[i++] = obj(20);
+ batch[i++] = 0;
+
+ batch[i++] = MI_STORE_REGISTER_MEM_GEN8;
+ batch[i++] = mmio_base + 0x35c;
+ batch[i++] = obj(24);
+ batch[i++] = 0;
+
+ batch[i++] = MI_BATCH_BUFFER_END;
+
+ gem_write(gem_fd, bb_handle, 0, batch, sizeof(batch));
+
+ reloc[0].target_handle = obj_handle;
+ reloc[0].offset = 1 * sizeof(uint32_t);
+ reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
+ reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
+ reloc[0].delta = 4;
+
+ reloc[1].target_handle = obj_handle;
+ reloc[1].offset = 6 * sizeof(uint32_t);
+ reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
+
+ obj[0].handle = obj_handle;
+
+ obj[1].handle = bb_handle;
+ obj[1].relocation_count = !ahnd ? 2 : 0;
+ obj[1].relocs_ptr = to_user_pointer(reloc);
+
+ eb.buffer_count = 2;
+ eb.buffers_ptr = to_user_pointer(obj);
+ eb.flags = e->flags;
+ eb.rsvd1 = ctx->id;
+
+ if (ahnd) {
+ obj[0].flags |= EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE;
+ obj[0].offset = obj_offset;
+ obj[1].flags |= EXEC_OBJECT_PINNED;
+ obj[1].offset = bb_offset;
+ }
+
+ gem_execbuf(gem_fd, &eb);
+
+ /* wait for the batch to start executing */
+ while (!obj_ptr[0])
+ usleep(5e3);
+
+ ts = (struct timespec *)&obj_ptr[1];
+ clock_gettime(CLOCK_REALTIME, ts);
+
+ gem_sync(gem_fd, bb_handle);
+
+ for (int j = 0; j < 16; j++)
+ igt_debug("[%d] %08x\n", j, obj_ptr[j]);
+
+ gpu_ts = (uint64_t *) &obj_ptr[5];
+ *gpu_ns = (*gpu_ts * 1000000) / 19200;
+ *cpu_ns = ts->tv_sec * NSEC_PER_SEC + ts->tv_nsec;
+
+ munmap(obj_ptr, 4096);
+ gem_close(gem_fd, obj_handle);
+ gem_close(gem_fd, bb_handle);
+ put_ahnd(ahnd);
+}
+
+#define test_each_engine(T, i915, ctx, e) \
+ igt_subtest_with_dynamic(T) for_each_ctx_engine(i915, ctx, e) \
+ igt_dynamic_f("%s", e->name)
+
+igt_main
+{
+ const struct intel_execution_engine2 *e;
+ uint64_t prev_cpu = 0, prev_gpu = 0;
+ uint64_t cpu_ns, gpu_ns;
+ const intel_ctx_t *ctx;
+ int device;
+
+ igt_fixture {
+ drm_load_module(DRIVER_INTEL);
+ device = drm_open_driver(DRIVER_INTEL);
+ igt_require_gem(device);
+ ctx = intel_ctx_create_all_physical(device);
+ }
+
+ igt_describe("Capture cpu and gpu time close to each other");
+ test_each_engine("cpu-gpu-time", device, ctx, e) {
+ test_cpu_gpu_time(device, ctx, e, &cpu_ns, &gpu_ns);
+ igt_debug("CPU = %ld, GPU = %ld\n", cpu_ns, gpu_ns);
+ if (prev_cpu && prev_gpu) {
+ int64_t cpu_delta = cpu_ns - prev_cpu;
+ int64_t gpu_delta = gpu_ns - prev_gpu;
+
+ igt_debug("d_CPU = %ld, d_GPU = %ld\n", cpu_delta, gpu_delta);
+ igt_info("d_d = %ld\n", labs(gpu_delta - cpu_delta));
+ }
+ prev_cpu = cpu_ns;
+ prev_gpu = gpu_ns;
+ }
+
+ igt_fixture {
+ intel_ctx_destroy(device, ctx);
+ drm_close_driver(device);
+ }
+}
diff --git a/tests/meson.build b/tests/meson.build
index 58061dbc2..c18dae125 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -260,6 +260,7 @@ i915_progs = [
'sysfs_heartbeat_interval',
'sysfs_preempt_timeout',
'sysfs_timeslice_duration',
+ 'cpu_gpu_time',
]
xe_progs = [
--
2.34.1
More information about the igt-dev
mailing list