[igt-dev] [PATCH v4 02/14] tests/intel/xe_query: Add a test for querying engine cycles
Francois Dugast
francois.dugast at intel.com
Thu Sep 28 11:05:04 UTC 2023
From: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
The DRM_XE_QUERY_ENGINE_CYCLES query provides a way for the user to obtain
CPU and GPU timestamps as close to each other as possible.
Add a test to query engine cycles and GPU/CPU time correlation as well as
validate the parameters.
Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
Signed-off-by: Francois Dugast <francois.dugast at intel.com>
Signed-off-by: Rodrigo Vivi <rodrigo.vivi at intel.com>
[Rodrigo rebased after s/cs/engine]
---
include/drm-uapi/xe_drm.h | 104 +++++++++++++++-----
tests/intel/xe_query.c | 195 ++++++++++++++++++++++++++++++++++++++
2 files changed, 275 insertions(+), 24 deletions(-)
diff --git a/include/drm-uapi/xe_drm.h b/include/drm-uapi/xe_drm.h
index 13cd6a73d..8a702e6f4 100644
--- a/include/drm-uapi/xe_drm.h
+++ b/include/drm-uapi/xe_drm.h
@@ -128,6 +128,25 @@ struct xe_user_extension {
#define DRM_IOCTL_XE_WAIT_USER_FENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence)
#define DRM_IOCTL_XE_VM_MADVISE DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_MADVISE, struct drm_xe_vm_madvise)
+/** struct drm_xe_engine_class_instance - instance of an engine class */
+struct drm_xe_engine_class_instance {
+#define DRM_XE_ENGINE_CLASS_RENDER 0
+#define DRM_XE_ENGINE_CLASS_COPY 1
+#define DRM_XE_ENGINE_CLASS_VIDEO_DECODE 2
+#define DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE 3
+#define DRM_XE_ENGINE_CLASS_COMPUTE 4
+ /*
+ * Kernel only class (not actual hardware engine class). Used for
+ * creating ordered queues of VM bind operations.
+ */
+#define DRM_XE_ENGINE_CLASS_VM_BIND 5
+ __u16 engine_class;
+
+ __u16 engine_instance;
+ __u16 gt_id;
+ __u16 rsvd;
+};
+
/**
* enum drm_xe_memory_class - Supported memory classes.
*/
@@ -219,6 +238,60 @@ struct drm_xe_query_mem_region {
__u64 reserved[6];
};
+/**
+ * struct drm_xe_query_engine_cycles - correlate CPU and GPU timestamps
+ *
+ * If a query is made with a struct drm_xe_device_query where .query is equal to
+ * DRM_XE_DEVICE_QUERY_ENGINE_CYCLES, then the reply uses struct drm_xe_query_engine_cycles
+ * in .data. struct drm_xe_query_engine_cycles is allocated by the user and
+ * .data points to this allocated structure.
+ *
+ * The query returns the engine cycles and the frequency that can
+ * be used to calculate the engine timestamp. In addition the
+ * query returns a set of cpu timestamps that indicate when the command
+ * streamer cycle count was captured.
+ */
+struct drm_xe_query_engine_cycles {
+ /**
+ * @eci: This is input by the user and is the engine for which command
+ * streamer cycles is queried.
+ */
+ struct drm_xe_engine_class_instance eci;
+
+ /**
+ * @clockid: This is input by the user and is the reference clock id for
+ * CPU timestamp. For definition, see clock_gettime(2) and
+ * perf_event_open(2). Supported clock ids are CLOCK_MONOTONIC,
+ * CLOCK_MONOTONIC_RAW, CLOCK_REALTIME, CLOCK_BOOTTIME, CLOCK_TAI.
+ */
+ __s32 clockid;
+
+ /** @width: Width of the engine cycle counter in bits. */
+ __u32 width;
+
+ /**
+ * @engine_cycles: Engine cycles as read from its register
+ * at 0x358 offset.
+ */
+ __u64 engine_cycles;
+
+ /** @engine_frequency: Frequency of the engine cycles in Hz. */
+ __u64 engine_frequency;
+
+ /**
+ * @cpu_timestamp: CPU timestamp in ns. The timestamp is captured before
+ * reading the engine_cycles register using the reference clockid set by the
+ * user.
+ */
+ __u64 cpu_timestamp;
+
+ /**
+ * @cpu_delta: Time delta in ns captured around reading the lower dword
+ * of the engine_cycles register.
+ */
+ __u64 cpu_delta;
+};
+
/**
* struct drm_xe_query_mem_usage - describe memory regions and usage
*
@@ -385,12 +458,13 @@ struct drm_xe_device_query {
/** @extensions: Pointer to the first extension struct, if any */
__u64 extensions;
-#define DRM_XE_DEVICE_QUERY_ENGINES 0
-#define DRM_XE_DEVICE_QUERY_MEM_USAGE 1
-#define DRM_XE_DEVICE_QUERY_CONFIG 2
-#define DRM_XE_DEVICE_QUERY_GTS 3
-#define DRM_XE_DEVICE_QUERY_HWCONFIG 4
-#define DRM_XE_DEVICE_QUERY_GT_TOPOLOGY 5
+#define DRM_XE_DEVICE_QUERY_ENGINES 0
+#define DRM_XE_DEVICE_QUERY_MEM_USAGE 1
+#define DRM_XE_DEVICE_QUERY_CONFIG 2
+#define DRM_XE_DEVICE_QUERY_GTS 3
+#define DRM_XE_DEVICE_QUERY_HWCONFIG 4
+#define DRM_XE_DEVICE_QUERY_GT_TOPOLOGY 5
+#define DRM_XE_DEVICE_QUERY_ENGINE_CYCLES 6
/** @query: The type of data to query */
__u32 query;
@@ -732,24 +806,6 @@ struct drm_xe_exec_queue_set_property {
__u64 reserved[2];
};
-/** struct drm_xe_engine_class_instance - instance of an engine class */
-struct drm_xe_engine_class_instance {
-#define DRM_XE_ENGINE_CLASS_RENDER 0
-#define DRM_XE_ENGINE_CLASS_COPY 1
-#define DRM_XE_ENGINE_CLASS_VIDEO_DECODE 2
-#define DRM_XE_ENGINE_CLASS_VIDEO_ENHANCE 3
-#define DRM_XE_ENGINE_CLASS_COMPUTE 4
- /*
- * Kernel only class (not actual hardware engine class). Used for
- * creating ordered queues of VM bind operations.
- */
-#define DRM_XE_ENGINE_CLASS_VM_BIND 5
- __u16 engine_class;
-
- __u16 engine_instance;
- __u16 gt_id;
-};
-
struct drm_xe_exec_queue_create {
#define XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY 0
/** @extensions: Pointer to the first extension struct, if any */
diff --git a/tests/intel/xe_query.c b/tests/intel/xe_query.c
index 5966968d3..3e7460ff4 100644
--- a/tests/intel/xe_query.c
+++ b/tests/intel/xe_query.c
@@ -476,6 +476,195 @@ test_query_invalid_extension(int fd)
do_ioctl_err(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query, EINVAL);
}
+static bool
+query_engine_cycles_supported(int fd)
+{
+ struct drm_xe_device_query query = {
+ .extensions = 0,
+ .query = DRM_XE_DEVICE_QUERY_ENGINE_CYCLES,
+ .size = 0,
+ .data = 0,
+ };
+
+ return igt_ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query) == 0;
+}
+
+static void
+query_engine_cycles(int fd, struct drm_xe_query_engine_cycles *resp)
+{
+ struct drm_xe_device_query query = {
+ .extensions = 0,
+ .query = DRM_XE_DEVICE_QUERY_ENGINE_CYCLES,
+ .size = sizeof(*resp),
+ .data = to_user_pointer(resp),
+ };
+
+ do_ioctl(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query);
+ igt_assert(query.size);
+}
+
+static void
+__engine_cycles(int fd, struct drm_xe_engine_class_instance *hwe)
+{
+ struct drm_xe_query_engine_cycles ts1 = {};
+ struct drm_xe_query_engine_cycles ts2 = {};
+ uint64_t delta_cpu, delta_cs, delta_delta;
+ unsigned int exec_queue;
+ int i, usable = 0;
+ igt_spin_t *spin;
+ uint64_t ahnd;
+ uint32_t vm;
+ struct {
+ int32_t id;
+ const char *name;
+ } clock[] = {
+ { CLOCK_MONOTONIC, "CLOCK_MONOTONIC" },
+ { CLOCK_MONOTONIC_RAW, "CLOCK_MONOTONIC_RAW" },
+ { CLOCK_REALTIME, "CLOCK_REALTIME" },
+ { CLOCK_BOOTTIME, "CLOCK_BOOTTIME" },
+ { CLOCK_TAI, "CLOCK_TAI" },
+ };
+
+ igt_debug("engine[%u:%u]\n",
+ hwe->engine_class,
+ hwe->engine_instance);
+
+ vm = xe_vm_create(fd, 0, 0);
+ exec_queue = xe_exec_queue_create(fd, vm, hwe, 0);
+ ahnd = intel_allocator_open(fd, 0, INTEL_ALLOCATOR_RELOC);
+ spin = igt_spin_new(fd, .ahnd = ahnd, .engine = exec_queue, .vm = vm);
+
+ /* Try a new clock every 10 iterations. */
+#define NUM_SNAPSHOTS 10
+ for (i = 0; i < NUM_SNAPSHOTS * ARRAY_SIZE(clock); i++) {
+ int index = i / NUM_SNAPSHOTS;
+
+ ts1.eci = *hwe;
+ ts1.clockid = clock[index].id;
+
+ ts2.eci = *hwe;
+ ts2.clockid = clock[index].id;
+
+ query_engine_cycles(fd, &ts1);
+ query_engine_cycles(fd, &ts2);
+
+ igt_debug("[1] cpu_ts before %llu, reg read time %llu\n",
+ ts1.cpu_timestamp,
+ ts1.cpu_delta);
+ igt_debug("[1] engine_ts %llu, freq %llu Hz, width %u\n",
+ ts1.engine_cycles, ts1.engine_frequency, ts1.width);
+
+ igt_debug("[2] cpu_ts before %llu, reg read time %llu\n",
+ ts2.cpu_timestamp,
+ ts2.cpu_delta);
+ igt_debug("[2] engine_ts %llu, freq %llu Hz, width %u\n",
+ ts2.engine_cycles, ts2.engine_frequency, ts2.width);
+
+ delta_cpu = ts2.cpu_timestamp - ts1.cpu_timestamp;
+
+ if (ts2.engine_cycles >= ts1.engine_cycles)
+ delta_cs = (ts2.engine_cycles - ts1.engine_cycles) *
+ NSEC_PER_SEC / ts1.engine_frequency;
+ else
+ delta_cs = (((1 << ts2.width) - ts2.engine_cycles) + ts1.engine_cycles) *
+ NSEC_PER_SEC / ts1.engine_frequency;
+
+ igt_debug("delta_cpu[%lu], delta_cs[%lu]\n",
+ delta_cpu, delta_cs);
+
+ delta_delta = delta_cpu > delta_cs ?
+ delta_cpu - delta_cs :
+ delta_cs - delta_cpu;
+ igt_debug("delta_delta %lu\n", delta_delta);
+
+ if (delta_delta < 5000)
+ usable++;
+
+ /*
+ * User needs few good snapshots of the timestamps to
+ * synchronize cpu time with cs time. Check if we have enough
+ * usable values before moving to the next clockid.
+ */
+ if (!((i + 1) % NUM_SNAPSHOTS)) {
+ igt_debug("clock %s\n", clock[index].name);
+ igt_debug("usable %d\n", usable);
+ igt_assert(usable > 2);
+ usable = 0;
+ }
+ }
+
+ igt_spin_free(fd, spin);
+ xe_exec_queue_destroy(fd, exec_queue);
+ xe_vm_destroy(fd, vm);
+ put_ahnd(ahnd);
+}
+
+/**
+ * SUBTEST: query-cs-cycles
+ * Description: Query CPU-GPU timestamp correlation
+ */
+static void test_query_engine_cycles(int fd)
+{
+ struct drm_xe_engine_class_instance *hwe;
+
+ igt_require(query_engine_cycles_supported(fd));
+
+ xe_for_each_hw_engine(fd, hwe) {
+ igt_assert(hwe);
+ __engine_cycles(fd, hwe);
+ }
+}
+
+/**
+ * SUBTEST: query-invalid-cs-cycles
+ * Description: Check query with invalid arguments returns expected error code.
+ */
+static void test_engine_cycles_invalid(int fd)
+{
+ struct drm_xe_engine_class_instance *hwe;
+ struct drm_xe_query_engine_cycles ts = {};
+ struct drm_xe_device_query query = {
+ .extensions = 0,
+ .query = DRM_XE_DEVICE_QUERY_ENGINE_CYCLES,
+ .size = sizeof(ts),
+ .data = to_user_pointer(&ts),
+ };
+
+ igt_require(query_engine_cycles_supported(fd));
+
+ /* get one engine */
+ xe_for_each_hw_engine(fd, hwe)
+ break;
+
+ /* sanity check engine selection is valid */
+ ts.eci = *hwe;
+ query_engine_cycles(fd, &ts);
+
+ /* bad instance */
+ ts.eci = *hwe;
+ ts.eci.engine_instance = 0xffff;
+ do_ioctl_err(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query, EINVAL);
+ ts.eci = *hwe;
+
+ /* bad class */
+ ts.eci.engine_class = 0xffff;
+ do_ioctl_err(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query, EINVAL);
+ ts.eci = *hwe;
+
+ /* bad gt */
+ ts.eci.gt_id = 0xffff;
+ do_ioctl_err(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query, EINVAL);
+ ts.eci = *hwe;
+
+ /* bad clockid */
+ ts.clockid = -1;
+ do_ioctl_err(fd, DRM_IOCTL_XE_DEVICE_QUERY, &query, EINVAL);
+ ts.clockid = 0;
+
+ /* sanity check */
+ query_engine_cycles(fd, &ts);
+}
+
igt_main
{
int xe;
@@ -501,6 +690,12 @@ igt_main
igt_subtest("query-topology")
test_query_gt_topology(xe);
+ igt_subtest("query-cs-cycles")
+ test_query_engine_cycles(xe);
+
+ igt_subtest("query-invalid-cs-cycles")
+ test_engine_cycles_invalid(xe);
+
igt_subtest("query-invalid-query")
test_query_invalid_query(xe);
--
2.34.1
More information about the igt-dev
mailing list