[PATCH i-g-t] HAX: i915/perf: Add test to query CS timestamp
Umesh Nerlige Ramappa
umesh.nerlige.ramappa at intel.com
Fri Feb 19 17:50:34 UTC 2021
Add tests to query CS timestamps for different engines.
v2:
- remove flag parameter
- assert for minimum usable values rather than maximum
v3:
- use clock id for cpu timestamps (Lionel)
- check if query is supported (Ashutosh)
- test bad queries
v4: (Chris, Tvrtko)
- cs_timestamp is a misnomer, use cs_cycles instead
- use cs cycle frequency returned in the query
- omit size parameter
v5:
- use __for_each_physical_engine (Lionel)
Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
---
include/drm-uapi/i915_drm.h | 43 +++++++
tests/i915/i915_query.c | 162 ++++++++++++++++++++++++
tests/intel-ci/fast-feedback.testlist | 173 +-------------------------
3 files changed, 208 insertions(+), 170 deletions(-)
diff --git a/include/drm-uapi/i915_drm.h b/include/drm-uapi/i915_drm.h
index bf9ea471..a29f8578 100644
--- a/include/drm-uapi/i915_drm.h
+++ b/include/drm-uapi/i915_drm.h
@@ -2176,6 +2176,10 @@ struct drm_i915_query_item {
#define DRM_I915_QUERY_TOPOLOGY_INFO 1
#define DRM_I915_QUERY_ENGINE_INFO 2
#define DRM_I915_QUERY_PERF_CONFIG 3
+ /**
+ * Query Command Streamer timestamp register.
+ */
+#define DRM_I915_QUERY_CS_CYCLES 4
/* Must be kept compact -- no holes and well documented */
/*
@@ -2309,6 +2313,45 @@ struct drm_i915_engine_info {
__u64 rsvd1[4];
};
+/**
+ * struct drm_i915_query_cs_cycles
+ *
+ * The query returns the command streamer cycles and the frequency that can be
+ * used to calculate the command streamer timestamp. In addition the query
+ * returns the cpu timestamp that indicates when the command streamer cycle
+ * count was captured.
+ */
+struct drm_i915_query_cs_cycles {
+ /** Engine for which command streamer cycles is queried. */
+ struct i915_engine_class_instance engine;
+
+ /** Must be zero. */
+ __u32 flags;
+
+ /**
+ * Command streamer cycles as read from the command streamer
+ * register at 0x358 offset.
+ */
+ __u64 cs_cycles;
+
+ /** Frequency of the cs cycles in Hz. */
+ __u64 cs_frequency;
+
+ /** CPU timestamp in nanoseconds. */
+ __u64 cpu_timestamp;
+
+ /**
+ * Reference clock id for CPU timestamp. For definition, see
+ * clock_gettime(2) and perf_event_open(2). Supported clock ids are
+ * CLOCK_MONOTONIC, CLOCK_MONOTONIC_RAW, CLOCK_REALTIME, CLOCK_BOOTTIME,
+ * CLOCK_TAI.
+ */
+ __s32 clockid;
+
+ /** Must be zero. */
+ __u32 rsvd;
+};
+
/**
* struct drm_i915_query_engine_info
*
diff --git a/tests/i915/i915_query.c b/tests/i915/i915_query.c
index 29b938e9..a3fcade5 100644
--- a/tests/i915/i915_query.c
+++ b/tests/i915/i915_query.c
@@ -267,6 +267,156 @@ eu_available(const struct drm_i915_query_topology_info *topo_info,
eu / 8] >> (eu % 8)) & 1;
}
+static bool query_cs_cycles_supported(int fd)
+{
+ struct drm_i915_query_item item = {
+ .query_id = DRM_I915_QUERY_CS_CYCLES,
+ };
+
+ return __i915_query_items(fd, &item, 1) == 0 && item.length > 0;
+}
+
+static void __query_cs_cycles(int i915, void *data, int err)
+{
+ struct drm_i915_query_item item = {
+ .query_id = DRM_I915_QUERY_CS_CYCLES,
+ .data_ptr = to_user_pointer(data),
+ .length = sizeof(struct drm_i915_query_cs_cycles),
+ };
+
+ i915_query_items(i915, &item, 1);
+
+ if (err)
+ igt_assert(item.length == -err);
+}
+
+static void
+__cs_cycles(int i915, struct i915_engine_class_instance *engine)
+{
+ struct drm_i915_query_cs_cycles ts1 = {};
+ struct drm_i915_query_cs_cycles ts2 = {};
+ uint64_t delta_cpu, delta_cs, delta_delta;
+ int i, usable = 0;
+ struct {
+ int32_t id;
+ const char *name;
+ } clock[] = {
+ { CLOCK_MONOTONIC, "CLOCK_MONOTONIC" },
+ { CLOCK_MONOTONIC_RAW, "CLOCK_MONOTONIC_RAW" },
+ { CLOCK_REALTIME, "CLOCK_REALTIME" },
+ { CLOCK_BOOTTIME, "CLOCK_BOOTTIME" },
+ { CLOCK_TAI, "CLOCK_TAI" },
+ };
+
+ igt_debug("engine[%u:%u]\n",
+ engine->engine_class,
+ engine->engine_instance);
+
+ /* Try a new clock every 10 iterations. */
+#define NUM_SNAPSHOTS 10
+ for (i = 0; i < NUM_SNAPSHOTS * ARRAY_SIZE(clock); i++) {
+ int index = i / NUM_SNAPSHOTS;
+
+ ts1.engine = *engine;
+ ts1.clockid = clock[index].id;
+
+ ts2.engine = *engine;
+ ts2.clockid = clock[index].id;
+
+ __query_cs_cycles(i915, &ts1, 0);
+ __query_cs_cycles(i915, &ts2, 0);
+
+ igt_debug("cpu_ts1[%llu], cs_ts1[%llu]\n",
+ ts1.cpu_timestamp,
+ ts1.cs_cycles);
+ igt_debug("cpu_ts2[%llu], cs_ts2[%llu]\n",
+ ts2.cpu_timestamp,
+ ts2.cs_cycles);
+
+ delta_cpu = ts2.cpu_timestamp - ts1.cpu_timestamp;
+ delta_cs = (ts2.cs_cycles - ts1.cs_cycles) *
+ NSEC_PER_SEC / ts1.cs_frequency;
+
+ igt_debug("delta_cpu[%lu], delta_cs[%lu]\n",
+ delta_cpu, delta_cs);
+
+ delta_delta = labs(delta_cpu - delta_cs);
+ igt_debug("delta_delta %lu\n", delta_delta);
+
+ if (delta_delta < 5000)
+ usable++;
+
+ /*
+ * User needs few good snapshots of the timestamps to
+ * synchronize cpu time with cs time. Check if we have enough
+ * usable values before moving to the next clockid.
+ */
+ if (!((i + 1) % NUM_SNAPSHOTS)) {
+ igt_debug("clock %s\n", clock[index].name);
+ igt_debug("usable %d\n", usable);
+ igt_assert(usable > 2);
+ usable = 0;
+ }
+ }
+}
+
+static void test_cs_cycles(int i915)
+{
+ const struct intel_execution_engine2 *e;
+ struct i915_engine_class_instance engine;
+
+ __for_each_physical_engine(i915, e) {
+ engine.engine_class = e->class;
+ engine.engine_instance = e->instance;
+ __cs_cycles(i915, &engine);
+ }
+}
+
+static void test_cs_cycles_invalid(int i915)
+{
+ struct i915_engine_class_instance engine;
+ const struct intel_execution_engine2 *e;
+ struct drm_i915_query_cs_cycles ts = {};
+
+ /* get one engine */
+ __for_each_physical_engine(i915, e)
+ break;
+
+ /* bad engines */
+ ts.engine.engine_class = e->class;
+ ts.engine.engine_instance = -1;
+ __query_cs_cycles(i915, &ts, EINVAL);
+
+ ts.engine.engine_class = -1;
+ ts.engine.engine_instance = e->instance;
+ __query_cs_cycles(i915, &ts, EINVAL);
+
+ ts.engine.engine_class = -1;
+ ts.engine.engine_instance = -1;
+ __query_cs_cycles(i915, &ts, EINVAL);
+
+ /* non zero flags */
+ ts.flags = 1;
+ ts.engine.engine_class = e->class;
+ ts.engine.engine_instance = e->instance;
+ __query_cs_cycles(i915, &ts, EINVAL);
+
+ /* non zero rsvd field */
+ ts.flags = 0;
+ ts.rsvd = 1;
+ __query_cs_cycles(i915, &ts, EINVAL);
+
+ /* bad clockid */
+ ts.rsvd = 0;
+ ts.clockid = -1;
+ __query_cs_cycles(i915, &ts, EINVAL);
+
+ /* sanity check */
+ engine.engine_class = e->class;
+ engine.engine_instance = e->instance;
+ __cs_cycles(i915, &engine);
+}
+
/*
* Verify that we get coherent values between the legacy getparam slice/subslice
* masks and the new topology query.
@@ -783,6 +933,18 @@ igt_main
engines(fd);
}
+ igt_subtest_group {
+ igt_fixture {
+ igt_require(query_cs_cycles_supported(fd));
+ }
+
+ igt_subtest("cs-cycles")
+ test_cs_cycles(fd);
+
+ igt_subtest("cs-cycles-invalid")
+ test_cs_cycles_invalid(fd);
+ }
+
igt_fixture {
close(fd);
}
diff --git a/tests/intel-ci/fast-feedback.testlist b/tests/intel-ci/fast-feedback.testlist
index eaa904fa..bd734620 100644
--- a/tests/intel-ci/fast-feedback.testlist
+++ b/tests/intel-ci/fast-feedback.testlist
@@ -1,173 +1,6 @@
# Keep alphabetically sorted by default
-igt at core_auth@basic-auth
-igt at debugfs_test@read_all_entries
-igt at fbdev@eof
-igt at fbdev@info
-igt at fbdev@nullptr
-igt at fbdev@read
-igt at fbdev@write
-igt at gem_basic@bad-close
-igt at gem_basic@create-close
-igt at gem_basic@create-fd-close
-igt at gem_busy@busy at all
-igt at gem_close_race@basic-process
-igt at gem_close_race@basic-threads
-igt at gem_ctx_create@basic
-igt at gem_ctx_create@basic-files
-igt at gem_ctx_exec@basic
-igt at gem_exec_basic@basic
-igt at gem_exec_create@basic
-igt at gem_exec_fence@basic-busy
-igt at gem_exec_fence@basic-wait
-igt at gem_exec_fence@basic-await
-igt at gem_exec_fence@nb-await
-igt at gem_exec_gttfill@basic
-igt at gem_exec_parallel@engines
-igt at gem_exec_store@basic
-igt at gem_exec_suspend@basic-s0
-igt at gem_exec_suspend@basic-s3
-igt at gem_flink_basic@bad-flink
-igt at gem_flink_basic@bad-open
-igt at gem_flink_basic@basic
-igt at gem_flink_basic@double-flink
-igt at gem_flink_basic@flink-lifetime
-igt at gem_huc_copy@huc-copy
-igt at gem_linear_blits@basic
-igt at gem_mmap@basic
-igt at gem_mmap_gtt@basic
-igt at gem_render_linear_blits@basic
-igt at gem_render_tiled_blits@basic
-igt at gem_ringfill@basic-all
-igt at gem_sync@basic-all
-igt at gem_sync@basic-each
-igt at gem_tiled_blits@basic
-igt at gem_tiled_fence_blits@basic
-igt at gem_tiled_pread_basic
-igt at gem_wait@busy at all
-igt at gem_wait@wait at all
-igt at i915_getparams_basic@basic-eu-total
-igt at i915_getparams_basic@basic-subslice-total
-igt at i915_hangman@error-state-basic
-igt at kms_addfb_basic@addfb25-bad-modifier
-igt at kms_addfb_basic@addfb25-framebuffer-vs-set-tiling
-igt at kms_addfb_basic@addfb25-modifier-no-flag
-igt at kms_addfb_basic@addfb25-x-tiled-legacy
-igt at kms_addfb_basic@addfb25-x-tiled-mismatch-legacy
-igt at kms_addfb_basic@addfb25-yf-tiled-legacy
-igt at kms_addfb_basic@addfb25-y-tiled-legacy
-igt at kms_addfb_basic@addfb25-y-tiled-small-legacy
-igt at kms_addfb_basic@bad-pitch-0
-igt at kms_addfb_basic@bad-pitch-1024
-igt at kms_addfb_basic@bad-pitch-128
-igt at kms_addfb_basic@bad-pitch-256
-igt at kms_addfb_basic@bad-pitch-32
-igt at kms_addfb_basic@bad-pitch-63
-igt at kms_addfb_basic@bad-pitch-65536
-igt at kms_addfb_basic@bad-pitch-999
-igt at kms_addfb_basic@basic
-igt at kms_addfb_basic@basic-x-tiled-legacy
-igt at kms_addfb_basic@basic-y-tiled-legacy
-igt at kms_addfb_basic@bo-too-small
-igt at kms_addfb_basic@bo-too-small-due-to-tiling
-igt at kms_addfb_basic@clobberred-modifier
-igt at kms_addfb_basic@framebuffer-vs-set-tiling
-igt at kms_addfb_basic@invalid-get-prop
-igt at kms_addfb_basic@invalid-get-prop-any
-igt at kms_addfb_basic@invalid-set-prop
-igt at kms_addfb_basic@invalid-set-prop-any
-igt at kms_addfb_basic@no-handle
-igt at kms_addfb_basic@size-max
-igt at kms_addfb_basic@small-bo
-igt at kms_addfb_basic@tile-pitch-mismatch
-igt at kms_addfb_basic@too-high
-igt at kms_addfb_basic@too-wide
-igt at kms_addfb_basic@unused-handle
-igt at kms_addfb_basic@unused-modifier
-igt at kms_addfb_basic@unused-offsets
-igt at kms_addfb_basic@unused-pitches
-igt at kms_busy@basic
-igt at kms_chamelium@dp-hpd-fast
-igt at kms_chamelium@dp-edid-read
-igt at kms_chamelium@dp-crc-fast
-igt at kms_chamelium@hdmi-hpd-fast
-igt at kms_chamelium@hdmi-edid-read
-igt at kms_chamelium@hdmi-crc-fast
-igt at kms_chamelium@vga-hpd-fast
-igt at kms_chamelium@vga-edid-read
-igt at kms_chamelium@common-hpd-after-suspend
-igt at kms_prop_blob@basic
-igt at kms_cursor_legacy@basic-busy-flip-before-cursor-atomic
-igt at kms_cursor_legacy@basic-busy-flip-before-cursor-legacy
-igt at kms_cursor_legacy@basic-flip-after-cursor-atomic
-igt at kms_cursor_legacy@basic-flip-after-cursor-legacy
-igt at kms_cursor_legacy@basic-flip-after-cursor-varying-size
-igt at kms_cursor_legacy@basic-flip-before-cursor-atomic
-igt at kms_cursor_legacy@basic-flip-before-cursor-legacy
-igt at kms_cursor_legacy@basic-flip-before-cursor-varying-size
-igt at kms_flip@basic-flip-vs-dpms
-igt at kms_flip@basic-flip-vs-modeset
-igt at kms_flip@basic-flip-vs-wf_vblank
-igt at kms_flip@basic-plain-flip
-igt at kms_force_connector_basic@force-connector-state
-igt at kms_force_connector_basic@force-edid
-igt at kms_force_connector_basic@force-load-detect
-igt at kms_force_connector_basic@prune-stale-modes
-igt at kms_frontbuffer_tracking@basic
-igt at kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-a
-igt at kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-b
-igt at kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-c
-igt at kms_pipe_crc_basic@compare-crc-sanitycheck-pipe-d
-igt at kms_pipe_crc_basic@hang-read-crc-pipe-a
-igt at kms_pipe_crc_basic@nonblocking-crc-pipe-a
-igt at kms_pipe_crc_basic@nonblocking-crc-pipe-a-frame-sequence
-igt at kms_pipe_crc_basic@read-crc-pipe-a
-igt at kms_pipe_crc_basic@read-crc-pipe-b
-igt at kms_pipe_crc_basic@read-crc-pipe-c
-igt at kms_pipe_crc_basic@read-crc-pipe-a-frame-sequence
-igt at kms_pipe_crc_basic@suspend-read-crc-pipe-a
-igt at kms_psr@primary_page_flip
-igt at kms_psr@cursor_plane_move
-igt at kms_psr@sprite_plane_onoff
-igt at kms_psr@primary_mmap_gtt
-igt at kms_setmode@basic-clone-single-crtc
-igt at i915_pm_backlight@basic-brightness
-igt at i915_pm_rpm@basic-pci-d3-state
-igt at i915_pm_rpm@basic-rte
-igt at i915_pm_rps@basic-api
-igt at prime_self_import@basic-llseek-bad
-igt at prime_self_import@basic-llseek-size
-igt at prime_self_import@basic-with_fd_dup
-igt at prime_self_import@basic-with_one_bo
-igt at prime_self_import@basic-with_one_bo_two_files
-igt at prime_self_import@basic-with_two_bos
-igt at prime_vgem@basic-fence-flip
-igt at prime_vgem@basic-fence-mmap
-igt at prime_vgem@basic-fence-read
-igt at prime_vgem@basic-gtt
-igt at prime_vgem@basic-read
-igt at prime_vgem@basic-write
-igt at prime_vgem@basic-userptr
-igt at vgem_basic@setversion
-igt at vgem_basic@create
-igt at vgem_basic@debugfs
-igt at vgem_basic@dmabuf-export
-igt at vgem_basic@dmabuf-fence
-igt at vgem_basic@dmabuf-fence-before
-igt at vgem_basic@dmabuf-mmap
-igt at vgem_basic@mmap
-igt at vgem_basic@second-client
-igt at vgem_basic@sysfs
-
-# All tests that do module unloading and reloading are executed last.
-# They will sometimes reveal issues of earlier tests leaving the
-# driver in a broken state that is not otherwise noticed in that test.
+igt at i915_query@cs-cycles
+igt at i915_query@cs-cycles-invalid
-igt at core_hotunplug@unbind-rebind
-igt at vgem_basic@unload
-igt at i915_module_load@reload
-igt at i915_pm_rpm@module-reload
-
-# Kernel selftests
-igt at i915_selftest@live
-igt at dmabuf@all
+igt at gem_exec_basic@basic
--
2.20.1
More information about the Intel-gfx-trybot
mailing list