[igt-dev] [PATCH i-g-t] i915/perf: Add test to query CS timestamp

Lionel Landwerlin lionel.g.landwerlin at intel.com
Fri Mar 5 11:55:10 UTC 2021


On 03/03/2021 23:28, Umesh Nerlige Ramappa wrote:
> Add tests to query CS timestamps for different engines.
>
> v2:
> - remove flag parameter
> - assert for minimum usable values rather than maximum
>
> v3:
> - use clock id for cpu timestamps (Lionel)
> - check if query is supported (Ashutosh)
> - test bad queries
>
> v4: (Chris, Tvrtko)
> - cs_timestamp is a misnomer, use cs_cycles instead
> - use cs cycle frequency returned in the query
> - omit size parameter
>
> v5:
> - use __for_each_physical_engine (Lionel)
> - check for ENODEV (Umesh)
>
> v6: Use 2 cpu timestamps to calculate reg read time (Lionel)
>
> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>


Looks good:


Reviewed-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>


Thanks!


> ---
>   include/drm-uapi/i915_drm.h |  47 +++++++++
>   tests/i915/i915_query.c     | 191 ++++++++++++++++++++++++++++++++++++
>   2 files changed, 238 insertions(+)
>
> diff --git a/include/drm-uapi/i915_drm.h b/include/drm-uapi/i915_drm.h
> index bf9ea471..e95e17f9 100644
> --- a/include/drm-uapi/i915_drm.h
> +++ b/include/drm-uapi/i915_drm.h
> @@ -2176,6 +2176,10 @@ struct drm_i915_query_item {
>   #define DRM_I915_QUERY_TOPOLOGY_INFO    1
>   #define DRM_I915_QUERY_ENGINE_INFO	2
>   #define DRM_I915_QUERY_PERF_CONFIG      3
> +	/**
> +	 * Query Command Streamer timestamp register.
> +	 */
> +#define DRM_I915_QUERY_CS_CYCLES	4
>   /* Must be kept compact -- no holes and well documented */
>   
>   	/*
> @@ -2309,6 +2313,49 @@ struct drm_i915_engine_info {
>   	__u64 rsvd1[4];
>   };
>   
> +/**
> + * struct drm_i915_query_cs_cycles
> + *
> + * The query returns the command streamer cycles and the frequency that can be
> + * used to calculate the command streamer timestamp. In addition the query
> + * returns the cpu timestamp that indicates when the command streamer cycle
> + * count was captured.
> + */
> +struct drm_i915_query_cs_cycles {
> +	/** Engine for which command streamer cycles is queried. */
> +	struct i915_engine_class_instance engine;
> +
> +	/** Must be zero. */
> +	__u32 flags;
> +
> +	/**
> +	 * Command streamer cycles as read from the command streamer
> +	 * register at 0x358 offset.
> +	 */
> +	__u64 cs_cycles;
> +
> +	/** Frequency of the cs cycles in Hz. */
> +	__u64 cs_frequency;
> +
> +	/**
> +	 * CPU timestamp in nanoseconds. cpu_timestamp[0] is captured before
> +	 * reading the cs_cycles register and cpu_timestamp[1] is captured after
> +	 * reading the register.
> +	 **/
> +	__u64 cpu_timestamp[2];
> +
> +	/**
> +	 * Reference clock id for CPU timestamp. For definition, see
> +	 * clock_gettime(2) and perf_event_open(2). Supported clock ids are
> +	 * CLOCK_MONOTONIC, CLOCK_MONOTONIC_RAW, CLOCK_REALTIME, CLOCK_BOOTTIME,
> +	 * CLOCK_TAI.
> +	 */
> +	__s32 clockid;
> +
> +	/** Must be zero. */
> +	__u32 rsvd;
> +};
> +
>   /**
>    * struct drm_i915_query_engine_info
>    *
> diff --git a/tests/i915/i915_query.c b/tests/i915/i915_query.c
> index 29b938e9..e1c5f93d 100644
> --- a/tests/i915/i915_query.c
> +++ b/tests/i915/i915_query.c
> @@ -267,6 +267,184 @@ eu_available(const struct drm_i915_query_topology_info *topo_info,
>   				eu / 8] >> (eu % 8)) & 1;
>   }
>   
> +static bool query_cs_cycles_supported(int fd)
> +{
> +	struct drm_i915_query_item item = {
> +		.query_id = DRM_I915_QUERY_CS_CYCLES,
> +	};
> +
> +	return __i915_query_items(fd, &item, 1) == 0 && item.length > 0;
> +}
> +
> +static void __query_cs_cycles(int i915, void *data, int err)
> +{
> +	struct drm_i915_query_item item = {
> +		.query_id = DRM_I915_QUERY_CS_CYCLES,
> +		.data_ptr = to_user_pointer(data),
> +		.length = sizeof(struct drm_i915_query_cs_cycles),
> +	};
> +
> +	i915_query_items(i915, &item, 1);
> +
> +	if (err)
> +		igt_assert(item.length == -err);
> +}
> +
> +static bool engine_has_cs_cycles(int i915, uint16_t class, uint16_t instance)
> +{
> +	struct drm_i915_query_cs_cycles ts = {};
> +	struct drm_i915_query_item item = {
> +		.query_id = DRM_I915_QUERY_CS_CYCLES,
> +		.data_ptr = to_user_pointer(&ts),
> +		.length = sizeof(struct drm_i915_query_cs_cycles),
> +	};
> +
> +	ts.engine.engine_class = class;
> +	ts.engine.engine_instance = instance;
> +
> +	i915_query_items(i915, &item, 1);
> +
> +	return item.length != -ENODEV;
> +}
> +
> +static void
> +__cs_cycles(int i915, struct i915_engine_class_instance *engine)
> +{
> +	struct drm_i915_query_cs_cycles ts1 = {};
> +	struct drm_i915_query_cs_cycles ts2 = {};
> +	uint64_t delta_cpu, delta_cs, delta_delta;
> +	int i, usable = 0;
> +	struct {
> +		int32_t id;
> +		const char *name;
> +	} clock[] = {
> +		{ CLOCK_MONOTONIC, "CLOCK_MONOTONIC" },
> +		{ CLOCK_MONOTONIC_RAW, "CLOCK_MONOTONIC_RAW" },
> +		{ CLOCK_REALTIME, "CLOCK_REALTIME" },
> +		{ CLOCK_BOOTTIME, "CLOCK_BOOTTIME" },
> +		{ CLOCK_TAI, "CLOCK_TAI" },
> +	};
> +
> +	igt_debug("engine[%u:%u]\n",
> +		  engine->engine_class,
> +		  engine->engine_instance);
> +
> +	/* Try a new clock every 10 iterations. */
> +#define NUM_SNAPSHOTS 10
> +	for (i = 0; i < NUM_SNAPSHOTS * ARRAY_SIZE(clock); i++) {
> +		int index = i / NUM_SNAPSHOTS;
> +
> +		ts1.engine = *engine;
> +		ts1.clockid = clock[index].id;
> +
> +		ts2.engine = *engine;
> +		ts2.clockid = clock[index].id;
> +
> +		__query_cs_cycles(i915, &ts1, 0);
> +		__query_cs_cycles(i915, &ts2, 0);
> +
> +		igt_debug("[1] cpu_ts before %llu, cpu_ts after %llu, reg read time %llu\n",
> +			  ts1.cpu_timestamp[0],
> +			  ts1.cpu_timestamp[1],
> +			  ts1.cpu_timestamp[1] - ts1.cpu_timestamp[0]);
> +		igt_debug("[1] cs_ts %llu, freq %llu Hz\n",
> +			  ts1.cs_cycles, ts1.cs_frequency);
> +
> +		igt_debug("[2] cpu_ts before %llu, cpu_ts after %llu, reg read time %llu\n",
> +			  ts2.cpu_timestamp[0],
> +			  ts2.cpu_timestamp[1],
> +			  ts2.cpu_timestamp[1] - ts2.cpu_timestamp[0]);
> +		igt_debug("[2] cs_ts %llu, freq %llu Hz\n",
> +			  ts2.cs_cycles, ts2.cs_frequency);
> +
> +		delta_cpu = ts2.cpu_timestamp[0] - ts1.cpu_timestamp[0];
> +		delta_cs = (ts2.cs_cycles - ts1.cs_cycles) *
> +			   NSEC_PER_SEC / ts1.cs_frequency;
> +
> +		igt_debug("delta_cpu[%lu], delta_cs[%lu]\n",
> +			  delta_cpu, delta_cs);
> +
> +		delta_delta = delta_cpu > delta_cs ?
> +			       delta_cpu - delta_cs :
> +			       delta_cs - delta_cpu;
> +		igt_debug("delta_delta %lu\n", delta_delta);
> +
> +		if (delta_delta < 5000)
> +			usable++;
> +
> +		/*
> +		 * User needs few good snapshots of the timestamps to
> +		 * synchronize cpu time with cs time. Check if we have enough
> +		 * usable values before moving to the next clockid.
> +		 */
> +		if (!((i + 1) % NUM_SNAPSHOTS)) {
> +			igt_debug("clock %s\n", clock[index].name);
> +			igt_debug("usable %d\n", usable);
> +			igt_assert(usable > 2);
> +			usable = 0;
> +		}
> +	}
> +}
> +
> +static void test_cs_cycles(int i915)
> +{
> +	const struct intel_execution_engine2 *e;
> +	struct i915_engine_class_instance engine;
> +
> +	__for_each_physical_engine(i915, e) {
> +		if (engine_has_cs_cycles(i915, e->class, e->instance)) {
> +			engine.engine_class = e->class;
> +			engine.engine_instance = e->instance;
> +			__cs_cycles(i915, &engine);
> +		}
> +	}
> +}
> +
> +static void test_cs_cycles_invalid(int i915)
> +{
> +	struct i915_engine_class_instance engine;
> +	const struct intel_execution_engine2 *e;
> +	struct drm_i915_query_cs_cycles ts = {};
> +
> +	/* get one engine */
> +	__for_each_physical_engine(i915, e)
> +		break;
> +
> +	/* bad engines */
> +	ts.engine.engine_class = e->class;
> +	ts.engine.engine_instance = -1;
> +	__query_cs_cycles(i915, &ts, EINVAL);
> +
> +	ts.engine.engine_class = -1;
> +	ts.engine.engine_instance = e->instance;
> +	__query_cs_cycles(i915, &ts, EINVAL);
> +
> +	ts.engine.engine_class = -1;
> +	ts.engine.engine_instance = -1;
> +	__query_cs_cycles(i915, &ts, EINVAL);
> +
> +	/* non zero flags */
> +	ts.flags = 1;
> +	ts.engine.engine_class = e->class;
> +	ts.engine.engine_instance = e->instance;
> +	__query_cs_cycles(i915, &ts, EINVAL);
> +
> +	/* non zero rsvd field */
> +	ts.flags = 0;
> +	ts.rsvd = 1;
> +	__query_cs_cycles(i915, &ts, EINVAL);
> +
> +	/* bad clockid */
> +	ts.rsvd = 0;
> +	ts.clockid = -1;
> +	__query_cs_cycles(i915, &ts, EINVAL);
> +
> +	/* sanity check */
> +	engine.engine_class = e->class;
> +	engine.engine_instance = e->instance;
> +	__cs_cycles(i915, &engine);
> +}
> +
>   /*
>    * Verify that we get coherent values between the legacy getparam slice/subslice
>    * masks and the new topology query.
> @@ -783,6 +961,19 @@ igt_main
>   			engines(fd);
>   	}
>   
> +	igt_subtest_group {
> +		igt_fixture {
> +			igt_require(intel_gen(devid) >= 6);
> +			igt_require(query_cs_cycles_supported(fd));
> +		}
> +
> +		igt_subtest("cs-cycles")
> +			test_cs_cycles(fd);
> +
> +		igt_subtest("cs-cycles-invalid")
> +			test_cs_cycles_invalid(fd);
> +	}
> +
>   	igt_fixture {
>   		close(fd);
>   	}




More information about the igt-dev mailing list