[igt-dev] [PATCH i-g-t] i915/perf: Add test to query CS timestamp

Umesh Nerlige Ramappa umesh.nerlige.ramappa at intel.com
Sat Mar 13 06:04:00 UTC 2021


On Fri, Mar 05, 2021 at 08:33:25PM +0000, Chris Wilson wrote:
>Quoting Umesh Nerlige Ramappa (2021-03-05 19:37:08)
>> Add tests to query CS timestamps for different engines.
>>
>> v2:
>> - remove flag parameter
>> - assert for minimum usable values rather than maximum
>>
>> v3:
>> - use clock id for cpu timestamps (Lionel)
>> - check if query is supported (Ashutosh)
>> - test bad queries
>>
>> v4: (Chris, Tvrtko)
>> - cs_timestamp is a misnomer, use cs_cycles instead
>> - use cs cycle frequency returned in the query
>> - omit size parameter
>>
>> v5:
>> - use __for_each_physical_engine (Lionel)
>> - check for ENODEV (Umesh)
>>
>> v6: Use 2 cpu timestamps to calculate reg read time (Lionel)
>>
>> Signed-off-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa at intel.com>
>> ---
>>  include/drm-uapi/i915_drm.h |  48 +++++++++
>>  tests/i915/i915_query.c     | 189 ++++++++++++++++++++++++++++++++++++
>>  2 files changed, 237 insertions(+)
>>
>> diff --git a/include/drm-uapi/i915_drm.h b/include/drm-uapi/i915_drm.h
>> index bf9ea471..0e50302f 100644
>> --- a/include/drm-uapi/i915_drm.h
>> +++ b/include/drm-uapi/i915_drm.h
>> @@ -2176,6 +2176,10 @@ struct drm_i915_query_item {
>>  #define DRM_I915_QUERY_TOPOLOGY_INFO    1
>>  #define DRM_I915_QUERY_ENGINE_INFO     2
>>  #define DRM_I915_QUERY_PERF_CONFIG      3
>> +       /**
>> +        * Query Command Streamer timestamp register.
>> +        */
>> +#define DRM_I915_QUERY_CS_CYCLES       4
>>  /* Must be kept compact -- no holes and well documented */
>>
>>         /*
>> @@ -2309,6 +2313,50 @@ struct drm_i915_engine_info {
>>         __u64 rsvd1[4];
>>  };
>>
>> +/**
>> + * struct drm_i915_query_cs_cycles
>> + *
>> + * The query returns the command streamer cycles and the frequency that can be
>> + * used to calculate the command streamer timestamp. In addition the query
>> + * returns a set of cpu timestamps that indicate when the command streamer cycle
>> + * count was captured.
>> + */
>> +struct drm_i915_query_cs_cycles {
>> +       /** Engine for which command streamer cycles is queried. */
>> +       struct i915_engine_class_instance engine;
>> +
>> +       /** Must be zero. */
>> +       __u32 flags;
>> +
>> +       /**
>> +        * Command streamer cycles as read from the command streamer
>> +        * register at 0x358 offset.
>> +        */
>> +       __u64 cs_cycles;
>> +
>> +       /** Frequency of the cs cycles in Hz. */
>> +       __u64 cs_frequency;
>> +
>> +       /**
>> +        * CPU timestamps in ns. cpu_timestamp[0] is captured before reading the
>> +        * cs_cycles register using the reference clockid set by the user.
>> +        * cpu_timestamp[1] is the time taken in ns to read the lower dword of
>> +        * the cs_cycles register.
>> +        */
>> +       __u64 cpu_timestamp[2];
>> +
>> +       /**
>> +        * Reference clock id for CPU timestamp. For definition, see
>> +        * clock_gettime(2) and perf_event_open(2). Supported clock ids are
>> +        * CLOCK_MONOTONIC, CLOCK_MONOTONIC_RAW, CLOCK_REALTIME, CLOCK_BOOTTIME,
>> +        * CLOCK_TAI.
>> +        */
>> +       __s32 clockid;
>> +
>> +       /** Must be zero. */
>> +       __u32 rsvd;
>> +};
>> +
>>  /**
>>   * struct drm_i915_query_engine_info
>>   *
>> diff --git a/tests/i915/i915_query.c b/tests/i915/i915_query.c
>> index 29b938e9..2ab3905b 100644
>> --- a/tests/i915/i915_query.c
>> +++ b/tests/i915/i915_query.c
>> @@ -267,6 +267,182 @@ eu_available(const struct drm_i915_query_topology_info *topo_info,
>>                                 eu / 8] >> (eu % 8)) & 1;
>>  }
>>
>> +static bool query_cs_cycles_supported(int fd)
>> +{
>> +       struct drm_i915_query_item item = {
>> +               .query_id = DRM_I915_QUERY_CS_CYCLES,
>> +       };
>> +
>> +       return __i915_query_items(fd, &item, 1) == 0 && item.length > 0;
>> +}
>> +
>> +static void __query_cs_cycles(int i915, void *data, int err)
>> +{
>> +       struct drm_i915_query_item item = {
>> +               .query_id = DRM_I915_QUERY_CS_CYCLES,
>> +               .data_ptr = to_user_pointer(data),
>> +               .length = sizeof(struct drm_i915_query_cs_cycles),
>> +       };
>> +
>> +       i915_query_items(i915, &item, 1);
>> +
>> +       if (err)
>> +               igt_assert(item.length == -err);
>
>igt_assert_eq(item.length, -err);
>
>s/err/expect/
>
>I was looking for how err was being returned from i915_query_items....
>
>And might as well pass in -EINVAL.
>
>
>> +}
>> +
>> +static bool engine_has_cs_cycles(int i915, uint16_t class, uint16_t instance)
>> +{
>> +       struct drm_i915_query_cs_cycles ts = {};
>> +       struct drm_i915_query_item item = {
>> +               .query_id = DRM_I915_QUERY_CS_CYCLES,
>> +               .data_ptr = to_user_pointer(&ts),
>> +               .length = sizeof(struct drm_i915_query_cs_cycles),
>> +       };
>> +
>> +       ts.engine.engine_class = class;
>> +       ts.engine.engine_instance = instance;
>> +
>> +       i915_query_items(i915, &item, 1);
>> +
>> +       return item.length != -ENODEV;
>> +}
>
>
>Let's refactor the duplicated code
>
>static void __query_cs_cycles(int i915, void *data)
>{
>       struct drm_i915_query_item item = {
>               .query_id = DRM_I915_QUERY_CS_CYCLES,
>               .data_ptr = to_user_pointer(data),
>               .length = sizeof(struct drm_i915_query_cs_cycles),
>       };
>
>       i915_query_items(i915, &item, 1);
>       return item.length;
>}
>
>static bool query_cs_cycles_supported(int fd)
>{

Thanks for breaking this down. I sent out a new version with these 
changes. The only thing different is that I need to pass .length = 0 to 
query for support. Otherwise I see EFAULT due to NULL data on platforms 
that support this query.

Regards,
Umesh

>	return __query_cs_cycles(fd, NULL) > 0;
>}
>
>static bool engine_has_cs_cycles(int i915, uint16_t class, uint16_t instance)
>{
>       struct drm_i915_query_cs_cycles ts = {
>	       .engine = { class, instance }
>       };
>
>       return __query_cs_cycles(i915, &ts) != -ENODEV;
>}
>
>> +static void
>> +__cs_cycles(int i915, struct i915_engine_class_instance *engine)
>> +{
>> +       struct drm_i915_query_cs_cycles ts1 = {};
>> +       struct drm_i915_query_cs_cycles ts2 = {};
>> +       uint64_t delta_cpu, delta_cs, delta_delta;
>> +       int i, usable = 0;
>> +       struct {
>> +               int32_t id;
>> +               const char *name;
>> +       } clock[] = {
>> +               { CLOCK_MONOTONIC, "CLOCK_MONOTONIC" },
>> +               { CLOCK_MONOTONIC_RAW, "CLOCK_MONOTONIC_RAW" },
>> +               { CLOCK_REALTIME, "CLOCK_REALTIME" },
>> +               { CLOCK_BOOTTIME, "CLOCK_BOOTTIME" },
>> +               { CLOCK_TAI, "CLOCK_TAI" },
>> +       };
>> +
>> +       igt_debug("engine[%u:%u]\n",
>> +                 engine->engine_class,
>> +                 engine->engine_instance);
>> +
>> +       /* Try a new clock every 10 iterations. */
>> +#define NUM_SNAPSHOTS 10
>> +       for (i = 0; i < NUM_SNAPSHOTS * ARRAY_SIZE(clock); i++) {
>> +               int index = i / NUM_SNAPSHOTS;
>> +
>> +               ts1.engine = *engine;
>> +               ts1.clockid = clock[index].id;
>> +
>> +               ts2.engine = *engine;
>> +               ts2.clockid = clock[index].id;
>> +
>> +               __query_cs_cycles(i915, &ts1, 0);
>> +               __query_cs_cycles(i915, &ts2, 0);
>
>igt_assert_eq(__query_cs_cycles(i915, &ts1), 0);
>igt_assert_eq(__query_cs_cycles(i915, &ts2), 0);
>
>> +
>> +               igt_debug("[1] cpu_ts before %llu, reg read time %llu\n",
>> +                         ts1.cpu_timestamp[0],
>> +                         ts1.cpu_timestamp[1]);
>> +               igt_debug("[1] cs_ts %llu, freq %llu Hz\n",
>> +                         ts1.cs_cycles, ts1.cs_frequency);
>> +
>> +               igt_debug("[2] cpu_ts before %llu, reg read time %llu\n",
>> +                         ts2.cpu_timestamp[0],
>> +                         ts2.cpu_timestamp[1]);
>> +               igt_debug("[2] cs_ts %llu, freq %llu Hz\n",
>> +                         ts2.cs_cycles, ts2.cs_frequency);
>> +
>> +               delta_cpu = ts2.cpu_timestamp[0] - ts1.cpu_timestamp[0];
>> +               delta_cs = (ts2.cs_cycles - ts1.cs_cycles) *
>> +                          NSEC_PER_SEC / ts1.cs_frequency;
>> +
>> +               igt_debug("delta_cpu[%lu], delta_cs[%lu]\n",
>> +                         delta_cpu, delta_cs);
>> +
>> +               delta_delta = delta_cpu > delta_cs ?
>> +                              delta_cpu - delta_cs :
>> +                              delta_cs - delta_cpu;
>> +               igt_debug("delta_delta %lu\n", delta_delta);
>> +
>> +               if (delta_delta < 5000)
>> +                       usable++;
>
>Nothing is keeping the CS awake, its counter is allowed to switch off
>between reads.
>
>I would run this test with a spinner in the background
>
>uint32_t ctx = gem_context_create_for_engine(i915, engine.class, engine.instance);
>igt_spin_t *spin = igt_spin_new(i915, ctx);
>gem_context_destroy(i915, ctx);
>
>...
>
>igt_spin_free(i915, spin);
>
>And even repeat the test to see if the counter does switch off. Although
>we can hypothesis that one day it may be replaced by an always running
>counter.
>
>> +
>> +               /*
>> +                * User needs few good snapshots of the timestamps to
>> +                * synchronize cpu time with cs time. Check if we have enough
>> +                * usable values before moving to the next clockid.
>> +                */
>> +               if (!((i + 1) % NUM_SNAPSHOTS)) {
>> +                       igt_debug("clock %s\n", clock[index].name);
>> +                       igt_debug("usable %d\n", usable);
>> +                       igt_assert(usable > 2);
>> +                       usable = 0;
>> +               }
>> +       }
>> +}
>> +
>> +static void test_cs_cycles(int i915)
>> +{
>> +       const struct intel_execution_engine2 *e;
>> +       struct i915_engine_class_instance engine;
>> +
>> +       __for_each_physical_engine(i915, e) {
>> +               if (engine_has_cs_cycles(i915, e->class, e->instance)) {
>> +                       engine.engine_class = e->class;
>> +                       engine.engine_instance = e->instance;
>> +                       __cs_cycles(i915, &engine);
>> +               }
>> +       }
>> +}
>> +
>> +static void test_cs_cycles_invalid(int i915)
>> +{
>> +       struct i915_engine_class_instance engine;
>> +       const struct intel_execution_engine2 *e;
>> +       struct drm_i915_query_cs_cycles ts = {};
>> +
>> +       /* get one engine */
>> +       __for_each_physical_engine(i915, e)
>> +               break;
>
>	/* sanity check engine selection is valid */
>	ts.engine.engine_class = e->class;
>	ts.engine.engine_instance = e->instance;
>	igt_assert_eq(__query_cs_cycles(i915, &ts), 0);
>
>Otherwise you won't know that the EINVALs are because it didn't like the
>flags or class or instance in a moment.
>
>> +       /* bad engines */
>> +       ts.engine.engine_class = e->class;
>> +       ts.engine.engine_instance = -1;
>> +       __query_cs_cycles(i915, &ts, EINVAL);
>
>	igt_assert_eq(__query_cs_cycles(i915, &ts), -EINVAL);
>	...
>
>> +
>> +       ts.engine.engine_class = -1;
>> +       ts.engine.engine_instance = e->instance;
>> +       __query_cs_cycles(i915, &ts, EINVAL);
>> +
>> +       ts.engine.engine_class = -1;
>> +       ts.engine.engine_instance = -1;
>> +       __query_cs_cycles(i915, &ts, EINVAL);
>> +
>> +       /* non zero flags */
>> +       ts.flags = 1;
>> +       ts.engine.engine_class = e->class;
>> +       ts.engine.engine_instance = e->instance;
>> +       __query_cs_cycles(i915, &ts, EINVAL);
>> +
>> +       /* non zero rsvd field */
>> +       ts.flags = 0;
>> +       ts.rsvd = 1;
>> +       __query_cs_cycles(i915, &ts, EINVAL);
>> +
>> +       /* bad clockid */
>> +       ts.rsvd = 0;
>> +       ts.clockid = -1;
>> +       __query_cs_cycles(i915, &ts, EINVAL);
>> +
>> +       /* sanity check */
>> +       engine.engine_class = e->class;
>> +       engine.engine_instance = e->instance;
>> +       __cs_cycles(i915, &engine);
>> +}
>> +
>>  /*
>>   * Verify that we get coherent values between the legacy getparam slice/subslice
>>   * masks and the new topology query.
>> @@ -783,6 +959,19 @@ igt_main
>>                         engines(fd);
>>         }
>>
>> +       igt_subtest_group {
>> +               igt_fixture {
>> +                       igt_require(intel_gen(devid) >= 6);
>> +                       igt_require(query_cs_cycles_supported(fd));
>
>query_cs_cycles_supported() should cover gen >= 6, and the test itself
>is not gen specific (i.e no instructions that require a later gen).
>
>> +               }
>> +
>> +               igt_subtest("cs-cycles")
>> +                       test_cs_cycles(fd);
>> +
>> +               igt_subtest("cs-cycles-invalid")
>> +                       test_cs_cycles_invalid(fd);
>
>Do the invalid check first, it's just neater flow when using --run cs-cycles*
>
>> +       }
>> +
>>         igt_fixture {
>>                 close(fd);
>>         }
>> --
>> 2.20.1
>>


More information about the igt-dev mailing list