[igt-dev] [Intel-gfx] [PATCH i-g-t 6/6] tests/i915_query: Engine queues tests

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Fri Jun 8 10:02:20 UTC 2018


On 06/06/2018 15:33, Lionel Landwerlin wrote:
> Just a few suggestions below. Otherwise looks good to me.
> 
> On 06/06/18 13:49, Tvrtko Ursulin wrote:
>> From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>>
>> Basic tests to cover engine queued/runnable/running metric as reported
>> by the DRM_I915_QUERY_ENGINE_QUEUES query.
>>
>> v2:
>>   * Update ABI for i915 changes.
>>   * Use igt_spin_busywait_until_running.
>>   * Support no hardware contexts.
>>   * More comments. (Lionel Landwerlin)
>>   Chris Wilson:
>>   * Check for sw sync support.
>>   * Multiple contexts queued test.
>>   * Simplify context and bb allocation.
>>   * Fix asserts in the running subtest.
>>
>> v3:
>>   * Update for uAPI change.
>>
>> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>> ---
>>   tests/i915_query.c | 442 +++++++++++++++++++++++++++++++++++++++++++++
>>   1 file changed, 442 insertions(+)
>>
>> diff --git a/tests/i915_query.c b/tests/i915_query.c
>> index c7de8cbd8371..588428749c50 100644
>> --- a/tests/i915_query.c
>> +++ b/tests/i915_query.c
>> @@ -22,6 +22,7 @@
>>    */
>>   #include "igt.h"
>> +#include "sw_sync.h"
>>   #include <limits.h>
>> @@ -477,8 +478,414 @@ test_query_topology_known_pci_ids(int fd, int 
>> devid)
>>       free(topo_info);
>>   }
>> +#define DRM_I915_QUERY_ENGINE_QUEUES    2
>> +
>> +struct drm_i915_query_engine_queues {
>> +    /** Engine class as in enum drm_i915_gem_engine_class. */
>> +    __u16 class;
>> +
>> +    /** Engine instance number. */
>> +    __u16 instance;
>> +
>> +    /** Number of requests with unresolved fences and dependencies. */
>> +    __u32 queued;
>> +
>> +    /** Number of ready requests waiting on a slot on GPU. */
>> +    __u32 runnable;
>> +
>> +    /** Number of requests executing on the GPU. */
>> +    __u32 running;
>> +
>> +    __u32 rsvd[6];
>> +};
> 
> I suppose this above would be in the updated i915_drm.h.
> I thought you would have put it there as part of your first commit.

Yes, or I should prefix the copy with local_ as we usually do.

>> +
>> +static bool query_engine_queues_supported(int fd)
>> +{
>> +    struct drm_i915_query_item item = {
>> +        .query_id = DRM_I915_QUERY_ENGINE_QUEUES,
>> +    };
>> +
>> +    return __i915_query_items(fd, &item, 1) == 0 && item.length > 0;
>> +}
>> +
>> +static void engine_queues_invalid(int fd)
>> +{
>> +    struct drm_i915_query_engine_queues queues;
>> +    struct drm_i915_query_item item;
>> +    unsigned int len;
>> +    unsigned int i;
>> +
>> +    /* Flags is MBZ. */
>> +    memset(&item, 0, sizeof(item));
>> +    item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
>> +    item.flags = 1;
>> +    i915_query_items(fd, &item, 1);
>> +    igt_assert_eq(item.length, -EINVAL);
>> +
>> +    /* Length not zero and not greater or equal required size. */
>> +    memset(&item, 0, sizeof(item));
>> +    item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
>> +    item.length = 1;
>> +    i915_query_items(fd, &item, 1);
>> +    igt_assert_eq(item.length, -EINVAL);
>> +
>> +    /* Query correct length. */
>> +    memset(&item, 0, sizeof(item));
>> +    item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
>> +    i915_query_items(fd, &item, 1);
>> +    igt_assert(item.length >= 0);
> 
> Could be item.length > 0?

True, should be even. :)

> 
>> +    len = item.length;
>> +
>> +    /* Ivalid pointer. */
>> +    memset(&item, 0, sizeof(item));
>> +    item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
>> +    item.length = len;
>> +    i915_query_items(fd, &item, 1);
>> +    igt_assert_eq(item.length, -EFAULT);
> 
> Maybe verify ULONG_MAX too?

item.data_ptr = ULONG_MAX? Can do.

> 
>> +
>> +    /* Reserved fields are MBZ. */
>> +
>> +    for (i = 0; i < ARRAY_SIZE(queues.rsvd); i++) {
>> +        memset(&queues, 0, sizeof(queues));
>> +        queues.rsvd[i] = 1;
>> +        memset(&item, 0, sizeof(item));
>> +        item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
>> +        item.length = len;
>> +        item.data_ptr = to_user_pointer(&queues);
>> +        i915_query_items(fd, &item, 1);
>> +        igt_assert_eq(item.length, -EINVAL);
>> +    }
>> +
>> +    memset(&queues, 0, sizeof(queues));
>> +    queues.class = -1;
> 
> class = I915_ENGINE_CLASS_INVALID ?

Yes thanks!

> 
>> +    memset(&item, 0, sizeof(item));
>> +    item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
>> +    item.length = len;
>> +    item.data_ptr = to_user_pointer(&queues);
>> +    i915_query_items(fd, &item, 1);
>> +    igt_assert_eq(item.length, -ENOENT);
>> +
>> +    memset(&queues, 0, sizeof(queues));
>> +    queues.instance = -1;
>> +    memset(&item, 0, sizeof(item));
>> +    item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
>> +    item.length = len;
>> +        item.data_ptr = to_user_pointer(&queues);
>> +    i915_query_items(fd, &item, 1);
>> +    igt_assert_eq(item.length, -ENOENT);
>> +}
>> +
>> +static void engine_queues(int fd, const struct 
>> intel_execution_engine2 *e)
>> +{
>> +    struct drm_i915_query_engine_queues queues;
>> +    struct drm_i915_query_item item;
>> +    unsigned int len;
>> +
>> +    /* Query required buffer length. */
>> +    memset(&queues, 0, sizeof(queues));
>> +    memset(&item, 0, sizeof(item));
>> +    item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
>> +    item.data_ptr = to_user_pointer(&queues);
>> +    i915_query_items(fd, &item, 1);
>> +    igt_assert(item.length >= 0);
> 
> item.length > 0?

Marking on TODO..

> 
>> +    igt_assert(item.length <= sizeof(queues));
> 
> I guess we can expect item.length == sizeof(queues) here.

Hmm yes, no idea what I was thinking there. :I

> 
> 
>> +    len = item.length;
>> +
>> +    /* Check length larger than required works and reports same 
>> length. */
>> +    memset(&queues, 0, sizeof(queues));
>> +    memset(&item, 0, sizeof(item));
>> +    item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
>> +    item.data_ptr = to_user_pointer(&queues);
>> +    item.length = len + 1;
>> +    i915_query_items(fd, &item, 1);
>> +    igt_assert_eq(item.length, len);
>> +
>> +    /* Actual query. */
>> +    memset(&queues, 0, sizeof(queues));
>> +    queues.class = e->class;
>> +    queues.instance = e->instance;
>> +    memset(&item, 0, sizeof(item));
>> +    item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
>> +    item.data_ptr = to_user_pointer(&queues);
>> +    item.length = len;
>> +    i915_query_items(fd, &item, 1);
>> +    igt_assert_eq(item.length, len);
>> +}
>> +
>> +static unsigned int e2ring(int gem_fd, const struct 
>> intel_execution_engine2 *e)
>> +{
>> +    return gem_class_instance_to_eb_flags(gem_fd, e->class, 
>> e->instance);
>> +}
>> +
>> +static void
>> +__query_queues(int fd, const struct intel_execution_engine2 *e,
>> +           struct drm_i915_query_engine_queues *queues)
>> +{
>> +    struct drm_i915_query_item item;
>> +
>> +    memset(queues, 0, sizeof(*queues));
>> +    queues->class = e->class;
>> +    queues->instance = e->instance;
>> +    memset(&item, 0, sizeof(item));
>> +    item.query_id = DRM_I915_QUERY_ENGINE_QUEUES;
>> +    item.data_ptr = to_user_pointer(queues);
>> +    item.length = sizeof(*queues);
>> +    i915_query_items(fd, &item, 1);
>> +    igt_assert_eq(item.length, sizeof(*queues));
>> +}
>> +
>> +#define TEST_CONTEXTS (1 << 0)
>> +
>> +/*
>> + * Test that the reported number of queued (not ready for execution 
>> due fences
>> + * or dependencies) requests on an engine is correct.
>> + */
>> +static void
>> +engine_queued(int gem_fd, const struct intel_execution_engine2 *e,
>> +          unsigned int flags)
>> +{
>> +    const unsigned long engine = e2ring(gem_fd, e);
>> +    struct drm_i915_query_engine_queues queues;
>> +    const uint32_t bbe = MI_BATCH_BUFFER_END;
>> +    const unsigned int max_rq = 10;
>> +    uint32_t queued[max_rq + 1];
>> +    unsigned int n, i;
>> +    uint32_t bo;
>> +
>> +    igt_require_sw_sync();
>> +    if (flags & TEST_CONTEXTS)
>> +        gem_require_contexts(gem_fd);
>> +
>> +    memset(queued, 0, sizeof(queued));
>> +
>> +    bo = gem_create(gem_fd, 4096);
>> +    gem_write(gem_fd, bo, 4092, &bbe, sizeof(bbe));
>> +
>> +     /* Create a specific queue depth of unready requests. */
>> +    for (n = 0; n <= max_rq; n++) {
>> +        int fence = -1;
>> +        IGT_CORK_FENCE(cork);
>> +
>> +        gem_quiescent_gpu(gem_fd);
>> +
>> +        /* Create a cork so we can create a dependency chain. */
>> +        if (n)
>> +            fence = igt_cork_plug(&cork, -1);
>> +
>> +        /* Submit n unready requests depending on the cork. */
>> +        for (i = 0; i < n; i++) {
>> +            struct drm_i915_gem_exec_object2 obj = { };
>> +            struct drm_i915_gem_execbuffer2 eb = { };
>> +
>> +            obj.handle = bo;
>> +
>> +            eb.buffer_count = 1;
>> +            eb.buffers_ptr = to_user_pointer(&obj);
>> +
>> +            eb.flags = engine | I915_EXEC_FENCE_IN;
>> +
>> +            /*
>> +             * In context mode each submission is on a separate
>> +             * context.
>> +             */
>> +            if (flags & TEST_CONTEXTS)
>> +                eb.rsvd1 = gem_context_create(gem_fd);
>> +
>> +            eb.rsvd2 = fence;
>> +
>> +            gem_execbuf(gem_fd, &eb);
>> +
>> +            if (flags & TEST_CONTEXTS)
>> +                gem_context_destroy(gem_fd, eb.rsvd1);
>> +        }
>> +
>> +        /* Store reported queue depth to assert against later. */
>> +        __query_queues(gem_fd, e, &queues);
>> +        queued[n] = queues.queued;
>> +        igt_info("n=%u queued=%u\n", n, queued[n]);
>> +
>> +        /* Unplug the queue and proceed to the next queue depth. */
>> +        if (fence >= 0)
>> +            igt_cork_unplug(&cork);
>> +
>> +        gem_sync(gem_fd, bo);
>> +    }
>> +
>> +    gem_close(gem_fd, bo);
>> +
>> +    for (i = 0; i <= max_rq; i++)
>> +        igt_assert_eq(queued[i], i);
>> +}
>> +
>> +static igt_spin_t * __spin_poll(int fd, uint32_t ctx, unsigned long 
>> flags)
>> +{
>> +    if (gem_can_store_dword(fd, flags))
>> +        return __igt_spin_batch_new_poll(fd, ctx, flags);
>> +    else
>> +        return __igt_spin_batch_new(fd, ctx, flags, 0);
>> +}
>> +
>> +static unsigned long __spin_wait(igt_spin_t *spin, unsigned int n)
>> +{
>> +    struct timespec ts = { };
>> +    unsigned long t;
>> +
>> +    igt_nsec_elapsed(&ts);
>> +
>> +    if (spin->running) {
>> +        igt_spin_busywait_until_running(spin);
>> +    } else {
>> +        igt_debug("__spin_wait - usleep mode\n");
>> +        usleep(500e3); /* Better than nothing! */
>> +    }
>> +
>> +    t = igt_nsec_elapsed(&ts);
>> +
>> +    return spin->running ? t : 500e6 / n;
>> +}
>> +
>> +/*
>> + * Test that the number of requests ready for execution but waiting 
>> on space on
>> + * GPU is correctly reported.
>> + */
>> +static void
>> +engine_runnable(int gem_fd, const struct intel_execution_engine2 *e)
>> +{
>> +    const unsigned long engine = e2ring(gem_fd, e);
>> +    struct drm_i915_query_engine_queues queues;
>> +    bool contexts = gem_has_contexts(gem_fd);
>> +    const unsigned int max_rq = 10;
>> +    igt_spin_t *spin[max_rq + 1];
>> +    uint32_t runnable[max_rq + 1];
>> +    uint32_t ctx[max_rq];
>> +    unsigned int n, i;
>> +
>> +    memset(runnable, 0, sizeof(runnable));
>> +
>> +    if (contexts) {
>> +        for (i = 0; i < max_rq; i++)
>> +            ctx[i] = gem_context_create(gem_fd);
>> +    }
>> +
>> +    /*
>> +     * Submit different number of requests, potentially against 
>> different
>> +     * contexts, in order to provoke engine runnable metric returning
>> +     * different numbers.
>> +     */
>> +    for (n = 0; n <= max_rq; n++) {
>> +        gem_quiescent_gpu(gem_fd);
>> +
>> +        for (i = 0; i < n; i++) {
>> +            uint32_t ctx_ = contexts ? ctx[i] : 0;
>> +
>> +            if (i == 0)
>> +                spin[i] = __spin_poll(gem_fd, ctx_, engine);
>> +            else
>> +                spin[i] = __igt_spin_batch_new(gem_fd, ctx_,
>> +                                   engine, 0);
>> +        }
>> +
>> +        if (n)
>> +            usleep(__spin_wait(spin[0], n) * n);
>> +
>> +        /* Query and store for later checking. */
>> +        __query_queues(gem_fd, e, &queues);
>> +        runnable[n] = queues.runnable;
>> +        igt_info("n=%u runnable=%u\n", n, runnable[n]);
>> +
>> +        for (i = 0; i < n; i++) {
>> +            igt_spin_batch_end(spin[i]);
>> +            gem_sync(gem_fd, spin[i]->handle);
>> +            igt_spin_batch_free(gem_fd, spin[i]);
>> +        }
>> +    }
>> +
>> +    if (contexts) {
>> +        for (i = 0; i < max_rq; i++)
>> +            gem_context_destroy(gem_fd, ctx[i]);
>> +    }
>> +
>> +    /*
>> +     * Check that the runnable metric is zero when nothing is submitted,
>> +     * and that it is greater than zero on the maximum queue depth.
>> +     *
>> +     * We cannot assert the exact value since we do not know how many
>> +     * requests can the submission backend consume.
>> +     */
>> +    igt_assert_eq(runnable[0], 0);
>> +    igt_assert(runnable[max_rq] > 0);
>> +
>> +    /*
>> +     * We can only test that the runnable metric is growing by one if we
>> +     * have context support.
>> +     */
>> +    if (contexts)
>> +        igt_assert_eq(runnable[max_rq] - runnable[max_rq - 1], 1);
>> +}
>> +
>> +/*
>> + * Test that the number of requests currently executing on the GPU is 
>> correctly
>> + * reported.
>> + */
>> +static void
>> +engine_running(int gem_fd, const struct intel_execution_engine2 *e)
>> +{
>> +    const unsigned long engine = e2ring(gem_fd, e);
>> +    struct drm_i915_query_engine_queues queues;
>> +    const unsigned int max_rq = 10;
>> +    igt_spin_t *spin[max_rq + 1];
>> +    uint32_t running[max_rq + 1];
>> +    unsigned int n, i;
>> +
>> +    memset(running, 0, sizeof(running));
>> +    memset(spin, 0, sizeof(spin));
>> +
>> +    /*
>> +     * Create various queue depths of requests against the same 
>> context to
>> +     * try and get submission backed execute one or more on the GPU.
>> +     */
>> +    for (n = 0; n <= max_rq; n++) {
>> +        gem_quiescent_gpu(gem_fd);
>> +
>> +        for (i = 0; i < n; i++) {
>> +            if (i == 0)
>> +                spin[i] = __spin_poll(gem_fd, 0, engine);
>> +            else
>> +                spin[i] = __igt_spin_batch_new(gem_fd, 0,
>> +                                   engine, 0);
>> +        }
>> +
>> +        if (n)
>> +            usleep(__spin_wait(spin[0], n) * n);
>> +
>> +        /* Query and store for later checking. */
>> +        __query_queues(gem_fd, e, &queues);
>> +        running[n] = queues.running;
>> +        igt_info("n=%u running=%u\n", n, running[n]);
>> +
>> +        for (i = 0; i < n; i++) {
>> +            igt_spin_batch_end(spin[i]);
>> +            gem_sync(gem_fd, spin[i]->handle);
>> +            igt_spin_batch_free(gem_fd, spin[i]);
>> +        }
>> +    }
>> +
>> +    /*
>> +     * Check that the running metric is zero when nothing is submitted,
>> +     * one when one request is submitted, and at least one for any 
>> greater
>> +     * queue depth.
>> +     *
>> +     * We cannot assert the exact value since we do not know how many
>> +     * requests can the submission backend consume.
>> +     */
>> +    igt_assert_eq(running[0], 0);
>> +    for (i = 1; i <= max_rq; i++)
>> +        igt_assert(running[i] > 0);
>> +}
>> +
>>   igt_main
>>   {
>> +    const struct intel_execution_engine2 *e;
>>       int fd = -1;
>>       int devid;
>> @@ -524,6 +931,41 @@ igt_main
>>           test_query_topology_known_pci_ids(fd, devid);
>>       }
>> +    igt_subtest_group {
>> +        igt_fixture {
>> +            igt_require(query_engine_queues_supported(fd));
>> +        }
>> +
>> +        igt_subtest("engine-queues-invalid")
>> +            engine_queues_invalid(fd);
>> +
>> +        __for_each_engine_class_instance(fd, e) {
>> +            igt_subtest_group {
>> +                igt_fixture {
>> +                    gem_require_engine(fd,
>> +                               e->class,
>> +                               e->instance);
>> +                }
>> +
>> +                igt_subtest_f("engine-queues-%s", e->name)
>> +                    engine_queues(fd, e);
>> +
>> +                igt_subtest_f("engine-queued-%s", e->name)
>> +                    engine_queued(fd, e, 0);
>> +
>> +                igt_subtest_f("engine-queued-contexts-%s",
>> +                          e->name)
>> +                    engine_queued(fd, e, TEST_CONTEXTS);
>> +
>> +                igt_subtest_f("engine-runnable-%s", e->name)
>> +                    engine_runnable(fd, e);
>> +
>> +                igt_subtest_f("engine-running-%s", e->name)
>> +                    engine_running(fd, e);
>> +            }
>> +        }
>> +    }
>> +
>>       igt_fixture {
>>           close(fd);
>>       }

Thanks for reading it through. I have marked the tweaks for the next 
resend. Problem with this query is that userspace is AWOL so until it 
materializes it will be just for reference.

Regards,

Tvrtko


More information about the igt-dev mailing list