[Intel-gfx] [PATCH i-g-t] i915/gem_exec_balancer: Throw a few hangs into the virtual pipelines
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Fri Nov 15 15:26:48 UTC 2019
On 15/11/2019 14:59, Chris Wilson wrote:
> Quoting Chris Wilson (2019-11-15 14:58:00)
>> Quoting Tvrtko Ursulin (2019-11-15 14:52:16)
>>>
>>> On 15/11/2019 13:09, Chris Wilson wrote:
>>>> Quoting Tvrtko Ursulin (2019-11-15 13:02:24)
>>>>>
>>>>> On 14/11/2019 19:15, Chris Wilson wrote:
>>>>>> Although a virtual engine itself has no hang detection; that is on the
>>>>>> underlying physical engines, it does provide a unique means for clients
>>>>>> to try and break the system. Try and break it before they do.
>>>>>>
>>>>>> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
>>>>>> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>>>>>> ---
>>>>>> tests/i915/gem_exec_balancer.c | 105 +++++++++++++++++++++++++++++++++
>>>>>> 1 file changed, 105 insertions(+)
>>>>>>
>>>>>> diff --git a/tests/i915/gem_exec_balancer.c b/tests/i915/gem_exec_balancer.c
>>>>>> index 70c4529b4..86028cfdd 100644
>>>>>> --- a/tests/i915/gem_exec_balancer.c
>>>>>> +++ b/tests/i915/gem_exec_balancer.c
>>>>>> @@ -24,6 +24,7 @@
>>>>>> #include <sched.h>
>>>>>>
>>>>>> #include "igt.h"
>>>>>> +#include "igt_gt.h"
>>>>>> #include "igt_perf.h"
>>>>>> #include "i915/gem_ring.h"
>>>>>> #include "sw_sync.h"
>>>>>> @@ -1314,6 +1315,102 @@ static void semaphore(int i915)
>>>>>> gem_quiescent_gpu(i915);
>>>>>> }
>>>>>>
>>>>>> +static void set_unbannable(int i915, uint32_t ctx)
>>>>>> +{
>>>>>> + struct drm_i915_gem_context_param p = {
>>>>>> + .ctx_id = ctx,
>>>>>> + .param = I915_CONTEXT_PARAM_BANNABLE,
>>>>>> + };
>>>>>> +
>>>>>> + igt_assert_eq(__gem_context_set_param(i915, &p), 0);
>>>>>> +}
>>>>>> +
>>>>>> +static void hangme(int i915)
>>>>>> +{
>>>>>> + struct drm_i915_gem_exec_object2 batch = {
>>>>>> + .handle = batch_create(i915),
>>>>>> + };
>>>>>> +
>>>>>> + /*
>>>>>> + * Fill the available engines with hanging virtual engines and verify
>>>>>> + * that execution continues onto the second batch.
>>>>>> + */
>>>>>> +
>>>>>> + for (int class = 1; class < 32; class++) {
>>>>>> + struct i915_engine_class_instance *ci;
>>>>>> + struct client {
>>>>>> + igt_spin_t *spin[2];
>>>>>> + } *client;
>>>>>> + unsigned int count;
>>>>>> + uint32_t bg;
>>>>>> +
>>>>>> + ci = list_engines(i915, 1u << class, &count);
>>>>>> + if (!ci)
>>>>>> + continue;
>>>>>> +
>>>>>> + if (count < 2) {
>>>>>> + free(ci);
>>>>>> + continue;
>>>>>> + }
>>>>>> +
>>>>>> + client = malloc(sizeof(*client) * count);
>>>>>> + igt_assert(client);
>>>>>> +
>>>>>> + for (int i = 0; i < count; i++) {
>>>>>> + uint32_t ctx = gem_context_create(i915);
>>>>>> + struct client *c = &client[i];
>>>>>> + unsigned int flags;
>>>>>> +
>>>>>> + set_unbannable(i915, ctx);
>>>>>> + set_load_balancer(i915, ctx, ci, count, NULL);
>>>>>> +
>>>>>> + flags = IGT_SPIN_FENCE_OUT | IGT_SPIN_NO_PREEMPTION;
>>>>>> + for (int j = 0; j < ARRAY_SIZE(c->spin); j++) {
>>>>>> + c->spin[j] = igt_spin_new(i915, ctx,
>>>>>> + .flags = flags);
>>>>>> + flags = IGT_SPIN_FENCE_OUT;
>>>>>> + }
>>>>>> +
>>>>>> + gem_context_destroy(i915, ctx);
>>>>>> + }
>>>>>> +
>>>>>> + /* Apply some background context to speed up hang detection */
>>>>>> + bg = gem_context_create(i915);
>>>>>> + set_engines(i915, bg, ci, count);
>>>>>> + for (int i = 0; i < count; i++) {
>>>>>> + struct drm_i915_gem_execbuffer2 execbuf = {
>>>>>> + .buffers_ptr = to_user_pointer(&batch),
>>>>>> + .buffer_count = 1,
>>>>>> + .flags = i,
>>>>>> + .rsvd1 = bg,
>>>>>> + };
>>>>>> + gem_execbuf(i915, &execbuf);
>>>>>> + }
>>>>>> + gem_context_destroy(i915, bg);
>>>>>> +
>>>>>> + for (int i = 0; i < count; i++) {
>>>>>> + struct client *c = &client[i];
>>>>>> +
>>>>>> + igt_debug("Waiting for client[%d].spin[%d]\n", i, 0);
>>>>>> + gem_sync(i915, c->spin[0]->handle);
>>>>>> + igt_assert_eq(sync_fence_status(c->spin[0]->out_fence),
>>>>>> + -EIO);
>>>>>> +
>>>>>> + igt_debug("Waiting for client[%d].spin[%d]\n", i, 1);
>>>>>> + gem_sync(i915, c->spin[1]->handle);
>>>>>> + igt_assert_eq(sync_fence_status(c->spin[1]->out_fence),
>>>>>> + -EIO);
>>>>>> +
>>>>>> + igt_spin_free(i915, c->spin[0]);
>>>>>> + igt_spin_free(i915, c->spin[1]);
>>>>>> + }
>>>>>> + free(client);
>>>>>> + }
>>>>>> +
>>>>>> + gem_close(i915, batch.handle);
>>>>>> + gem_quiescent_gpu(i915);
>>>>>> +}
>>>>>> +
>>>>>> static void smoketest(int i915, int timeout)
>>>>>> {
>>>>>> struct drm_i915_gem_exec_object2 batch[2] = {
>>>>>> @@ -1486,4 +1583,12 @@ igt_main
>>>>>> igt_fixture {
>>>>>> igt_stop_hang_detector();
>>>>>> }
>>>>>> +
>>>>>> + igt_subtest("hang") {
>>>>>> + igt_hang_t hang = igt_allow_hang(i915, 0, 0);
>>>>>> +
>>>>>> + hangme(i915);
>>>>>> +
>>>>>> + igt_disallow_hang(i915, hang);
>>>>>> + }
>>>>>> }
>>>>>>
>>>>>
>>>>> Looks good. But do we need some core helpers to figure out when preempt
>>>>> timeout is compiled out?
>>>>
>>>> It should still work the same, but slower; 10s hang detection rather
>>>> than ~200ms.
>>>
>>> You are talking about old hangcheck? I was thinking about all new
>>> Kconfig's compiled out. No heartbeats, no preemption timeout. Still works?
>>
>> Works even faster. :)
>>
>> The spinners then get killed when the contexts are closed (default is
>> non-persistent contexts if you disable heartbeats entirely). The
>> challenge is really on the per-engine heartbeat controls to make sure we
>> kick off the dead contexts, but that's for the future.
>
> And for the other kconfig, with no preemption timeout, you just get
> regular heartbeats, so roughly the 10s hangcheck timeout.
Good then. No other opens:
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list