[igt-dev] [Intel-gfx] [PATCH i-g-t] igt/gem_exec_gttfill: Avoid pwrite into busy handle

Fri Jun 29 15:43:54 UTC 2018

On 29/06/2018 16:22, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-06-29 16:15:04)
>>
>> On 28/06/2018 22:35, Chris Wilson wrote:
>>> The goal of gem_exec_gttfill is to exercise execbuf under heavy GTT
>>> pressure (by trying to execute more objects than may fit into the GTT).
>>> We spread the same set of handles across different processes, with the
>>> result that each would occasionally stall waiting for execution of an
>>> unrelated batch, limiting the pressure we were applying. If we using a
>>> steaming write via a WC pointer, we can avoid the serialisation penalty
>>> and so submit faster.
>>>
>>> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
>>> ---
>>>    tests/gem_exec_gttfill.c | 66 +++++++++++++++++++++++++---------------
>>>    1 file changed, 42 insertions(+), 24 deletions(-)
>>>
>>> diff --git a/tests/gem_exec_gttfill.c b/tests/gem_exec_gttfill.c
>>> index 4097e4077..efd612bb6 100644
>>> --- a/tests/gem_exec_gttfill.c
>>> +++ b/tests/gem_exec_gttfill.c
>>> @@ -28,18 +28,25 @@ IGT_TEST_DESCRIPTION("Fill the GTT with batches.");
>>>    
>>>    #define BATCH_SIZE (4096<<10)
>>>    
>>> -static void xchg_u32(void *array, unsigned i, unsigned j)
>>> +struct batch {
>>> +     uint32_t handle;
>>> +     void *ptr;
>>> +};
>>> +
>>> +static void xchg_batch(void *array, unsigned int i, unsigned int j)
>>>    {
>>> -     uint32_t *u32 = array;
>>> -     uint32_t tmp = u32[i];
>>> -     u32[i] = u32[j];
>>> -     u32[j] = tmp;
>>> +     struct batch *batches = array;
>>> +     struct batch tmp;
>>> +
>>> +     tmp = batches[i];
>>> +     batches[i] = batches[j];
>>> +     batches[j] = tmp;
>>>    }
>>>    
>>>    static void submit(int fd, int gen,
>>>                   struct drm_i915_gem_execbuffer2 *eb,
>>>                   struct drm_i915_gem_relocation_entry *reloc,
>>> -                uint32_t *handles, unsigned count)
>>> +                struct batch *batches, unsigned int count)
>>>    {
>>>        struct drm_i915_gem_exec_object2 obj;
>>>        uint32_t batch[16];
>>> @@ -80,7 +87,7 @@ static void submit(int fd, int gen,
>>>    
>>>        eb->buffers_ptr = to_user_pointer(&obj);
>>>        for (unsigned i = 0; i < count; i++) {
>>> -             obj.handle = handles[i];
>>> +             obj.handle = batches[i].handle;
>>>                reloc[0].target_handle = obj.handle;
>>>                reloc[1].target_handle = obj.handle;
>>>    
>>> @@ -88,8 +95,8 @@ static void submit(int fd, int gen,
>>>                reloc[0].presumed_offset = obj.offset;
>>>                reloc[1].presumed_offset = obj.offset;
>>>    
>>> -             gem_write(fd, obj.handle, eb->batch_start_offset,
>>> -                       batch, sizeof(batch));
>>> +             memcpy(batches[i].ptr + eb->batch_start_offset,
>>> +                    batch, sizeof(batch));
>>>    
>>>                gem_execbuf(fd, eb);
>>>        }
>>> @@ -103,7 +110,7 @@ static void fillgtt(int fd, unsigned ring, int timeout)
>>>        struct drm_i915_gem_execbuffer2 execbuf;
>>>        struct drm_i915_gem_relocation_entry reloc[2];
>>>        volatile uint64_t *shared;
>>> -     unsigned *handles;
>>> +     struct batch *batches;
>>>        unsigned engines[16];
>>>        unsigned nengine;
>>>        unsigned engine;
>>> @@ -145,29 +152,38 @@ static void fillgtt(int fd, unsigned ring, int timeout)
>>>        if (gen < 6)
>>>                execbuf.flags |= I915_EXEC_SECURE;
>>>    
>>> -     handles = calloc(count, sizeof(handles));
>>> -     igt_assert(handles);
>>> -     for (unsigned i = 0; i < count; i++)
>>> -             handles[i] = gem_create(fd, BATCH_SIZE);
>>> +     batches = calloc(count, sizeof(*batches));
>>> +     igt_assert(batches);
>>> +     for (unsigned i = 0; i < count; i++) {
>>> +             batches[i].handle = gem_create(fd, BATCH_SIZE);
>>> +             batches[i].ptr =
>>> +                     __gem_mmap__wc(fd, batches[i].handle,
>>> +                                    0, BATCH_SIZE, PROT_WRITE);
>>> +             if (!batches[i].ptr) {
>>> +                     batches[i].ptr =
>>> +                             __gem_mmap__gtt(fd, batches[i].handle,
>>> +                                             BATCH_SIZE, PROT_WRITE);
>>> +             }
>>> +             igt_require(batches[i].ptr);
>>
>> Not assert?
> 
> If we fallback to using gtt, we are likely to run out of mappable space,
> in which case we can't run the test. We should only fallback to gtt
> because we can't support WC (the likelihood of it being ENOMEM is
> small). So skip since a failure is expected on old kernels.
> 
>>> +     }
>>>    
>>>        /* Flush all memory before we start the timer */
>>> -     submit(fd, gen, &execbuf, reloc, handles, count);
>>> +     submit(fd, gen, &execbuf, reloc, batches, count);
>>>    
>>>        igt_fork(child, nengine) {
>>>                uint64_t cycles = 0;
>>>                hars_petruska_f54_1_random_perturb(child);
>>> -             igt_permute_array(handles, count, xchg_u32);
>>> +             igt_permute_array(batches, count, xchg_batch);
>>>                execbuf.batch_start_offset = child*64;
>>>                execbuf.flags |= engines[child];
>>>                igt_until_timeout(timeout) {
>>> -                     submit(fd, gen, &execbuf, reloc, handles, count);
>>> +                     submit(fd, gen, &execbuf, reloc, batches, count);
>>>                        for (unsigned i = 0; i < count; i++) {
>>> -                             uint32_t handle = handles[i];
>>> -                             uint64_t buf[2];
>>> +                             uint64_t offset, delta;
>>>    
>>> -                             gem_read(fd, handle, reloc[1].offset, &buf[0], sizeof(buf[0]));
>>> -                             gem_read(fd, handle, reloc[0].delta, &buf[1], sizeof(buf[1]));
>>> -                             igt_assert_eq_u64(buf[0], buf[1]);
>>
>> No flushing or domain management needed, especially since it can be
>> either wc or gtt mmap?
> 
> It's a UC read of a buffer known to already flushed from the CPU caches
> with a prior gem_sync, so no not required. Considering that asynchronous
> access is the whole point of the patch...

True.

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

Regards,

Tvrtko