[Intel-gfx] [PATCH i-g-t] igt/gem_exec_gttfill: Avoid pwrite into busy handle
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Fri Jun 29 15:43:54 UTC 2018
On 29/06/2018 16:22, Chris Wilson wrote:
> Quoting Tvrtko Ursulin (2018-06-29 16:15:04)
>>
>> On 28/06/2018 22:35, Chris Wilson wrote:
>>> The goal of gem_exec_gttfill is to exercise execbuf under heavy GTT
>>> pressure (by trying to execute more objects than may fit into the GTT).
>>> We spread the same set of handles across different processes, with the
>>> result that each would occasionally stall waiting for execution of an
>>> unrelated batch, limiting the pressure we were applying. If we using a
>>> steaming write via a WC pointer, we can avoid the serialisation penalty
>>> and so submit faster.
>>>
>>> Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
>>> ---
>>> tests/gem_exec_gttfill.c | 66 +++++++++++++++++++++++++---------------
>>> 1 file changed, 42 insertions(+), 24 deletions(-)
>>>
>>> diff --git a/tests/gem_exec_gttfill.c b/tests/gem_exec_gttfill.c
>>> index 4097e4077..efd612bb6 100644
>>> --- a/tests/gem_exec_gttfill.c
>>> +++ b/tests/gem_exec_gttfill.c
>>> @@ -28,18 +28,25 @@ IGT_TEST_DESCRIPTION("Fill the GTT with batches.");
>>>
>>> #define BATCH_SIZE (4096<<10)
>>>
>>> -static void xchg_u32(void *array, unsigned i, unsigned j)
>>> +struct batch {
>>> + uint32_t handle;
>>> + void *ptr;
>>> +};
>>> +
>>> +static void xchg_batch(void *array, unsigned int i, unsigned int j)
>>> {
>>> - uint32_t *u32 = array;
>>> - uint32_t tmp = u32[i];
>>> - u32[i] = u32[j];
>>> - u32[j] = tmp;
>>> + struct batch *batches = array;
>>> + struct batch tmp;
>>> +
>>> + tmp = batches[i];
>>> + batches[i] = batches[j];
>>> + batches[j] = tmp;
>>> }
>>>
>>> static void submit(int fd, int gen,
>>> struct drm_i915_gem_execbuffer2 *eb,
>>> struct drm_i915_gem_relocation_entry *reloc,
>>> - uint32_t *handles, unsigned count)
>>> + struct batch *batches, unsigned int count)
>>> {
>>> struct drm_i915_gem_exec_object2 obj;
>>> uint32_t batch[16];
>>> @@ -80,7 +87,7 @@ static void submit(int fd, int gen,
>>>
>>> eb->buffers_ptr = to_user_pointer(&obj);
>>> for (unsigned i = 0; i < count; i++) {
>>> - obj.handle = handles[i];
>>> + obj.handle = batches[i].handle;
>>> reloc[0].target_handle = obj.handle;
>>> reloc[1].target_handle = obj.handle;
>>>
>>> @@ -88,8 +95,8 @@ static void submit(int fd, int gen,
>>> reloc[0].presumed_offset = obj.offset;
>>> reloc[1].presumed_offset = obj.offset;
>>>
>>> - gem_write(fd, obj.handle, eb->batch_start_offset,
>>> - batch, sizeof(batch));
>>> + memcpy(batches[i].ptr + eb->batch_start_offset,
>>> + batch, sizeof(batch));
>>>
>>> gem_execbuf(fd, eb);
>>> }
>>> @@ -103,7 +110,7 @@ static void fillgtt(int fd, unsigned ring, int timeout)
>>> struct drm_i915_gem_execbuffer2 execbuf;
>>> struct drm_i915_gem_relocation_entry reloc[2];
>>> volatile uint64_t *shared;
>>> - unsigned *handles;
>>> + struct batch *batches;
>>> unsigned engines[16];
>>> unsigned nengine;
>>> unsigned engine;
>>> @@ -145,29 +152,38 @@ static void fillgtt(int fd, unsigned ring, int timeout)
>>> if (gen < 6)
>>> execbuf.flags |= I915_EXEC_SECURE;
>>>
>>> - handles = calloc(count, sizeof(handles));
>>> - igt_assert(handles);
>>> - for (unsigned i = 0; i < count; i++)
>>> - handles[i] = gem_create(fd, BATCH_SIZE);
>>> + batches = calloc(count, sizeof(*batches));
>>> + igt_assert(batches);
>>> + for (unsigned i = 0; i < count; i++) {
>>> + batches[i].handle = gem_create(fd, BATCH_SIZE);
>>> + batches[i].ptr =
>>> + __gem_mmap__wc(fd, batches[i].handle,
>>> + 0, BATCH_SIZE, PROT_WRITE);
>>> + if (!batches[i].ptr) {
>>> + batches[i].ptr =
>>> + __gem_mmap__gtt(fd, batches[i].handle,
>>> + BATCH_SIZE, PROT_WRITE);
>>> + }
>>> + igt_require(batches[i].ptr);
>>
>> Not assert?
>
> If we fallback to using gtt, we are likely to run out of mappable space,
> in which case we can't run the test. We should only fallback to gtt
> because we can't support WC (the likelihood of it being ENOMEM is
> small). So skip since a failure is expected on old kernels.
>
>>> + }
>>>
>>> /* Flush all memory before we start the timer */
>>> - submit(fd, gen, &execbuf, reloc, handles, count);
>>> + submit(fd, gen, &execbuf, reloc, batches, count);
>>>
>>> igt_fork(child, nengine) {
>>> uint64_t cycles = 0;
>>> hars_petruska_f54_1_random_perturb(child);
>>> - igt_permute_array(handles, count, xchg_u32);
>>> + igt_permute_array(batches, count, xchg_batch);
>>> execbuf.batch_start_offset = child*64;
>>> execbuf.flags |= engines[child];
>>> igt_until_timeout(timeout) {
>>> - submit(fd, gen, &execbuf, reloc, handles, count);
>>> + submit(fd, gen, &execbuf, reloc, batches, count);
>>> for (unsigned i = 0; i < count; i++) {
>>> - uint32_t handle = handles[i];
>>> - uint64_t buf[2];
>>> + uint64_t offset, delta;
>>>
>>> - gem_read(fd, handle, reloc[1].offset, &buf[0], sizeof(buf[0]));
>>> - gem_read(fd, handle, reloc[0].delta, &buf[1], sizeof(buf[1]));
>>> - igt_assert_eq_u64(buf[0], buf[1]);
>>
>> No flushing or domain management needed, especially since it can be
>> either wc or gtt mmap?
>
> It's a UC read of a buffer known to already flushed from the CPU caches
> with a prior gem_sync, so no not required. Considering that asynchronous
> access is the whole point of the patch...
True.
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Regards,
Tvrtko
More information about the Intel-gfx
mailing list