[igt-dev] [RFC PATCH] i915/gem_tiled_blits: Switch from libdrm methods to igt lib

Vanshidhar Konda vanshidhar.r.konda at intel.com
Wed Nov 13 18:15:55 UTC 2019


On Wed, Nov 13, 2019 at 09:51:01AM +0000, Chris Wilson wrote:
>Quoting Vanshidhar Konda (2019-11-13 04:31:50)
>> Switch the test from using libdrm methods to using methods provided by
>> the igt library. Like some of the other gem_tiled* tests, this test also
>> creates the batch buffer used to do the blitter copies. Also, the test
>> avoids calling GET/SET_TILING IOCTLs - something that will not be
>> supported on Gen12+.
>>
>> Signed-off-by: Vanshidhar Konda <vanshidhar.r.konda at intel.com>
>> ---
>>  tests/i915/gem_tiled_blits.c | 176 +++++++++++++++++++++++++----------
>>  1 file changed, 126 insertions(+), 50 deletions(-)
>>
>> diff --git a/tests/i915/gem_tiled_blits.c b/tests/i915/gem_tiled_blits.c
>> index df0699f3..b72ec600 100644
>> --- a/tests/i915/gem_tiled_blits.c
>> +++ b/tests/i915/gem_tiled_blits.c
>> @@ -57,52 +57,135 @@
>>  IGT_TEST_DESCRIPTION("Test doing many tiled blits, with a working set larger"
>>                      " than the aperture size.");
>>
>> -static drm_intel_bufmgr *bufmgr;
>> -struct intel_batchbuffer *batch;
>>  static int width = 512, height = 512;
>> +int device_gen;
>>
>> -static drm_intel_bo *
>> -create_bo(uint32_t start_val)
>> +static void
>> +copy(int fd, uint32_t dst, uint32_t dst_tiling,
>> +     uint32_t src, uint32_t src_tiling)
>>  {
>> -       drm_intel_bo *bo, *linear_bo;
>> +       uint32_t batch[12];
>
>Be considerate and think in cachelines.

Thanks for pointing that out. I'll keep it in mind.

>
>> +       struct drm_i915_gem_relocation_entry reloc[2];
>> +       struct drm_i915_gem_exec_object2 obj[3];
>> +       struct drm_i915_gem_execbuffer2 exec;
>> +       int src_pitch, dst_pitch;
>> +       int tile_height, tile_width;
>> +       int i = 0;
>> +
>> +       src_pitch = 4096;
>> +       dst_pitch = 4096;
>> +       tile_width = 1024;
>> +       tile_height = width*height*4/4096;
>> +
>> +       batch[i++] = XY_SRC_COPY_BLT_CMD |
>> +                 XY_SRC_COPY_BLT_WRITE_ALPHA |
>> +                 XY_SRC_COPY_BLT_WRITE_RGB;
>> +       if (device_gen >= 8)
>> +               batch[i - 1] |= 8;
>> +       else
>> +               batch[i - 1] |= 6;
>> +
>> +       if (device_gen >= 4 && src_tiling != I915_TILING_NONE) {
>> +               src_pitch /= 4;
>> +               batch[i - 1] |= XY_SRC_COPY_BLT_SRC_TILED;
>> +       }
>> +       if (device_gen >= 4 && dst_tiling != I915_TILING_NONE) {
>> +               dst_pitch /= 4;
>> +               batch[i - 1] |= XY_SRC_COPY_BLT_DST_TILED;
>> +       }
>> +
>> +       batch[i++] = (3 << 24) | /* 32 bits */
>> +                 (0xcc << 16) | /* copy ROP */
>> +                 dst_pitch;
>> +       batch[i++] = 0; /* dst x1,y1 */
>> +       batch[i++] = (tile_height << 16) | tile_width; /* dst x2,y2 */
>> +       batch[i++] = 0; /* dst reloc */
>> +       if (device_gen >= 8)
>> +               batch[i++] = 0;
>> +       batch[i++] = 0; /* src x1,y1 */
>> +       batch[i++] = src_pitch;
>> +       batch[i++] = 0; /* src reloc */
>> +       if (device_gen >= 8)
>> +               batch[i++] = 0;
>> +       batch[i++] = MI_BATCH_BUFFER_END;
>> +       batch[i++] = MI_NOOP;
>> +
>> +       memset(reloc, 0, sizeof(reloc));
>> +       reloc[0].target_handle = dst;
>> +       reloc[0].delta = 0;
>> +       reloc[0].offset = 4 * sizeof(batch[0]);
>> +       reloc[0].presumed_offset = 0;
>> +       reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
>> +       reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
>> +
>> +       reloc[1].target_handle = src;
>> +       reloc[1].delta = 0;
>> +       reloc[1].offset = 7 * sizeof(batch[0]);
>> +       if (device_gen >= 8)
>> +               reloc[1].offset += sizeof(batch[0]);
>> +       reloc[1].presumed_offset = 0;
>> +       reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
>> +       reloc[1].write_domain = 0;
>
>Hmm. Since the batch is fresh every time, relocations are not a gpu
>stall. So horrible but not a test breaker. (Stalling for relocations
>would very effectively nerf the test.)

I'm not sure I follow the comment here. Are you saying it would be
better to keep track of the buffer offset after it has been "relocated"
and then reuse that? Also, would it be better to reuse the batch buffer?
I saw the same approach being taken in gem_linear_blits and thought it
would suffice for testing.

>
>> +       memset(obj, 0, sizeof(obj));
>> +       obj[0].handle = dst;
>> +       obj[1].handle = src;
>> +       obj[2].handle = gem_create(fd, 4096);
>> +       gem_write(fd, obj[2].handle, 0, batch, i * sizeof(batch[0]));
>> +       obj[2].relocation_count = 2;
>> +       obj[2].relocs_ptr = to_user_pointer(reloc);
>> +
>> +       memset(&exec, 0, sizeof(exec));
>> +       exec.buffers_ptr = to_user_pointer(obj);
>> +       exec.buffer_count = 3;
>> +       exec.batch_len = i * sizeof(batch[0]);
>> +       exec.flags = gem_has_blt(fd) ? I915_EXEC_BLT : 0;
>> +
>> +       gem_execbuf(fd, &exec);
>> +       if (dst_tiling == I915_TILING_NONE)
>> +               gem_sync(fd, obj[2].handle);
>
>Why is there a sync here? What bug are you trying to hide?

This is what I've been trying to figure out as well. I tried using
gem_mmap__cpu() in check_bo() method like you mentioned. Without this
gem_sync() the test fails regardless of gem_mmap__cpu() or
gem_mmap__wc() in the check_bo() method. The data doesn't match the
expected values and the assert in check_bo() method is triggered.

>
>> +       gem_close(fd, obj[2].handle);
>> +}
>> +
>> +static uint32_t
>> +create_bo(int fd, uint32_t start_val)
>> +{
>> +       uint32_t bo, linear_bo;
>>         uint32_t *linear;
>> -       uint32_t tiling = I915_TILING_X;
>>         int i;
>> +       const uint32_t buf_size = 1024 * 1024;
>>
>> -       bo = drm_intel_bo_alloc(bufmgr, "tiled bo", 1024 * 1024, 4096);
>> -       do_or_die(drm_intel_bo_set_tiling(bo, &tiling, width * 4));
>> -       igt_assert(tiling == I915_TILING_X);
>> -
>> -       linear_bo = drm_intel_bo_alloc(bufmgr, "linear src", 1024 * 1024, 4096);
>> +       bo = gem_create(fd, buf_size);
>> +       linear_bo = gem_create(fd, buf_size);
>>
>> +       linear = (uint32_t *)gem_mmap__wc(fd, linear_bo, 0, buf_size,
>> +                                         PROT_WRITE);
>
>gem_mmap__wc() is not universal. So you need a fallback to __gtt. Or a
>fallback from __gtt.
>
>>         /* Fill the BO with dwords starting at start_val */
>> -       do_or_die(drm_intel_bo_map(linear_bo, 1));
>> -       linear = linear_bo->virtual;
>> -       for (i = 0; i < 1024 * 1024 / 4; i++)
>> +       for (i = 0; i < buf_size / 4; i++)
>>                 linear[i] = start_val++;
>> -       drm_intel_bo_unmap(linear_bo);
>> -
>> -       intel_copy_bo (batch, bo, linear_bo, width*height*4);
>>
>> -       drm_intel_bo_unreference(linear_bo);
>> +       munmap(linear, buf_size);
>> +       copy(fd, bo, I915_TILING_X, linear_bo, I915_TILING_NONE);
>>
>> +       gem_close(fd, linear_bo);
>>         return bo;
>>  }
>>
>>  static void
>> -check_bo(drm_intel_bo *bo, uint32_t val)
>> +check_bo(int fd, uint32_t bo, uint32_t val)
>>  {
>> -       drm_intel_bo *linear_bo;
>> +       uint32_t linear_bo;
>>         uint32_t *linear;
>>         int num_errors;
>>         int i;
>> +       const uint32_t buf_size = 1024 * 1024;
>>
>> -       linear_bo = drm_intel_bo_alloc(bufmgr, "linear dst", 1024 * 1024, 4096);
>> +       linear_bo = gem_create(fd, buf_size);
>>
>> -       intel_copy_bo(batch, linear_bo, bo, width*height*4);
>> +       copy(fd, linear_bo, I915_TILING_NONE, bo, I915_TILING_X);
>>
>> -       do_or_die(drm_intel_bo_map(linear_bo, 0));
>> -       linear = linear_bo->virtual;
>> +       linear = (uint32_t *)gem_mmap__wc(fd, linear_bo, 0, buf_size,
>> +                                         PROT_READ);
>
>And this is the error above you were trying to hide.
>
>Please do not even think about reading a few megabytes from WC.
>linear = mmap__wb(); with a fallback to copy to wb.

Thank you for pointing this out. I'll keep this in mind.

Thanks,
Vanshi
>-Chris


More information about the igt-dev mailing list