[igt-dev] [RFC PATCH] i915/gem_tiled_blits: Switch from libdrm methods to igt lib

Wed Nov 13 09:51:01 UTC 2019

Quoting Vanshidhar Konda (2019-11-13 04:31:50)
> Switch the test from using libdrm methods to using methods provided by
> the igt library. Like some of the other gem_tiled* tests, this test also
> creates the batch buffer used to do the blitter copies. Also, the test
> avoids calling GET/SET_TILING IOCTLs - something that will not be
> supported on Gen12+.
> 
> Signed-off-by: Vanshidhar Konda <vanshidhar.r.konda at intel.com>
> ---
>  tests/i915/gem_tiled_blits.c | 176 +++++++++++++++++++++++++----------
>  1 file changed, 126 insertions(+), 50 deletions(-)
> 
> diff --git a/tests/i915/gem_tiled_blits.c b/tests/i915/gem_tiled_blits.c
> index df0699f3..b72ec600 100644
> --- a/tests/i915/gem_tiled_blits.c
> +++ b/tests/i915/gem_tiled_blits.c
> @@ -57,52 +57,135 @@
>  IGT_TEST_DESCRIPTION("Test doing many tiled blits, with a working set larger"
>                      " than the aperture size.");
>  
> -static drm_intel_bufmgr *bufmgr;
> -struct intel_batchbuffer *batch;
>  static int width = 512, height = 512;
> +int device_gen;
>  
> -static drm_intel_bo *
> -create_bo(uint32_t start_val)
> +static void
> +copy(int fd, uint32_t dst, uint32_t dst_tiling,
> +     uint32_t src, uint32_t src_tiling)
>  {
> -       drm_intel_bo *bo, *linear_bo;
> +       uint32_t batch[12];

Be considerate and think in cachelines.

> +       struct drm_i915_gem_relocation_entry reloc[2];
> +       struct drm_i915_gem_exec_object2 obj[3];
> +       struct drm_i915_gem_execbuffer2 exec;
> +       int src_pitch, dst_pitch;
> +       int tile_height, tile_width;
> +       int i = 0;
> +
> +       src_pitch = 4096;
> +       dst_pitch = 4096;
> +       tile_width = 1024;
> +       tile_height = width*height*4/4096;
> +
> +       batch[i++] = XY_SRC_COPY_BLT_CMD |
> +                 XY_SRC_COPY_BLT_WRITE_ALPHA |
> +                 XY_SRC_COPY_BLT_WRITE_RGB;
> +       if (device_gen >= 8)
> +               batch[i - 1] |= 8;
> +       else
> +               batch[i - 1] |= 6;
> +
> +       if (device_gen >= 4 && src_tiling != I915_TILING_NONE) {
> +               src_pitch /= 4;
> +               batch[i - 1] |= XY_SRC_COPY_BLT_SRC_TILED;
> +       }
> +       if (device_gen >= 4 && dst_tiling != I915_TILING_NONE) {
> +               dst_pitch /= 4;
> +               batch[i - 1] |= XY_SRC_COPY_BLT_DST_TILED;
> +       }
> +
> +       batch[i++] = (3 << 24) | /* 32 bits */
> +                 (0xcc << 16) | /* copy ROP */
> +                 dst_pitch;
> +       batch[i++] = 0; /* dst x1,y1 */
> +       batch[i++] = (tile_height << 16) | tile_width; /* dst x2,y2 */
> +       batch[i++] = 0; /* dst reloc */
> +       if (device_gen >= 8)
> +               batch[i++] = 0;
> +       batch[i++] = 0; /* src x1,y1 */
> +       batch[i++] = src_pitch;
> +       batch[i++] = 0; /* src reloc */
> +       if (device_gen >= 8)
> +               batch[i++] = 0;
> +       batch[i++] = MI_BATCH_BUFFER_END;
> +       batch[i++] = MI_NOOP;
> +
> +       memset(reloc, 0, sizeof(reloc));
> +       reloc[0].target_handle = dst;
> +       reloc[0].delta = 0;
> +       reloc[0].offset = 4 * sizeof(batch[0]);
> +       reloc[0].presumed_offset = 0;
> +       reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
> +       reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
> +
> +       reloc[1].target_handle = src;
> +       reloc[1].delta = 0;
> +       reloc[1].offset = 7 * sizeof(batch[0]);
> +       if (device_gen >= 8)
> +               reloc[1].offset += sizeof(batch[0]);
> +       reloc[1].presumed_offset = 0;
> +       reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
> +       reloc[1].write_domain = 0;

Hmm. Since the batch is fresh every time, relocations are not a gpu
stall. So horrible but not a test breaker. (Stalling for relocations
would very effectively nerf the test.)

> +       memset(obj, 0, sizeof(obj));
> +       obj[0].handle = dst;
> +       obj[1].handle = src;
> +       obj[2].handle = gem_create(fd, 4096);
> +       gem_write(fd, obj[2].handle, 0, batch, i * sizeof(batch[0]));
> +       obj[2].relocation_count = 2;
> +       obj[2].relocs_ptr = to_user_pointer(reloc);
> +
> +       memset(&exec, 0, sizeof(exec));
> +       exec.buffers_ptr = to_user_pointer(obj);
> +       exec.buffer_count = 3;
> +       exec.batch_len = i * sizeof(batch[0]);
> +       exec.flags = gem_has_blt(fd) ? I915_EXEC_BLT : 0;
> +
> +       gem_execbuf(fd, &exec);
> +       if (dst_tiling == I915_TILING_NONE)
> +               gem_sync(fd, obj[2].handle);

Why is there a sync here? What bug are you trying to hide?

> +       gem_close(fd, obj[2].handle);
> +}
> +
> +static uint32_t
> +create_bo(int fd, uint32_t start_val)
> +{
> +       uint32_t bo, linear_bo;
>         uint32_t *linear;
> -       uint32_t tiling = I915_TILING_X;
>         int i;
> +       const uint32_t buf_size = 1024 * 1024;
>  
> -       bo = drm_intel_bo_alloc(bufmgr, "tiled bo", 1024 * 1024, 4096);
> -       do_or_die(drm_intel_bo_set_tiling(bo, &tiling, width * 4));
> -       igt_assert(tiling == I915_TILING_X);
> -
> -       linear_bo = drm_intel_bo_alloc(bufmgr, "linear src", 1024 * 1024, 4096);
> +       bo = gem_create(fd, buf_size);
> +       linear_bo = gem_create(fd, buf_size);
>  
> +       linear = (uint32_t *)gem_mmap__wc(fd, linear_bo, 0, buf_size,
> +                                         PROT_WRITE);

gem_mmap__wc() is not universal. So you need a fallback to __gtt. Or a
fallback from __gtt.

>         /* Fill the BO with dwords starting at start_val */
> -       do_or_die(drm_intel_bo_map(linear_bo, 1));
> -       linear = linear_bo->virtual;
> -       for (i = 0; i < 1024 * 1024 / 4; i++)
> +       for (i = 0; i < buf_size / 4; i++)
>                 linear[i] = start_val++;
> -       drm_intel_bo_unmap(linear_bo);
> -
> -       intel_copy_bo (batch, bo, linear_bo, width*height*4);
>  
> -       drm_intel_bo_unreference(linear_bo);
> +       munmap(linear, buf_size);
> +       copy(fd, bo, I915_TILING_X, linear_bo, I915_TILING_NONE);
>  
> +       gem_close(fd, linear_bo);
>         return bo;
>  }
>  
>  static void
> -check_bo(drm_intel_bo *bo, uint32_t val)
> +check_bo(int fd, uint32_t bo, uint32_t val)
>  {
> -       drm_intel_bo *linear_bo;
> +       uint32_t linear_bo;
>         uint32_t *linear;
>         int num_errors;
>         int i;
> +       const uint32_t buf_size = 1024 * 1024;
>  
> -       linear_bo = drm_intel_bo_alloc(bufmgr, "linear dst", 1024 * 1024, 4096);
> +       linear_bo = gem_create(fd, buf_size);
>  
> -       intel_copy_bo(batch, linear_bo, bo, width*height*4);
> +       copy(fd, linear_bo, I915_TILING_NONE, bo, I915_TILING_X);
>  
> -       do_or_die(drm_intel_bo_map(linear_bo, 0));
> -       linear = linear_bo->virtual;
> +       linear = (uint32_t *)gem_mmap__wc(fd, linear_bo, 0, buf_size,
> +                                         PROT_READ);

And this is the error above you were trying to hide.

Please do not even think about reading a few megabytes from WC.
linear = mmap__wb(); with a fallback to copy to wb.
-Chris