[igt-dev] [RFC PATCH] i915/gem_tiled_blits: Switch from libdrm methods to igt lib
Chris Wilson
chris at chris-wilson.co.uk
Wed Nov 13 09:51:01 UTC 2019
Quoting Vanshidhar Konda (2019-11-13 04:31:50)
> Switch the test from using libdrm methods to using methods provided by
> the igt library. Like some of the other gem_tiled* tests, this test also
> creates the batch buffer used to do the blitter copies. Also, the test
> avoids calling GET/SET_TILING IOCTLs - something that will not be
> supported on Gen12+.
>
> Signed-off-by: Vanshidhar Konda <vanshidhar.r.konda at intel.com>
> ---
> tests/i915/gem_tiled_blits.c | 176 +++++++++++++++++++++++++----------
> 1 file changed, 126 insertions(+), 50 deletions(-)
>
> diff --git a/tests/i915/gem_tiled_blits.c b/tests/i915/gem_tiled_blits.c
> index df0699f3..b72ec600 100644
> --- a/tests/i915/gem_tiled_blits.c
> +++ b/tests/i915/gem_tiled_blits.c
> @@ -57,52 +57,135 @@
> IGT_TEST_DESCRIPTION("Test doing many tiled blits, with a working set larger"
> " than the aperture size.");
>
> -static drm_intel_bufmgr *bufmgr;
> -struct intel_batchbuffer *batch;
> static int width = 512, height = 512;
> +int device_gen;
>
> -static drm_intel_bo *
> -create_bo(uint32_t start_val)
> +static void
> +copy(int fd, uint32_t dst, uint32_t dst_tiling,
> + uint32_t src, uint32_t src_tiling)
> {
> - drm_intel_bo *bo, *linear_bo;
> + uint32_t batch[12];
Be considerate and think in cachelines.
> + struct drm_i915_gem_relocation_entry reloc[2];
> + struct drm_i915_gem_exec_object2 obj[3];
> + struct drm_i915_gem_execbuffer2 exec;
> + int src_pitch, dst_pitch;
> + int tile_height, tile_width;
> + int i = 0;
> +
> + src_pitch = 4096;
> + dst_pitch = 4096;
> + tile_width = 1024;
> + tile_height = width*height*4/4096;
> +
> + batch[i++] = XY_SRC_COPY_BLT_CMD |
> + XY_SRC_COPY_BLT_WRITE_ALPHA |
> + XY_SRC_COPY_BLT_WRITE_RGB;
> + if (device_gen >= 8)
> + batch[i - 1] |= 8;
> + else
> + batch[i - 1] |= 6;
> +
> + if (device_gen >= 4 && src_tiling != I915_TILING_NONE) {
> + src_pitch /= 4;
> + batch[i - 1] |= XY_SRC_COPY_BLT_SRC_TILED;
> + }
> + if (device_gen >= 4 && dst_tiling != I915_TILING_NONE) {
> + dst_pitch /= 4;
> + batch[i - 1] |= XY_SRC_COPY_BLT_DST_TILED;
> + }
> +
> + batch[i++] = (3 << 24) | /* 32 bits */
> + (0xcc << 16) | /* copy ROP */
> + dst_pitch;
> + batch[i++] = 0; /* dst x1,y1 */
> + batch[i++] = (tile_height << 16) | tile_width; /* dst x2,y2 */
> + batch[i++] = 0; /* dst reloc */
> + if (device_gen >= 8)
> + batch[i++] = 0;
> + batch[i++] = 0; /* src x1,y1 */
> + batch[i++] = src_pitch;
> + batch[i++] = 0; /* src reloc */
> + if (device_gen >= 8)
> + batch[i++] = 0;
> + batch[i++] = MI_BATCH_BUFFER_END;
> + batch[i++] = MI_NOOP;
> +
> + memset(reloc, 0, sizeof(reloc));
> + reloc[0].target_handle = dst;
> + reloc[0].delta = 0;
> + reloc[0].offset = 4 * sizeof(batch[0]);
> + reloc[0].presumed_offset = 0;
> + reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
> + reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
> +
> + reloc[1].target_handle = src;
> + reloc[1].delta = 0;
> + reloc[1].offset = 7 * sizeof(batch[0]);
> + if (device_gen >= 8)
> + reloc[1].offset += sizeof(batch[0]);
> + reloc[1].presumed_offset = 0;
> + reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
> + reloc[1].write_domain = 0;
Hmm. Since the batch is fresh every time, relocations are not a gpu
stall. So horrible but not a test breaker. (Stalling for relocations
would very effectively nerf the test.)
> + memset(obj, 0, sizeof(obj));
> + obj[0].handle = dst;
> + obj[1].handle = src;
> + obj[2].handle = gem_create(fd, 4096);
> + gem_write(fd, obj[2].handle, 0, batch, i * sizeof(batch[0]));
> + obj[2].relocation_count = 2;
> + obj[2].relocs_ptr = to_user_pointer(reloc);
> +
> + memset(&exec, 0, sizeof(exec));
> + exec.buffers_ptr = to_user_pointer(obj);
> + exec.buffer_count = 3;
> + exec.batch_len = i * sizeof(batch[0]);
> + exec.flags = gem_has_blt(fd) ? I915_EXEC_BLT : 0;
> +
> + gem_execbuf(fd, &exec);
> + if (dst_tiling == I915_TILING_NONE)
> + gem_sync(fd, obj[2].handle);
Why is there a sync here? What bug are you trying to hide?
> + gem_close(fd, obj[2].handle);
> +}
> +
> +static uint32_t
> +create_bo(int fd, uint32_t start_val)
> +{
> + uint32_t bo, linear_bo;
> uint32_t *linear;
> - uint32_t tiling = I915_TILING_X;
> int i;
> + const uint32_t buf_size = 1024 * 1024;
>
> - bo = drm_intel_bo_alloc(bufmgr, "tiled bo", 1024 * 1024, 4096);
> - do_or_die(drm_intel_bo_set_tiling(bo, &tiling, width * 4));
> - igt_assert(tiling == I915_TILING_X);
> -
> - linear_bo = drm_intel_bo_alloc(bufmgr, "linear src", 1024 * 1024, 4096);
> + bo = gem_create(fd, buf_size);
> + linear_bo = gem_create(fd, buf_size);
>
> + linear = (uint32_t *)gem_mmap__wc(fd, linear_bo, 0, buf_size,
> + PROT_WRITE);
gem_mmap__wc() is not universal. So you need a fallback to __gtt. Or a
fallback from __gtt.
> /* Fill the BO with dwords starting at start_val */
> - do_or_die(drm_intel_bo_map(linear_bo, 1));
> - linear = linear_bo->virtual;
> - for (i = 0; i < 1024 * 1024 / 4; i++)
> + for (i = 0; i < buf_size / 4; i++)
> linear[i] = start_val++;
> - drm_intel_bo_unmap(linear_bo);
> -
> - intel_copy_bo (batch, bo, linear_bo, width*height*4);
>
> - drm_intel_bo_unreference(linear_bo);
> + munmap(linear, buf_size);
> + copy(fd, bo, I915_TILING_X, linear_bo, I915_TILING_NONE);
>
> + gem_close(fd, linear_bo);
> return bo;
> }
>
> static void
> -check_bo(drm_intel_bo *bo, uint32_t val)
> +check_bo(int fd, uint32_t bo, uint32_t val)
> {
> - drm_intel_bo *linear_bo;
> + uint32_t linear_bo;
> uint32_t *linear;
> int num_errors;
> int i;
> + const uint32_t buf_size = 1024 * 1024;
>
> - linear_bo = drm_intel_bo_alloc(bufmgr, "linear dst", 1024 * 1024, 4096);
> + linear_bo = gem_create(fd, buf_size);
>
> - intel_copy_bo(batch, linear_bo, bo, width*height*4);
> + copy(fd, linear_bo, I915_TILING_NONE, bo, I915_TILING_X);
>
> - do_or_die(drm_intel_bo_map(linear_bo, 0));
> - linear = linear_bo->virtual;
> + linear = (uint32_t *)gem_mmap__wc(fd, linear_bo, 0, buf_size,
> + PROT_READ);
And this is the error above you were trying to hide.
Please do not even think about reading a few megabytes from WC.
linear = mmap__wb(); with a fallback to copy to wb.
-Chris
More information about the igt-dev
mailing list