[igt-dev] [RFC PATCH] i915/gem_tiled_blits: Switch from libdrm methods to igt lib
Vanshidhar Konda
vanshidhar.r.konda at intel.com
Wed Nov 13 04:35:10 UTC 2019
On Tue, Nov 12, 2019 at 08:31:50PM -0800, Vanshidhar Konda wrote:
>Switch the test from using libdrm methods to using methods provided by
>the igt library. Like some of the other gem_tiled* tests, this test also
>creates the batch buffer used to do the blitter copies. Also, the test
>avoids calling GET/SET_TILING IOCTLs - something that will not be
>supported on Gen12+.
>
>Signed-off-by: Vanshidhar Konda <vanshidhar.r.konda at intel.com>
>---
> tests/i915/gem_tiled_blits.c | 176 +++++++++++++++++++++++++----------
> 1 file changed, 126 insertions(+), 50 deletions(-)
>
>diff --git a/tests/i915/gem_tiled_blits.c b/tests/i915/gem_tiled_blits.c
>index df0699f3..b72ec600 100644
>--- a/tests/i915/gem_tiled_blits.c
>+++ b/tests/i915/gem_tiled_blits.c
>@@ -57,52 +57,135 @@
> IGT_TEST_DESCRIPTION("Test doing many tiled blits, with a working set larger"
> " than the aperture size.");
>
>-static drm_intel_bufmgr *bufmgr;
>-struct intel_batchbuffer *batch;
> static int width = 512, height = 512;
>+int device_gen;
>
>-static drm_intel_bo *
>-create_bo(uint32_t start_val)
>+static void
>+copy(int fd, uint32_t dst, uint32_t dst_tiling,
>+ uint32_t src, uint32_t src_tiling)
> {
>- drm_intel_bo *bo, *linear_bo;
>+ uint32_t batch[12];
>+ struct drm_i915_gem_relocation_entry reloc[2];
>+ struct drm_i915_gem_exec_object2 obj[3];
>+ struct drm_i915_gem_execbuffer2 exec;
>+ int src_pitch, dst_pitch;
>+ int tile_height, tile_width;
>+ int i = 0;
>+
>+ src_pitch = 4096;
>+ dst_pitch = 4096;
>+ tile_width = 1024;
>+ tile_height = width*height*4/4096;
>+
>+ batch[i++] = XY_SRC_COPY_BLT_CMD |
>+ XY_SRC_COPY_BLT_WRITE_ALPHA |
>+ XY_SRC_COPY_BLT_WRITE_RGB;
>+ if (device_gen >= 8)
>+ batch[i - 1] |= 8;
>+ else
>+ batch[i - 1] |= 6;
>+
>+ if (device_gen >= 4 && src_tiling != I915_TILING_NONE) {
>+ src_pitch /= 4;
>+ batch[i - 1] |= XY_SRC_COPY_BLT_SRC_TILED;
>+ }
>+ if (device_gen >= 4 && dst_tiling != I915_TILING_NONE) {
>+ dst_pitch /= 4;
>+ batch[i - 1] |= XY_SRC_COPY_BLT_DST_TILED;
>+ }
>+
>+ batch[i++] = (3 << 24) | /* 32 bits */
>+ (0xcc << 16) | /* copy ROP */
>+ dst_pitch;
>+ batch[i++] = 0; /* dst x1,y1 */
>+ batch[i++] = (tile_height << 16) | tile_width; /* dst x2,y2 */
>+ batch[i++] = 0; /* dst reloc */
>+ if (device_gen >= 8)
>+ batch[i++] = 0;
>+ batch[i++] = 0; /* src x1,y1 */
>+ batch[i++] = src_pitch;
>+ batch[i++] = 0; /* src reloc */
>+ if (device_gen >= 8)
>+ batch[i++] = 0;
>+ batch[i++] = MI_BATCH_BUFFER_END;
>+ batch[i++] = MI_NOOP;
>+
>+ memset(reloc, 0, sizeof(reloc));
>+ reloc[0].target_handle = dst;
>+ reloc[0].delta = 0;
>+ reloc[0].offset = 4 * sizeof(batch[0]);
>+ reloc[0].presumed_offset = 0;
>+ reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
>+ reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
>+
>+ reloc[1].target_handle = src;
>+ reloc[1].delta = 0;
>+ reloc[1].offset = 7 * sizeof(batch[0]);
>+ if (device_gen >= 8)
>+ reloc[1].offset += sizeof(batch[0]);
>+ reloc[1].presumed_offset = 0;
>+ reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
>+ reloc[1].write_domain = 0;
>+
>+ memset(obj, 0, sizeof(obj));
>+ obj[0].handle = dst;
>+ obj[1].handle = src;
>+ obj[2].handle = gem_create(fd, 4096);
>+ gem_write(fd, obj[2].handle, 0, batch, i * sizeof(batch[0]));
>+ obj[2].relocation_count = 2;
>+ obj[2].relocs_ptr = to_user_pointer(reloc);
>+
>+ memset(&exec, 0, sizeof(exec));
>+ exec.buffers_ptr = to_user_pointer(obj);
>+ exec.buffer_count = 3;
>+ exec.batch_len = i * sizeof(batch[0]);
>+ exec.flags = gem_has_blt(fd) ? I915_EXEC_BLT : 0;
>+
>+ gem_execbuf(fd, &exec);
>+ if (dst_tiling == I915_TILING_NONE)
>+ gem_sync(fd, obj[2].handle);
I noticed that the test doesn't work without adding this gem_sync. Also,
the execution time for the test has become 20x after these changes. Any
suggestions on how to improve are greatly appreciated.
Vanshi
>+ gem_close(fd, obj[2].handle);
>+}
>+
>+static uint32_t
>+create_bo(int fd, uint32_t start_val)
>+{
>+ uint32_t bo, linear_bo;
> uint32_t *linear;
>- uint32_t tiling = I915_TILING_X;
> int i;
>+ const uint32_t buf_size = 1024 * 1024;
>
>- bo = drm_intel_bo_alloc(bufmgr, "tiled bo", 1024 * 1024, 4096);
>- do_or_die(drm_intel_bo_set_tiling(bo, &tiling, width * 4));
>- igt_assert(tiling == I915_TILING_X);
>-
>- linear_bo = drm_intel_bo_alloc(bufmgr, "linear src", 1024 * 1024, 4096);
>+ bo = gem_create(fd, buf_size);
>+ linear_bo = gem_create(fd, buf_size);
>
>+ linear = (uint32_t *)gem_mmap__wc(fd, linear_bo, 0, buf_size,
>+ PROT_WRITE);
> /* Fill the BO with dwords starting at start_val */
>- do_or_die(drm_intel_bo_map(linear_bo, 1));
>- linear = linear_bo->virtual;
>- for (i = 0; i < 1024 * 1024 / 4; i++)
>+ for (i = 0; i < buf_size / 4; i++)
> linear[i] = start_val++;
>- drm_intel_bo_unmap(linear_bo);
>-
>- intel_copy_bo (batch, bo, linear_bo, width*height*4);
>
>- drm_intel_bo_unreference(linear_bo);
>+ munmap(linear, buf_size);
>+ copy(fd, bo, I915_TILING_X, linear_bo, I915_TILING_NONE);
>
>+ gem_close(fd, linear_bo);
> return bo;
> }
>
> static void
>-check_bo(drm_intel_bo *bo, uint32_t val)
>+check_bo(int fd, uint32_t bo, uint32_t val)
> {
>- drm_intel_bo *linear_bo;
>+ uint32_t linear_bo;
> uint32_t *linear;
> int num_errors;
> int i;
>+ const uint32_t buf_size = 1024 * 1024;
>
>- linear_bo = drm_intel_bo_alloc(bufmgr, "linear dst", 1024 * 1024, 4096);
>+ linear_bo = gem_create(fd, buf_size);
>
>- intel_copy_bo(batch, linear_bo, bo, width*height*4);
>+ copy(fd, linear_bo, I915_TILING_NONE, bo, I915_TILING_X);
>
>- do_or_die(drm_intel_bo_map(linear_bo, 0));
>- linear = linear_bo->virtual;
>+ linear = (uint32_t *)gem_mmap__wc(fd, linear_bo, 0, buf_size,
>+ PROT_READ);
>
> num_errors = 0;
> for (i = 0; i < 1024 * 1024 / 4; i++) {
>@@ -112,31 +195,31 @@ check_bo(drm_intel_bo *bo, uint32_t val)
> val++;
> }
> igt_assert_eq(num_errors, 0);
>- drm_intel_bo_unmap(linear_bo);
>
>- drm_intel_bo_unreference(linear_bo);
>+ munmap(linear, buf_size);
>+ gem_close(fd, linear_bo);
> }
>
>-static void run_test(int count)
>+static void run_test(int fd, int count)
> {
>- drm_intel_bo **bo;
>+ uint32_t *bo;
> uint32_t *bo_start_val;
> uint32_t start = 0;
> int i;
>
> igt_debug("Using %d 1MiB buffers\n", count);
>
>- bo = malloc(sizeof(drm_intel_bo *)*count);
>+ bo = malloc(sizeof(uint32_t)*count);
> bo_start_val = malloc(sizeof(uint32_t)*count);
>
> for (i = 0; i < count; i++) {
>- bo[i] = create_bo(start);
>+ bo[i] = create_bo(fd, start);
> bo_start_val[i] = start;
> start += 1024 * 1024 / 4;
> }
> igt_info("Verifying initialisation...\n");
> for (i = 0; i < count; i++)
>- check_bo(bo[i], bo_start_val[i]);
>+ check_bo(fd, bo[i], bo_start_val[i]);
>
> igt_info("Cyclic blits, forward...\n");
> for (i = 0; i < count * 4; i++) {
>@@ -146,15 +229,15 @@ static void run_test(int count)
> if (src == dst)
> continue;
>
>- intel_copy_bo(batch, bo[dst], bo[src], width*height*4);
>+ copy(fd, bo[dst], I915_TILING_X, bo[src], I915_TILING_X);
> bo_start_val[dst] = bo_start_val[src];
> }
> for (i = 0; i < count; i++)
>- check_bo(bo[i], bo_start_val[i]);
>+ check_bo(fd, bo[i], bo_start_val[i]);
>
> if (igt_run_in_simulation()) {
> for (i = 0; i < count; i++)
>- drm_intel_bo_unreference(bo[i]);
>+ gem_close(fd, bo[i]);
> free(bo_start_val);
> free(bo);
> return;
>@@ -168,11 +251,11 @@ static void run_test(int count)
> if (src == dst)
> continue;
>
>- intel_copy_bo(batch, bo[dst], bo[src], width*height*4);
>+ copy(fd, bo[dst], I915_TILING_X, bo[src], I915_TILING_X);
> bo_start_val[dst] = bo_start_val[src];
> }
> for (i = 0; i < count; i++)
>- check_bo(bo[i], bo_start_val[i]);
>+ check_bo(fd, bo[i], bo_start_val[i]);
>
> igt_info("Random blits...\n");
> for (i = 0; i < count * 4; i++) {
>@@ -182,12 +265,12 @@ static void run_test(int count)
> if (src == dst)
> continue;
>
>- intel_copy_bo(batch, bo[dst], bo[src], width*height*4);
>+ copy(fd, bo[dst], I915_TILING_X, bo[src], I915_TILING_X);
> bo_start_val[dst] = bo_start_val[src];
> }
> for (i = 0; i < count; i++) {
>- check_bo(bo[i], bo_start_val[i]);
>- drm_intel_bo_unreference(bo[i]);
>+ check_bo(fd, bo[i], bo_start_val[i]);
>+ gem_close(fd, bo[i]);
> }
>
> free(bo_start_val);
>@@ -204,15 +287,11 @@ igt_main
> fd = drm_open_driver(DRIVER_INTEL);
> igt_require_gem(fd);
> gem_require_blitter(fd);
>-
>- bufmgr = drm_intel_bufmgr_gem_init(fd, 4096);
>- drm_intel_bufmgr_gem_enable_reuse(bufmgr);
>- drm_intel_bufmgr_gem_set_vma_cache_size(bufmgr, 32);
>- batch = intel_batchbuffer_alloc(bufmgr, intel_get_drm_devid(fd));
>+ device_gen = intel_gen(intel_get_drm_devid(fd));
> }
>
> igt_subtest("basic")
>- run_test(2);
>+ run_test(fd, 2);
>
> igt_subtest("normal") {
> uint64_t count;
>@@ -224,7 +303,7 @@ igt_main
> count += (count & 1) == 0;
> intel_require_memory(count, 1024*1024, CHECK_RAM);
>
>- run_test(count);
>+ run_test(fd, count);
> }
>
> igt_subtest("interruptible") {
>@@ -238,14 +317,11 @@ igt_main
> intel_require_memory(count, 1024*1024, CHECK_RAM);
>
> igt_fork_signal_helper();
>- run_test(count);
>+ run_test(fd, count);
> igt_stop_signal_helper();
> }
>
> igt_fixture {
>- intel_batchbuffer_free(batch);
>- drm_intel_bufmgr_destroy(bufmgr);
>-
> close(fd);
> }
> }
>--
>2.24.0
>
More information about the igt-dev
mailing list