[igt-dev] [PATCH i-g-t v23 37/37] tests/gem_linear_blits: Use intel allocator

Chris Wilson chris at chris-wilson.co.uk
Tue Mar 16 11:23:25 UTC 2021


Quoting Zbigniew Kempczyński (2021-03-15 16:58:42)
> From: Dominik Grzegorzek <dominik.grzegorzek at intel.com>
> 
> Use intel allocator directly, without intel-bb infrastructure.
> 
> Signed-off-by: Dominik Grzegorzek <dominik.grzegorzek at intel.com>
> Cc: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
> Cc: Chris Wilson <chris at chris-wilson.co.uk>
> ---
>  tests/i915/gem_linear_blits.c | 120 ++++++++++++++++++++++++----------
>  1 file changed, 84 insertions(+), 36 deletions(-)
> 
> diff --git a/tests/i915/gem_linear_blits.c b/tests/i915/gem_linear_blits.c
> index cae42d52a..b85fd9fef 100644
> --- a/tests/i915/gem_linear_blits.c
> +++ b/tests/i915/gem_linear_blits.c
> @@ -53,10 +53,13 @@ IGT_TEST_DESCRIPTION("Test doing many blits with a working set larger than the"
>  #define WIDTH 512
>  #define HEIGHT 512
>  
> +/* We don't have alignment detection yet, so assume worst case scenario */
> +#define ALIGNMENT (2048*1024)
> +
>  static uint32_t linear[WIDTH*HEIGHT];
>  
> -static void
> -copy(int fd, uint32_t dst, uint32_t src)
> +static void copy(int fd, uint64_t ahnd, uint32_t dst, uint32_t src,
> +                uint64_t dst_offset, uint64_t src_offset, bool do_relocs)
>  {
>         uint32_t batch[12];
>         struct drm_i915_gem_relocation_entry reloc[2];
> @@ -64,6 +67,23 @@ copy(int fd, uint32_t dst, uint32_t src)
>         struct drm_i915_gem_execbuffer2 exec;
>         int i = 0;
>  
> +       memset(obj, 0, sizeof(obj));
> +       obj[0].handle = dst;
> +       obj[0].offset = CANONICAL(dst_offset);
> +       obj[0].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE |
> +                      EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
> +       obj[1].handle = src;
> +       obj[1].offset = CANONICAL(src_offset);
> +       obj[1].flags = EXEC_OBJECT_PINNED |
> +                      EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
> +
> +       obj[2].handle = gem_create(fd, 4096);
> +       obj[2].offset = intel_allocator_alloc(ahnd, obj[2].handle,
> +                       4096, ALIGNMENT);
> +       obj[2].offset = CANONICAL(obj[2].offset);
> +       obj[2].flags = EXEC_OBJECT_PINNED |
> +                      EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
> +
>         batch[i++] = XY_SRC_COPY_BLT_CMD |
>                   XY_SRC_COPY_BLT_WRITE_ALPHA |
>                   XY_SRC_COPY_BLT_WRITE_RGB;
> @@ -77,49 +97,52 @@ copy(int fd, uint32_t dst, uint32_t src)
>                   WIDTH*4;
>         batch[i++] = 0; /* dst x1,y1 */
>         batch[i++] = (HEIGHT << 16) | WIDTH; /* dst x2,y2 */
> -       batch[i++] = 0; /* dst reloc */
> +       batch[i++] = obj[0].offset;
>         if (intel_gen(intel_get_drm_devid(fd)) >= 8)
> -               batch[i++] = 0;
> +               batch[i++] = obj[0].offset >> 32;
>         batch[i++] = 0; /* src x1,y1 */
>         batch[i++] = WIDTH*4;
> -       batch[i++] = 0; /* src reloc */
> +       batch[i++] = obj[1].offset;
>         if (intel_gen(intel_get_drm_devid(fd)) >= 8)
> -               batch[i++] = 0;
> +               batch[i++] = obj[1].offset >> 32;
>         batch[i++] = MI_BATCH_BUFFER_END;
>         batch[i++] = MI_NOOP;
>  
> -       memset(reloc, 0, sizeof(reloc));
> -       reloc[0].target_handle = dst;
> -       reloc[0].delta = 0;
> -       reloc[0].offset = 4 * sizeof(batch[0]);
> -       reloc[0].presumed_offset = 0;
> -       reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
> -       reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
> -
> -       reloc[1].target_handle = src;
> -       reloc[1].delta = 0;
> -       reloc[1].offset = 7 * sizeof(batch[0]);
> -       if (intel_gen(intel_get_drm_devid(fd)) >= 8)
> -               reloc[1].offset += sizeof(batch[0]);
> -       reloc[1].presumed_offset = 0;
> -       reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
> -       reloc[1].write_domain = 0;
> -
> -       memset(obj, 0, sizeof(obj));
> -       obj[0].handle = dst;
> -       obj[1].handle = src;
> -       obj[2].handle = gem_create(fd, 4096);
>         gem_write(fd, obj[2].handle, 0, batch, i * sizeof(batch[0]));
> -       obj[2].relocation_count = 2;
> -       obj[2].relocs_ptr = to_user_pointer(reloc);
> +
> +       if (do_relocs) {
> +               memset(reloc, 0, sizeof(reloc));
> +               reloc[0].target_handle = dst;
> +               reloc[0].delta = 0;
> +               reloc[0].offset = 4 * sizeof(batch[0]);
> +               reloc[0].presumed_offset = obj[0].offset;
> +               reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
> +               reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
> +
> +               reloc[1].target_handle = src;
> +               reloc[1].delta = 0;
> +               reloc[1].offset = 7 * sizeof(batch[0]);
> +               if (intel_gen(intel_get_drm_devid(fd)) >= 8)
> +                       reloc[1].offset += sizeof(batch[0]);
> +               reloc[1].presumed_offset = obj[1].offset;
> +               reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
> +               reloc[1].write_domain = 0;
> +
> +               obj[0].flags &= ~EXEC_OBJECT_PINNED;
> +               obj[1].flags &= ~EXEC_OBJECT_PINNED;
> +               obj[2].flags &= ~EXEC_OBJECT_PINNED;
> +               obj[2].relocation_count = ARRAY_SIZE(reloc);
> +               obj[2].relocs_ptr = to_user_pointer(reloc);
> +       }
>  
>         memset(&exec, 0, sizeof(exec));
>         exec.buffers_ptr = to_user_pointer(obj);
> -       exec.buffer_count = 3;
> +       exec.buffer_count = ARRAY_SIZE(obj);
>         exec.batch_len = i * sizeof(batch[0]);
>         exec.flags = gem_has_blt(fd) ? I915_EXEC_BLT : 0;
> -
>         gem_execbuf(fd, &exec);
> +
> +       intel_allocator_free(ahnd, obj[2].handle);
>         gem_close(fd, obj[2].handle);
>  }


@@ -64,6 +67,20 @@ copy(int fd, uint32_t dst, uint32_t src)
 	struct drm_i915_gem_execbuffer2 exec;
 	int i = 0;
 
+	memset(obj, 0, sizeof(obj));
+	obj[0].handle = dst;
+	obj[0].offset = CANONICAL(dst_offset);
+	obj[0].flags = EXEC_OBJECT_WRITE | EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+	obj[1].handle = src;
+	obj[1].offset = CANONICAL(src_offset);
+	obj[1].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+
+	obj[2].handle = gem_create(fd, 4096);
+	obj[2].offset = intel_allocator_alloc(ahnd, obj[2].handle,
+			4096, ALIGNMENT);
+	obj[2].offset = CANONICAL(obj[2].offset);
+	obj[2].flags = EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+
 	batch[i++] = XY_SRC_COPY_BLT_CMD |
 		  XY_SRC_COPY_BLT_WRITE_ALPHA |
 		  XY_SRC_COPY_BLT_WRITE_RGB;
@@ -77,14 +94,14 @@ copy(int fd, uint32_t dst, uint32_t src)
 		  WIDTH*4;
 	batch[i++] = 0; /* dst x1,y1 */
 	batch[i++] = (HEIGHT << 16) | WIDTH; /* dst x2,y2 */
-	batch[i++] = 0; /* dst reloc */
+	batch[i++] = obj[0].offset;
 	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
-		batch[i++] = 0;
+		batch[i++] = obj[0].offset >> 32;
 	batch[i++] = 0; /* src x1,y1 */
 	batch[i++] = WIDTH*4;
-	batch[i++] = 0; /* src reloc */
+	batch[i++] = obj[1].offset;
 	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
-		batch[i++] = 0;
+		batch[i++] = obj[1].offset >> 32;
 	batch[i++] = MI_BATCH_BUFFER_END;
 	batch[i++] = MI_NOOP;
 
@@ -92,7 +109,7 @@ copy(int fd, uint32_t dst, uint32_t src)
 	reloc[0].target_handle = dst;
 	reloc[0].delta = 0;
 	reloc[0].offset = 4 * sizeof(batch[0]);
-	reloc[0].presumed_offset = 0;
+	reloc[0].presumed_offset = obj[0].offset;
 	reloc[0].read_domains = I915_GEM_DOMAIN_RENDER;
 	reloc[0].write_domain = I915_GEM_DOMAIN_RENDER;
 
@@ -101,25 +118,29 @@ copy(int fd, uint32_t dst, uint32_t src)
 	reloc[1].offset = 7 * sizeof(batch[0]);
 	if (intel_gen(intel_get_drm_devid(fd)) >= 8)
 		reloc[1].offset += sizeof(batch[0]);
-	reloc[1].presumed_offset = 0;
+	reloc[1].presumed_offset = obj[1].offset;
 	reloc[1].read_domains = I915_GEM_DOMAIN_RENDER;
 	reloc[1].write_domain = 0;
 
-	memset(obj, 0, sizeof(obj));
-	obj[0].handle = dst;
-	obj[1].handle = src;
-	obj[2].handle = gem_create(fd, 4096);
 	gem_write(fd, obj[2].handle, 0, batch, i * sizeof(batch[0]));
-	obj[2].relocation_count = 2;
-	obj[2].relocs_ptr = to_user_pointer(reloc);
+
+	if (do_relocs) {
+		obj[2].relocation_count = ARRAY_SIZE(reloc);
+		obj[2].relocs_ptr = to_user_pointer(reloc);
+	} else {
+		obj[0].flags |= EXEC_OBJECT_PINNED;
+		obj[1].flags |= EXEC_OBJECT_PINNED;
+		obj[2].flags |= EXEC_OBJECT_PINNED;
+	}
 
 	memset(&exec, 0, sizeof(exec));
 	exec.buffers_ptr = to_user_pointer(obj);
-	exec.buffer_count = 3;
+	exec.buffer_count = ARRAY_SIZE(obj);
 	exec.batch_len = i * sizeof(batch[0]);
 	exec.flags = gem_has_blt(fd) ? I915_EXEC_BLT : 0;
-
 	gem_execbuf(fd, &exec);
+
+	intel_allocator_free(ahnd, obj[2].handle);
 	gem_close(fd, obj[2].handle);
 }


Is more concise, both in delta and test flow.

It also suggests something like

intel_allocator_apply_relocations(ahnd, &exec):
	foreach(exec.obj) {
		foreach(execobj.reloc) {
			if (reloc.write_domain)
				execobj.flags |= EXEC_OBJECT_WRITE;

			if (reloc.presumed_offset == reloc.target->offset)
				continue;

			reloc.presumed_offset = lookup_exec_offset(exec, reloc.target);
			u64 address = reloc.presumed_offset + (int)reloc.delta;
			memcpy(mmap(execobj) + reloc.offset, &address, ahnd->address_size);
		}
		if (!ahnd->use_relocs) {
			execobj.reloc = {};
			execobj.flags |= EXEC_OBJECT_PINNED;
		}
	}
	exec.flags |= I915_EXEC_NORELOC;


would be really useful for converting tests.


> @@ -157,17 +180,28 @@ check_bo(int fd, uint32_t handle, uint32_t val)
>         igt_assert_eq(num_errors, 0);
>  }
>  
> -static void run_test(int fd, int count)
> +static void run_test(int fd, int count, bool do_relocs)
>  {
>         uint32_t *handle, *start_val;
> +       uint64_t *offset, ahnd;
>         uint32_t start = 0;
>         int i;
>  
> +       ahnd = intel_allocator_open(fd, 0, do_relocs ?
> +                                           INTEL_ALLOCATOR_RELOC :
> +                                           INTEL_ALLOCATOR_SIMPLE);
> +
>         handle = malloc(sizeof(uint32_t) * count * 2);
> +       offset = calloc(1, sizeof(uint64_t) * count);

calloc(count, sizeof(uint64_t)

> +       igt_assert_f(handle && offset, "Allocation failed\n");
>         start_val = handle + count;
>  
>         for (i = 0; i < count; i++) {
>                 handle[i] = create_bo(fd, start);
> +
> +               offset[i] = intel_allocator_alloc(ahnd, handle[i],
> +                                                 sizeof(linear), ALIGNMENT);
> +
>                 start_val[i] = start;
>                 start += 1024 * 1024 / 4;
>         }
> @@ -178,17 +212,22 @@ static void run_test(int fd, int count)
>  
>                 if (src == dst)
>                         continue;
> +               copy(fd, ahnd, handle[dst], handle[src],
> +                    offset[dst], offset[src], do_relocs);
>  
> -               copy(fd, handle[dst], handle[src]);
>                 start_val[dst] = start_val[src];
>         }
>  
>         for (i = 0; i < count; i++) {
>                 check_bo(fd, handle[i], start_val[i]);
> +               intel_allocator_free(ahnd, handle[i]);
>                 gem_close(fd, handle[i]);
>         }
>  
>         free(handle);
> +       free(offset);
> +
> +       intel_allocator_close(ahnd);
>  }
>  
>  #define MAX_32b ((1ull << 32) - 4096)
> @@ -197,16 +236,21 @@ igt_main
>  {
>         const int ncpus = sysconf(_SC_NPROCESSORS_ONLN);
>         uint64_t count = 0;
> +       bool do_relocs;
>         int fd = -1;
>  
>         igt_fixture {
>                 fd = drm_open_driver(DRIVER_INTEL);
>                 igt_require_gem(fd);
>                 gem_require_blitter(fd);
> +               do_relocs = !gem_uses_ppgtt(fd);
>  
>                 count = gem_aperture_size(fd);
>                 if (count >> 32)
>                         count = MAX_32b;
> +               else
> +                       do_relocs = true;

So this catches Braswell (has a 32b full-ppgtt) and only Braswell.
Any reason why the simple allocator cannot handle less than 32b of
address space?

> +
>                 count = 3 + count / (1024*1024);
>                 igt_require(count > 1);
>                 intel_require_memory(count, sizeof(linear), CHECK_RAM);
> @@ -216,19 +260,23 @@ igt_main
>         }
>  
>         igt_subtest("basic")
> -               run_test(fd, 2);
> +               run_test(fd, 2, do_relocs);
>  
>         igt_subtest("normal") {
> +               intel_allocator_multiprocess_start();
>                 igt_fork(child, ncpus)
> -                       run_test(fd, count);
> +                       run_test(fd, count, do_relocs);
>                 igt_waitchildren();
> +               intel_allocator_multiprocess_stop();
>         }
>  
>         igt_subtest("interruptible") {
> +               intel_allocator_multiprocess_start();
>                 igt_fork_signal_helper();
>                 igt_fork(child, ncpus)
> -                       run_test(fd, count);
> +                       run_test(fd, count, do_relocs);
>                 igt_waitchildren();
>                 igt_stop_signal_helper();
> +               intel_allocator_multiprocess_stop();
>         }
>  }
> -- 
> 2.26.0
> 
> _______________________________________________
> igt-dev mailing list
> igt-dev at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/igt-dev


More information about the igt-dev mailing list