[PATCH i-g-t 12/12] tools/intel_tiling_detect: Introduce tiling detection tool

Zbigniew Kempczyński zbigniew.kempczynski at intel.com
Tue Jun 18 04:51:06 UTC 2024


On Mon, Jun 17, 2024 at 10:21:29PM +0300, Juha-Pekka Heikkila wrote:
> Just small issues below, on the actual engine stuff I didn't spot anything
> comment worthy.
> 
> On 27.5.2024 10.33, Zbigniew Kempczyński wrote:
> > In some tests we're incorrectly assumming we get requested tiling.
> > In blt library we defined available blitter and render tilings for
> > defined platforms to avoid being wrong and allowing iterators to walk
> > over proper tilings. Unfortunately we're not sure what's the real
> > tiling underneath as we're not validating its content.
> > 
> > Instead of blindly trusting the documentation as it might be wrong
> > or vague lets introduce tiling detection tool for Intel platforms.
> > Currently it supports detecting linear/X/Y/Yf/Ys/4 tilings (tile64
> > is queued to be added) in fast-copy, block-copy and render-copy
> > commands.
> > 
> > Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
> > Cc: Juha-Pekka Heikkila <juhapekka.heikkila at gmail.com>
> > ---
> >   tools/intel_tiling_detect.c | 432 ++++++++++++++++++++++++++++++++++++
> >   tools/meson.build           |   1 +
> >   2 files changed, 433 insertions(+)
> >   create mode 100644 tools/intel_tiling_detect.c
> > 
> > diff --git a/tools/intel_tiling_detect.c b/tools/intel_tiling_detect.c
> > new file mode 100644
> > index 0000000000..ea5ea5eae6
> > --- /dev/null
> > +++ b/tools/intel_tiling_detect.c
> > @@ -0,0 +1,432 @@
> > +// SPDX-License-Identifier: MIT
> > +/*
> > + * Copyright © 2024 Intel Corporation
> > + */
> > +
> > +#include <errno.h>
> > +#include <glib.h>
> > +#include <sys/ioctl.h>
> > +#include <sys/time.h>
> > +#include <malloc.h>
> > +#include "drm.h"
> > +#include "igt.h"
> > +#include "igt_syncobj.h"
> > +#include "i915/gem_create.h"
> > +#include "intel_blt.h"
> > +#include "intel_common.h"
> > +#include "intel_mocs.h"
> > +#include "intel_pat.h"
> > +#include "xe/xe_ioctl.h"
> > +#include "xe/xe_query.h"
> > +#include "xe/xe_util.h"
> > +
> > +IGT_TEST_DESCRIPTION("Exercise gen12 blitter with and without flatccs compression on Xe");
> 
> cut'n'paste description? :)

Indeed. I'll remove this.

> 
> > +
> > +static struct param {
> > +	int tiling;
> > +	bool write_png;
> > +	bool print_bb;
> > +	bool print_surface_info;
> > +	int width;
> > +	int height;
> > +	int incdim_width;
> > +} param = {
> > +	.tiling = -1,
> > +	.write_png = false,
> > +	.print_bb = false,
> > +	.print_surface_info = false,
> > +	.width = 256,
> > +	.height = 256,
> > +	.incdim_width = 1,
> > +};
> > +
> > +struct intel_buf refs[I915_TILING_64] = {};
> 
> I think this should be structured somehow differently as that I915_TILING_64
> already defined as MAX_TILING + n. Though, just now I have no good
> suggestion.

Good catch. I missed array size is too small to keep tiling64 as the
last element. I think adding

#define NUM_REFS (I915_TILING_64 + 1)

should clearly show my intention.

> 
> > +
> > +#define PRINT_SURFACE_INFO(name, obj) do { \
> > +	if (param.print_surface_info) \
> > +		blt_surface_info((name), (obj)); } while (0)
> > +
> > +#define WRITE_PNG(fd, id, name, obj, w, h, bpp) do { \
> > +	if (param.write_png) \
> > +		blt_surface_to_png((fd), (id), (name), (obj), (w), (h), (bpp)); } while (0)
> > +
> > +const char *help_str =
> > +	"  -b\tPrint bb\n"
> > +	"  -s\tPrint surface info\n"
> > +	"  -p\tWrite PNG\n"
> > +	"  -W\tWidth (default 256)\n"
> > +	"  -H\tHeight (default 256)\n"
> > +	"  -h\tHelp"
> > +	;
> > +
> > +enum copy_fn {
> > +	FAST_COPY,
> > +	BLOCK_COPY,
> > +	RENDER_COPY,
> > +};
> > +
> > +static const char * const copy_fn_name[] = {
> > +	[FAST_COPY] = "fast-copy",
> > +	[BLOCK_COPY] = "block-copy",
> > +	[RENDER_COPY] = "render-copy",
> > +};
> > +
> > +static void detect_blt_tiling(const struct blt_copy_object *buf)
> > +{
> > +	bool detected = false;
> > +	int i;
> > +
> > +	for (i = 0; i < ARRAY_SIZE(refs); i++) {
> > +		if (!refs[i].bops)
> > +			continue;
> > +
> > +		if (!memcmp(buf->ptr, refs[i].ptr, buf->size)) {
> > +			detected = true;
> > +			break;
> > +		}
> > +	}
> > +
> > +	igt_info("buffer tiling (claimed): %s, detected: %s\n",
> > +		 blt_tiling_name(buf->tiling),
> > +		 detected ? blt_tiling_name(i915_tile_to_blt_tile(i)) : "unknown");
> > +}
> > +
> > +static void blt_copy(int fd,
> > +		     intel_ctx_t *ctx,
> > +		     const struct intel_execution_engine2 *e,
> > +		     uint32_t width, uint32_t height,
> > +		     enum blt_tiling_type tiling,
> > +		     enum copy_fn fn)
> > +{
> > +	struct blt_copy_data blt = {};
> > +	struct blt_block_copy_data_ext ext = {}, *pext = &ext;
> > +	struct blt_copy_object *src, *dst;
> > +	const uint32_t bpp = 32;
> > +	uint64_t bb_size;
> > +	uint64_t ahnd = intel_allocator_open(fd, ctx->vm, INTEL_ALLOCATOR_RELOC);
> > +	uint32_t run_id = tiling;
> > +	uint32_t src_region, dst_region;
> > +	uint32_t bb;
> > +	uint8_t uc_mocs = intel_get_uc_mocs_index(fd);
> > +	bool is_xe = is_xe_device(fd);
> > +
> > +	if (is_xe) {
> > +		bb_size = xe_bb_size(fd, SZ_4K);
> > +		src_region = system_memory(fd);
> > +		dst_region = vram_if_possible(fd, 0);
> > +		bb = xe_bo_create(fd, 0, bb_size, src_region, 0);
> > +	} else {
> > +		bb_size = SZ_4K;
> > +		src_region = REGION_SMEM;
> > +		dst_region = gem_has_lmem(fd) ? REGION_LMEM(0) : REGION_SMEM;
> > +		igt_assert(__gem_create_in_memory_regions(fd, &bb, &bb_size, src_region) == 0);
> > +	}
> > +
> > +	if (!blt_uses_extended_block_copy(fd))
> > +		pext = NULL;
> > +
> > +	blt_copy_init(fd, &blt);
> > +
> > +	src = blt_create_object(&blt, src_region, width, height, bpp, uc_mocs,
> > +				T_LINEAR, COMPRESSION_DISABLED,
> > +				COMPRESSION_TYPE_3D, true);
> > +	dst = blt_create_object(&blt, dst_region, width, height, bpp, uc_mocs,
> > +				tiling, COMPRESSION_DISABLED,
> > +				COMPRESSION_TYPE_3D, true);
> > +	PRINT_SURFACE_INFO("src", src);
> > +	PRINT_SURFACE_INFO("dst", dst);
> > +
> > +	blt_surface_fill_rect(fd, src, width, height);
> > +
> > +	blt.color_depth = CD_32bit;
> > +	blt.print_bb = param.print_bb;
> > +	blt_set_copy_object(&blt.src, src);
> > +	blt_set_copy_object(&blt.dst, dst);
> > +	blt_set_object_ext(&ext.src, 0, width, height, SURFACE_TYPE_2D);
> > +	blt_set_object_ext(&ext.dst, 0, width, height, SURFACE_TYPE_2D);
> > +	blt_set_batch(&blt.bb, bb, bb_size, src_region);
> > +	if (fn == BLOCK_COPY)
> > +		blt_block_copy(fd, ctx, e, ahnd, &blt, pext);
> > +	else if (fn == FAST_COPY)
> > +		blt_fast_copy(fd, ctx, e, ahnd, &blt);
> > +	if (is_xe)
> > +		intel_ctx_xe_sync(ctx, true);
> > +	else
> > +		gem_sync(fd, dst->handle);
> > +
> > +	WRITE_PNG(fd, run_id, copy_fn_name[fn], &blt.dst, width, height, bpp);
> > +
> > +	detect_blt_tiling(dst);
> > +
> > +	/* Politely clean vm */
> > +	put_offset(ahnd, src->handle);
> > +	put_offset(ahnd, dst->handle);
> > +	put_offset(ahnd, bb);
> > +	intel_allocator_bind(ahnd, 0, 0);
> > +	blt_destroy_object(fd, src);
> > +	blt_destroy_object(fd, dst);
> > +	gem_close(fd, bb);
> > +	put_ahnd(ahnd);
> > +}
> > +
> > +static void detect_render_tiling(struct intel_buf *buf)
> > +{
> > +	bool detected = false;
> > +	int i;
> > +
> > +	intel_buf_device_map(buf, false);
> > +
> > +	for (i = 0; i < ARRAY_SIZE(refs); i++) {
> > +		if (!refs[i].bops)
> > +			continue;
> > +
> > +		if (!memcmp(buf->ptr, refs[i].ptr, buf->size)) {
> > +			detected = true;
> > +			break;
> > +		}
> > +	}
> > +
> > +	intel_buf_unmap(buf);
> > +
> > +	igt_info("buffer tiling (claimed): %s, detected: %s\n",
> > +		 blt_tiling_name(buf->tiling),
> > +		 detected ? blt_tiling_name(i915_tile_to_blt_tile(i)) : "unknown");
> > +}
> > +
> > +static void scratch_buf_init(struct buf_ops *bops,
> > +			     struct intel_buf *buf,
> > +			     int width, int height,
> > +			     uint32_t tiling,
> > +			     enum i915_compression compression)
> > +{
> > +	int fd = buf_ops_get_fd(bops);
> > +	int bpp = 32;
> > +
> > +	/*
> > +	 * We use system memory even if vram is possible because wc mapping
> > +	 * is extremely slow.
> > +	 */
> > +	intel_buf_init_in_region(bops, buf, width, height, bpp, 0,
> > +				 tiling, compression,
> > +				 is_xe_device(fd) ? system_memory(fd) : REGION_SMEM);
> > +
> > +	igt_assert(intel_buf_width(buf) == width);
> > +	igt_assert(intel_buf_height(buf) == height);
> > +}
> > +
> > +static void render(int fd, uint32_t width, uint32_t height, uint32_t tiling)
> > +{
> > +	struct buf_ops *bops;
> > +	struct intel_bb *ibb;
> > +	struct intel_buf src, dst;
> > +	uint32_t devid = intel_get_drm_devid(fd);
> > +	igt_render_copyfunc_t render_copy = NULL;
> > +
> > +	bops = buf_ops_create(fd);
> > +
> > +	igt_debug("%s() gen: %d\n", __func__, intel_gen(devid));
> > +
> > +	ibb = intel_bb_create(fd, SZ_4K);
> > +
> > +	scratch_buf_init(bops, &src, width, height, I915_TILING_NONE,
> > +			 I915_COMPRESSION_NONE);
> > +	scratch_buf_init(bops, &dst, width, height, tiling,
> > +			 I915_COMPRESSION_NONE);
> > +
> > +	/* Copy reference linear image */
> > +	intel_buf_device_map(&src, true);
> > +	memcpy(src.ptr, refs[0].ptr, src.bo_size);
> > +	intel_buf_unmap(&src);
> > +
> > +	render_copy = igt_get_render_copyfunc(devid);
> > +	igt_assert(render_copy);
> > +
> > +	render_copy(ibb,
> > +		    &src,
> > +		    0, 0, width, height,
> > +		    &dst,
> > +		    0, 0);
> > +
> > +	intel_bb_sync(ibb);
> > +	intel_bb_destroy(ibb);
> > +
> > +	detect_render_tiling(&dst);
> > +
> > +	if (param.write_png)
> > +		intel_buf_raw_write_to_png(&dst, "render-tile-%s.png",
> > +					   blt_tiling_name(i915_tile_to_blt_tile(tiling)));
> > +
> > +	intel_buf_close(bops, &src);
> > +	intel_buf_close(bops, &dst);
> > +
> > +	buf_ops_destroy(bops);
> > +}
> > +
> > +static void single_copy(int fd,
> > +			uint32_t width, uint32_t height,
> > +			int tiling, enum copy_fn fn)
> > +{
> > +	/* for potential hangs */
> > +	fd = drm_reopen_driver(fd);
> > +
> > +	switch (fn) {
> > +	case BLOCK_COPY:
> > +	case FAST_COPY:
> > +		if (is_xe_device(fd)) {
> > +			struct drm_xe_engine_class_instance inst = {
> > +				.engine_class = DRM_XE_ENGINE_CLASS_COPY,
> > +			};
> > +			uint32_t vm, exec_queue;
> > +			intel_ctx_t *ctx;
> > +
> > +			vm = xe_vm_create(fd, 0, 0);
> > +			exec_queue = xe_exec_queue_create(fd, vm, &inst, 0);
> > +			ctx = intel_ctx_xe(fd, vm, exec_queue, 0, 0, 0);
> > +
> > +			blt_copy(fd, ctx, NULL, width, height, tiling, fn);
> > +
> > +			xe_exec_queue_destroy(fd, exec_queue);
> > +			xe_vm_destroy(fd, vm);
> > +			free(ctx);
> > +		} else {
> > +			const struct intel_execution_engine2 *e;
> > +			const intel_ctx_t *ctx;
> > +
> > +			ctx = intel_ctx_create_all_physical(fd);
> > +			for_each_ctx_engine(fd, ctx, e) {
> > +				if (e->class != I915_ENGINE_CLASS_COPY)
> > +					continue;
> > +
> > +				if (fn == BLOCK_COPY && !gem_engine_can_block_copy(fd, e))
> > +					continue;
> > +
> > +				blt_copy(fd, (intel_ctx_t *)ctx, e,
> > +					 width, height, tiling, fn);
> > +				break;
> > +			}
> > +			intel_ctx_destroy(fd, ctx);
> > +		}
> > +		break;
> > +
> > +	case RENDER_COPY:
> > +		render(fd, width, height, blt_tile_to_i915_tile(tiling));
> > +		break;
> > +	}
> > +
> > +	drm_close_driver(fd);
> > +}
> > +
> > +static void soft_tile(struct buf_ops *bops, struct intel_buf *buf,
> > +		      uint32_t width, uint32_t height, uint32_t tiling)
> > +{
> > +	struct blt_copy_data blt = {};
> > +	struct blt_copy_object *src;
> > +	int fd = buf_ops_get_fd(bops);
> > +	uint8_t uc_mocs = intel_get_uc_mocs_index(fd);
> > +	enum blt_compression_type comp_type = COMPRESSION_TYPE_3D;
> > +	uint64_t sys_region;
> > +	const int bpp = 32;
> > +
> > +	sys_region = is_xe_device(fd) ? system_memory(fd) : REGION_SMEM;
> > +	blt_copy_init(fd, &blt);
> > +	src = blt_create_object(&blt, sys_region, width, height, bpp, uc_mocs,
> > +				T_LINEAR, COMPRESSION_DISABLED, comp_type, true);
> > +	blt_surface_fill_rect(fd, src, width, height);
> > +
> > +	intel_buf_init(bops, buf, width, height, bpp, 0, tiling, false);
> > +	buf_ops_set_software_tiling(bops, tiling, true);
> > +
> > +	linear_to_intel_buf(bops, buf, src->ptr);
> > +
> > +	if (param.write_png)
> > +		intel_buf_raw_write_to_png(buf, "reference-tile-%s.png",
> > +					   blt_tiling_name(i915_tile_to_blt_tile(tiling)));
> > +}
> > +
> > +static bool try_tile[] = {
> > +	[I915_TILING_NONE] = true,
> > +	[I915_TILING_X] = true,
> > +	[I915_TILING_Y] = true,
> > +	[I915_TILING_4] = true,
> > +	[I915_TILING_Yf] = true,
> > +	[I915_TILING_64] = false,
> > +};
> 
> I think you have hole in this list? You don't have Ys here when you'll be
> missing I915_TILING_LAST + 3 while you do have I915_TILING_LAST + 4
> 
> Maybe create struct to encapsulate these and the use list of those structs?
> Then there would be no holes.

Hole is not a problem as this array is filled with false (static)
on the compilation time (take a look Ys is in the iteration during
reference surface creation). I've added I915_TILING_64 to clearly show
my intention that tile64 is not supported in this version yet.
But setting explicit size to this array is a good idea as the compiler
will protect against using out of boundary access.

So if you don't mind I would keep this array. It has direct tile -> ref
mapping and it's easy to read imo.

Thank you for the time to review this. I'm going to address your and
Kamil need and respin.

--
Zbigniew

> 
> > +
> > +int main(int argc, char *argv[])
> > +{
> > +	struct buf_ops *bops;
> > +	int fd, i, fn, opt;
> > +
> > +	while ((opt = getopt(argc, argv, "bpsW:H:h")) != -1) {
> > +		switch (opt) {
> > +		case 'b':
> > +			param.print_bb = true;
> > +			break;
> > +
> > +		case 'p':
> > +			param.write_png = true;
> > +			break;
> > +
> > +		case 's':
> > +			param.print_surface_info = true;
> > +			break;
> > +
> > +		case 'W':
> > +			param.width = atoi(optarg);
> > +			break;
> > +
> > +		case 'H':
> > +			param.height = atoi(optarg);
> > +			break;
> > +
> > +		case 'h':
> > +			igt_info("%s\n", help_str);
> > +			exit(0);
> > +			break;
> > +
> > +		default:
> > +			break;
> > +		}
> > +	}
> > +
> > +	fd = drm_open_driver(DRIVER_INTEL | DRIVER_XE);
> > +	if (is_xe_device(fd))
> > +		xe_device_get(fd);
> > +
> > +	bops = buf_ops_create(fd);
> > +
> > +	for (i = 0; i <= I915_TILING_64; i++) {
> 
> why not ARRAY_SIZE() ?
> 
> > +		igt_info("Building reference tile[%-7s] = %s\n",
> > +			 blt_tiling_name(i915_tile_to_blt_tile(i)),
> > +			 try_tile[i] ? "yes" : "no");
> > +		if (try_tile[i]) {
> > +			soft_tile(bops, &refs[i],
> > +				  param.width, param.height, i);
> > +			intel_buf_device_map(&refs[i], false);
> > +		}
> > +	}
> > +
> > +	for (fn = FAST_COPY; fn <= RENDER_COPY; fn++) {
> > +		if (fn == FAST_COPY && !blt_has_fast_copy(fd))
> > +			continue;
> > +
> > +		if (fn == BLOCK_COPY && !blt_has_block_copy(fd))
> > +			continue;
> > +
> > +		igt_info("[%s]:\n", copy_fn_name[fn]);
> > +
> > +		for (i = 0; i <= I915_TILING_64; i++)
> > +			if (try_tile[i])
> > +				single_copy(fd, param.width, param.height,
> > +					    i915_tile_to_blt_tile(i), fn);
> > +	}
> > +
> > +	for (i = 0; i <= I915_TILING_64; i++) {
> > +		if (try_tile[i])
> > +			intel_buf_unmap(&refs[i]);
> > +	}
> > +
> > +	if (is_xe_device(fd))
> > +		xe_device_put(fd);
> > +	close(fd);
> > +}
> > diff --git a/tools/meson.build b/tools/meson.build
> > index ac79d8b584..1656355eef 100644
> > --- a/tools/meson.build
> > +++ b/tools/meson.build
> > @@ -36,6 +36,7 @@ tools_progs = [
> >   	'intel_reg_checker',
> >   	'intel_residency',
> >   	'intel_stepping',
> > +	'intel_tiling_detect',
> >   	'intel_vbt_decode',
> >   	'intel_watermark',
> >   	'intel_gem_info',
> 


More information about the igt-dev mailing list