[igt-dev] [PATCH i-g-t 1/2] lib/intel_blt: Add wrappers to prepare batch buffers and submit exec

Ch, Sai Gowtham sai.gowtham.ch at intel.com
Mon Oct 16 17:44:56 UTC 2023



>-----Original Message-----
>From: Kempczynski, Zbigniew <zbigniew.kempczynski at intel.com>
>Sent: Monday, October 16, 2023 2:25 PM
>To: Ch, Sai Gowtham <sai.gowtham.ch at intel.com>
>Cc: igt-dev at lists.freedesktop.org; Stolarek, Karolina
><karolina.stolarek at intel.com>
>Subject: Re: [PATCH i-g-t 1/2] lib/intel_blt: Add wrappers to prepare batch
>buffers and submit exec
>
>On Fri, Oct 13, 2023 at 04:07:27PM +0530, sai.gowtham.ch at intel.com wrote:
>> From: Sai Gowtham Ch <sai.gowtham.ch at intel.com>
>>
>> Adding wrapper for mem-set and mem-copy instructions to prepare batch
>> buffers and submit exec, (blt_mem_copy, blt_mem_set,
>> emit_blt_mem_copy, emit_blt_set_mem)
>>
>> Cc: Karolina Stolarek <karolina.stolarek at intel.com>
>> Cc: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
>> Signed-off-by: Sai Gowtham Ch <sai.gowtham.ch at intel.com>
>> ---
>>  lib/intel_blt.c | 199
>> ++++++++++++++++++++++++++++++++++++++++++++++++
>>  lib/intel_blt.h |  39 ++++++++++
>>  lib/intel_reg.h |   4 +
>>  3 files changed, 242 insertions(+)
>>
>> diff --git a/lib/intel_blt.c b/lib/intel_blt.c index
>> a76c7a404..4e7357b6f 100644
>> --- a/lib/intel_blt.c
>> +++ b/lib/intel_blt.c
>> @@ -13,12 +13,14 @@
>>  #include "igt.h"
>>  #include "igt_syncobj.h"
>>  #include "intel_blt.h"
>> +#include "intel_mocs.h"
>>  #include "xe/xe_ioctl.h"
>>  #include "xe/xe_query.h"
>>  #include "xe/xe_util.h"
>>
>>  #define BITRANGE(start, end) (end - start + 1)  #define
>> GET_CMDS_INFO(__fd) intel_get_cmds_info(intel_get_drm_devid(__fd))
>> +#define MEM_COPY_MOCS_SHIFT                     25
>>
>>  /* Blitter tiling definitions sanity checks */
>> static_assert(T_LINEAR == I915_TILING_NONE, "Linear definitions have
>> to match"); @@ -1577,6 +1579,186 @@ int blt_fast_copy(int fd,
>>  	return ret;
>>  }
>>
>> +/**
>> + * blt_mem_init:
>> + * @fd: drm fd
>> + * @mem: structure for initialization
>> + *
>> + * Function is zeroing @mem and sets fd and driver fields
>> +(INTEL_DRIVER_I915 or
>> + * INTEL_DRIVER_XE).
>> + */
>> +void blt_mem_init(int fd, struct blt_mem_data *mem) {
>> +	memset(mem, 0, sizeof(*mem));
>> +
>> +	mem->fd = fd;
>> +	mem->driver = get_intel_driver(fd);
>> +}
>> +
>> +static void emit_blt_mem_copy(int fd, uint64_t ahnd, const struct
>> +blt_mem_data *mem) {
>> +	uint64_t dst_offset, src_offset, alignment;
>> +	int i;
>> +	uint32_t *batch;
>> +	uint32_t optype;
>> +
>> +	alignment = get_default_alignment(fd, mem->driver);
>> +	src_offset = get_offset(ahnd, mem->src.handle, mem->src.size,
>alignment);
>> +	dst_offset = get_offset(ahnd, mem->dst.handle, mem->dst.size,
>> +alignment);
>> +
>> +	batch = bo_map(fd, mem->bb.handle, mem->bb.size, mem->driver);
>> +	optype = mem->src.type == M_MATRIX ? 1 << 17 : 0;
>> +
>> +	i = 0;
>> +	batch[i++] = MEM_COPY_CMD | (1 << 19) | optype;
>> +	batch[i++] = mem->src.width - 1;
>> +	batch[i++] = mem->src.height - 1;
>> +	batch[i++] = mem->src.pitch - 1;
>> +	batch[i++] = mem->dst.pitch - 1;
>> +	batch[i++] = src_offset;
>> +	batch[i++] = src_offset << 32;
>> +	batch[i++] = dst_offset;
>> +	batch[i++] = dst_offset << 32;
>> +	batch[i++] = mem->src.mocs << MEM_COPY_MOCS_SHIFT | mem-
>>dst.mocs;
>> +	batch[i++] = MI_BATCH_BUFFER_END;
>> +
>> +	munmap(batch, mem->bb.size);
>> +}
>> +
>> +/**
>> + * blt_mem_copy:
>> + * @fd: drm fd
>> + * @ctx: intel_ctx_t context
>> + * @e: blitter engine for @ctx
>> + * @ahnd: allocator handle
>> + * @blt: blitter data for mem-copy.
>> + *
>> + * Function does mem blit between @src and @dst described in @blt
>object.
>> + *
>> + * Returns:
>> + * execbuffer status.
>> + */
>> +int blt_mem_copy(int fd, const intel_ctx_t *ctx,
>> +		 const struct intel_execution_engine2 *e,
>> +		 uint64_t ahnd,
>> +		 const struct blt_mem_data *mem)
>> +{
>> +	struct drm_i915_gem_execbuffer2 execbuf = {};
>> +	struct drm_i915_gem_exec_object2 obj[3] = {};
>> +	uint64_t dst_offset, src_offset, bb_offset, alignment;
>> +	int ret;
>> +
>> +	alignment = get_default_alignment(fd, mem->driver);
>> +	src_offset = get_offset(ahnd, mem->src.handle, mem->src.size,
>alignment);
>> +	dst_offset = get_offset(ahnd, mem->dst.handle, mem->dst.size,
>alignment);
>> +	bb_offset = get_offset(ahnd, mem->bb.handle, mem->bb.size,
>> +alignment);
>> +
>> +	emit_blt_mem_copy(fd, ahnd, mem);
>> +
>> +	if (mem->driver == INTEL_DRIVER_XE) {
>> +		intel_ctx_xe_exec(ctx, ahnd, CANONICAL(bb_offset));
>> +	} else {
>> +		obj[0].offset = CANONICAL(dst_offset);
>> +		obj[1].offset = CANONICAL(src_offset);
>> +		obj[2].offset = CANONICAL(bb_offset);
>> +		obj[0].handle = mem->dst.handle;
>> +		obj[1].handle = mem->src.handle;
>> +		obj[2].handle = mem->bb.handle;
>> +		obj[0].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE |
>> +			EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
>> +		obj[1].flags = EXEC_OBJECT_PINNED |
>EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
>> +		obj[2].flags = EXEC_OBJECT_PINNED |
>EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
>> +		execbuf.buffer_count = 3;
>> +                execbuf.buffers_ptr = to_user_pointer(obj);
>> +		execbuf.rsvd1 = ctx ? ctx->id : 0;
>> +		execbuf.flags = e ? e->flags : I915_EXEC_BLT;
>> +		ret = __gem_execbuf(fd, &execbuf);
>> +		put_offset(ahnd, mem->dst.handle);
>> +		put_offset(ahnd, mem->src.handle);
>> +		put_offset(ahnd, mem->bb.handle);
>> +	}
>> +
>> +	return ret;
>> +}
>> +
>> +static void emit_blt_mem_set(int fd, uint64_t ahnd, const struct
>blt_mem_data *mem,
>> +			     uint8_t fill_data)
>> +{
>> +	uint64_t dst_offset, alignment;
>> +	int b;
>> +	uint32_t *batch;
>> +	uint32_t value;
>> +
>> +	alignment = get_default_alignment(fd, mem->driver);
>> +	dst_offset = get_offset(ahnd, mem->dst.handle, mem->dst.size,
>> +alignment);
>> +
>> +	batch = bo_map(fd, mem->bb.handle, mem->bb.size, mem->driver);
>> +	value = (uint32_t)fill_data << 24;
>> +
>> +	b = 0;
>> +	batch[b++] = MEM_SET_CMD;
>> +	batch[b++] = mem->dst.width - 1;
>> +	batch[b++] = mem->dst.height - 1;
>> +	batch[b++] = mem->dst.pitch - 1;
>> +	batch[b++] = dst_offset;
>> +	batch[b++] = dst_offset << 32;
>> +	batch[b++] = value | mem->dst.mocs;
>> +	batch[b++] = MI_BATCH_BUFFER_END;
>> +
>> +	munmap(batch, mem->bb.size);
>> +}
>> +/**
>> + * blt_mem_set:
>> + * @fd: drm fd
>> + * @ctx: intel_ctx_t context
>> + * @e: blitter engine for @ctx
>> + * @ahnd: allocator handle
>> + * @blt: blitter data for mem-set.
>> + *
>> + * Function does mem set blit in described @blt object.
>> + *
>> + * Returns:
>> + * execbuffer status.
>> + */
>> +int blt_mem_set(int fd, const intel_ctx_t *ctx,
>> +		const struct intel_execution_engine2 *e,
>> +		uint64_t ahnd,
>> +		const struct blt_mem_data *mem,
>> +		uint8_t fill_data)
>> +{
>> +	struct drm_i915_gem_execbuffer2 execbuf = {};
>> +	struct drm_i915_gem_exec_object2 obj[2] = {};
>> +	uint64_t dst_offset, bb_offset, alignment;
>> +	int ret;
>> +
>> +	alignment = get_default_alignment(fd, mem->driver);
>> +	dst_offset = get_offset(ahnd, mem->dst.handle, mem->dst.size,
>alignment);
>> +	bb_offset = get_offset(ahnd, mem->bb.handle, mem->bb.size,
>> +alignment);
>> +
>> +	emit_blt_mem_set(fd, ahnd, mem, fill_data);
>> +
>> +	if (mem->driver == INTEL_DRIVER_XE) {
>> +		intel_ctx_xe_exec(ctx, ahnd, CANONICAL(bb_offset));
>> +	} else {
>> +		obj[0].offset = CANONICAL(dst_offset);
>> +		obj[1].offset = CANONICAL(bb_offset);
>> +		obj[0].handle = mem->dst.handle;
>> +		obj[1].handle = mem->bb.handle;
>> +		obj[0].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE |
>> +
>EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
>> +		obj[1].flags = EXEC_OBJECT_PINNED |
>EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
>> +		execbuf.buffer_count = 2;
>> +		execbuf.buffers_ptr = to_user_pointer(obj);
>> +		execbuf.rsvd1 = ctx ? ctx->id : 0;
>> +		execbuf.flags = e ? e->flags : I915_EXEC_BLT;
>> +		ret = __gem_execbuf(fd, &execbuf);
>> +		put_offset(ahnd, mem->dst.handle);
>> +		put_offset(ahnd, mem->bb.handle);
>> +	}
>> +
>> +	return ret;
>> +}
>> +
>>  void blt_set_geom(struct blt_copy_object *obj, uint32_t pitch,
>>  		  int16_t x1, int16_t y1, int16_t x2, int16_t y2,
>>  		  uint16_t x_offset, uint16_t y_offset) @@ -1659,6 +1841,23
>@@ void
>> blt_set_object(struct blt_copy_object *obj,
>>  	obj->compression_type = compression_type;  }
>>
>> +void blt_set_mem_object(struct blt_mem_object *obj,
>> +			uint32_t handle, uint64_t size, uint32_t pitch,
>> +			uint32_t width, uint32_t height, uint32_t region,
>> +			uint8_t mocs, enum blt_memop_type type,
>> +			enum blt_compression compression)
>> +{
>> +	obj->handle = handle;
>> +	obj->region = region;
>> +	obj->size = size;
>> +	obj->mocs = mocs;
>> +	obj->type = type;
>> +	obj->compression = compression;
>> +	obj->width = width;
>> +	obj->height = height;
>> +	obj->pitch = pitch;
>> +}
>> +
>>  void blt_set_object_ext(struct blt_block_copy_object_ext *obj,
>>  			uint8_t compression_format,
>>  			uint16_t surface_width, uint16_t surface_height, diff --
>git
>> a/lib/intel_blt.h b/lib/intel_blt.h index 7b4271620..d6f40680d 100644
>> --- a/lib/intel_blt.h
>> +++ b/lib/intel_blt.h
>> @@ -93,6 +93,19 @@ struct blt_copy_object {
>>  	uint32_t plane_offset;
>>  };
>>
>> +struct blt_mem_object {
>> +	uint32_t handle;
>> +	uint32_t region;
>> +	uint64_t size;
>> +	uint8_t mocs;
>
>Rename to mocs_index (see gen12_block_copy_data) for consistency.
>
>Rest looks ok. BTW looking at mem-copy I see M_MATRIX is supported for pvc
>so you should update intel_cmds_info.c either.
Missed it, good observation. Adding M_MATRIX.
>
>What about printing instruction debug similar to dump_bb_fast_cmd?
>(print_bb = true?).
I feel this can be added as enhancement later, after merging this series. 

Will be merging this series with above fixes.

Thanks,
Gowtham
>
>With above nits addressed:
>
>Reviewed-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
>
>--
>Zbigniew
>
>> +	enum blt_memop_type type;
>> +	enum blt_compression compression;
>> +	uint32_t width;
>> +	uint32_t height;
>> +	uint32_t pitch;
>> +	uint32_t *ptr;
>> +};
>> +
>>  struct blt_copy_batch {
>>  	uint32_t handle;
>>  	uint32_t region;
>> @@ -112,6 +125,14 @@ struct blt_copy_data {
>>  	bool print_bb;
>>  };
>>
>> +struct blt_mem_data {
>> +	int fd;
>> +	enum intel_driver driver;
>> +	struct blt_mem_object src;
>> +	struct blt_mem_object dst;
>> +	struct blt_copy_batch bb;
>> +};
>> +
>>  enum blt_surface_type {
>>  	SURFACE_TYPE_1D,
>>  	SURFACE_TYPE_2D,
>> @@ -231,6 +252,17 @@ int blt_fast_copy(int fd,
>>  		  uint64_t ahnd,
>>  		  const struct blt_copy_data *blt);
>>
>> +void blt_mem_init(int fd, struct blt_mem_data *mem);
>> +
>> +int blt_mem_copy(int fd, const intel_ctx_t *ctx,
>> +			 const struct intel_execution_engine2 *e,
>> +			 uint64_t ahnd,
>> +			 const struct blt_mem_data *mem);
>> +
>> +int blt_mem_set(int fd, const intel_ctx_t *ctx,
>> +			const struct intel_execution_engine2 *e, uint64_t
>ahnd,
>> +			const struct blt_mem_data *mem, uint8_t fill_data);
>> +
>>  void blt_set_geom(struct blt_copy_object *obj, uint32_t pitch,
>>  		  int16_t x1, int16_t y1, int16_t x2, int16_t y2,
>>  		  uint16_t x_offset, uint16_t y_offset); @@ -250,6 +282,13 @@
>void
>> blt_set_object(struct blt_copy_object *obj,
>>  		    uint8_t mocs_index, enum blt_tiling_type tiling,
>>  		    enum blt_compression compression,
>>  		    enum blt_compression_type compression_type);
>> +
>> +void blt_set_mem_object(struct blt_mem_object *obj,
>> +			uint32_t handle, uint64_t size, uint32_t pitch,
>> +			uint32_t width, uint32_t height, uint32_t region,
>> +			uint8_t mocs, enum blt_memop_type type,
>> +			enum blt_compression compression);
>> +
>>  void blt_set_object_ext(struct blt_block_copy_object_ext *obj,
>>  			uint8_t compression_format,
>>  			uint16_t surface_width, uint16_t surface_height, diff --
>git
>> a/lib/intel_reg.h b/lib/intel_reg.h index ea463376b..a8190d683 100644
>> --- a/lib/intel_reg.h
>> +++ b/lib/intel_reg.h
>> @@ -2588,6 +2588,10 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN
>THE SOFTWARE.
>>  #define   XY_FAST_COPY_COLOR_DEPTH_64			(4  << 24)
>>  #define   XY_FAST_COPY_COLOR_DEPTH_128			(5  << 24)
>>
>> +/* RAW memory commands */
>> +#define MEM_COPY_CMD                    ((0x2 << 29)|(0x5a << 22)|0x8)
>> +#define MEM_SET_CMD                     ((0x2 << 29)|(0x5b << 22)|0x5)
>> +
>>  #define CTXT_NO_RESTORE			(1)
>>  #define CTXT_PALETTE_SAVE_DISABLE	(1<<3)
>>  #define CTXT_PALETTE_RESTORE_DISABLE	(1<<2)
>> --
>> 2.39.1
>>


More information about the igt-dev mailing list