[igt-dev] [PATCH i-g-t 1/2] lib/intel_blt: Add wrappers to prepare batch buffers and submit exec
Zbigniew Kempczyński
zbigniew.kempczynski at intel.com
Mon Oct 16 08:54:46 UTC 2023
On Fri, Oct 13, 2023 at 04:07:27PM +0530, sai.gowtham.ch at intel.com wrote:
> From: Sai Gowtham Ch <sai.gowtham.ch at intel.com>
>
> Adding wrapper for mem-set and mem-copy instructions to prepare
> batch buffers and submit exec, (blt_mem_copy, blt_mem_set,
> emit_blt_mem_copy, emit_blt_set_mem)
>
> Cc: Karolina Stolarek <karolina.stolarek at intel.com>
> Cc: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
> Signed-off-by: Sai Gowtham Ch <sai.gowtham.ch at intel.com>
> ---
> lib/intel_blt.c | 199 ++++++++++++++++++++++++++++++++++++++++++++++++
> lib/intel_blt.h | 39 ++++++++++
> lib/intel_reg.h | 4 +
> 3 files changed, 242 insertions(+)
>
> diff --git a/lib/intel_blt.c b/lib/intel_blt.c
> index a76c7a404..4e7357b6f 100644
> --- a/lib/intel_blt.c
> +++ b/lib/intel_blt.c
> @@ -13,12 +13,14 @@
> #include "igt.h"
> #include "igt_syncobj.h"
> #include "intel_blt.h"
> +#include "intel_mocs.h"
> #include "xe/xe_ioctl.h"
> #include "xe/xe_query.h"
> #include "xe/xe_util.h"
>
> #define BITRANGE(start, end) (end - start + 1)
> #define GET_CMDS_INFO(__fd) intel_get_cmds_info(intel_get_drm_devid(__fd))
> +#define MEM_COPY_MOCS_SHIFT 25
>
> /* Blitter tiling definitions sanity checks */
> static_assert(T_LINEAR == I915_TILING_NONE, "Linear definitions have to match");
> @@ -1577,6 +1579,186 @@ int blt_fast_copy(int fd,
> return ret;
> }
>
> +/**
> + * blt_mem_init:
> + * @fd: drm fd
> + * @mem: structure for initialization
> + *
> + * Function is zeroing @mem and sets fd and driver fields (INTEL_DRIVER_I915 or
> + * INTEL_DRIVER_XE).
> + */
> +void blt_mem_init(int fd, struct blt_mem_data *mem)
> +{
> + memset(mem, 0, sizeof(*mem));
> +
> + mem->fd = fd;
> + mem->driver = get_intel_driver(fd);
> +}
> +
> +static void emit_blt_mem_copy(int fd, uint64_t ahnd, const struct blt_mem_data *mem)
> +{
> + uint64_t dst_offset, src_offset, alignment;
> + int i;
> + uint32_t *batch;
> + uint32_t optype;
> +
> + alignment = get_default_alignment(fd, mem->driver);
> + src_offset = get_offset(ahnd, mem->src.handle, mem->src.size, alignment);
> + dst_offset = get_offset(ahnd, mem->dst.handle, mem->dst.size, alignment);
> +
> + batch = bo_map(fd, mem->bb.handle, mem->bb.size, mem->driver);
> + optype = mem->src.type == M_MATRIX ? 1 << 17 : 0;
> +
> + i = 0;
> + batch[i++] = MEM_COPY_CMD | (1 << 19) | optype;
> + batch[i++] = mem->src.width - 1;
> + batch[i++] = mem->src.height - 1;
> + batch[i++] = mem->src.pitch - 1;
> + batch[i++] = mem->dst.pitch - 1;
> + batch[i++] = src_offset;
> + batch[i++] = src_offset << 32;
> + batch[i++] = dst_offset;
> + batch[i++] = dst_offset << 32;
> + batch[i++] = mem->src.mocs << MEM_COPY_MOCS_SHIFT | mem->dst.mocs;
> + batch[i++] = MI_BATCH_BUFFER_END;
> +
> + munmap(batch, mem->bb.size);
> +}
> +
> +/**
> + * blt_mem_copy:
> + * @fd: drm fd
> + * @ctx: intel_ctx_t context
> + * @e: blitter engine for @ctx
> + * @ahnd: allocator handle
> + * @blt: blitter data for mem-copy.
> + *
> + * Function does mem blit between @src and @dst described in @blt object.
> + *
> + * Returns:
> + * execbuffer status.
> + */
> +int blt_mem_copy(int fd, const intel_ctx_t *ctx,
> + const struct intel_execution_engine2 *e,
> + uint64_t ahnd,
> + const struct blt_mem_data *mem)
> +{
> + struct drm_i915_gem_execbuffer2 execbuf = {};
> + struct drm_i915_gem_exec_object2 obj[3] = {};
> + uint64_t dst_offset, src_offset, bb_offset, alignment;
> + int ret;
> +
> + alignment = get_default_alignment(fd, mem->driver);
> + src_offset = get_offset(ahnd, mem->src.handle, mem->src.size, alignment);
> + dst_offset = get_offset(ahnd, mem->dst.handle, mem->dst.size, alignment);
> + bb_offset = get_offset(ahnd, mem->bb.handle, mem->bb.size, alignment);
> +
> + emit_blt_mem_copy(fd, ahnd, mem);
> +
> + if (mem->driver == INTEL_DRIVER_XE) {
> + intel_ctx_xe_exec(ctx, ahnd, CANONICAL(bb_offset));
> + } else {
> + obj[0].offset = CANONICAL(dst_offset);
> + obj[1].offset = CANONICAL(src_offset);
> + obj[2].offset = CANONICAL(bb_offset);
> + obj[0].handle = mem->dst.handle;
> + obj[1].handle = mem->src.handle;
> + obj[2].handle = mem->bb.handle;
> + obj[0].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE |
> + EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
> + obj[1].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
> + obj[2].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
> + execbuf.buffer_count = 3;
> + execbuf.buffers_ptr = to_user_pointer(obj);
> + execbuf.rsvd1 = ctx ? ctx->id : 0;
> + execbuf.flags = e ? e->flags : I915_EXEC_BLT;
> + ret = __gem_execbuf(fd, &execbuf);
> + put_offset(ahnd, mem->dst.handle);
> + put_offset(ahnd, mem->src.handle);
> + put_offset(ahnd, mem->bb.handle);
> + }
> +
> + return ret;
> +}
> +
> +static void emit_blt_mem_set(int fd, uint64_t ahnd, const struct blt_mem_data *mem,
> + uint8_t fill_data)
> +{
> + uint64_t dst_offset, alignment;
> + int b;
> + uint32_t *batch;
> + uint32_t value;
> +
> + alignment = get_default_alignment(fd, mem->driver);
> + dst_offset = get_offset(ahnd, mem->dst.handle, mem->dst.size, alignment);
> +
> + batch = bo_map(fd, mem->bb.handle, mem->bb.size, mem->driver);
> + value = (uint32_t)fill_data << 24;
> +
> + b = 0;
> + batch[b++] = MEM_SET_CMD;
> + batch[b++] = mem->dst.width - 1;
> + batch[b++] = mem->dst.height - 1;
> + batch[b++] = mem->dst.pitch - 1;
> + batch[b++] = dst_offset;
> + batch[b++] = dst_offset << 32;
> + batch[b++] = value | mem->dst.mocs;
> + batch[b++] = MI_BATCH_BUFFER_END;
> +
> + munmap(batch, mem->bb.size);
> +}
> +/**
> + * blt_mem_set:
> + * @fd: drm fd
> + * @ctx: intel_ctx_t context
> + * @e: blitter engine for @ctx
> + * @ahnd: allocator handle
> + * @blt: blitter data for mem-set.
> + *
> + * Function does mem set blit in described @blt object.
> + *
> + * Returns:
> + * execbuffer status.
> + */
> +int blt_mem_set(int fd, const intel_ctx_t *ctx,
> + const struct intel_execution_engine2 *e,
> + uint64_t ahnd,
> + const struct blt_mem_data *mem,
> + uint8_t fill_data)
> +{
> + struct drm_i915_gem_execbuffer2 execbuf = {};
> + struct drm_i915_gem_exec_object2 obj[2] = {};
> + uint64_t dst_offset, bb_offset, alignment;
> + int ret;
> +
> + alignment = get_default_alignment(fd, mem->driver);
> + dst_offset = get_offset(ahnd, mem->dst.handle, mem->dst.size, alignment);
> + bb_offset = get_offset(ahnd, mem->bb.handle, mem->bb.size, alignment);
> +
> + emit_blt_mem_set(fd, ahnd, mem, fill_data);
> +
> + if (mem->driver == INTEL_DRIVER_XE) {
> + intel_ctx_xe_exec(ctx, ahnd, CANONICAL(bb_offset));
> + } else {
> + obj[0].offset = CANONICAL(dst_offset);
> + obj[1].offset = CANONICAL(bb_offset);
> + obj[0].handle = mem->dst.handle;
> + obj[1].handle = mem->bb.handle;
> + obj[0].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE |
> + EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
> + obj[1].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
> + execbuf.buffer_count = 2;
> + execbuf.buffers_ptr = to_user_pointer(obj);
> + execbuf.rsvd1 = ctx ? ctx->id : 0;
> + execbuf.flags = e ? e->flags : I915_EXEC_BLT;
> + ret = __gem_execbuf(fd, &execbuf);
> + put_offset(ahnd, mem->dst.handle);
> + put_offset(ahnd, mem->bb.handle);
> + }
> +
> + return ret;
> +}
> +
> void blt_set_geom(struct blt_copy_object *obj, uint32_t pitch,
> int16_t x1, int16_t y1, int16_t x2, int16_t y2,
> uint16_t x_offset, uint16_t y_offset)
> @@ -1659,6 +1841,23 @@ void blt_set_object(struct blt_copy_object *obj,
> obj->compression_type = compression_type;
> }
>
> +void blt_set_mem_object(struct blt_mem_object *obj,
> + uint32_t handle, uint64_t size, uint32_t pitch,
> + uint32_t width, uint32_t height, uint32_t region,
> + uint8_t mocs, enum blt_memop_type type,
> + enum blt_compression compression)
> +{
> + obj->handle = handle;
> + obj->region = region;
> + obj->size = size;
> + obj->mocs = mocs;
> + obj->type = type;
> + obj->compression = compression;
> + obj->width = width;
> + obj->height = height;
> + obj->pitch = pitch;
> +}
> +
> void blt_set_object_ext(struct blt_block_copy_object_ext *obj,
> uint8_t compression_format,
> uint16_t surface_width, uint16_t surface_height,
> diff --git a/lib/intel_blt.h b/lib/intel_blt.h
> index 7b4271620..d6f40680d 100644
> --- a/lib/intel_blt.h
> +++ b/lib/intel_blt.h
> @@ -93,6 +93,19 @@ struct blt_copy_object {
> uint32_t plane_offset;
> };
>
> +struct blt_mem_object {
> + uint32_t handle;
> + uint32_t region;
> + uint64_t size;
> + uint8_t mocs;
Rename to mocs_index (see gen12_block_copy_data) for consistency.
Rest looks ok. BTW looking at mem-copy I see M_MATRIX is supported
for pvc so you should update intel_cmds_info.c either.
What about printing instruction debug similar to dump_bb_fast_cmd?
(print_bb = true?).
With above nits addressed:
Reviewed-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
--
Zbigniew
> + enum blt_memop_type type;
> + enum blt_compression compression;
> + uint32_t width;
> + uint32_t height;
> + uint32_t pitch;
> + uint32_t *ptr;
> +};
> +
> struct blt_copy_batch {
> uint32_t handle;
> uint32_t region;
> @@ -112,6 +125,14 @@ struct blt_copy_data {
> bool print_bb;
> };
>
> +struct blt_mem_data {
> + int fd;
> + enum intel_driver driver;
> + struct blt_mem_object src;
> + struct blt_mem_object dst;
> + struct blt_copy_batch bb;
> +};
> +
> enum blt_surface_type {
> SURFACE_TYPE_1D,
> SURFACE_TYPE_2D,
> @@ -231,6 +252,17 @@ int blt_fast_copy(int fd,
> uint64_t ahnd,
> const struct blt_copy_data *blt);
>
> +void blt_mem_init(int fd, struct blt_mem_data *mem);
> +
> +int blt_mem_copy(int fd, const intel_ctx_t *ctx,
> + const struct intel_execution_engine2 *e,
> + uint64_t ahnd,
> + const struct blt_mem_data *mem);
> +
> +int blt_mem_set(int fd, const intel_ctx_t *ctx,
> + const struct intel_execution_engine2 *e, uint64_t ahnd,
> + const struct blt_mem_data *mem, uint8_t fill_data);
> +
> void blt_set_geom(struct blt_copy_object *obj, uint32_t pitch,
> int16_t x1, int16_t y1, int16_t x2, int16_t y2,
> uint16_t x_offset, uint16_t y_offset);
> @@ -250,6 +282,13 @@ void blt_set_object(struct blt_copy_object *obj,
> uint8_t mocs_index, enum blt_tiling_type tiling,
> enum blt_compression compression,
> enum blt_compression_type compression_type);
> +
> +void blt_set_mem_object(struct blt_mem_object *obj,
> + uint32_t handle, uint64_t size, uint32_t pitch,
> + uint32_t width, uint32_t height, uint32_t region,
> + uint8_t mocs, enum blt_memop_type type,
> + enum blt_compression compression);
> +
> void blt_set_object_ext(struct blt_block_copy_object_ext *obj,
> uint8_t compression_format,
> uint16_t surface_width, uint16_t surface_height,
> diff --git a/lib/intel_reg.h b/lib/intel_reg.h
> index ea463376b..a8190d683 100644
> --- a/lib/intel_reg.h
> +++ b/lib/intel_reg.h
> @@ -2588,6 +2588,10 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
> #define XY_FAST_COPY_COLOR_DEPTH_64 (4 << 24)
> #define XY_FAST_COPY_COLOR_DEPTH_128 (5 << 24)
>
> +/* RAW memory commands */
> +#define MEM_COPY_CMD ((0x2 << 29)|(0x5a << 22)|0x8)
> +#define MEM_SET_CMD ((0x2 << 29)|(0x5b << 22)|0x5)
> +
> #define CTXT_NO_RESTORE (1)
> #define CTXT_PALETTE_SAVE_DISABLE (1<<3)
> #define CTXT_PALETTE_RESTORE_DISABLE (1<<2)
> --
> 2.39.1
>
More information about the igt-dev
mailing list