[igt-dev] [PATCH i-g-t 1/3] lib/intel_blt: Add wrappers to prepare batch buffers and submit exec
sai.gowtham.ch at intel.com
sai.gowtham.ch at intel.com
Tue Oct 17 14:05:28 UTC 2023
From: Sai Gowtham Ch <sai.gowtham.ch at intel.com>
Adding wrapper for mem-set and mem-copy instructions to prepare
batch buffers and submit exec, (blt_mem_copy, blt_mem_set,
emit_blt_mem_copy, emit_blt_set_mem)
Cc: Karolina Stolarek <karolina.stolarek at intel.com>
Cc: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
Signed-off-by: Sai Gowtham Ch <sai.gowtham.ch at intel.com>
Reviewed-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
---
lib/intel_blt.c | 200 ++++++++++++++++++++++++++++++++++++++++++++++++
lib/intel_blt.h | 39 ++++++++++
lib/intel_reg.h | 4 +
3 files changed, 243 insertions(+)
diff --git a/lib/intel_blt.c b/lib/intel_blt.c
index a76c7a404..28dc9e96b 100644
--- a/lib/intel_blt.c
+++ b/lib/intel_blt.c
@@ -13,12 +13,14 @@
#include "igt.h"
#include "igt_syncobj.h"
#include "intel_blt.h"
+#include "intel_mocs.h"
#include "xe/xe_ioctl.h"
#include "xe/xe_query.h"
#include "xe/xe_util.h"
#define BITRANGE(start, end) (end - start + 1)
#define GET_CMDS_INFO(__fd) intel_get_cmds_info(intel_get_drm_devid(__fd))
+#define MEM_COPY_MOCS_SHIFT 25
/* Blitter tiling definitions sanity checks */
static_assert(T_LINEAR == I915_TILING_NONE, "Linear definitions have to match");
@@ -1577,6 +1579,187 @@ int blt_fast_copy(int fd,
return ret;
}
+/**
+ * blt_mem_init:
+ * @fd: drm fd
+ * @mem: structure for initialization
+ *
+ * Function is zeroing @mem and sets fd and driver fields (INTEL_DRIVER_I915 or
+ * INTEL_DRIVER_XE).
+ */
+void blt_mem_init(int fd, struct blt_mem_data *mem)
+{
+ memset(mem, 0, sizeof(*mem));
+
+ mem->fd = fd;
+ mem->driver = get_intel_driver(fd);
+}
+
+static void emit_blt_mem_copy(int fd, uint64_t ahnd, const struct blt_mem_data *mem)
+{
+ uint64_t dst_offset, src_offset, alignment;
+ int i;
+ uint32_t *batch;
+ uint32_t optype;
+
+ alignment = get_default_alignment(fd, mem->driver);
+ src_offset = get_offset(ahnd, mem->src.handle, mem->src.size, alignment);
+ dst_offset = get_offset(ahnd, mem->dst.handle, mem->dst.size, alignment);
+
+ batch = bo_map(fd, mem->bb.handle, mem->bb.size, mem->driver);
+ optype = mem->src.type == M_MATRIX ? 1 << 17 : 0;
+
+ i = 0;
+ batch[i++] = MEM_COPY_CMD | (1 << 19) | optype;
+ batch[i++] = mem->src.width - 1;
+ batch[i++] = mem->src.height - 1;
+ batch[i++] = mem->src.pitch - 1;
+ batch[i++] = mem->dst.pitch - 1;
+ batch[i++] = src_offset;
+ batch[i++] = src_offset << 32;
+ batch[i++] = dst_offset;
+ batch[i++] = dst_offset << 32;
+ batch[i++] = mem->src.mocs_index << MEM_COPY_MOCS_SHIFT | mem->dst.mocs_index;
+ batch[i++] = MI_BATCH_BUFFER_END;
+
+ munmap(batch, mem->bb.size);
+}
+
+/**
+ * blt_mem_copy:
+ * @fd: drm fd
+ * @ctx: intel_ctx_t context
+ * @e: blitter engine for @ctx
+ * @ahnd: allocator handle
+ * @blt: blitter data for mem-copy.
+ *
+ * Function does mem blit between @src and @dst described in @blt object.
+ *
+ * Returns:
+ * execbuffer status.
+ */
+int blt_mem_copy(int fd, const intel_ctx_t *ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t ahnd,
+ const struct blt_mem_data *mem)
+{
+ struct drm_i915_gem_execbuffer2 execbuf = {};
+ struct drm_i915_gem_exec_object2 obj[3] = {};
+ uint64_t dst_offset, src_offset, bb_offset, alignment;
+ int ret;
+
+ alignment = get_default_alignment(fd, mem->driver);
+ src_offset = get_offset(ahnd, mem->src.handle, mem->src.size, alignment);
+ dst_offset = get_offset(ahnd, mem->dst.handle, mem->dst.size, alignment);
+ bb_offset = get_offset(ahnd, mem->bb.handle, mem->bb.size, alignment);
+
+ emit_blt_mem_copy(fd, ahnd, mem);
+
+ if (mem->driver == INTEL_DRIVER_XE) {
+ intel_ctx_xe_exec(ctx, ahnd, CANONICAL(bb_offset));
+ } else {
+ obj[0].offset = CANONICAL(dst_offset);
+ obj[1].offset = CANONICAL(src_offset);
+ obj[2].offset = CANONICAL(bb_offset);
+ obj[0].handle = mem->dst.handle;
+ obj[1].handle = mem->src.handle;
+ obj[2].handle = mem->bb.handle;
+ obj[0].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE |
+ EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+ obj[1].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+ obj[2].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+ execbuf.buffer_count = 3;
+ execbuf.buffers_ptr = to_user_pointer(obj);
+ execbuf.rsvd1 = ctx ? ctx->id : 0;
+ execbuf.flags = e ? e->flags : I915_EXEC_BLT;
+ ret = __gem_execbuf(fd, &execbuf);
+ put_offset(ahnd, mem->dst.handle);
+ put_offset(ahnd, mem->src.handle);
+ put_offset(ahnd, mem->bb.handle);
+ }
+
+ return ret;
+}
+
+static void emit_blt_mem_set(int fd, uint64_t ahnd, const struct blt_mem_data *mem,
+ uint8_t fill_data)
+{
+ uint64_t dst_offset, alignment;
+ int b;
+ uint32_t *batch;
+ uint32_t value;
+
+ alignment = get_default_alignment(fd, mem->driver);
+ dst_offset = get_offset(ahnd, mem->dst.handle, mem->dst.size, alignment);
+
+ batch = bo_map(fd, mem->bb.handle, mem->bb.size, mem->driver);
+ value = (uint32_t)fill_data << 24;
+
+ b = 0;
+ batch[b++] = MEM_SET_CMD;
+ batch[b++] = mem->dst.width - 1;
+ batch[b++] = mem->dst.height - 1;
+ batch[b++] = mem->dst.pitch - 1;
+ batch[b++] = dst_offset;
+ batch[b++] = dst_offset << 32;
+ batch[b++] = value | mem->dst.mocs_index;
+ batch[b++] = MI_BATCH_BUFFER_END;
+
+ munmap(batch, mem->bb.size);
+
+}
+/**
+ * blt_mem_set:
+ * @fd: drm fd
+ * @ctx: intel_ctx_t context
+ * @e: blitter engine for @ctx
+ * @ahnd: allocator handle
+ * @blt: blitter data for mem-set.
+ *
+ * Function does mem set blit in described @blt object.
+ *
+ * Returns:
+ * execbuffer status.
+ */
+int blt_mem_set(int fd, const intel_ctx_t *ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t ahnd,
+ const struct blt_mem_data *mem,
+ uint8_t fill_data)
+{
+ struct drm_i915_gem_execbuffer2 execbuf = {};
+ struct drm_i915_gem_exec_object2 obj[2] = {};
+ uint64_t dst_offset, bb_offset, alignment;
+ int ret;
+
+ alignment = get_default_alignment(fd, mem->driver);
+ dst_offset = get_offset(ahnd, mem->dst.handle, mem->dst.size, alignment);
+ bb_offset = get_offset(ahnd, mem->bb.handle, mem->bb.size, alignment);
+
+ emit_blt_mem_set(fd, ahnd, mem, fill_data);
+
+ if (mem->driver == INTEL_DRIVER_XE) {
+ intel_ctx_xe_exec(ctx, ahnd, CANONICAL(bb_offset));
+ } else {
+ obj[0].offset = CANONICAL(dst_offset);
+ obj[1].offset = CANONICAL(bb_offset);
+ obj[0].handle = mem->dst.handle;
+ obj[1].handle = mem->bb.handle;
+ obj[0].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_WRITE |
+ EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+ obj[1].flags = EXEC_OBJECT_PINNED | EXEC_OBJECT_SUPPORTS_48B_ADDRESS;
+ execbuf.buffer_count = 2;
+ execbuf.buffers_ptr = to_user_pointer(obj);
+ execbuf.rsvd1 = ctx ? ctx->id : 0;
+ execbuf.flags = e ? e->flags : I915_EXEC_BLT;
+ ret = __gem_execbuf(fd, &execbuf);
+ put_offset(ahnd, mem->dst.handle);
+ put_offset(ahnd, mem->bb.handle);
+ }
+
+ return ret;
+}
+
void blt_set_geom(struct blt_copy_object *obj, uint32_t pitch,
int16_t x1, int16_t y1, int16_t x2, int16_t y2,
uint16_t x_offset, uint16_t y_offset)
@@ -1659,6 +1842,23 @@ void blt_set_object(struct blt_copy_object *obj,
obj->compression_type = compression_type;
}
+void blt_set_mem_object(struct blt_mem_object *obj,
+ uint32_t handle, uint64_t size, uint32_t pitch,
+ uint32_t width, uint32_t height, uint32_t region,
+ uint8_t mocs_index, enum blt_memop_type type,
+ enum blt_compression compression)
+{
+ obj->handle = handle;
+ obj->region = region;
+ obj->size = size;
+ obj->mocs_index = mocs_index;
+ obj->type = type;
+ obj->compression = compression;
+ obj->width = width;
+ obj->height = height;
+ obj->pitch = pitch;
+}
+
void blt_set_object_ext(struct blt_block_copy_object_ext *obj,
uint8_t compression_format,
uint16_t surface_width, uint16_t surface_height,
diff --git a/lib/intel_blt.h b/lib/intel_blt.h
index 7b4271620..01a7e117a 100644
--- a/lib/intel_blt.h
+++ b/lib/intel_blt.h
@@ -93,6 +93,19 @@ struct blt_copy_object {
uint32_t plane_offset;
};
+struct blt_mem_object {
+ uint32_t handle;
+ uint32_t region;
+ uint64_t size;
+ uint8_t mocs_index;
+ enum blt_memop_type type;
+ enum blt_compression compression;
+ uint32_t width;
+ uint32_t height;
+ uint32_t pitch;
+ uint32_t *ptr;
+};
+
struct blt_copy_batch {
uint32_t handle;
uint32_t region;
@@ -112,6 +125,14 @@ struct blt_copy_data {
bool print_bb;
};
+struct blt_mem_data {
+ int fd;
+ enum intel_driver driver;
+ struct blt_mem_object src;
+ struct blt_mem_object dst;
+ struct blt_copy_batch bb;
+};
+
enum blt_surface_type {
SURFACE_TYPE_1D,
SURFACE_TYPE_2D,
@@ -231,6 +252,17 @@ int blt_fast_copy(int fd,
uint64_t ahnd,
const struct blt_copy_data *blt);
+void blt_mem_init(int fd, struct blt_mem_data *mem);
+
+int blt_mem_copy(int fd, const intel_ctx_t *ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t ahnd,
+ const struct blt_mem_data *mem);
+
+int blt_mem_set(int fd, const intel_ctx_t *ctx,
+ const struct intel_execution_engine2 *e, uint64_t ahnd,
+ const struct blt_mem_data *mem, uint8_t fill_data);
+
void blt_set_geom(struct blt_copy_object *obj, uint32_t pitch,
int16_t x1, int16_t y1, int16_t x2, int16_t y2,
uint16_t x_offset, uint16_t y_offset);
@@ -250,6 +282,13 @@ void blt_set_object(struct blt_copy_object *obj,
uint8_t mocs_index, enum blt_tiling_type tiling,
enum blt_compression compression,
enum blt_compression_type compression_type);
+
+void blt_set_mem_object(struct blt_mem_object *obj,
+ uint32_t handle, uint64_t size, uint32_t pitch,
+ uint32_t width, uint32_t height, uint32_t region,
+ uint8_t mocs_index, enum blt_memop_type type,
+ enum blt_compression compression);
+
void blt_set_object_ext(struct blt_block_copy_object_ext *obj,
uint8_t compression_format,
uint16_t surface_width, uint16_t surface_height,
diff --git a/lib/intel_reg.h b/lib/intel_reg.h
index ea463376b..a8190d683 100644
--- a/lib/intel_reg.h
+++ b/lib/intel_reg.h
@@ -2588,6 +2588,10 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#define XY_FAST_COPY_COLOR_DEPTH_64 (4 << 24)
#define XY_FAST_COPY_COLOR_DEPTH_128 (5 << 24)
+/* RAW memory commands */
+#define MEM_COPY_CMD ((0x2 << 29)|(0x5a << 22)|0x8)
+#define MEM_SET_CMD ((0x2 << 29)|(0x5b << 22)|0x5)
+
#define CTXT_NO_RESTORE (1)
#define CTXT_PALETTE_SAVE_DISABLE (1<<3)
#define CTXT_PALETTE_RESTORE_DISABLE (1<<2)
--
2.39.1
More information about the igt-dev
mailing list