[igt-dev] [PATCH i-g-t 2/3] lib/i915_blt: Extract blit emit functions
Zbigniew Kempczyński
zbigniew.kempczynski at intel.com
Mon Dec 12 12:50:34 UTC 2022
Add some flexibility in building user pipelines extracting blitter
emission code to dedicated functions. Previous blitter functions which
do one blit-and-execute are rewritten to use those functions.
Requires usage with stateful allocator (offset might be acquired more
than one, so it must not change).
Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
---
lib/i915/i915_blt.c | 263 ++++++++++++++++++++++++++++++++------------
lib/i915/i915_blt.h | 19 ++++
2 files changed, 213 insertions(+), 69 deletions(-)
diff --git a/lib/i915/i915_blt.c b/lib/i915/i915_blt.c
index 42c28623f9..32ad608775 100644
--- a/lib/i915/i915_blt.c
+++ b/lib/i915/i915_blt.c
@@ -503,58 +503,61 @@ static void dump_bb_ext(struct gen12_block_copy_data_ext *data)
}
/**
- * blt_block_copy:
+ * emit_blt_block_copy:
* @i915: drm fd
- * @ctx: intel_ctx_t context
- * @e: blitter engine for @ctx
* @ahnd: allocator handle
* @blt: basic blitter data (for TGL/DG1 which doesn't support ext version)
* @ext: extended blitter data (for DG2+, supports flatccs compression)
+ * @bb_pos: position at which insert block copy commands
+ * @emit_bbe: emit MI_BATCH_BUFFER_END after block-copy or not
*
- * Function does blit between @src and @dst described in @blt object.
+ * Function inserts block-copy blit into batch at @bb_pos. Allows concatenating
+ * with other commands to achieve pipelining.
*
* Returns:
- * execbuffer status.
+ * Next write position in batch.
*/
-int blt_block_copy(int i915,
- const intel_ctx_t *ctx,
- const struct intel_execution_engine2 *e,
- uint64_t ahnd,
- const struct blt_copy_data *blt,
- const struct blt_block_copy_data_ext *ext)
+uint64_t emit_blt_block_copy(int i915,
+ uint64_t ahnd,
+ const struct blt_copy_data *blt,
+ const struct blt_block_copy_data_ext *ext,
+ uint64_t bb_pos,
+ bool emit_bbe)
{
- struct drm_i915_gem_execbuffer2 execbuf = {};
- struct drm_i915_gem_exec_object2 obj[3] = {};
struct gen12_block_copy_data data = {};
struct gen12_block_copy_data_ext dext = {};
- uint64_t dst_offset, src_offset, bb_offset, alignment;
- uint32_t *bb;
- int i, ret;
+ uint64_t dst_offset, src_offset, bb_offset;
+ uint32_t bbe = MI_BATCH_BUFFER_END;
+ uint8_t *bb;
igt_assert_f(ahnd, "block-copy supports softpin only\n");
igt_assert_f(blt, "block-copy requires data to do blit\n");
- alignment = gem_detect_safe_alignment(i915);
- src_offset = get_offset(ahnd, blt->src.handle, blt->src.size, alignment);
- if (__special_mode(blt) == SM_FULL_RESOLVE)
- dst_offset = src_offset;
- else
- dst_offset = get_offset(ahnd, blt->dst.handle, blt->dst.size, alignment);
- bb_offset = get_offset(ahnd, blt->bb.handle, blt->bb.size, alignment);
+ src_offset = get_offset(ahnd, blt->src.handle, blt->src.size, 0);
+ dst_offset = get_offset(ahnd, blt->dst.handle, blt->dst.size, 0);
+ bb_offset = get_offset(ahnd, blt->bb.handle, blt->bb.size, 0);
fill_data(&data, blt, src_offset, dst_offset, ext);
- i = sizeof(data) / sizeof(uint32_t);
bb = gem_mmap__device_coherent(i915, blt->bb.handle, 0, blt->bb.size,
PROT_READ | PROT_WRITE);
- memcpy(bb, &data, sizeof(data));
+
+ igt_assert(bb_pos + sizeof(data) < blt->bb.size);
+ memcpy(bb + bb_pos, &data, sizeof(data));
+ bb_pos += sizeof(data);
if (ext) {
fill_data_ext(&dext, ext);
- memcpy(bb + i, &dext, sizeof(dext));
- i += sizeof(dext) / sizeof(uint32_t);
+ igt_assert(bb_pos + sizeof(dext) < blt->bb.size);
+ memcpy(bb + bb_pos, &dext, sizeof(dext));
+ bb_pos += sizeof(dext);
+ }
+
+ if (emit_bbe) {
+ igt_assert(bb_pos + sizeof(uint32_t) < blt->bb.size);
+ memcpy(bb + bb_pos, &bbe, sizeof(bbe));
+ bb_pos += sizeof(uint32_t);
}
- bb[i++] = MI_BATCH_BUFFER_END;
if (blt->print_bb) {
igt_info("[BLOCK COPY]\n");
@@ -569,6 +572,44 @@ int blt_block_copy(int i915,
munmap(bb, blt->bb.size);
+ return bb_pos;
+}
+
+/**
+ * blt_block_copy:
+ * @i915: drm fd
+ * @ctx: intel_ctx_t context
+ * @e: blitter engine for @ctx
+ * @ahnd: allocator handle
+ * @blt: basic blitter data (for TGL/DG1 which doesn't support ext version)
+ * @ext: extended blitter data (for DG2+, supports flatccs compression)
+ *
+ * Function does blit between @src and @dst described in @blt object.
+ *
+ * Returns:
+ * execbuffer status.
+ */
+int blt_block_copy(int i915,
+ const intel_ctx_t *ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t ahnd,
+ const struct blt_copy_data *blt,
+ const struct blt_block_copy_data_ext *ext)
+{
+ struct drm_i915_gem_execbuffer2 execbuf = {};
+ struct drm_i915_gem_exec_object2 obj[3] = {};
+ uint64_t dst_offset, src_offset, bb_offset;
+ int ret;
+
+ igt_assert_f(ahnd, "block-copy supports softpin only\n");
+ igt_assert_f(blt, "block-copy requires data to do blit\n");
+
+ src_offset = get_offset(ahnd, blt->src.handle, blt->src.size, 0);
+ dst_offset = get_offset(ahnd, blt->dst.handle, blt->dst.size, 0);
+ bb_offset = get_offset(ahnd, blt->bb.handle, blt->bb.size, 0);
+
+ emit_blt_block_copy(i915, ahnd, blt, ext, 0, true);
+
obj[0].offset = CANONICAL(dst_offset);
obj[1].offset = CANONICAL(src_offset);
obj[2].offset = CANONICAL(bb_offset);
@@ -655,31 +696,30 @@ static void dump_bb_surf_ctrl_cmd(const struct gen12_ctrl_surf_copy_data *data)
}
/**
- * blt_ctrl_surf_copy:
+ * emit_blt_ctrl_surf_copy:
* @i915: drm fd
- * @ctx: intel_ctx_t context
- * @e: blitter engine for @ctx
* @ahnd: allocator handle
* @surf: blitter data for ctrl-surf-copy
+ * @bb_pos: position at which insert block copy commands
+ * @emit_bbe: emit MI_BATCH_BUFFER_END after ctrl-surf-copy or not
*
- * Function does ctrl-surf-copy blit between @src and @dst described in
- * @blt object.
+ * Function emits ctrl-surf-copy blit between @src and @dst described in
+ * @blt object at @bb_pos. Allows concatenating with other commands to
+ * achieve pipelining.
*
* Returns:
- * execbuffer status.
+ * Next write position in batch.
*/
-int blt_ctrl_surf_copy(int i915,
- const intel_ctx_t *ctx,
- const struct intel_execution_engine2 *e,
- uint64_t ahnd,
- const struct blt_ctrl_surf_copy_data *surf)
+uint64_t emit_blt_ctrl_surf_copy(int i915,
+ uint64_t ahnd,
+ const struct blt_ctrl_surf_copy_data *surf,
+ uint64_t bb_pos,
+ bool emit_bbe)
{
- struct drm_i915_gem_execbuffer2 execbuf = {};
- struct drm_i915_gem_exec_object2 obj[3] = {};
struct gen12_ctrl_surf_copy_data data = {};
uint64_t dst_offset, src_offset, bb_offset, alignment;
+ uint32_t bbe = MI_BATCH_BUFFER_END;
uint32_t *bb;
- int i;
igt_assert_f(ahnd, "ctrl-surf-copy supports softpin only\n");
igt_assert_f(surf, "ctrl-surf-copy requires data to do ctrl-surf-copy blit\n");
@@ -695,12 +735,9 @@ int blt_ctrl_surf_copy(int i915,
data.dw00.size_of_ctrl_copy = __ccs_size(surf) / CCS_RATIO - 1;
data.dw00.length = 0x3;
- src_offset = get_offset(ahnd, surf->src.handle, surf->src.size,
- alignment);
- dst_offset = get_offset(ahnd, surf->dst.handle, surf->dst.size,
- alignment);
- bb_offset = get_offset(ahnd, surf->bb.handle, surf->bb.size,
- alignment);
+ src_offset = get_offset(ahnd, surf->src.handle, surf->src.size, alignment);
+ dst_offset = get_offset(ahnd, surf->dst.handle, surf->dst.size, alignment);
+ bb_offset = get_offset(ahnd, surf->bb.handle, surf->bb.size, alignment);
data.dw01.src_address_lo = src_offset;
data.dw02.src_address_hi = src_offset >> 32;
@@ -710,11 +747,18 @@ int blt_ctrl_surf_copy(int i915,
data.dw04.dst_address_hi = dst_offset >> 32;
data.dw04.dst_mocs = surf->dst.mocs;
- i = sizeof(data) / sizeof(uint32_t);
bb = gem_mmap__device_coherent(i915, surf->bb.handle, 0, surf->bb.size,
PROT_READ | PROT_WRITE);
- memcpy(bb, &data, sizeof(data));
- bb[i++] = MI_BATCH_BUFFER_END;
+
+ igt_assert(bb_pos + sizeof(data) < surf->bb.size);
+ memcpy(bb + bb_pos, &data, sizeof(data));
+ bb_pos += sizeof(data);
+
+ if (emit_bbe) {
+ igt_assert(bb_pos + sizeof(uint32_t) < surf->bb.size);
+ memcpy(bb + bb_pos, &bbe, sizeof(bbe));
+ bb_pos += sizeof(uint32_t);
+ }
if (surf->print_bb) {
igt_info("BB [CTRL SURF]:\n");
@@ -724,8 +768,46 @@ int blt_ctrl_surf_copy(int i915,
dump_bb_surf_ctrl_cmd(&data);
}
+
munmap(bb, surf->bb.size);
+ return bb_pos;
+}
+
+/**
+ * blt_ctrl_surf_copy:
+ * @i915: drm fd
+ * @ctx: intel_ctx_t context
+ * @e: blitter engine for @ctx
+ * @ahnd: allocator handle
+ * @surf: blitter data for ctrl-surf-copy
+ *
+ * Function does ctrl-surf-copy blit between @src and @dst described in
+ * @blt object.
+ *
+ * Returns:
+ * execbuffer status.
+ */
+int blt_ctrl_surf_copy(int i915,
+ const intel_ctx_t *ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t ahnd,
+ const struct blt_ctrl_surf_copy_data *surf)
+{
+ struct drm_i915_gem_execbuffer2 execbuf = {};
+ struct drm_i915_gem_exec_object2 obj[3] = {};
+ uint64_t dst_offset, src_offset, bb_offset, alignment;
+
+ igt_assert_f(ahnd, "ctrl-surf-copy supports softpin only\n");
+ igt_assert_f(surf, "ctrl-surf-copy requires data to do ctrl-surf-copy blit\n");
+
+ alignment = max_t(uint64_t, gem_detect_safe_alignment(i915), 1ull << 16);
+ src_offset = get_offset(ahnd, surf->src.handle, surf->src.size, alignment);
+ dst_offset = get_offset(ahnd, surf->dst.handle, surf->dst.size, alignment);
+ bb_offset = get_offset(ahnd, surf->bb.handle, surf->bb.size, alignment);
+
+ emit_blt_ctrl_surf_copy(i915, ahnd, surf, 0, true);
+
obj[0].offset = CANONICAL(dst_offset);
obj[1].offset = CANONICAL(src_offset);
obj[2].offset = CANONICAL(bb_offset);
@@ -869,31 +951,31 @@ static void dump_bb_fast_cmd(struct gen12_fast_copy_data *data)
}
/**
- * blt_fast_copy:
+ * emit_blt_fast_copy:
* @i915: drm fd
- * @ctx: intel_ctx_t context
- * @e: blitter engine for @ctx
* @ahnd: allocator handle
* @blt: blitter data for fast-copy (same as for block-copy but doesn't use
* compression fields).
+ * @bb_pos: position at which insert block copy commands
+ * @emit_bbe: emit MI_BATCH_BUFFER_END after fast-copy or not
*
- * Function does fast blit between @src and @dst described in @blt object.
+ * Function emits fast-copy blit between @src and @dst described in @blt object
+ * at @bb_pos. Allows concatenating with other commands to
+ * achieve pipelining.
*
* Returns:
- * execbuffer status.
+ * Next write position in batch.
*/
-int blt_fast_copy(int i915,
- const intel_ctx_t *ctx,
- const struct intel_execution_engine2 *e,
- uint64_t ahnd,
- const struct blt_copy_data *blt)
+uint64_t emit_blt_fast_copy(int i915,
+ uint64_t ahnd,
+ const struct blt_copy_data *blt,
+ uint64_t bb_pos,
+ bool emit_bbe)
{
- struct drm_i915_gem_execbuffer2 execbuf = {};
- struct drm_i915_gem_exec_object2 obj[3] = {};
struct gen12_fast_copy_data data = {};
uint64_t dst_offset, src_offset, bb_offset, alignment;
+ uint32_t bbe = MI_BATCH_BUFFER_END;
uint32_t *bb;
- int i, ret;
alignment = gem_detect_safe_alignment(i915);
@@ -931,22 +1013,65 @@ int blt_fast_copy(int i915,
data.dw08.src_address_lo = src_offset;
data.dw09.src_address_hi = src_offset >> 32;
- i = sizeof(data) / sizeof(uint32_t);
bb = gem_mmap__device_coherent(i915, blt->bb.handle, 0, blt->bb.size,
PROT_READ | PROT_WRITE);
- memcpy(bb, &data, sizeof(data));
- bb[i++] = MI_BATCH_BUFFER_END;
+ igt_assert(bb_pos + sizeof(data) < blt->bb.size);
+ memcpy(bb + bb_pos, &data, sizeof(data));
+ bb_pos += sizeof(data);
+
+ if (emit_bbe) {
+ igt_assert(bb_pos + sizeof(uint32_t) < blt->bb.size);
+ memcpy(bb + bb_pos, &bbe, sizeof(bbe));
+ bb_pos += sizeof(uint32_t);
+ }
if (blt->print_bb) {
igt_info("BB [FAST COPY]\n");
- igt_info("blit [src offset: %llx, dst offset: %llx\n",
- (long long) src_offset, (long long) dst_offset);
+ igt_info("src offset: %llx, dst offset: %llx, bb offset: %llx\n",
+ (long long) src_offset, (long long) dst_offset,
+ (long long) bb_offset);
dump_bb_fast_cmd(&data);
}
munmap(bb, blt->bb.size);
+ return bb_pos;
+}
+
+/**
+ * blt_fast_copy:
+ * @i915: drm fd
+ * @ctx: intel_ctx_t context
+ * @e: blitter engine for @ctx
+ * @ahnd: allocator handle
+ * @blt: blitter data for fast-copy (same as for block-copy but doesn't use
+ * compression fields).
+ *
+ * Function does fast blit between @src and @dst described in @blt object.
+ *
+ * Returns:
+ * execbuffer status.
+ */
+int blt_fast_copy(int i915,
+ const intel_ctx_t *ctx,
+ const struct intel_execution_engine2 *e,
+ uint64_t ahnd,
+ const struct blt_copy_data *blt)
+{
+ struct drm_i915_gem_execbuffer2 execbuf = {};
+ struct drm_i915_gem_exec_object2 obj[3] = {};
+ uint64_t dst_offset, src_offset, bb_offset, alignment;
+ int ret;
+
+ alignment = gem_detect_safe_alignment(i915);
+
+ src_offset = get_offset(ahnd, blt->src.handle, blt->src.size, alignment);
+ dst_offset = get_offset(ahnd, blt->dst.handle, blt->dst.size, alignment);
+ bb_offset = get_offset(ahnd, blt->bb.handle, blt->bb.size, alignment);
+
+ emit_blt_fast_copy(i915, ahnd, blt, 0, true);
+
obj[0].offset = CANONICAL(dst_offset);
obj[1].offset = CANONICAL(src_offset);
obj[2].offset = CANONICAL(bb_offset);
diff --git a/lib/i915/i915_blt.h b/lib/i915/i915_blt.h
index e0e8b52bc2..34db9bb962 100644
--- a/lib/i915/i915_blt.h
+++ b/lib/i915/i915_blt.h
@@ -168,6 +168,13 @@ bool blt_supports_compression(int i915);
bool blt_supports_tiling(int i915, enum blt_tiling tiling);
const char *blt_tiling_name(enum blt_tiling tiling);
+uint64_t emit_blt_block_copy(int i915,
+ uint64_t ahnd,
+ const struct blt_copy_data *blt,
+ const struct blt_block_copy_data_ext *ext,
+ uint64_t bb_pos,
+ bool emit_bbe);
+
int blt_block_copy(int i915,
const intel_ctx_t *ctx,
const struct intel_execution_engine2 *e,
@@ -175,12 +182,24 @@ int blt_block_copy(int i915,
const struct blt_copy_data *blt,
const struct blt_block_copy_data_ext *ext);
+uint64_t emit_blt_ctrl_surf_copy(int i915,
+ uint64_t ahnd,
+ const struct blt_ctrl_surf_copy_data *surf,
+ uint64_t bb_pos,
+ bool emit_bbe);
+
int blt_ctrl_surf_copy(int i915,
const intel_ctx_t *ctx,
const struct intel_execution_engine2 *e,
uint64_t ahnd,
const struct blt_ctrl_surf_copy_data *surf);
+uint64_t emit_blt_fast_copy(int i915,
+ uint64_t ahnd,
+ const struct blt_copy_data *blt,
+ uint64_t bb_pos,
+ bool emit_bbe);
+
int blt_fast_copy(int i915,
const intel_ctx_t *ctx,
const struct intel_execution_engine2 *e,
--
2.34.1
More information about the igt-dev
mailing list