[igt-dev] [PATCH i-g-t 2/3] lib/i915_blt: Extract blit emit functions
Zbigniew Kempczyński
zbigniew.kempczynski at intel.com
Wed Dec 14 19:40:36 UTC 2022
On Tue, Dec 13, 2022 at 04:39:14PM +0100, Karolina Stolarek wrote:
> On 12.12.2022 13:50, Zbigniew Kempczyński wrote:
> > Add some flexibility in building user pipelines extracting blitter
> > emission code to dedicated functions. Previous blitter functions which
> > do one blit-and-execute are rewritten to use those functions.
> > Requires usage with stateful allocator (offset might be acquired more
> > than one, so it must not change).
> >
> > Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
> > ---
> > lib/i915/i915_blt.c | 263 ++++++++++++++++++++++++++++++++------------
> > lib/i915/i915_blt.h | 19 ++++
> > 2 files changed, 213 insertions(+), 69 deletions(-)
> >
> > diff --git a/lib/i915/i915_blt.c b/lib/i915/i915_blt.c
> > index 42c28623f9..32ad608775 100644
> > --- a/lib/i915/i915_blt.c
> > +++ b/lib/i915/i915_blt.c
> > @@ -503,58 +503,61 @@ static void dump_bb_ext(struct gen12_block_copy_data_ext *data)
> > }
> > /**
> > - * blt_block_copy:
> > + * emit_blt_block_copy:
> > * @i915: drm fd
> > - * @ctx: intel_ctx_t context
> > - * @e: blitter engine for @ctx
> > * @ahnd: allocator handle
> > * @blt: basic blitter data (for TGL/DG1 which doesn't support ext version)
> > * @ext: extended blitter data (for DG2+, supports flatccs compression)
> > + * @bb_pos: position at which insert block copy commands
> > + * @emit_bbe: emit MI_BATCH_BUFFER_END after block-copy or not
> > *
> > - * Function does blit between @src and @dst described in @blt object.
> > + * Function inserts block-copy blit into batch at @bb_pos. Allows concatenating
> > + * with other commands to achieve pipelining.
> > *
> > * Returns:
> > - * execbuffer status.
> > + * Next write position in batch.
> > */
> > -int blt_block_copy(int i915,
> > - const intel_ctx_t *ctx,
> > - const struct intel_execution_engine2 *e,
> > - uint64_t ahnd,
> > - const struct blt_copy_data *blt,
> > - const struct blt_block_copy_data_ext *ext)
> > +uint64_t emit_blt_block_copy(int i915,
> > + uint64_t ahnd,
> > + const struct blt_copy_data *blt,
> > + const struct blt_block_copy_data_ext *ext,
> > + uint64_t bb_pos,
> > + bool emit_bbe)
> > {
> > - struct drm_i915_gem_execbuffer2 execbuf = {};
> > - struct drm_i915_gem_exec_object2 obj[3] = {};
> > struct gen12_block_copy_data data = {};
> > struct gen12_block_copy_data_ext dext = {};
> > - uint64_t dst_offset, src_offset, bb_offset, alignment;
> > - uint32_t *bb;
> > - int i, ret;
> > + uint64_t dst_offset, src_offset, bb_offset;
> > + uint32_t bbe = MI_BATCH_BUFFER_END;
> > + uint8_t *bb;
> > igt_assert_f(ahnd, "block-copy supports softpin only\n");
> > igt_assert_f(blt, "block-copy requires data to do blit\n");
> > - alignment = gem_detect_safe_alignment(i915);
> > - src_offset = get_offset(ahnd, blt->src.handle, blt->src.size, alignment);
> > - if (__special_mode(blt) == SM_FULL_RESOLVE)
> > - dst_offset = src_offset;
> > - else
> > - dst_offset = get_offset(ahnd, blt->dst.handle, blt->dst.size, alignment);
> > - bb_offset = get_offset(ahnd, blt->bb.handle, blt->bb.size, alignment);
> > + src_offset = get_offset(ahnd, blt->src.handle, blt->src.size, 0);
> > + dst_offset = get_offset(ahnd, blt->dst.handle, blt->dst.size, 0);
> > + bb_offset = get_offset(ahnd, blt->bb.handle, blt->bb.size, 0);
>
> Why do we pass 0 as object alignment here? In surf-copy and fast-copy we
> pass "alignment" in.
After rethinking you're right, caller may instantiate allocator open
with unacceptable alignment (like 4K where we use smem and lmem regions)
so enforcing safe alignment will protect us from ENOSPC.
Will be sent in v2 after you'll comment reply to previous patch.
Anyway thanks for pointing this.
>
> > fill_data(&data, blt, src_offset, dst_offset, ext);
> > - i = sizeof(data) / sizeof(uint32_t);
> > bb = gem_mmap__device_coherent(i915, blt->bb.handle, 0, blt->bb.size,
> > PROT_READ | PROT_WRITE);
> > - memcpy(bb, &data, sizeof(data));
> > +
> > + igt_assert(bb_pos + sizeof(data) < blt->bb.size);
>
> I'd say we need extra space for potential MI_BATCH_BUFFER_END here
I don't assume how many steps (memcpy's to bb) will be in this blit
so I incrementally check if there's enough space in bb. See all
igt_asserts() below - one for dext and second for bbe (notice that
bbe might be not emitted - but this is caller responsibility to handle
if emitted instructions consumed all bb dwords).
>
> > + memcpy(bb + bb_pos, &data, sizeof(data));
> > + bb_pos += sizeof(data);
> > if (ext) {
> > fill_data_ext(&dext, ext);
> > - memcpy(bb + i, &dext, sizeof(dext));
> > - i += sizeof(dext) / sizeof(uint32_t);
> > + igt_assert(bb_pos + sizeof(dext) < blt->bb.size);
> > + memcpy(bb + bb_pos, &dext, sizeof(dext));
> > + bb_pos += sizeof(dext);
> > + }
> > +
> > + if (emit_bbe) {
> > + igt_assert(bb_pos + sizeof(uint32_t) < blt->bb.size);
> > + memcpy(bb + bb_pos, &bbe, sizeof(bbe));
> > + bb_pos += sizeof(uint32_t);
> > }
> > - bb[i++] = MI_BATCH_BUFFER_END;
> > if (blt->print_bb) {
> > igt_info("[BLOCK COPY]\n");
> > @@ -569,6 +572,44 @@ int blt_block_copy(int i915,
> > munmap(bb, blt->bb.size);
> > + return bb_pos;
> > +}
> > +
> > +/**
> > + * blt_block_copy:
> > + * @i915: drm fd
> > + * @ctx: intel_ctx_t context
> > + * @e: blitter engine for @ctx
> > + * @ahnd: allocator handle
> > + * @blt: basic blitter data (for TGL/DG1 which doesn't support ext version)
> > + * @ext: extended blitter data (for DG2+, supports flatccs compression)
> > + *
> > + * Function does blit between @src and @dst described in @blt object.
> > + *
> > + * Returns:
> > + * execbuffer status.
> > + */
> > +int blt_block_copy(int i915,
> > + const intel_ctx_t *ctx,
> > + const struct intel_execution_engine2 *e,
> > + uint64_t ahnd,
> > + const struct blt_copy_data *blt,
> > + const struct blt_block_copy_data_ext *ext)
> > +{
> > + struct drm_i915_gem_execbuffer2 execbuf = {};
> > + struct drm_i915_gem_exec_object2 obj[3] = {};
> > + uint64_t dst_offset, src_offset, bb_offset;
> > + int ret;
> > +
> > + igt_assert_f(ahnd, "block-copy supports softpin only\n");
> > + igt_assert_f(blt, "block-copy requires data to do blit\n");
> > +
> > + src_offset = get_offset(ahnd, blt->src.handle, blt->src.size, 0);
> > + dst_offset = get_offset(ahnd, blt->dst.handle, blt->dst.size, 0);
> > + bb_offset = get_offset(ahnd, blt->bb.handle, blt->bb.size, 0);
> > +
> > + emit_blt_block_copy(i915, ahnd, blt, ext, 0, true);
> > +
> > obj[0].offset = CANONICAL(dst_offset);
> > obj[1].offset = CANONICAL(src_offset);
> > obj[2].offset = CANONICAL(bb_offset > @@ -655,31 +696,30 @@ static
> > void dump_bb_surf_ctrl_cmd(const struct
> gen12_ctrl_surf_copy_data *data)
> > }
> > /**
> > - * blt_ctrl_surf_copy:
> > + * emit_blt_ctrl_surf_copy:
> > * @i915: drm fd
> > - * @ctx: intel_ctx_t context
> > - * @e: blitter engine for @ctx
> > * @ahnd: allocator handle
> > * @surf: blitter data for ctrl-surf-copy
> > + * @bb_pos: position at which insert block copy commands
> > + * @emit_bbe: emit MI_BATCH_BUFFER_END after ctrl-surf-copy or not
> > *
> > - * Function does ctrl-surf-copy blit between @src and @dst described in
> > - * @blt object.
> > + * Function emits ctrl-surf-copy blit between @src and @dst described in
> > + * @blt object at @bb_pos. Allows concatenating with other commands to
> > + * achieve pipelining.
> > *
> > * Returns:
> > - * execbuffer status.
> > + * Next write position in batch.
> > */
> > -int blt_ctrl_surf_copy(int i915,
> > - const intel_ctx_t *ctx,
> > - const struct intel_execution_engine2 *e,
> > - uint64_t ahnd,
> > - const struct blt_ctrl_surf_copy_data *surf)
> > +uint64_t emit_blt_ctrl_surf_copy(int i915,
> > + uint64_t ahnd,
> > + const struct blt_ctrl_surf_copy_data *surf,
> > + uint64_t bb_pos,
> > + bool emit_bbe)
> > {
> > - struct drm_i915_gem_execbuffer2 execbuf = {};
> > - struct drm_i915_gem_exec_object2 obj[3] = {};
> > struct gen12_ctrl_surf_copy_data data = {};
> > uint64_t dst_offset, src_offset, bb_offset, alignment;
> > + uint32_t bbe = MI_BATCH_BUFFER_END;
> > uint32_t *bb;
> > - int i;
> > igt_assert_f(ahnd, "ctrl-surf-copy supports softpin only\n");
> > igt_assert_f(surf, "ctrl-surf-copy requires data to do ctrl-surf-copy blit\n");
> > @@ -695,12 +735,9 @@ int blt_ctrl_surf_copy(int i915,
> > data.dw00.size_of_ctrl_copy = __ccs_size(surf) / CCS_RATIO - 1;
> > data.dw00.length = 0x3;
> > - src_offset = get_offset(ahnd, surf->src.handle, surf->src.size,
> > - alignment);
> > - dst_offset = get_offset(ahnd, surf->dst.handle, surf->dst.size,
> > - alignment);
> > - bb_offset = get_offset(ahnd, surf->bb.handle, surf->bb.size,
> > - alignment);
> > + src_offset = get_offset(ahnd, surf->src.handle, surf->src.size, alignment);
> > + dst_offset = get_offset(ahnd, surf->dst.handle, surf->dst.size, alignment);
> > + bb_offset = get_offset(ahnd, surf->bb.handle, surf->bb.size, alignment);
> > data.dw01.src_address_lo = src_offset;
> > data.dw02.src_address_hi = src_offset >> 32;
> > @@ -710,11 +747,18 @@ int blt_ctrl_surf_copy(int i915,
> > data.dw04.dst_address_hi = dst_offset >> 32;
> > data.dw04.dst_mocs = surf->dst.mocs;
> > - i = sizeof(data) / sizeof(uint32_t);
> > bb = gem_mmap__device_coherent(i915, surf->bb.handle, 0, surf->bb.size,
> > PROT_READ | PROT_WRITE);
> > - memcpy(bb, &data, sizeof(data));
> > - bb[i++] = MI_BATCH_BUFFER_END;
> > +
> > + igt_assert(bb_pos + sizeof(data) < surf->bb.size);
> > + memcpy(bb + bb_pos, &data, sizeof(data));
> > + bb_pos += sizeof(data);
> > +
> > + if (emit_bbe) {
> > + igt_assert(bb_pos + sizeof(uint32_t) < surf->bb.size);
> > + memcpy(bb + bb_pos, &bbe, sizeof(bbe));
> > + bb_pos += sizeof(uint32_t);
> > + }
> > if (surf->print_bb) {
> > igt_info("BB [CTRL SURF]:\n");
> > @@ -724,8 +768,46 @@ int blt_ctrl_surf_copy(int i915,
> > dump_bb_surf_ctrl_cmd(&data);
> > }
> > +
> > munmap(bb, surf->bb.size);
> > + return bb_pos;
> > +}
> > +
> > +/**
> > + * blt_ctrl_surf_copy:
> > + * @i915: drm fd
> > + * @ctx: intel_ctx_t context
> > + * @e: blitter engine for @ctx
> > + * @ahnd: allocator handle
> > + * @surf: blitter data for ctrl-surf-copy
> > + *
> > + * Function does ctrl-surf-copy blit between @src and @dst described in
> > + * @blt object.
> > + *
> > + * Returns:
> > + * execbuffer status.
> > + */
> > +int blt_ctrl_surf_copy(int i915,
> > + const intel_ctx_t *ctx,
> > + const struct intel_execution_engine2 *e,
> > + uint64_t ahnd,
> > + const struct blt_ctrl_surf_copy_data *surf)
> > +{
> > + struct drm_i915_gem_execbuffer2 execbuf = {};
> > + struct drm_i915_gem_exec_object2 obj[3] = {};
> > + uint64_t dst_offset, src_offset, bb_offset, alignment;
> > +
> > + igt_assert_f(ahnd, "ctrl-surf-copy supports softpin only\n");
> > + igt_assert_f(surf, "ctrl-surf-copy requires data to do ctrl-surf-copy blit\n");
> > +
> > + alignment = max_t(uint64_t, gem_detect_safe_alignment(i915), 1ull << 16);
> > + src_offset = get_offset(ahnd, surf->src.handle, surf->src.size, alignment);
> > + dst_offset = get_offset(ahnd, surf->dst.handle, surf->dst.size, alignment);
> > + bb_offset = get_offset(ahnd, surf->bb.handle, surf->bb.size, alignment);
> > +
> > + emit_blt_ctrl_surf_copy(i915, ahnd, surf, 0, true);
> > +
> > obj[0].offset = CANONICAL(dst_offset);
> > obj[1].offset = CANONICAL(src_offset);
> > obj[2].offset = CANONICAL(bb_offset);
> > @@ -869,31 +951,31 @@ static void dump_bb_fast_cmd(struct gen12_fast_copy_data *data)
> > }
> > /**
> > - * blt_fast_copy:
> > + * emit_blt_fast_copy:
> > * @i915: drm fd
> > - * @ctx: intel_ctx_t context
> > - * @e: blitter engine for @ctx
> > * @ahnd: allocator handle
> > * @blt: blitter data for fast-copy (same as for block-copy but doesn't use
> > * compression fields).
> > + * @bb_pos: position at which insert block copy commands
> > + * @emit_bbe: emit MI_BATCH_BUFFER_END after fast-copy or not
> > *
> > - * Function does fast blit between @src and @dst described in @blt object.
> > + * Function emits fast-copy blit between @src and @dst described in @blt object
> > + * at @bb_pos. Allows concatenating with other commands to
> > + * achieve pipelining.
> > *
> > * Returns:
> > - * execbuffer status.
> > + * Next write position in batch.
> > */
> > -int blt_fast_copy(int i915,
> > - const intel_ctx_t *ctx,
> > - const struct intel_execution_engine2 *e,
> > - uint64_t ahnd,
> > - const struct blt_copy_data *blt)
> > +uint64_t emit_blt_fast_copy(int i915,
> > + uint64_t ahnd,
> > + const struct blt_copy_data *blt,
> > + uint64_t bb_pos,
> > + bool emit_bbe)
> > {
> > - struct drm_i915_gem_execbuffer2 execbuf = {};
> > - struct drm_i915_gem_exec_object2 obj[3] = {};
> > struct gen12_fast_copy_data data = {};
> > uint64_t dst_offset, src_offset, bb_offset, alignment;
> > + uint32_t bbe = MI_BATCH_BUFFER_END;
> > uint32_t *bb;
> > - int i, ret;
> > alignment = gem_detect_safe_alignment(i915);
> > @@ -931,22 +1013,65 @@ int blt_fast_copy(int i915,
> > data.dw08.src_address_lo = src_offset;
> > data.dw09.src_address_hi = src_offset >> 32;
> > - i = sizeof(data) / sizeof(uint32_t);
> > bb = gem_mmap__device_coherent(i915, blt->bb.handle, 0, blt->bb.size,
> > PROT_READ | PROT_WRITE);
> > - memcpy(bb, &data, sizeof(data));
> > - bb[i++] = MI_BATCH_BUFFER_END;
> > + igt_assert(bb_pos + sizeof(data) < blt->bb.size);
> > + memcpy(bb + bb_pos, &data, sizeof(data));
> > + bb_pos += sizeof(data);
> > +
> > + if (emit_bbe) {
> > + igt_assert(bb_pos + sizeof(uint32_t) < blt->bb.size);
> > + memcpy(bb + bb_pos, &bbe, sizeof(bbe));
> > + bb_pos += sizeof(uint32_t);
> > + }
> > if (blt->print_bb) {
> > igt_info("BB [FAST COPY]\n");
> > - igt_info("blit [src offset: %llx, dst offset: %llx\n",
> > - (long long) src_offset, (long long) dst_offset);
> > + igt_info("src offset: %llx, dst offset: %llx, bb offset: %llx\n",
> > + (long long) src_offset, (long long) dst_offset,
> > + (long long) bb_offset);
>
> Nitpick: as you're touching these lines anyway, could you delete space after
> cast? They're not needed.
I see preferred code style is to join (type)var, strange, imo (type)
var looks more readable as var is immediately visible (joined value
disturbes my perpception). Anyway ok, I will change this.
--
Zbigniew
>
> In general, I'm fine with the changes, but would like to clarify a couple of
> things above before giving r-b.
>
> All the best,
> Karolina
>
> > dump_bb_fast_cmd(&data);
> > }
> > munmap(bb, blt->bb.size);
> > + return bb_pos;
> > +}
> > +
> > +/**
> > + * blt_fast_copy:
> > + * @i915: drm fd
> > + * @ctx: intel_ctx_t context
> > + * @e: blitter engine for @ctx
> > + * @ahnd: allocator handle
> > + * @blt: blitter data for fast-copy (same as for block-copy but doesn't use
> > + * compression fields).
> > + *
> > + * Function does fast blit between @src and @dst described in @blt object.
> > + *
> > + * Returns:
> > + * execbuffer status.
> > + */
> > +int blt_fast_copy(int i915,
> > + const intel_ctx_t *ctx,
> > + const struct intel_execution_engine2 *e,
> > + uint64_t ahnd,
> > + const struct blt_copy_data *blt)
> > +{
> > + struct drm_i915_gem_execbuffer2 execbuf = {};
> > + struct drm_i915_gem_exec_object2 obj[3] = {};
> > + uint64_t dst_offset, src_offset, bb_offset, alignment;
> > + int ret;
> > +
> > + alignment = gem_detect_safe_alignment(i915);
> > +
> > + src_offset = get_offset(ahnd, blt->src.handle, blt->src.size, alignment);
> > + dst_offset = get_offset(ahnd, blt->dst.handle, blt->dst.size, alignment);
> > + bb_offset = get_offset(ahnd, blt->bb.handle, blt->bb.size, alignment);
> > +
> > + emit_blt_fast_copy(i915, ahnd, blt, 0, true);
> > +
> > obj[0].offset = CANONICAL(dst_offset);
> > obj[1].offset = CANONICAL(src_offset);
> > obj[2].offset = CANONICAL(bb_offset);
> > diff --git a/lib/i915/i915_blt.h b/lib/i915/i915_blt.h
> > index e0e8b52bc2..34db9bb962 100644
> > --- a/lib/i915/i915_blt.h
> > +++ b/lib/i915/i915_blt.h
> > @@ -168,6 +168,13 @@ bool blt_supports_compression(int i915);
> > bool blt_supports_tiling(int i915, enum blt_tiling tiling);
> > const char *blt_tiling_name(enum blt_tiling tiling);
> > +uint64_t emit_blt_block_copy(int i915,
> > + uint64_t ahnd,
> > + const struct blt_copy_data *blt,
> > + const struct blt_block_copy_data_ext *ext,
> > + uint64_t bb_pos,
> > + bool emit_bbe);
> > +
> > int blt_block_copy(int i915,
> > const intel_ctx_t *ctx,
> > const struct intel_execution_engine2 *e,
> > @@ -175,12 +182,24 @@ int blt_block_copy(int i915,
> > const struct blt_copy_data *blt,
> > const struct blt_block_copy_data_ext *ext);
> > +uint64_t emit_blt_ctrl_surf_copy(int i915,
> > + uint64_t ahnd,
> > + const struct blt_ctrl_surf_copy_data *surf,
> > + uint64_t bb_pos,
> > + bool emit_bbe);
> > +
> > int blt_ctrl_surf_copy(int i915,
> > const intel_ctx_t *ctx,
> > const struct intel_execution_engine2 *e,
> > uint64_t ahnd,
> > const struct blt_ctrl_surf_copy_data *surf);
> > +uint64_t emit_blt_fast_copy(int i915,
> > + uint64_t ahnd,
> > + const struct blt_copy_data *blt,
> > + uint64_t bb_pos,
> > + bool emit_bbe);
> > +
> > int blt_fast_copy(int i915,
> > const intel_ctx_t *ctx,
> > const struct intel_execution_engine2 *e,
More information about the igt-dev
mailing list