[PATCH i-g-t 09/15] lib/intel_blt: use struct instead of inline coding
Francois Dugast
francois.dugast at intel.com
Thu May 22 11:52:42 UTC 2025
On Tue, May 13, 2025 at 08:58:04PM +0200, Zbigniew Kempczyński wrote:
> Structs with bitfields offers better control to avoid setting
Missed this: s/offers/offer/
> unnecessary bits and get unexpected behavior.
>
> Add struct for mem-copy and replace current byte copy implementation
> to byte/page adding iterator for insert couple of mem-copy instructions
> if user passed objects which width is greater than limit.
>
> On the first glance order of assigning fields might be weird, but
> this will be used in consecutive patch which extends copy from linear
> to matrix.
>
> Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
> Cc: Francois Dugast <francois.dugast at intel.com>
> ---
> lib/intel_blt.c | 171 ++++++++++++++++++++++++++++++++++++++++--------
> 1 file changed, 144 insertions(+), 27 deletions(-)
>
> diff --git a/lib/intel_blt.c b/lib/intel_blt.c
> index 04549ab42e..265f5ed50f 100644
> --- a/lib/intel_blt.c
> +++ b/lib/intel_blt.c
> @@ -1798,6 +1798,73 @@ int blt_fast_copy(int fd,
> return ret;
> }
>
> +struct xe_mem_copy_data {
> + struct {
> + uint32_t length: BITRANGE(0, 7);
> + uint32_t compression_format: BITRANGE(8, 12);
> + uint32_t compression_enable: BITRANGE(13, 13);
> + uint32_t rsvd0: BITRANGE(14, 14);
> + uint32_t dst_compressible: BITRANGE(15, 15);
> + uint32_t src_compressible: BITRANGE(16, 16);
> + uint32_t copy_type: BITRANGE(17, 18);
> + uint32_t mode: BITRANGE(19, 19);
> + uint32_t rsvd1: BITRANGE(20, 21);
> + uint32_t opcode: BITRANGE(22, 28);
> + uint32_t client: BITRANGE(29, 31);
> + } dw00;
> +
> + struct {
> + union {
> + struct {
> + uint32_t width: BITRANGE(0, 17);
> + uint32_t rsvd0: BITRANGE(18, 31);
> + } byte_copy;
> + struct {
> + uint32_t width: BITRANGE(0, 23);
> + uint32_t rsvd0: BITRANGE(24, 31);
> + } page_copy;
> + uint32_t val;
> + };
> + } dw01;
> +
> + struct {
> + uint32_t height: BITRANGE(0, 17);
> + uint32_t rsvd0: BITRANGE(18, 31);
> + } dw02;
> +
> + struct {
> + uint32_t src_pitch: BITRANGE(0, 17);
> + uint32_t rsvd0: BITRANGE(18, 31);
> + } dw03;
> +
> + struct {
> + uint32_t dst_pitch: BITRANGE(0, 17);
> + uint32_t rsvd0: BITRANGE(18, 31);
> + } dw04;
> +
> + struct {
> + uint32_t src_address_lo;
> + } dw05;
> +
> + struct {
> + uint32_t src_address_hi;
> + } dw06;
> +
> + struct {
> + uint32_t dst_address_lo;
> + } dw07;
> +
> + struct {
> + uint32_t dst_address_hi;
> + } dw08;
> +
> + struct {
> + uint32_t dst_mocs: BITRANGE(0, 6);
> + uint32_t rsvd0: BITRANGE(7, 24);
> + uint32_t src_mocs: BITRANGE(25, 31);
> + } dw09;
> +};
> +
> /**
> * blt_mem_copy_init:
> * @fd: drm fd
> @@ -1820,41 +1887,91 @@ void blt_mem_copy_init(int fd, struct blt_mem_copy_data *mem,
> mem->copy_type = copy_type;
> }
>
> -static void emit_blt_mem_copy(int fd, uint64_t ahnd,
> - const struct blt_mem_copy_data *mem,
> - bool emit_bbe)
> +static uint64_t emit_blt_mem_copy(int fd, uint64_t ahnd,
> + const struct blt_mem_copy_data *mem,
> + uint64_t bb_pos, bool emit_bbe)
> {
> - uint64_t dst_offset, src_offset;
> - int i;
> - uint32_t *batch;
> - uint32_t optype;
> + struct xe_mem_copy_data data = {};
> + uint64_t dst_offset, src_offset, shift;
> + uint32_t height, width_max, remain;
> + uint32_t bbe = MI_BATCH_BUFFER_END;
> + uint32_t *bb;
> +
> + if (mem->mode == MODE_BYTE) {
> + data.dw01.byte_copy.width = -1;
> + width_max = data.dw01.byte_copy.width + 1;
> + shift = width_max;
> + } else {
> + data.dw01.page_copy.width = -1;
> + width_max = data.dw01.page_copy.width + 1;
> + shift = width_max << 8;
> + }
>
> src_offset = get_offset_pat_index(ahnd, mem->src.handle, mem->src.size,
> 0, mem->src.pat_index);
> dst_offset = get_offset_pat_index(ahnd, mem->dst.handle, mem->dst.size,
> 0, mem->dst.pat_index);
>
> - batch = bo_map(fd, mem->bb.handle, mem->bb.size, mem->driver);
> - optype = mem->copy_type == TYPE_MATRIX ? 1 << 17 : 0;
> -
> - i = 0;
> - batch[i++] = MEM_COPY_CMD | optype;
> - batch[i++] = mem->src.width - 1;
> - batch[i++] = mem->src.height - 1;
> - batch[i++] = mem->src.pitch - 1;
> - batch[i++] = mem->dst.pitch - 1;
> - batch[i++] = src_offset;
> - batch[i++] = src_offset << 32;
> - batch[i++] = dst_offset;
> - batch[i++] = dst_offset << 32;
> - batch[i++] = mem->src.mocs_index << XE2_MEM_COPY_MOCS_SHIFT | mem->dst.mocs_index;
> -
> - if (emit_bbe)
> - batch[i++] = MI_BATCH_BUFFER_END;
> -
> - munmap(batch, mem->bb.size);
> + bb = bo_map(fd, mem->bb.handle, mem->bb.size, mem->driver);
> +
> + height = mem->dst.height;
> +
> + data.dw00.client = 0x2;
> + data.dw00.opcode = 0x5a;
> + data.dw00.length = 8;
> + data.dw00.mode = mem->mode;
> + data.dw00.copy_type = mem->copy_type;
> +
> + data.dw02.height = height - 1;
> + data.dw05.src_address_lo = src_offset;
> + data.dw06.src_address_hi = src_offset >> 32;
> + data.dw07.dst_address_lo = dst_offset;
> + data.dw08.dst_address_hi = dst_offset >> 32;
> + data.dw09.src_mocs = mem->src.mocs_index;
> + data.dw09.dst_mocs = mem->dst.mocs_index;
> +
> + remain = mem->src.width;
> +
> + /* Truncate pitches to match operation bits */
> + if (mem->src.pitch > width_max)
> + data.dw03.src_pitch = width_max - 1;
> + else
> + data.dw03.src_pitch = mem->src.pitch;
> +
> + if (mem->dst.pitch > width_max)
> + data.dw04.dst_pitch = width_max - 1;
> + else
> + data.dw04.dst_pitch = mem->dst.pitch;
> +
> + while (remain) {
> + data.dw01.val = min_t(uint32_t, width_max, remain) - 1;
> +
> + igt_assert(bb_pos + sizeof(data) < mem->bb.size);
> + memcpy(bb + bb_pos, &data, sizeof(data));
> + bb_pos += sizeof(data);
> +
> + remain -= remain > width_max ? width_max : remain;
> + src_offset += shift;
> + dst_offset += shift;
> +
> + data.dw05.src_address_lo = src_offset;
> + data.dw06.src_address_hi = src_offset >> 32;
> + data.dw07.dst_address_lo = dst_offset;
> + data.dw08.dst_address_hi = dst_offset >> 32;
> + }
> +
> + if (emit_bbe) {
> + igt_assert(bb_pos + sizeof(uint32_t) < mem->bb.size);
> + memcpy(bb + bb_pos, &bbe, sizeof(bbe));
> + bb_pos += sizeof(uint32_t);
> + }
> +
> + munmap(bb, mem->bb.size);
> +
> + return bb_pos;
> }
>
> +
> /**
> * blt_mem_copy:
> * @fd: drm fd
> @@ -1884,7 +2001,7 @@ int blt_mem_copy(int fd, const intel_ctx_t *ctx,
> 0, mem->dst.pat_index);
> bb_offset = get_offset(ahnd, mem->bb.handle, mem->bb.size, 0);
>
> - emit_blt_mem_copy(fd, ahnd, mem, true);
> + emit_blt_mem_copy(fd, ahnd, mem, 0, true);
>
> if (mem->driver == INTEL_DRIVER_XE) {
> intel_ctx_xe_exec(ctx, ahnd, CANONICAL(bb_offset));
> --
> 2.43.0
>
More information about the igt-dev
mailing list