[PATCH i-g-t 1/3] lib/intel_blt: Handle multiple ctrl-surf-copy blits on big surface

Zbigniew Kempczyński zbigniew.kempczynski at intel.com
Fri Nov 8 06:47:10 UTC 2024


Copying ccs data for bigger surfaces requires couple of blits. This is
caused by the limitation of 10-bit size (for Xe and Xe2+) of the
ctrl-surf-copy operation. To handle bigger than 64MiB surfaces on Xe
and 4MiB on Xe2+ we need to divide blits to fit in mentioned 10-bit
size.

Change introduces looping around big surface to produce batch with
couple of ctrl-surf-copy blits.

Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
Cc: Juha-Pekka Heikkila <juhapekka.heikkila at gmail.com>
---
 lib/intel_blt.c | 121 ++++++++++++++++++++++++++++++++++--------------
 1 file changed, 86 insertions(+), 35 deletions(-)

diff --git a/lib/intel_blt.c b/lib/intel_blt.c
index 1b0f279177..b2fb3151e0 100644
--- a/lib/intel_blt.c
+++ b/lib/intel_blt.c
@@ -1115,7 +1115,13 @@ int blt_block_copy(int fd,
 	return ret;
 }
 
-static uint16_t __ccs_size(int fd, const struct blt_ctrl_surf_copy_data *surf)
+/*
+ * Function calculates CCS bytes used for the surface. Size field in
+ * ctrl-surf-copy command struct is for Xe and Xe2 10-bit length, so caller
+ * has to divide ccs copy of bigger surfaces to couple of separate commands.
+ * This function returns total size of ccs data used for the surface.
+ */
+static uint32_t __ccs_size(int fd, const struct blt_ctrl_surf_copy_data *surf)
 {
 	uint32_t src_size, dst_size;
 	uint16_t ccsratio = CCS_RATIO(fd);
@@ -1286,8 +1292,8 @@ uint64_t emit_blt_ctrl_surf_copy(int fd,
 	uint64_t dst_offset, src_offset, bb_offset, alignment;
 	uint32_t bbe = MI_BATCH_BUFFER_END;
 	uint32_t *bb;
-	uint16_t num_ccs_blocks = (ip_ver >= IP_VER(20, 0)) ?
-				(xe_get_default_alignment(fd) / CCS_RATIO(fd)) : CCS_RATIO(fd);
+	uint32_t ccs_per_page, max_blocks, src_step, dst_step;
+	int32_t left_blocks;
 
 	igt_assert_f(ahnd, "ctrl-surf-copy supports softpin only\n");
 	igt_assert_f(surf, "ctrl-surf-copy requires data to do ctrl-surf-copy blit\n");
@@ -1299,51 +1305,108 @@ uint64_t emit_blt_ctrl_surf_copy(int fd,
 					  alignment, surf->dst.pat_index);
 	bb_offset = get_offset(ahnd, surf->bb.handle, surf->bb.size, alignment);
 
+	bb = bo_map(fd, surf->bb.handle, surf->bb.size, surf->driver);
+
+	/*
+	 * Copying in/out CCS data is limited by bitfield size_of_ctrl_copy size
+	 * what means operation on bigger surface needs to be handled on couple
+	 * of ctrl-surf-copy commands.
+	 *
+	 * Instead of hardcoding maximum number of blocks copied in single
+	 * command we may calculate it from bitfield size (Xe and Xe2 differs
+	 * in bitfield location [and in future platforms potentially size]).
+	 */
 	if (ip_ver >= IP_VER(20, 0)) {
+		ccs_per_page = SZ_4K / CCS_RATIO(fd);
+
 		data.xe2.dw00.client = 0x2;
 		data.xe2.dw00.opcode = 0x48;
 		data.xe2.dw00.src_access_type = surf->src.access_type;
 		data.xe2.dw00.dst_access_type = surf->dst.access_type;
-
-		/* Ensure dst has size capable to keep src ccs aux */
-		data.xe2.dw00.size_of_ctrl_copy = __ccs_size(fd, surf) / num_ccs_blocks - 1;
 		data.xe2.dw00.length = 0x3;
 
-		data.xe2.dw01.src_address_lo = src_offset;
-		data.xe2.dw02.src_address_hi = src_offset >> 32;
+		data.xe2.dw00.size_of_ctrl_copy = -1;
+		max_blocks = data.xe2.dw00.size_of_ctrl_copy + 1;
+
+		/*
+		 * For Xe2+ each size_of_ctrl_copy increment covers 4K page size
+		 * of surface, which in turn is covered by 8B of CCS data.
+		 */
+		src_step = surf->src.access_type == DIRECT_ACCESS ? ccs_per_page : SZ_4K;
+		src_step *= max_blocks;
+		dst_step = surf->dst.access_type == DIRECT_ACCESS ? ccs_per_page : SZ_4K;
+		dst_step *= max_blocks;
+
 		data.xe2.dw02.src_mocs_index = surf->src.mocs_index;
-
-		data.xe2.dw03.dst_address_lo = dst_offset;
-		data.xe2.dw04.dst_address_hi = dst_offset >> 32;
 		data.xe2.dw04.dst_mocs_index = surf->dst.mocs_index;
 
 		data_sz = sizeof(data.xe2);
 	} else {
+		ccs_per_page = SZ_64K / CCS_RATIO(fd);
+
 		data.gen12.dw00.client = 0x2;
 		data.gen12.dw00.opcode = 0x48;
 		data.gen12.dw00.src_access_type = surf->src.access_type;
 		data.gen12.dw00.dst_access_type = surf->dst.access_type;
-
-		/* Ensure dst has size capable to keep src ccs aux */
-		data.gen12.dw00.size_of_ctrl_copy = __ccs_size(fd, surf) / num_ccs_blocks - 1;
 		data.gen12.dw00.length = 0x3;
 
-		data.gen12.dw01.src_address_lo = src_offset;
-		data.gen12.dw02.src_address_hi = src_offset >> 32;
+		data.gen12.dw00.size_of_ctrl_copy = -1;
+		max_blocks = data.gen12.dw00.size_of_ctrl_copy + 1;
+
+		/*
+		 * For Xe each size_of_ctrl_copy increment covers 64K page size
+		 * of surface, which in turn is covered by 256B of CCS data.
+		 */
+		src_step = surf->src.access_type == DIRECT_ACCESS ? ccs_per_page : SZ_64K;
+		src_step *= max_blocks;
+		dst_step = surf->dst.access_type == DIRECT_ACCESS ? ccs_per_page : SZ_64K;
+		dst_step *= max_blocks;
+
 		data.gen12.dw02.src_mocs_index = surf->src.mocs_index;
-
-		data.gen12.dw03.dst_address_lo = dst_offset;
-		data.gen12.dw04.dst_address_hi = dst_offset >> 32;
 		data.gen12.dw04.dst_mocs_index = surf->dst.mocs_index;
 
 		data_sz = sizeof(data.gen12);
 	}
 
-	bb = bo_map(fd, surf->bb.handle, surf->bb.size, surf->driver);
+	left_blocks = __ccs_size(fd, surf) / ccs_per_page;
 
-	igt_assert(bb_pos + data_sz < surf->bb.size);
-	memcpy(bb + bb_pos, &data, data_sz);
-	bb_pos += data_sz;
+	while (left_blocks > 0) {
+		int32_t nblocks = min_t(int32_t, left_blocks, max_blocks);
+
+		if (ip_ver >= IP_VER(20, 0)) {
+			data.xe2.dw00.size_of_ctrl_copy = nblocks - 1;
+			data.xe2.dw01.src_address_lo = src_offset;
+			data.xe2.dw02.src_address_hi = src_offset >> 32;
+			data.xe2.dw03.dst_address_lo = dst_offset;
+			data.xe2.dw04.dst_address_hi = dst_offset >> 32;
+		} else {
+			data.gen12.dw00.size_of_ctrl_copy = nblocks - 1;
+			data.gen12.dw01.src_address_lo = src_offset;
+			data.gen12.dw02.src_address_hi = src_offset >> 32;
+			data.gen12.dw03.dst_address_lo = dst_offset;
+			data.gen12.dw04.dst_address_hi = dst_offset >> 32;
+		}
+
+		left_blocks -= max_blocks;
+		dst_offset += dst_step;
+		src_offset += src_step;
+
+		igt_assert(bb_pos + data_sz < surf->bb.size);
+		memcpy(bb + bb_pos, &data, data_sz);
+		bb_pos += data_sz;
+
+		if (surf->print_bb) {
+			igt_info("[CTRL SURF]:\n");
+			igt_info("src offset: 0x%" PRIx64 ", dst offset: 0x%" PRIx64
+				 ", bb offset: 0x%" PRIx64 ", copy nblocks: 0x%x\n",
+				 src_offset, dst_offset, bb_offset, nblocks);
+
+			if (ip_ver >= IP_VER(20, 0))
+				xe2_dump_bb_surf_ctrl_cmd(&data.xe2);
+			else
+				dump_bb_surf_ctrl_cmd(&data.gen12);
+		}
+	}
 
 	if (emit_bbe) {
 		igt_assert(bb_pos + sizeof(uint32_t) < surf->bb.size);
@@ -1351,18 +1414,6 @@ uint64_t emit_blt_ctrl_surf_copy(int fd,
 		bb_pos += sizeof(uint32_t);
 	}
 
-	if (surf->print_bb) {
-		igt_info("[CTRL SURF]:\n");
-		igt_info("src offset: %" PRIx64 ", dst offset: %" PRIx64
-			 ", bb offset: %" PRIx64 "\n",
-			 src_offset, dst_offset, bb_offset);
-
-		if (ip_ver >= IP_VER(20, 0))
-			xe2_dump_bb_surf_ctrl_cmd(&data.xe2);
-		else
-			dump_bb_surf_ctrl_cmd(&data.gen12);
-	}
-
 	munmap(bb, surf->bb.size);
 
 	return bb_pos;
-- 
2.34.1



More information about the igt-dev mailing list