Mesa (master): freedreno/fdl6: rework layout code a bit (reduce linear align to 64 bytes)

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Thu Jun 18 02:44:20 UTC 2020


Module: Mesa
Branch: master
Commit: d53dc6c37680eba8e8fedde055e03b0a61407467
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=d53dc6c37680eba8e8fedde055e03b0a61407467

Author: Jonathan Marek <jonathan at marek.ca>
Date:   Tue Jun  9 18:48:34 2020 -0400

freedreno/fdl6: rework layout code a bit (reduce linear align to 64 bytes)

Reduce linear alignment, and rework the layout code a bit.

This rework has a side effect of also increasing the alignment on linear
levels of tiled (non-ubwc) cpp=1 and cpp=2 layouts. Since we should be
UBWC for those cases anyway, its not a big loss.

Signed-off-by: Jonathan Marek <jonathan at marek.ca>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/5013>

---

 src/freedreno/fdl/fd6_layout.c                     | 181 +++++++++++----------
 src/freedreno/fdl/fd6_layout_test.c                |   8 +-
 src/freedreno/vulkan/tu_clear_blit.c               |   8 -
 src/gallium/drivers/freedreno/freedreno_resource.c |   6 +-
 4 files changed, 102 insertions(+), 101 deletions(-)

diff --git a/src/freedreno/fdl/fd6_layout.c b/src/freedreno/fdl/fd6_layout.c
index 146e86900aa..b5ffde5f8fe 100644
--- a/src/freedreno/fdl/fd6_layout.c
+++ b/src/freedreno/fdl/fd6_layout.c
@@ -29,62 +29,70 @@
 
 #include "freedreno_layout.h"
 
-/* indexed by cpp, including msaa 2x and 4x:
- * TODO:
- * cpp=1 UBWC needs testing at larger texture sizes
- * missing UBWC blockwidth/blockheight for npot+64 cpp
- * missing 96/128 CPP for 8x MSAA with 32_32_32/32_32_32_32
- */
-static const struct tile_alignment {
-	unsigned basealign;
-	unsigned pitchalign;
-	unsigned heightalign;
-	/* UBWC block width/height.  Used in size alignment, and calculating a
-	 * descriptor's FLAG_BUFFER_LOG2W/H for mipmapping.
-	 */
-	uint8_t ubwc_blockwidth;
-	uint8_t ubwc_blockheight;
-} tile_alignment[] = {
-	[1]  = {  64, 128, 32, 16, 4 },
-	[2]  = { 128, 128, 16, 16, 4 },
-	[3]  = { 256,  64, 32 },
-	[4]  = { 256,  64, 16, 16, 4 },
-	[6]  = { 256,  64, 16 },
-	[8]  = { 256,  64, 16, 8, 4, },
-	[12] = { 256,  64, 16 },
-	[16] = { 256,  64, 16, 4, 4, },
-	[24] = { 256,  64, 16 },
-	[32] = { 256,  64, 16, 4, 2 },
-	[48] = { 256,  64, 16 },
-	[64] = { 256,  64, 16 },
-
-	/* special cases for r8g8: */
-	[0]  = { 256, 64, 32, 16, 8 },
-};
-
 #define RGB_TILE_WIDTH_ALIGNMENT 64
 #define RGB_TILE_HEIGHT_ALIGNMENT 16
 #define UBWC_PLANE_SIZE_ALIGNMENT 4096
 
-static const struct tile_alignment *
-fdl6_tile_alignment(struct fdl_layout *layout)
+static bool
+is_r8g8(struct fdl_layout *layout)
 {
-	debug_assert(layout->cpp < ARRAY_SIZE(tile_alignment));
+	return layout->cpp == 2 &&
+		   util_format_get_nr_components(layout->format) == 2;
+}
 
-	if ((layout->cpp == 2) && (util_format_get_nr_components(layout->format) == 2))
-		return &tile_alignment[0];
-	else
-		return &tile_alignment[layout->cpp];
+void
+fdl6_get_ubwc_blockwidth(struct fdl_layout *layout,
+		uint32_t *blockwidth, uint32_t *blockheight)
+{
+	static const struct {
+		uint8_t width;
+		uint8_t height;
+	} blocksize[] = {
+		{ 16, 4 }, /* cpp = 1 */
+		{ 16, 4 }, /* cpp = 2 */
+		{ 16, 4 }, /* cpp = 4 */
+		{ 8, 4, }, /* cpp = 8 */
+		{ 4, 4, }, /* cpp = 16 */
+		{ 4, 2 },  /* cpp = 32 */
+		{ 0, 0 },  /* cpp = 64 (TODO) */
+	};
+
+	/* special case for r8g8: */
+	if (is_r8g8(layout)) {
+		*blockwidth = 16;
+		*blockheight = 8;
+		return;
+	}
+
+	uint32_t cpp = fdl_cpp_shift(layout);
+	assert(cpp < ARRAY_SIZE(blocksize));
+	*blockwidth = blocksize[cpp].width;
+	*blockheight = blocksize[cpp].height;
 }
 
-static int
-fdl6_pitchalign(struct fdl_layout *layout, int level)
+static void
+fdl6_tile_alignment(struct fdl_layout *layout, uint32_t *heightalign)
 {
-	uint32_t pitchalign = 64;
-	if (fdl_tile_mode(layout, level))
-		pitchalign = fdl6_tile_alignment(layout)->pitchalign;
+	layout->pitchalign = fdl_cpp_shift(layout);
+	*heightalign = 16;
+
+	if (is_r8g8(layout) || layout->cpp == 1) {
+		layout->pitchalign = 1;
+		*heightalign = 32;
+	} else if (layout->cpp == 2) {
+		layout->pitchalign = 2;
+	}
 
-	return pitchalign;
+	/* note: this base_align is *probably* not always right,
+	 * it doesn't really get tested. for example with UBWC we might
+	 * want 4k alignment, since we align UBWC levels to 4k
+	 */
+	if (layout->cpp == 1)
+		layout->base_align = 64;
+	else if (layout->cpp == 2)
+		layout->base_align = 128;
+	else
+		layout->base_align = 256;
 }
 
 /* NOTE: good way to test this is:  (for example)
@@ -97,8 +105,9 @@ fdl6_layout(struct fdl_layout *layout,
 		uint32_t mip_levels, uint32_t array_size, bool is_3d,
 		struct fdl_slice *plane_layout)
 {
-	uint32_t offset;
-	uint32_t pitch0;
+	uint32_t offset, pitch0;
+	uint32_t pitchalign, heightalign;
+	uint32_t ubwc_blockwidth, ubwc_blockheight;
 
 	assert(nr_samples > 0);
 	layout->width0 = width0;
@@ -113,37 +122,54 @@ fdl6_layout(struct fdl_layout *layout,
 	layout->nr_samples = nr_samples;
 	layout->layer_first = !is_3d;
 
-	if (depth0 > 1)
-		layout->ubwc = false;
-	if (tile_alignment[layout->cpp].ubwc_blockwidth == 0)
-		layout->ubwc = false;
+	fdl6_get_ubwc_blockwidth(layout, &ubwc_blockwidth, &ubwc_blockheight);
 
-	const struct tile_alignment *ta = fdl6_tile_alignment(layout);
+	if (depth0 > 1 || ubwc_blockwidth == 0)
+		layout->ubwc = false;
 
 	/* in layer_first layout, the level (slice) contains just one
 	 * layer (since in fact the layer contains the slices)
 	 */
 	uint32_t layers_in_level = layout->layer_first ? 1 : array_size;
 
-	debug_assert(ta->pitchalign);
-
+	/* note: for tiled+noubwc layouts, we can use a lower pitchalign
+	 * which will affect the linear levels only, (the hardware will still
+	 * expect the tiled alignment on the tiled levels)
+	 */
 	if (layout->tile_mode) {
-		layout->base_align = ta->basealign;
+		fdl6_tile_alignment(layout, &heightalign);
 	} else {
 		layout->base_align = 64;
+		layout->pitchalign = 0;
+		/* align pitch to at least 16 pixels:
+		 * both turnip and galium assume there is enough alignment for 16x4
+		 * aligned gmem store. turnip can use CP_BLIT to work without this
+		 * extra alignment, but gallium driver doesn't implement it yet
+		 */
+		if (layout->cpp > 4)
+			layout->pitchalign = fdl_cpp_shift(layout) - 2;
+
+		/* when possible, use a bit more alignment than necessary
+		 * presumably this is better for performance?
+		 */
+		if (!plane_layout)
+			layout->pitchalign = fdl_cpp_shift(layout);
+
+		/* not used, avoid "may be used uninitialized" warning */
+		heightalign = 1;
 	}
 
+	pitchalign = 64 << layout->pitchalign;
+
 	if (plane_layout) {
 		offset = plane_layout->offset;
 		pitch0 = plane_layout->pitch;
-		if (align(pitch0, fdl6_pitchalign(layout, 0) * layout->cpp) != pitch0)
-			return false;
-		pitch0 /= layout->cpp; /* explicit pitch is in bytes */
-		if (pitch0 < width0 && height0 > 1)
+		if (align(pitch0, pitchalign) != pitch0)
 			return false;
 	} else {
+		uint32_t nblocksx = util_format_get_nblocksx(format, width0);
 		offset = 0;
-		pitch0 = util_align_npot(width0, fdl6_pitchalign(layout, 0));
+		pitch0 = util_align_npot(nblocksx * layout->cpp, pitchalign);
 	}
 
 	uint32_t ubwc_width0 = width0;
@@ -159,15 +185,11 @@ fdl6_layout(struct fdl_layout *layout,
 		ubwc_height0 = util_next_power_of_two(height0);
 		ubwc_tile_height_alignment = 64;
 	}
-	ubwc_width0 = align(DIV_ROUND_UP(ubwc_width0, ta->ubwc_blockwidth),
+	ubwc_width0 = align(DIV_ROUND_UP(ubwc_width0, ubwc_blockwidth),
 			RGB_TILE_WIDTH_ALIGNMENT);
-	ubwc_height0 = align(DIV_ROUND_UP(ubwc_height0,
-					ta->ubwc_blockheight),
+	ubwc_height0 = align(DIV_ROUND_UP(ubwc_height0, ubwc_blockheight),
 			ubwc_tile_height_alignment);
 
-	layout->pitchalign =
-		util_logbase2_ceil(fdl6_pitchalign(layout, mip_levels - 1) * layout->cpp >> 6);
-
 	for (uint32_t level = 0; level < mip_levels; level++) {
 		uint32_t depth = u_minify(depth0, level);
 		struct fdl_slice *slice = &layout->slices[level];
@@ -184,7 +206,7 @@ fdl6_layout(struct fdl_layout *layout,
 
 		uint32_t nblocksy = util_format_get_nblocksy(format, height);
 		if (tile_mode)
-			nblocksy = align(nblocksy, ta->heightalign);
+			nblocksy = align(nblocksy, heightalign);
 
 		/* The blits used for mem<->gmem work at a granularity of
 		 * 16x4, which can cause faults due to over-fetch on the
@@ -196,14 +218,8 @@ fdl6_layout(struct fdl_layout *layout,
 		if (level == mip_levels - 1)
 			height = align(nblocksy, 4);
 
-		uint32_t nblocksx =
-			util_align_npot(util_format_get_nblocksx(format, u_minify(pitch0, level)),
-					fdl6_pitchalign(layout, level));
-
-		slice->offset = offset + layout->size;
-		uint32_t blocks = nblocksx * nblocksy;
-
-		slice->pitch = nblocksx * layout->cpp;
+		slice->offset = layout->size;
+		slice->pitch = align(u_minify(pitch0, level), pitchalign);
 
 		/* 1d array and 2d array textures must all have the same layer size
 		 * for each miplevel on a6xx. 3d textures can have different layer
@@ -213,12 +229,12 @@ fdl6_layout(struct fdl_layout *layout,
 		 */
 		if (is_3d) {
 			if (level < 1 || layout->slices[level - 1].size0 > 0xf000) {
-				slice->size0 = align(blocks * layout->cpp, 4096);
+				slice->size0 = align(nblocksy * slice->pitch, 4096);
 			} else {
 				slice->size0 = layout->slices[level - 1].size0;
 			}
 		} else {
-			slice->size0 = blocks * layout->cpp;
+			slice->size0 = nblocksy * slice->pitch;
 		}
 
 		layout->size += slice->size0 * depth * layers_in_level;
@@ -260,12 +276,3 @@ fdl6_layout(struct fdl_layout *layout,
 
 	return true;
 }
-
-void
-fdl6_get_ubwc_blockwidth(struct fdl_layout *layout,
-		uint32_t *blockwidth, uint32_t *blockheight)
-{
-	const struct tile_alignment *ta = fdl6_tile_alignment(layout);
-	*blockwidth = ta->ubwc_blockwidth;
-	*blockheight = ta->ubwc_blockheight;
-}
diff --git a/src/freedreno/fdl/fd6_layout_test.c b/src/freedreno/fdl/fd6_layout_test.c
index c5b693a931d..2a8083a866c 100644
--- a/src/freedreno/fdl/fd6_layout_test.c
+++ b/src/freedreno/fdl/fd6_layout_test.c
@@ -356,10 +356,10 @@ static const struct testcase testcases[] = {
 				{ .offset = 8192, .pitch = 128 },
 				{ .offset = 12288, .pitch = 128 },
 				{ .offset = 16384, .pitch = 128 },
-				{ .offset = 20480, .pitch = 64 },
-				{ .offset = 20544, .pitch = 64 },
-				{ .offset = 20608, .pitch = 64 },
-				{ .offset = 20672, .pitch = 64 },
+				{ .offset = 20480, .pitch = 128 },
+				{ .offset = 20608, .pitch = 128 },
+				{ .offset = 20736, .pitch = 128 },
+				{ .offset = 20864, .pitch = 128 },
 			},
 		},
 	},
diff --git a/src/freedreno/vulkan/tu_clear_blit.c b/src/freedreno/vulkan/tu_clear_blit.c
index 0ebe3ba2c90..df2359f3bb0 100644
--- a/src/freedreno/vulkan/tu_clear_blit.c
+++ b/src/freedreno/vulkan/tu_clear_blit.c
@@ -1129,10 +1129,6 @@ tu_copy_buffer_to_image(struct tu_cmd_buffer *cmd,
    uint32_t pitch = src_width * vk_format_get_blocksize(src_format);
    uint32_t layer_size = src_height * pitch;
 
-   /* note: the src_va/pitch alignment of 64 is for 2D engine,
-    * it is also valid for 1cpp format with shader path (stencil aspect path)
-    */
-
    ops->setup(cmd, cs, dst_format, ROTATE_0, false, mask);
 
    struct tu_image_view dst;
@@ -1212,10 +1208,6 @@ tu_copy_image_to_buffer(struct tu_cmd_buffer *cmd,
    uint32_t pitch = dst_width * vk_format_get_blocksize(dst_format);
    uint32_t layer_size = pitch * dst_height;
 
-   /* note: the dst_va/pitch alignment of 64 is for 2D engine,
-    * it is also valid for 1cpp format with shader path (stencil aspect)
-    */
-
    ops->setup(cmd, cs, dst_format, ROTATE_0, false, 0xf);
 
    struct tu_image_view src;
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c
index f7111a499e7..7343dbcc9c6 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.c
+++ b/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -1032,9 +1032,11 @@ fd_resource_from_handle(struct pipe_screen *pscreen,
 
 	uint32_t pitchalign = fd_screen(pscreen)->gmem_alignw * rsc->layout.cpp;
 
-	/* use 64 pitchalign on a6xx where gmem_alignw is not right */
+	/* pitchalign is 64-bytes for linear formats on a6xx
+	 * layout_resource_for_modifier will validate tiled pitch
+	 */
 	if (is_a6xx(screen))
-		pitchalign = 64 * rsc->layout.cpp;
+		pitchalign = 64;
 
 	if ((slice->pitch < align(prsc->width0 * rsc->layout.cpp, pitchalign)) ||
 			(slice->pitch & (pitchalign - 1)))



More information about the mesa-commit mailing list