Mesa (master): freedreno: Allow UBWC on textures with multiple mipmap levels.

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Tue Feb 4 23:35:05 UTC 2020


Module: Mesa
Branch: master
Commit: 22d2cbe6856fea65bf01dc96941b5127f17dacab
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=22d2cbe6856fea65bf01dc96941b5127f17dacab

Author: Eric Anholt <eric at anholt.net>
Date:   Thu Nov 21 15:09:02 2019 -0800

freedreno: Allow UBWC on textures with multiple mipmap levels.

This is a backport of Jonathan Marek's UBWC work on turnip to GL.

Performance highlights from our trace set (320 frames sampled)
traces/glmark2/texture-texture-filter=mipmap.rdc:    +9.1% +/-   2.2%
traces/android/trex.rdc:                             +8.7% +/-   0.4%
traces/glmark2/desktop-effect=shadow:windows=4.rdc:  +4.2% +/-   2.5%

Tested-by: Marge Bot <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3059>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/merge_requests/3059>

---

 src/freedreno/fdl/fd6_layout.c                     | 14 +++--
 src/freedreno/fdl/freedreno_layout.c               |  4 +-
 src/freedreno/fdl/freedreno_layout.h               | 19 +++---
 src/freedreno/vulkan/tu_image.c                    |  5 +-
 src/gallium/drivers/freedreno/a6xx/fd6_image.c     |  9 ++-
 src/gallium/drivers/freedreno/a6xx/fd6_resource.c  | 68 ++--------------------
 src/gallium/drivers/freedreno/a6xx/fd6_resource.h  |  1 -
 src/gallium/drivers/freedreno/a6xx/fd6_screen.c    |  1 -
 src/gallium/drivers/freedreno/a6xx/fd6_texture.c   | 15 +++--
 src/gallium/drivers/freedreno/freedreno_resource.c |  8 +--
 src/gallium/drivers/freedreno/freedreno_resource.h |  4 +-
 src/gallium/drivers/freedreno/freedreno_screen.h   |  1 -
 12 files changed, 51 insertions(+), 98 deletions(-)

diff --git a/src/freedreno/fdl/fd6_layout.c b/src/freedreno/fdl/fd6_layout.c
index 30f334968ee..661d7574e8b 100644
--- a/src/freedreno/fdl/fd6_layout.c
+++ b/src/freedreno/fdl/fd6_layout.c
@@ -69,7 +69,7 @@ void
 fdl6_layout(struct fdl_layout *layout,
 		enum pipe_format format, uint32_t nr_samples,
 		uint32_t width0, uint32_t height0, uint32_t depth0,
-		uint32_t mip_levels, uint32_t array_size, bool is_3d, bool ubwc)
+		uint32_t mip_levels, uint32_t array_size, bool is_3d)
 {
 	assert(nr_samples > 0);
 	layout->width0 = width0;
@@ -81,6 +81,11 @@ fdl6_layout(struct fdl_layout *layout,
 	layout->format = format;
 	layout->nr_samples = nr_samples;
 
+	if (depth0 > 1)
+		layout->ubwc = false;
+	if (tile_alignment[layout->cpp].ubwc_blockwidth == 0)
+		layout->ubwc = false;
+
 	const struct util_format_description *format_desc =
 		util_format_description(format);
 	uint32_t depth = depth0;
@@ -115,8 +120,7 @@ fdl6_layout(struct fdl_layout *layout,
 	for (uint32_t level = 0; level < mip_levels; level++) {
 		struct fdl_slice *slice = &layout->slices[level];
 		struct fdl_slice *ubwc_slice = &layout->ubwc_slices[level];
-		uint32_t tile_mode = (ubwc ?
-				layout->tile_mode : fdl_tile_mode(layout, level));
+		uint32_t tile_mode = fdl_tile_mode(layout, level);
 		uint32_t width, height;
 
 		/* tiled levels of 3D textures are rounded up to PoT dimensions: */
@@ -176,7 +180,7 @@ fdl6_layout(struct fdl_layout *layout,
 
 		layout->size += slice->size0 * depth * layers_in_level;
 
-		if (ubwc) {
+		if (layout->ubwc) {
 			/* with UBWC every level is aligned to 4K */
 			layout->size = align(layout->size, 4096);
 
@@ -219,7 +223,7 @@ fdl6_layout(struct fdl_layout *layout,
 	 * get to program the UBWC and non-UBWC offset/strides
 	 * independently.
 	 */
-	if (ubwc) {
+	if (layout->ubwc) {
 		for (uint32_t level = 0; level < mip_levels; level++)
 			layout->slices[level].offset += layout->ubwc_layer_size * array_size;
 		layout->size += layout->ubwc_layer_size * array_size;
diff --git a/src/freedreno/fdl/freedreno_layout.c b/src/freedreno/fdl/freedreno_layout.c
index 5336683ba18..57f6388b69a 100644
--- a/src/freedreno/fdl/freedreno_layout.c
+++ b/src/freedreno/fdl/freedreno_layout.c
@@ -47,8 +47,6 @@ fdl_dump_layout(struct fdl_layout *layout)
 	for (uint32_t level = 0; level < layout->slices[level].size0; level++) {
 		struct fdl_slice *slice = &layout->slices[level];
 		struct fdl_slice *ubwc_slice = &layout->ubwc_slices[level];
-		uint32_t tile_mode = (layout->ubwc_layer_size ?
-				layout->tile_mode : fdl_tile_mode(layout, level));
 
 		fprintf(stderr, "%s: %ux%ux%u@%ux%u:\t%2u: stride=%4u, size=%6u,%6u, aligned_height=%3u, offset=0x%x,0x%x, layersz %5u,%5u tiling=%d\n",
 				util_format_name(layout->format),
@@ -62,6 +60,6 @@ fdl_dump_layout(struct fdl_layout *layout)
 				slice->size0 / (slice->pitch * layout->cpp),
 				slice->offset, ubwc_slice->offset,
 				layout->layer_size, layout->ubwc_layer_size,
-				tile_mode);
+				fdl_tile_mode(layout, level));
 	}
 }
diff --git a/src/freedreno/fdl/freedreno_layout.h b/src/freedreno/fdl/freedreno_layout.h
index 7230d337a48..f20a3056c2c 100644
--- a/src/freedreno/fdl/freedreno_layout.h
+++ b/src/freedreno/fdl/freedreno_layout.h
@@ -93,6 +93,7 @@ struct fdl_layout {
 	struct fdl_slice ubwc_slices[FDL_MAX_MIP_LEVELS];
 	uint32_t layer_size;
 	uint32_t ubwc_layer_size; /* in bytes */
+	bool ubwc : 1;
 	bool layer_first : 1;    /* see above description */
 
 	/* Note that for tiled textures, beyond a certain mipmap level (ie.
@@ -134,22 +135,20 @@ fdl_surface_offset(const struct fdl_layout *layout, unsigned level, unsigned lay
 static inline uint32_t
 fdl_ubwc_offset(const struct fdl_layout *layout, unsigned level, unsigned layer)
 {
-	/* for now this doesn't do anything clever, but when UBWC is enabled
-	 * for multi layer/level images, it will.
-	 */
-	if (layout->ubwc_layer_size) {
-		assert(level == 0);
-		assert(layer == 0);
-	}
-	return layout->ubwc_slices[0].offset;
+	const struct fdl_slice *slice = &layout->ubwc_slices[level];
+	return slice->offset + layer * layout->ubwc_layer_size;
 }
 
 static inline bool
 fdl_level_linear(const struct fdl_layout *layout, int level)
 {
+	if (layout->ubwc)
+		return false;
+
 	unsigned w = u_minify(layout->width0, level);
 	if (w < 16)
 		return true;
+
 	return false;
 }
 
@@ -165,7 +164,7 @@ fdl_tile_mode(const struct fdl_layout *layout, int level)
 static inline bool
 fdl_ubwc_enabled(const struct fdl_layout *layout, int level)
 {
-	return layout->ubwc_layer_size && fdl_tile_mode(layout, level);
+	return layout->ubwc;
 }
 
 void
@@ -175,7 +174,7 @@ void
 fdl6_layout(struct fdl_layout *layout,
 		enum pipe_format format, uint32_t nr_samples,
 		uint32_t width0, uint32_t height0, uint32_t depth0,
-		uint32_t mip_levels, uint32_t array_size, bool is_3d, bool ubwc);
+		uint32_t mip_levels, uint32_t array_size, bool is_3d);
 
 void
 fdl_dump_layout(struct fdl_layout *layout);
diff --git a/src/freedreno/vulkan/tu_image.c b/src/freedreno/vulkan/tu_image.c
index fee74a294c5..4b804ba5c24 100644
--- a/src/freedreno/vulkan/tu_image.c
+++ b/src/freedreno/vulkan/tu_image.c
@@ -150,6 +150,8 @@ tu_image_create(VkDevice _device,
    /* expect UBWC enabled if we asked for it */
    assert(modifier != DRM_FORMAT_MOD_QCOM_COMPRESSED || ubwc_enabled);
 
+   image->layout.ubwc = ubwc_enabled;
+
    fdl6_layout(&image->layout, vk_format_to_pipe_format(image->vk_format),
                image->samples,
                pCreateInfo->extent.width,
@@ -157,8 +159,7 @@ tu_image_create(VkDevice _device,
                pCreateInfo->extent.depth,
                pCreateInfo->mipLevels,
                pCreateInfo->arrayLayers,
-               pCreateInfo->imageType == VK_IMAGE_TYPE_3D,
-               ubwc_enabled);
+               pCreateInfo->imageType == VK_IMAGE_TYPE_3D);
 
    *pImage = tu_image_to_handle(image);
 
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_image.c b/src/gallium/drivers/freedreno/a6xx/fd6_image.c
index 57503b5c14e..4e87e2e9c6e 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_image.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_image.c
@@ -197,9 +197,16 @@ static void emit_image_tex(struct fd_ringbuffer *ring, struct fd6_image *img)
 
 	if (ubwc_enabled) {
 		struct fdl_slice *ubwc_slice = &rsc->layout.ubwc_slices[img->level];
+
+		uint32_t block_width, block_height;
+		fdl6_get_ubwc_blockwidth(&rsc->layout, &block_width, &block_height);
+
 		OUT_RELOC(ring, rsc->bo, img->ubwc_offset, 0, 0);
 		OUT_RING(ring, A6XX_TEX_CONST_9_FLAG_BUFFER_ARRAY_PITCH(rsc->layout.ubwc_layer_size >> 2));
-		OUT_RING(ring, A6XX_TEX_CONST_10_FLAG_BUFFER_PITCH(ubwc_slice->pitch));
+		OUT_RING(ring,
+				A6XX_TEX_CONST_10_FLAG_BUFFER_PITCH(ubwc_slice->pitch) |
+				A6XX_TEX_CONST_10_FLAG_BUFFER_LOGW(util_logbase2_ceil(DIV_ROUND_UP(img->width, block_width))) |
+				A6XX_TEX_CONST_10_FLAG_BUFFER_LOGH(util_logbase2_ceil(DIV_ROUND_UP(img->height, block_height))));
 	} else {
 		OUT_RING(ring, 0x00000000);   /* texconst7 */
 		OUT_RING(ring, 0x00000000);   /* texconst8 */
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_resource.c b/src/gallium/drivers/freedreno/a6xx/fd6_resource.c
index d35a42156bb..2e86632892a 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_resource.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_resource.c
@@ -89,67 +89,6 @@ ok_ubwc_format(struct fd_resource *rsc, enum pipe_format pfmt)
 	}
 }
 
-uint32_t
-fd6_fill_ubwc_buffer_sizes(struct fd_resource *rsc)
-{
-#define RBG_TILE_WIDTH_ALIGNMENT 64
-#define RGB_TILE_HEIGHT_ALIGNMENT 16
-#define UBWC_PLANE_SIZE_ALIGNMENT 4096
-
-	struct pipe_resource *prsc = &rsc->base;
-	uint32_t width = prsc->width0;
-	uint32_t height = prsc->height0;
-
-	if (!ok_ubwc_format(rsc, prsc->format))
-		return 0;
-
-	/* limit things to simple single level 2d for now: */
-	if ((prsc->depth0 != 1) || (prsc->array_size != 1) || (prsc->last_level != 0))
-		return 0;
-
-	uint32_t block_width, block_height;
-	switch (rsc->layout.cpp) {
-	case 2:
-	case 4:
-		block_width = 16;
-		block_height = 4;
-		break;
-	case 8:
-		block_width = 8;
-		block_height = 4;
-		break;
-	case 16:
-		block_width = 4;
-		block_height = 4;
-		break;
-	default:
-		return 0;
-	}
-
-	uint32_t meta_stride =
-		ALIGN_POT(DIV_ROUND_UP(width, block_width), RBG_TILE_WIDTH_ALIGNMENT);
-	uint32_t meta_height =
-		ALIGN_POT(DIV_ROUND_UP(height, block_height), RGB_TILE_HEIGHT_ALIGNMENT);
-	uint32_t meta_size =
-		ALIGN_POT(meta_stride * meta_height, UBWC_PLANE_SIZE_ALIGNMENT);
-
-	/* UBWC goes first, then color data.. this constraint is mainly only
-	 * because it is what the kernel expects for scanout.  For non-2D we
-	 * could just use a separate UBWC buffer..
-	 */
-	for (int level = 0; level <= prsc->last_level; level++) {
-		struct fdl_slice *slice = fd_resource_slice(rsc, level);
-		slice->offset += meta_size;
-	}
-
-	rsc->layout.ubwc_slices[0].offset = 0;
-	rsc->layout.ubwc_slices[0].pitch = meta_stride;
-	rsc->layout.ubwc_layer_size = meta_size;
-	rsc->layout.tile_mode = TILE6_3;
-
-	return meta_size;
-}
-
 /**
  * Ensure the rsc is in an ok state to be used with the specified format.
  * This handles the case of UBWC buffers used with non-UBWC compatible
@@ -159,7 +98,7 @@ void
 fd6_validate_format(struct fd_context *ctx, struct fd_resource *rsc,
 		enum pipe_format format)
 {
-	if (!rsc->layout.ubwc_layer_size)
+	if (!rsc->layout.ubwc)
 		return;
 
 	if (ok_ubwc_format(rsc, format))
@@ -205,10 +144,13 @@ fd6_setup_slices(struct fd_resource *rsc)
 	if (!(fd_mesa_debug & FD_DBG_NOLRZ) && has_depth(rsc->base.format))
 		setup_lrz(rsc);
 
+	if (rsc->layout.ubwc && !ok_ubwc_format(rsc, rsc->base.format))
+		rsc->layout.ubwc = false;
+
 	fdl6_layout(&rsc->layout, prsc->format, fd_resource_nr_samples(prsc),
 			prsc->width0, prsc->height0, prsc->depth0,
 			prsc->last_level + 1, prsc->array_size,
-			prsc->target == PIPE_TEXTURE_3D, false);
+			prsc->target == PIPE_TEXTURE_3D);
 
 	return rsc->layout.size;
 }
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_resource.h b/src/gallium/drivers/freedreno/a6xx/fd6_resource.h
index f639207c6bc..b988f1c78b6 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_resource.h
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_resource.h
@@ -30,7 +30,6 @@
 
 #include "freedreno_resource.h"
 
-uint32_t fd6_fill_ubwc_buffer_sizes(struct fd_resource *rsc);
 void fd6_validate_format(struct fd_context *ctx, struct fd_resource *rsc,
 		enum pipe_format format);
 uint32_t fd6_setup_slices(struct fd_resource *rsc);
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_screen.c b/src/gallium/drivers/freedreno/a6xx/fd6_screen.c
index 6e046cfa8c6..6e04df65119 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_screen.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_screen.c
@@ -138,7 +138,6 @@ fd6_screen_init(struct pipe_screen *pscreen)
 
 	screen->setup_slices = fd6_setup_slices;
 	screen->tile_mode = fd6_tile_mode;
-	screen->fill_ubwc_buffer_sizes = fd6_fill_ubwc_buffer_sizes;
 
 	static const uint64_t supported_modifiers[] = {
 		DRM_FORMAT_MOD_LINEAR,
diff --git a/src/gallium/drivers/freedreno/a6xx/fd6_texture.c b/src/gallium/drivers/freedreno/a6xx/fd6_texture.c
index 91b42bd3d7c..e0883815ca9 100644
--- a/src/gallium/drivers/freedreno/a6xx/fd6_texture.c
+++ b/src/gallium/drivers/freedreno/a6xx/fd6_texture.c
@@ -277,11 +277,6 @@ fd6_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
 				cso->swizzle_r, cso->swizzle_g,
 				cso->swizzle_b, cso->swizzle_a);
 
-	if (so->ubwc_enabled) {
-		so->texconst9 |= A6XX_TEX_CONST_9_FLAG_BUFFER_ARRAY_PITCH(rsc->layout.ubwc_layer_size >> 2);
-		so->texconst10 |= A6XX_TEX_CONST_10_FLAG_BUFFER_PITCH(rsc->layout.ubwc_slices[lvl].pitch);
-	}
-
 	so->texconst2 |= A6XX_TEX_CONST_2_TYPE(fd6_tex_type(cso->target));
 
 	switch (cso->target) {
@@ -320,7 +315,17 @@ fd6_sampler_view_create(struct pipe_context *pctx, struct pipe_resource *prsc,
 	}
 
 	if (so->ubwc_enabled) {
+		struct fdl_slice *ubwc_base_slice = &rsc->layout.ubwc_slices[lvl];
+
+		uint32_t block_width, block_height;
+		fdl6_get_ubwc_blockwidth(&rsc->layout, &block_width, &block_height);
+
 		so->texconst3 |= A6XX_TEX_CONST_3_FLAG | A6XX_TEX_CONST_3_TILE_ALL;
+		so->texconst9 |= A6XX_TEX_CONST_9_FLAG_BUFFER_ARRAY_PITCH(rsc->layout.ubwc_layer_size >> 2);
+		so->texconst10 |=
+			A6XX_TEX_CONST_10_FLAG_BUFFER_PITCH(ubwc_base_slice->pitch) |
+			A6XX_TEX_CONST_10_FLAG_BUFFER_LOGW(util_logbase2_ceil(DIV_ROUND_UP(u_minify(prsc->width0, lvl), block_width))) |
+			A6XX_TEX_CONST_10_FLAG_BUFFER_LOGH(util_logbase2_ceil(DIV_ROUND_UP(u_minify(prsc->height0, lvl), block_height)));
 	}
 
 	return &so->base;
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.c b/src/gallium/drivers/freedreno/freedreno_resource.c
index 748664384eb..59b1d9793fa 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.c
+++ b/src/gallium/drivers/freedreno/freedreno_resource.c
@@ -977,6 +977,8 @@ fd_resource_create_with_modifiers(struct pipe_screen *pscreen,
 
 	rsc->internal_format = format;
 
+	rsc->layout.ubwc = rsc->layout.tile_mode && is_a6xx(screen) && allow_ubwc;
+
 	if (prsc->target == PIPE_BUFFER) {
 		assert(prsc->format == PIPE_FORMAT_R8_UNORM);
 		size = prsc->width0;
@@ -985,9 +987,6 @@ fd_resource_create_with_modifiers(struct pipe_screen *pscreen,
 		size = screen->setup_slices(rsc);
 	}
 
-	if (allow_ubwc && screen->fill_ubwc_buffer_sizes && rsc->layout.tile_mode)
-		size += screen->fill_ubwc_buffer_sizes(rsc);
-
 	/* special case for hw-query buffer, which we need to allocate before we
 	 * know the size:
 	 */
@@ -1098,8 +1097,7 @@ fd_resource_from_handle(struct pipe_screen *pscreen,
 			DBG("bad modifier: %"PRIx64, handle->modifier);
 			goto fail;
 		}
-		debug_assert(screen->fill_ubwc_buffer_sizes);
-		screen->fill_ubwc_buffer_sizes(rsc);
+		/* XXX UBWC setup */
 	} else if (handle->modifier &&
 			(handle->modifier != DRM_FORMAT_MOD_INVALID)) {
 		goto fail;
diff --git a/src/gallium/drivers/freedreno/freedreno_resource.h b/src/gallium/drivers/freedreno/freedreno_resource.h
index 7a4f9fe2930..478417e6ba3 100644
--- a/src/gallium/drivers/freedreno/freedreno_resource.h
+++ b/src/gallium/drivers/freedreno/freedreno_resource.h
@@ -158,7 +158,9 @@ fd_resource_offset(struct fd_resource *rsc, unsigned level, unsigned layer)
 static inline uint32_t
 fd_resource_ubwc_offset(struct fd_resource *rsc, unsigned level, unsigned layer)
 {
-	return fdl_ubwc_offset(&rsc->layout, level, layer);
+	uint32_t offset = fdl_ubwc_offset(&rsc->layout, level, layer);
+	debug_assert(offset < fd_bo_size(rsc->bo));
+	return offset;
 }
 
 /* This might be a5xx specific, but higher mipmap levels are always linear: */
diff --git a/src/gallium/drivers/freedreno/freedreno_screen.h b/src/gallium/drivers/freedreno/freedreno_screen.h
index 22513065e5d..b5730da0297 100644
--- a/src/gallium/drivers/freedreno/freedreno_screen.h
+++ b/src/gallium/drivers/freedreno/freedreno_screen.h
@@ -91,7 +91,6 @@ struct fd_screen {
 	 */
 	struct fd_pipe *pipe;
 
-	uint32_t (*fill_ubwc_buffer_sizes)(struct fd_resource *rsc);
 	uint32_t (*setup_slices)(struct fd_resource *rsc);
 	unsigned (*tile_mode)(const struct pipe_resource *prsc);
 



More information about the mesa-commit mailing list