Mesa (master): ac/surface: add code for gfx10 displayable DCC

GitLab Mirror gitlab-mirror at kemper.freedesktop.org
Wed Apr 29 15:03:25 UTC 2020


Module: Mesa
Branch: master
Commit: 5e31e4b6971fde00040c7a37f13f2253ae49ad34
URL:    http://cgit.freedesktop.org/mesa/mesa/commit/?id=5e31e4b6971fde00040c7a37f13f2253ae49ad34

Author: Marek Olšák <marek.olsak at amd.com>
Date:   Fri Apr 17 20:37:41 2020 -0400

ac/surface: add code for gfx10 displayable DCC

Acked-by: Pierre-Eric Pelloux-Prayer <pierre-eric.pelloux-prayer at amd.com>
Part-of: <https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/4697>

---

 src/amd/common/ac_surface.c                | 80 ++++++++++++++++++++++++------
 src/amd/common/ac_surface.h                |  3 ++
 src/gallium/drivers/radeon/radeon_winsys.h |  2 +
 src/gallium/drivers/radeonsi/si_state.c    |  8 +--
 src/gallium/drivers/radeonsi/si_texture.c  | 13 ++---
 src/gallium/winsys/amdgpu/drm/amdgpu_bo.c  |  8 +++
 6 files changed, 88 insertions(+), 26 deletions(-)

diff --git a/src/amd/common/ac_surface.c b/src/amd/common/ac_surface.c
index 8773fd51b47..c64f9903aad 100644
--- a/src/amd/common/ac_surface.c
+++ b/src/amd/common/ac_surface.c
@@ -32,6 +32,7 @@
 #include "util/macros.h"
 #include "util/u_atomic.h"
 #include "util/u_math.h"
+#include "sid.h"
 
 #include <errno.h>
 #include <stdio.h>
@@ -378,10 +379,6 @@ static int gfx6_compute_level(ADDR_HANDLE addrlib,
 	return 0;
 }
 
-#define   G_009910_MICRO_TILE_MODE(x)          (((x) >> 0) & 0x03)
-#define     V_009910_ADDR_SURF_THICK_MICRO_TILING                   0x03
-#define   G_009910_MICRO_TILE_MODE_NEW(x)      (((x) >> 22) & 0x07)
-
 static void gfx6_set_micro_tile_mode(struct radeon_surf *surf,
 				     const struct radeon_info *info)
 {
@@ -1046,12 +1043,37 @@ static bool is_dcc_supported_by_DCN(const struct radeon_info *info,
 	    !info->use_display_dcc_with_retile_blit)
 		return false;
 
+	/* 16bpp and 64bpp are more complicated, so they are disallowed for now. */
+	if (surf->bpe != 4)
+		return false;
+
 	/* Handle unaligned DCC. */
 	if (info->use_display_dcc_unaligned &&
 	    (rb_aligned || pipe_aligned))
 		return false;
 
-	return true;
+	switch (info->chip_class) {
+	case GFX9:
+		/* There are more constraints, but we always set
+		 * INDEPENDENT_64B_BLOCKS = 1 and MAX_COMPRESSED_BLOCK_SIZE = 64B,
+		 * which always works.
+		 */
+		assert(surf->u.gfx9.dcc.independent_64B_blocks &&
+		       surf->u.gfx9.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B);
+		return true;
+	case GFX10:
+		/* DCN requires INDEPENDENT_128B_BLOCKS = 0.
+		 * For 4K, it also requires INDEPENDENT_64B_BLOCKS = 1.
+		 */
+		return !surf->u.gfx9.dcc.independent_128B_blocks &&
+		       ((config->info.width <= 2560 &&
+			 config->info.height <= 2560) ||
+			(surf->u.gfx9.dcc.independent_64B_blocks &&
+			 surf->u.gfx9.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B));
+	default:
+		unreachable("unhandled chip");
+		return false;
+	}
 }
 
 static int gfx9_compute_miptree(ADDR_HANDLE addrlib,
@@ -1552,17 +1574,43 @@ static int gfx9_compute_surface(ADDR_HANDLE addrlib,
 	AddrSurfInfoIn.flags.metaPipeUnaligned = 0;
 	AddrSurfInfoIn.flags.metaRbUnaligned = 0;
 
-	/* The display hardware can only read DCC with RB_ALIGNED=0 and
-	 * PIPE_ALIGNED=0. PIPE_ALIGNED really means L2CACHE_ALIGNED.
-	 *
-	 * The CB block requires RB_ALIGNED=1 except 1 RB chips.
-	 * PIPE_ALIGNED is optional, but PIPE_ALIGNED=0 requires L2 flushes
-	 * after rendering, so PIPE_ALIGNED=1 is recommended.
-	 */
-	if (info->use_display_dcc_unaligned &&
-	    AddrSurfInfoIn.flags.display) {
-		AddrSurfInfoIn.flags.metaPipeUnaligned = 1;
-		AddrSurfInfoIn.flags.metaRbUnaligned = 1;
+	/* Optimal values for the L2 cache. */
+	if (info->chip_class == GFX9) {
+		surf->u.gfx9.dcc.independent_64B_blocks = 1;
+		surf->u.gfx9.dcc.independent_128B_blocks = 0;
+		surf->u.gfx9.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
+	} else if (info->chip_class >= GFX10) {
+		surf->u.gfx9.dcc.independent_64B_blocks = 0;
+		surf->u.gfx9.dcc.independent_128B_blocks = 1;
+		surf->u.gfx9.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
+	}
+
+	if (AddrSurfInfoIn.flags.display) {
+		/* The display hardware can only read DCC with RB_ALIGNED=0 and
+		 * PIPE_ALIGNED=0. PIPE_ALIGNED really means L2CACHE_ALIGNED.
+		 *
+		 * The CB block requires RB_ALIGNED=1 except 1 RB chips.
+		 * PIPE_ALIGNED is optional, but PIPE_ALIGNED=0 requires L2 flushes
+		 * after rendering, so PIPE_ALIGNED=1 is recommended.
+		 */
+		if (info->use_display_dcc_unaligned) {
+			AddrSurfInfoIn.flags.metaPipeUnaligned = 1;
+			AddrSurfInfoIn.flags.metaRbUnaligned = 1;
+		}
+
+		/* Adjust DCC settings to meet DCN requirements. */
+		if (info->use_display_dcc_unaligned ||
+		    info->use_display_dcc_with_retile_blit) {
+			/* Only Navi12/14 support independent 64B blocks in L2,
+			 * but without DCC image stores.
+			 */
+			if (info->family == CHIP_NAVI12 ||
+			    info->family == CHIP_NAVI14) {
+				surf->u.gfx9.dcc.independent_64B_blocks = 1;
+				surf->u.gfx9.dcc.independent_128B_blocks = 0;
+				surf->u.gfx9.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
+			}
+		}
 	}
 
 	switch (mode) {
diff --git a/src/amd/common/ac_surface.h b/src/amd/common/ac_surface.h
index 77ddf2fc5e3..8bdafa295ef 100644
--- a/src/amd/common/ac_surface.h
+++ b/src/amd/common/ac_surface.h
@@ -139,6 +139,9 @@ struct gfx9_surf_flags {
 struct gfx9_surf_meta_flags {
     unsigned                    rb_aligned:1;   /* optimal for RBs */
     unsigned                    pipe_aligned:1; /* optimal for TC */
+    unsigned                    independent_64B_blocks:1;
+    unsigned                    independent_128B_blocks:1;
+    unsigned                    max_compressed_block_size:2;
 };
 
 struct gfx9_surf_layout {
diff --git a/src/gallium/drivers/radeon/radeon_winsys.h b/src/gallium/drivers/radeon/radeon_winsys.h
index a2293837763..e3bb9c3f369 100644
--- a/src/gallium/drivers/radeon/radeon_winsys.h
+++ b/src/gallium/drivers/radeon/radeon_winsys.h
@@ -227,6 +227,8 @@ struct radeon_bo_metadata {
          unsigned dcc_offset_256B : 24;
          unsigned dcc_pitch_max : 14; /* (mip chain pitch - 1) for DCN */
          unsigned dcc_independent_64B : 1;
+         unsigned dcc_independent_128B : 1;
+         unsigned dcc_max_compressed_block_size : 2;
 
          bool scanout;
       } gfx9;
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 1d62f11c6be..baa65452e36 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -2416,10 +2416,10 @@ static void si_initialize_color_surface(struct si_context *sctx, struct si_surfa
          min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_64B;
 
       surf->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
-                             S_028C78_MAX_COMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_128B) |
+                             S_028C78_MAX_COMPRESSED_BLOCK_SIZE(tex->surface.u.gfx9.dcc.max_compressed_block_size) |
                              S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
-                             S_028C78_INDEPENDENT_64B_BLOCKS(0) |
-                             S_028C78_INDEPENDENT_128B_BLOCKS(1);
+                             S_028C78_INDEPENDENT_64B_BLOCKS(tex->surface.u.gfx9.dcc.independent_64B_blocks) |
+                             S_028C78_INDEPENDENT_128B_BLOCKS(tex->surface.u.gfx9.dcc.independent_128B_blocks);
    } else if (sctx->chip_class >= GFX8) {
       unsigned max_uncompressed_block_size = V_028C78_MAX_BLOCK_SIZE_256B;
       unsigned min_compressed_block_size = V_028C78_MIN_BLOCK_SIZE_32B;
@@ -3799,7 +3799,7 @@ static void gfx10_make_texture_descriptor(
 
    if (tex->surface.dcc_offset) {
       state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
-                  S_00A018_MAX_COMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_128B) |
+                  S_00A018_MAX_COMPRESSED_BLOCK_SIZE(tex->surface.u.gfx9.dcc.max_compressed_block_size) |
                   S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(screen, pipe_format));
    }
 
diff --git a/src/gallium/drivers/radeonsi/si_texture.c b/src/gallium/drivers/radeonsi/si_texture.c
index 43fc648edfa..0b29fc44bf3 100644
--- a/src/gallium/drivers/radeonsi/si_texture.c
+++ b/src/gallium/drivers/radeonsi/si_texture.c
@@ -339,12 +339,11 @@ static void si_get_display_metadata(struct si_screen *sscreen, struct radeon_sur
          *array_mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
 
       surf->u.gfx9.surf.swizzle_mode = metadata->u.gfx9.swizzle_mode;
+      surf->u.gfx9.dcc.independent_64B_blocks = metadata->u.gfx9.dcc_independent_64B;
+      surf->u.gfx9.dcc.independent_128B_blocks = metadata->u.gfx9.dcc_independent_128B;
+      surf->u.gfx9.dcc.max_compressed_block_size = metadata->u.gfx9.dcc_max_compressed_block_size;
+      surf->u.gfx9.display_dcc_pitch_max = metadata->u.gfx9.dcc_pitch_max;
       *is_scanout = metadata->u.gfx9.scanout;
-
-      if (metadata->u.gfx9.dcc_offset_256B) {
-         surf->u.gfx9.display_dcc_pitch_max = metadata->u.gfx9.dcc_pitch_max;
-         assert(metadata->u.gfx9.dcc_independent_64B == 1);
-      }
    } else {
       surf->u.legacy.pipe_config = metadata->u.legacy.pipe_config;
       surf->u.legacy.bankw = metadata->u.legacy.bankw;
@@ -613,7 +612,9 @@ static void si_set_tex_bo_metadata(struct si_screen *sscreen, struct si_texture
          assert((dcc_offset >> 8) != 0 && (dcc_offset >> 8) < (1 << 24));
          md.u.gfx9.dcc_offset_256B = dcc_offset >> 8;
          md.u.gfx9.dcc_pitch_max = tex->surface.u.gfx9.display_dcc_pitch_max;
-         md.u.gfx9.dcc_independent_64B = 1;
+         md.u.gfx9.dcc_independent_64B = tex->surface.u.gfx9.dcc.independent_64B_blocks;
+         md.u.gfx9.dcc_independent_128B = tex->surface.u.gfx9.dcc.independent_128B_blocks;
+         md.u.gfx9.dcc_max_compressed_block_size = tex->surface.u.gfx9.dcc.max_compressed_block_size;
       }
    } else {
       md.u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D
diff --git a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
index bdb03ee033f..ec2fa3a56e4 100644
--- a/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
+++ b/src/gallium/winsys/amdgpu/drm/amdgpu_bo.c
@@ -1223,6 +1223,10 @@ static unsigned eg_tile_split_rev(unsigned eg_tile_split)
    }
 }
 
+#define AMDGPU_TILING_DCC_INDEPENDENT_128B_SHIFT	44
+#define AMDGPU_TILING_DCC_INDEPENDENT_128B_MASK		0x1
+#define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_SHIFT  45
+#define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_MASK   0x3
 #define AMDGPU_TILING_SCANOUT_SHIFT		63
 #define AMDGPU_TILING_SCANOUT_MASK		0x1
 
@@ -1248,6 +1252,8 @@ static void amdgpu_buffer_get_metadata(struct pb_buffer *_buf,
       md->u.gfx9.dcc_offset_256B = AMDGPU_TILING_GET(tiling_flags, DCC_OFFSET_256B);
       md->u.gfx9.dcc_pitch_max = AMDGPU_TILING_GET(tiling_flags, DCC_PITCH_MAX);
       md->u.gfx9.dcc_independent_64B = AMDGPU_TILING_GET(tiling_flags, DCC_INDEPENDENT_64B);
+      md->u.gfx9.dcc_independent_128B = AMDGPU_TILING_GET(tiling_flags, DCC_INDEPENDENT_128B);
+      md->u.gfx9.dcc_max_compressed_block_size = AMDGPU_TILING_GET(tiling_flags, DCC_MAX_COMPRESSED_BLOCK_SIZE);
       md->u.gfx9.scanout = AMDGPU_TILING_GET(tiling_flags, SCANOUT);
    } else {
       md->u.legacy.microtile = RADEON_LAYOUT_LINEAR;
@@ -1286,6 +1292,8 @@ static void amdgpu_buffer_set_metadata(struct pb_buffer *_buf,
       tiling_flags |= AMDGPU_TILING_SET(DCC_OFFSET_256B, md->u.gfx9.dcc_offset_256B);
       tiling_flags |= AMDGPU_TILING_SET(DCC_PITCH_MAX, md->u.gfx9.dcc_pitch_max);
       tiling_flags |= AMDGPU_TILING_SET(DCC_INDEPENDENT_64B, md->u.gfx9.dcc_independent_64B);
+      tiling_flags |= AMDGPU_TILING_SET(DCC_INDEPENDENT_128B, md->u.gfx9.dcc_independent_128B);
+      tiling_flags |= AMDGPU_TILING_SET(DCC_MAX_COMPRESSED_BLOCK_SIZE, md->u.gfx9.dcc_max_compressed_block_size);
       tiling_flags |= AMDGPU_TILING_SET(SCANOUT, md->u.gfx9.scanout);
    } else {
       if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)



More information about the mesa-commit mailing list