[PATCH i-g-t v4 06/11] lib/rendercopy_gen9: Separate xe and xe2 compression format

Wed May 8 12:54:38 UTC 2024

Xe and beyond differ how compression format is handled. For Xe it
is 5-bit long whereas for Xe2+ this is 4-bit long field. Instead of
artifically packing 0-15 into 5-bit field lets separate this structures
to conform with the documentation.

Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
Cc: Juha-Pekka Heikkila <juhapekka.heikkila at gmail.com>
---
 lib/gen9_render.h     | 31 +++++++++++++++++++++----------
 lib/rendercopy_gen9.c | 24 ++++++++++++++++--------
 2 files changed, 37 insertions(+), 18 deletions(-)

diff --git a/lib/gen9_render.h b/lib/gen9_render.h
index 8ed60a2a54..4c1ed4726a 100644
--- a/lib/gen9_render.h
+++ b/lib/gen9_render.h
@@ -154,16 +154,27 @@ struct gen9_surface_state {
 		uint32_t aux_base_addr_hi;
 	} ss11;
 
-	struct {
-		/*
-		 * compression_format is used only dg2 onward.
-		 * prior to dg2 full ss12 is used for the address
-		 * but due to alignments bits 0..6 will be zero
-		 * and asserted in code to be so
-		 */
-		uint32_t compression_format:5;
-		uint32_t pad0:1;
-		uint32_t clear_address:26;
+	union {
+		struct {
+			/*
+			 * compression_format is used only dg2 onward.
+			 * prior to dg2 full ss12 is used for the address
+			 * but due to alignments bits 0..6 will be zero
+			 * and asserted in code to be so
+			 */
+			uint32_t compression_format:5;
+			uint32_t pad0:1;
+			uint32_t clear_address:26;
+		} xe;
+
+		struct {
+			/*
+			 * On Xe2+ compression format is 4-bit long.
+			 */
+			uint32_t compression_format:4;
+			uint32_t mip_region_depth_in_log:4;
+			uint32_t pad0:24;
+		} xe2;
 	} ss12;
 
 	struct {
diff --git a/lib/rendercopy_gen9.c b/lib/rendercopy_gen9.c
index 7c7563d50c..35d79acbab 100644
--- a/lib/rendercopy_gen9.c
+++ b/lib/rendercopy_gen9.c
@@ -264,7 +264,7 @@ gen9_bind_buf(struct intel_bb *ibb, const struct intel_buf *buf, int is_dst,
 			igt_assert(__builtin_ctzl(address + buf->cc.offset) >= 6 &&
 				   (__builtin_clzl(address + buf->cc.offset) >= 16));
 
-			ss->ss12.clear_address = (address + buf->cc.offset) >> 6;
+			ss->ss12.xe.clear_address = (address + buf->cc.offset) >> 6;
 			ss->ss13.clear_address_hi = (address + buf->cc.offset) >> 32;
 		}
 
@@ -274,13 +274,21 @@ gen9_bind_buf(struct intel_bb *ibb, const struct intel_buf *buf, int is_dst,
 			ss->ss7.dg2.disable_support_for_multi_gpu_partial_writes = 1;
 			ss->ss7.dg2.disable_support_for_multi_gpu_atomics = 1;
 
-			/*
-			 * For now here is coming only 32bpp rgb format
-			 * which is marked below as B8G8R8X8_UNORM = '8'
-			 * If here ever arrive other formats below need to be
-			 * fixed to take that into account.
-			 */
-			ss->ss12.compression_format = 8;
+			if (AT_LEAST_GEN(ibb->devid, 20)) {
+				/*
+				 * For Xe2+ R8G8B8A8 best compression ratio is
+				 * achieved with compression format = '2'
+				 */
+				ss->ss12.xe2.compression_format = 2;
+			} else {
+				/*
+				 * For now here is coming only 32bpp rgb format
+				 * which is marked below as B8G8R8X8_UNORM = '8'
+				 * If here ever arrive other formats below need to be
+				 * fixed to take that into account.
+				 */
+				ss->ss12.xe.compression_format = 8;
+			}
 		}
 	}
 
-- 
2.34.1