[PATCH i-g-t v5 06/11] lib/rendercopy_gen9: Separate xe and xe2 compression format

Tue May 14 17:25:31 UTC 2024

On 9.5.2024 8.33, Zbigniew Kempczyński wrote:
> Xe and beyond differ how compression format is handled. For Xe it
> is 5-bit long whereas for Xe2+ this is 4-bit long field. Instead of
> artifically packing 0-15 into 5-bit field lets separate this structures
> to conform with the documentation.
> 
> Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
> Cc: Juha-Pekka Heikkila <juhapekka.heikkila at gmail.com>
> ---
>   lib/gen9_render.h     | 31 +++++++++++++++++++++----------
>   lib/rendercopy_gen9.c | 24 ++++++++++++++++--------
>   2 files changed, 37 insertions(+), 18 deletions(-)
> 
> diff --git a/lib/gen9_render.h b/lib/gen9_render.h
> index 8ed60a2a54..4c1ed4726a 100644
> --- a/lib/gen9_render.h
> +++ b/lib/gen9_render.h
> @@ -154,16 +154,27 @@ struct gen9_surface_state {
>   		uint32_t aux_base_addr_hi;
>   	} ss11;
>   
> -	struct {
> -		/*
> -		 * compression_format is used only dg2 onward.
> -		 * prior to dg2 full ss12 is used for the address
> -		 * but due to alignments bits 0..6 will be zero
> -		 * and asserted in code to be so
> -		 */
> -		uint32_t compression_format:5;
> -		uint32_t pad0:1;
> -		uint32_t clear_address:26;
> +	union {
> +		struct {
> +			/*
> +			 * compression_format is used only dg2 onward.
> +			 * prior to dg2 full ss12 is used for the address
> +			 * but due to alignments bits 0..6 will be zero
> +			 * and asserted in code to be so
> +			 */
> +			uint32_t compression_format:5;
> +			uint32_t pad0:1;
> +			uint32_t clear_address:26;
> +		} xe;
> +
> +		struct {
> +			/*
> +			 * On Xe2+ compression format is 4-bit long.
> +			 */
> +			uint32_t compression_format:4;
> +			uint32_t mip_region_depth_in_log:4;
> +			uint32_t pad0:24;
> +		} xe2;

I'd prefer here use same naming convention as with rest of the 
structure. Ie. xe would become dg2 as commented above and xe2 I figure 
is likely lnl.

otherwise everything look ok, with those fixed

Reviewed-by: Juha-Pekka Heikkila <juhapekka.heikkila at gmail.com>

>   	} ss12;
>   
>   	struct {
> diff --git a/lib/rendercopy_gen9.c b/lib/rendercopy_gen9.c
> index 7c7563d50c..35d79acbab 100644
> --- a/lib/rendercopy_gen9.c
> +++ b/lib/rendercopy_gen9.c
> @@ -264,7 +264,7 @@ gen9_bind_buf(struct intel_bb *ibb, const struct intel_buf *buf, int is_dst,
>   			igt_assert(__builtin_ctzl(address + buf->cc.offset) >= 6 &&
>   				   (__builtin_clzl(address + buf->cc.offset) >= 16));
>   
> -			ss->ss12.clear_address = (address + buf->cc.offset) >> 6;
> +			ss->ss12.xe.clear_address = (address + buf->cc.offset) >> 6;
>   			ss->ss13.clear_address_hi = (address + buf->cc.offset) >> 32;
>   		}
>   
> @@ -274,13 +274,21 @@ gen9_bind_buf(struct intel_bb *ibb, const struct intel_buf *buf, int is_dst,
>   			ss->ss7.dg2.disable_support_for_multi_gpu_partial_writes = 1;
>   			ss->ss7.dg2.disable_support_for_multi_gpu_atomics = 1;
>   
> -			/*
> -			 * For now here is coming only 32bpp rgb format
> -			 * which is marked below as B8G8R8X8_UNORM = '8'
> -			 * If here ever arrive other formats below need to be
> -			 * fixed to take that into account.
> -			 */
> -			ss->ss12.compression_format = 8;
> +			if (AT_LEAST_GEN(ibb->devid, 20)) {
> +				/*
> +				 * For Xe2+ R8G8B8A8 best compression ratio is
> +				 * achieved with compression format = '2'
> +				 */
> +				ss->ss12.xe2.compression_format = 2;
> +			} else {
> +				/*
> +				 * For now here is coming only 32bpp rgb format
> +				 * which is marked below as B8G8R8X8_UNORM = '8'
> +				 * If here ever arrive other formats below need to be
> +				 * fixed to take that into account.
> +				 */
> +				ss->ss12.xe.compression_format = 8;
> +			}
>   		}
>   	}
>