[PATCH i-g-t 1/2] lib/rendercopy: Fix horizontal stride

Tue Apr 23 08:29:26 UTC 2024

Indeed there is such restriction in bspec. Indeed the change you did changes the stride:

~ iga64 -p=2 -Xifs "mad (16|M0)              acc0.0<1>:f   r6.0<0;0>:f      r1.0<1;1>:f      
r6.6<0>:f"
	...
        Src1.RgnHz              [97:96]   (2)            0x0                <*;0>

~ iga64 -p=2 -Xifs "mad (16|M0)              acc0.0<1>:f   r6.0<0;0>:f      r1.0<1;0>:f      
r6.6<0>:f"
	...
	Src1.RgnHz              [97:96]   (2)            0x1                <*;1>

So it is:
Reviewed-by: Dominik Grzegorzek <dominik.grzegorzek at intel.com>

On Tue, 2024-04-23 at 09:04 +0200, Zbigniew Kempczyński wrote:
> According to regioning spec horizontal stride must be 0. There's
> no functional change in render-copy execution on hardware.
> 
> Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
> Cc: Dominik Grzegorzek <dominik.grzegorzek at intel.com>
> ---
>  lib/i915/shaders/ps/gen20_render_copy.asm | 8 ++++----
>  lib/rendercopy_gen9.c                     | 8 ++++----
>  2 files changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/lib/i915/shaders/ps/gen20_render_copy.asm b/lib/i915/shaders/ps/gen20_render_copy.asm
> index 48057f441e..c4e12a3687 100644
> --- a/lib/i915/shaders/ps/gen20_render_copy.asm
> +++ b/lib/i915/shaders/ps/gen20_render_copy.asm
> @@ -1,8 +1,8 @@
>  L0:
> -(W)     mad (16|M0)              acc0.0<1>:f   r6.0<0;0>:f      r1.0<1;1>:f       r6.6<0>:f
> -(W)     mad (16|M0)              r113.0<1>:f   acc0.0<1;1>:f    r1.0<1;1>:f       r6.1<0>:f
> -(W)     mad (16|M0)              acc0.0<1>:f   r6.3<0;0>:f      r1.0<1;1>:f       r6.4<0>:f
> -(W)     mad (16|M0)              r114.0<1>:f   acc0.0<1;1>:f    r2.0<1;1>:f       r6.5<0>:f
> +(W)     mad (16|M0)              acc0.0<1>:f   r6.0<0;0>:f      r1.0<1;0>:f       r6.6<0>:f
> +(W)     mad (16|M0)              r113.0<1>:f   acc0.0<1;0>:f    r1.0<1;0>:f       r6.1<0>:f
> +(W)     mad (16|M0)              acc0.0<1>:f   r6.3<0;0>:f      r1.0<1;0>:f       r6.4<0>:f
> +(W)     mad (16|M0)              r114.0<1>:f   acc0.0<1;0>:f    r2.0<1;0>:f       r6.5<0>:f
>  (W)     send.smpl (16|M0)        r12      r113  null:0  0x0            0x04420001           {F at 1,$0} // wr:2+0, rd:4; simd16 sample:u+v+r+ai+mlod using sampler index 0
>  (W)     send.rc (16|M0)          null     r12   null:0  0x0            0x08031400           {EOT,$0} // wr:4+0, rd:0; full-precision render target write SIMD16; last render target to surface 0
>  L96:
> diff --git a/lib/rendercopy_gen9.c b/lib/rendercopy_gen9.c
> index 392d68653b..7c7563d50c 100644
> --- a/lib/rendercopy_gen9.c
> +++ b/lib/rendercopy_gen9.c
> @@ -138,10 +138,10 @@ static const uint32_t gen12p71_render_copy[][4] = {
>  };
>  
>  static const uint32_t xe2_render_copy[][4] = {
> -	{ 0x8010005b, 0x200002a0, 0x020a0604, 0x06640105 },
> -	{ 0x8010005b, 0x710402a8, 0x020a2001, 0x06140105 },
> -	{ 0x8010005b, 0x200002a0, 0x020a0634, 0x06440105 },
> -	{ 0x8010005b, 0x720402a8, 0x020a2001, 0x06540205 },
> +	{ 0x8010005b, 0x200002a0, 0x020a0604, 0x06640104 },
> +	{ 0x8010005b, 0x710402a8, 0x020a2000, 0x06140104 },
> +	{ 0x8010005b, 0x200002a0, 0x020a0634, 0x06440104 },
> +	{ 0x8010005b, 0x720402a8, 0x020a2000, 0x06540204 },
>  	{ 0x80122031, 0x0c240000, 0x20027114, 0x00800000 },
>  	{ 0x8010c031, 0x00000004, 0x58000c24, 0x00c40000 },
>  };