[PATCH i-g-t 08/37] lib/rendercopy: Fix fastclear scaling

Tue Aug 27 15:17:33 UTC 2024

On 3.7.2024 2.27, Ville Syrjala wrote:
> From: Ville Syrjälä <ville.syrjala at linux.intel.com>
> 
> The hardcoded 64x16 fastclear coordinate scaling
> factors assume 32bpp+Y-tile. Determine the correct
> scaling factors for other tilings and bpps.
> 
> Signed-off-by: Ville Syrjälä <ville.syrjala at linux.intel.com>
> ---
>   lib/rendercopy_gen9.c | 105 +++++++++++++++++++++++++++++++++++++++---
>   1 file changed, 99 insertions(+), 6 deletions(-)
> 
> diff --git a/lib/rendercopy_gen9.c b/lib/rendercopy_gen9.c
> index 57b64dad1b1d..42a227916f15 100644
> --- a/lib/rendercopy_gen9.c
> +++ b/lib/rendercopy_gen9.c
> @@ -346,6 +346,95 @@ gen8_fill_ps(struct intel_bb *ibb,
>   	return intel_bb_copy_data(ibb, kernel, size, 64);
>   }
>   
> +static void fast_clear_scale(const struct intel_buf *buf,
> +			     int *x_scale, int *y_scale)
> +{
> +	switch (buf->tiling) {
> +	case I915_TILING_4:
> +		*x_scale = 1024 * 8 / buf->bpp;

I was trying to figure where 1024 is coming from but fell short, maybe 
some comment could be added for this magic. Otherwise patch look ok.

/Juha-Pekka

> +		*y_scale = 16;
> +		break;
> +	case I915_TILING_64:
> +		switch (buf->bpp) {
> +		case 8:
> +			*x_scale = 128;
> +			*y_scale = 128;
> +			break;
> +		case 16:
> +			*x_scale = 128;
> +			*y_scale = 64;
> +			break;
> +		case 32:
> +			*x_scale = 64;
> +			*y_scale = 64;
> +			break;
> +		case 64:
> +			*x_scale = 64;
> +			*y_scale = 32;
> +			break;
> +		case 128:
> +			*x_scale = 32;
> +			*y_scale = 32;
> +			break;
> +		}
> +		break;
> +	case I915_TILING_Y:
> +		*x_scale = 256 * 8 / buf->bpp;
> +		*y_scale = 16;
> +		break;
> +	case I915_TILING_Yf:
> +		switch (buf->bpp) {
> +		case 8:
> +			*x_scale = 128;
> +			*y_scale = 32;
> +			break;
> +		case 16:
> +			*x_scale = 128;
> +			*y_scale = 16;
> +			break;
> +		case 32:
> +			*x_scale = 64;
> +			*y_scale = 16;
> +			break;
> +		case 64:
> +			*x_scale = 64;
> +			*y_scale = 8;
> +			break;
> +		case 128:
> +			*x_scale = 32;
> +			*y_scale = 8;
> +			break;
> +		}
> +		break;
> +	case I915_TILING_Ys:
> +		switch (buf->bpp) {
> +		case 8:
> +			*x_scale = 64;
> +			*y_scale = 64;
> +			break;
> +		case 16:
> +			*x_scale = 64;
> +			*y_scale = 32;
> +			break;
> +		case 32:
> +			*x_scale = 32;
> +			*y_scale = 32;
> +			break;
> +		case 64:
> +			*x_scale = 32;
> +			*y_scale = 16;
> +			break;
> +		case 128:
> +			*x_scale = 16;
> +			*y_scale = 16;
> +			break;
> +		}
> +		break;
> +	default:
> +		igt_assert(0);
> +	}
> +}
> +
>   /*
>    * gen7_fill_vertex_buffer_data populate vertex buffer with data.
>    *
> @@ -360,6 +449,7 @@ static uint32_t
>   gen7_fill_vertex_buffer_data(struct intel_bb *ibb,
>   			     const struct intel_buf *src,
>   			     uint32_t src_x, uint32_t src_y,
> +			     const struct intel_buf *dst,
>   			     uint32_t dst_x, uint32_t dst_y,
>   			     uint32_t width, uint32_t height)
>   {
> @@ -384,17 +474,21 @@ gen7_fill_vertex_buffer_data(struct intel_bb *ibb,
>   		emit_vertex_normalized(ibb, src_x, intel_buf_width(src));
>   		emit_vertex_normalized(ibb, src_y, intel_buf_height(src));
>   	} else {
> -		emit_vertex_2s(ibb, DIV_ROUND_UP(dst_x + width, 64), DIV_ROUND_UP(dst_y + height, 16));
> +		int x_scale, y_scale;
> +
> +		fast_clear_scale(dst, &x_scale, &y_scale);
> +
> +		emit_vertex_2s(ibb, DIV_ROUND_UP(dst_x + width, x_scale), DIV_ROUND_UP(dst_y + height, y_scale));
>   
>   		emit_vertex_normalized(ibb, 0, 0);
>   		emit_vertex_normalized(ibb, 0, 0);
>   
> -		emit_vertex_2s(ibb, dst_x/64, DIV_ROUND_UP(dst_y + height, 16));
> +		emit_vertex_2s(ibb, dst_x/x_scale, DIV_ROUND_UP(dst_y + height, y_scale));
>   
>   		emit_vertex_normalized(ibb, 0, 0);
>   		emit_vertex_normalized(ibb, 0, 0);
>   
> -		emit_vertex_2s(ibb, dst_x/64, dst_y/16);
> +		emit_vertex_2s(ibb, dst_x/x_scale, dst_y/y_scale);
>   
>   		emit_vertex_normalized(ibb, 0, 0);
>   		emit_vertex_normalized(ibb, 0, 0);
> @@ -1108,9 +1202,8 @@ void _gen9_render_op(struct intel_bb *ibb,
>   	ps_binding_table  = gen8_bind_surfaces(ibb, src, dst);
>   	ps_sampler_state  = gen8_create_sampler(ibb);
>   	ps_kernel_off = gen8_fill_ps(ibb, ps_kernel, ps_kernel_size);
> -	vertex_buffer = gen7_fill_vertex_buffer_data(ibb, src,
> -						     src_x, src_y,
> -						     dst_x, dst_y,
> +	vertex_buffer = gen7_fill_vertex_buffer_data(ibb, src, src_x, src_y,
> +						     dst, dst_x, dst_y,
>   						     width, height);
>   	cc.cc_state = gen6_create_cc_state(ibb);
>   	cc.blend_state = gen8_create_blend_state(ibb);