[PATCH i-g-t 08/37] lib/rendercopy: Fix fastclear scaling

Thu Aug 29 15:11:13 UTC 2024

On Tue, Aug 27, 2024 at 06:17:33PM +0300, Juha-Pekka Heikkila wrote:
> On 3.7.2024 2.27, Ville Syrjala wrote:
> > From: Ville Syrjälä <ville.syrjala at linux.intel.com>
> > 
> > The hardcoded 64x16 fastclear coordinate scaling
> > factors assume 32bpp+Y-tile. Determine the correct
> > scaling factors for other tilings and bpps.
> > 
> > Signed-off-by: Ville Syrjälä <ville.syrjala at linux.intel.com>
> > ---
> >   lib/rendercopy_gen9.c | 105 +++++++++++++++++++++++++++++++++++++++---
> >   1 file changed, 99 insertions(+), 6 deletions(-)
> > 
> > diff --git a/lib/rendercopy_gen9.c b/lib/rendercopy_gen9.c
> > index 57b64dad1b1d..42a227916f15 100644
> > --- a/lib/rendercopy_gen9.c
> > +++ b/lib/rendercopy_gen9.c
> > @@ -346,6 +346,95 @@ gen8_fill_ps(struct intel_bb *ibb,
> >   	return intel_bb_copy_data(ibb, kernel, size, 64);
> >   }
> >   
> > +static void fast_clear_scale(const struct intel_buf *buf,
> > +			     int *x_scale, int *y_scale)
> > +{
> > +	switch (buf->tiling) {
> > +	case I915_TILING_4:
> > +		*x_scale = 1024 * 8 / buf->bpp;
> 
> I was trying to figure where 1024 is coming from but fell short, maybe 
> some comment could be added for this magic. Otherwise patch look ok.

It's just the required alignment for 8bpp. For tile4 and tile-y
we can simply divide that down to get the alignment for higher
bpps. The magic numbers are listed in bspec:47709.

> 
> /Juha-Pekka
> 
> > +		*y_scale = 16;
> > +		break;
> > +	case I915_TILING_64:
> > +		switch (buf->bpp) {
> > +		case 8:
> > +			*x_scale = 128;
> > +			*y_scale = 128;
> > +			break;
> > +		case 16:
> > +			*x_scale = 128;
> > +			*y_scale = 64;
> > +			break;
> > +		case 32:
> > +			*x_scale = 64;
> > +			*y_scale = 64;
> > +			break;
> > +		case 64:
> > +			*x_scale = 64;
> > +			*y_scale = 32;
> > +			break;
> > +		case 128:
> > +			*x_scale = 32;
> > +			*y_scale = 32;
> > +			break;
> > +		}
> > +		break;
> > +	case I915_TILING_Y:
> > +		*x_scale = 256 * 8 / buf->bpp;
> > +		*y_scale = 16;
> > +		break;
> > +	case I915_TILING_Yf:
> > +		switch (buf->bpp) {
> > +		case 8:
> > +			*x_scale = 128;
> > +			*y_scale = 32;
> > +			break;
> > +		case 16:
> > +			*x_scale = 128;
> > +			*y_scale = 16;
> > +			break;
> > +		case 32:
> > +			*x_scale = 64;
> > +			*y_scale = 16;
> > +			break;
> > +		case 64:
> > +			*x_scale = 64;
> > +			*y_scale = 8;
> > +			break;
> > +		case 128:
> > +			*x_scale = 32;
> > +			*y_scale = 8;
> > +			break;
> > +		}
> > +		break;
> > +	case I915_TILING_Ys:
> > +		switch (buf->bpp) {
> > +		case 8:
> > +			*x_scale = 64;
> > +			*y_scale = 64;
> > +			break;
> > +		case 16:
> > +			*x_scale = 64;
> > +			*y_scale = 32;
> > +			break;
> > +		case 32:
> > +			*x_scale = 32;
> > +			*y_scale = 32;
> > +			break;
> > +		case 64:
> > +			*x_scale = 32;
> > +			*y_scale = 16;
> > +			break;
> > +		case 128:
> > +			*x_scale = 16;
> > +			*y_scale = 16;
> > +			break;
> > +		}
> > +		break;
> > +	default:
> > +		igt_assert(0);
> > +	}
> > +}
> > +
> >   /*
> >    * gen7_fill_vertex_buffer_data populate vertex buffer with data.
> >    *
> > @@ -360,6 +449,7 @@ static uint32_t
> >   gen7_fill_vertex_buffer_data(struct intel_bb *ibb,
> >   			     const struct intel_buf *src,
> >   			     uint32_t src_x, uint32_t src_y,
> > +			     const struct intel_buf *dst,
> >   			     uint32_t dst_x, uint32_t dst_y,
> >   			     uint32_t width, uint32_t height)
> >   {
> > @@ -384,17 +474,21 @@ gen7_fill_vertex_buffer_data(struct intel_bb *ibb,
> >   		emit_vertex_normalized(ibb, src_x, intel_buf_width(src));
> >   		emit_vertex_normalized(ibb, src_y, intel_buf_height(src));
> >   	} else {
> > -		emit_vertex_2s(ibb, DIV_ROUND_UP(dst_x + width, 64), DIV_ROUND_UP(dst_y + height, 16));
> > +		int x_scale, y_scale;
> > +
> > +		fast_clear_scale(dst, &x_scale, &y_scale);
> > +
> > +		emit_vertex_2s(ibb, DIV_ROUND_UP(dst_x + width, x_scale), DIV_ROUND_UP(dst_y + height, y_scale));
> >   
> >   		emit_vertex_normalized(ibb, 0, 0);
> >   		emit_vertex_normalized(ibb, 0, 0);
> >   
> > -		emit_vertex_2s(ibb, dst_x/64, DIV_ROUND_UP(dst_y + height, 16));
> > +		emit_vertex_2s(ibb, dst_x/x_scale, DIV_ROUND_UP(dst_y + height, y_scale));
> >   
> >   		emit_vertex_normalized(ibb, 0, 0);
> >   		emit_vertex_normalized(ibb, 0, 0);
> >   
> > -		emit_vertex_2s(ibb, dst_x/64, dst_y/16);
> > +		emit_vertex_2s(ibb, dst_x/x_scale, dst_y/y_scale);
> >   
> >   		emit_vertex_normalized(ibb, 0, 0);
> >   		emit_vertex_normalized(ibb, 0, 0);
> > @@ -1108,9 +1202,8 @@ void _gen9_render_op(struct intel_bb *ibb,
> >   	ps_binding_table  = gen8_bind_surfaces(ibb, src, dst);
> >   	ps_sampler_state  = gen8_create_sampler(ibb);
> >   	ps_kernel_off = gen8_fill_ps(ibb, ps_kernel, ps_kernel_size);
> > -	vertex_buffer = gen7_fill_vertex_buffer_data(ibb, src,
> > -						     src_x, src_y,
> > -						     dst_x, dst_y,
> > +	vertex_buffer = gen7_fill_vertex_buffer_data(ibb, src, src_x, src_y,
> > +						     dst, dst_x, dst_y,
> >   						     width, height);
> >   	cc.cc_state = gen6_create_cc_state(ibb);
> >   	cc.blend_state = gen8_create_blend_state(ibb);

-- 
Ville Syrjälä
Intel