[PATCH i-g-t v3 09/11] lib/intel_blt: add support for matrix mem-copy

Fri May 30 06:17:42 UTC 2025

On Wed, May 28, 2025 at 10:23:41AM +0200, Francois Dugast wrote:
> On Fri, May 23, 2025 at 10:01:24AM +0200, Zbigniew Kempczyński wrote:
> > Linear copy in intel_blt supports passing large buffers (which
> > requires to be spread over couple mem-copies). For matrix this is
> > a little bit more complicated so I left simple case in which
> 
> Patch LGTM but a nit: impersonal style is preferred over "I did X". 

Ok, reprhased this. Check if it is ok now, anyway I applied r-b.

--
Zbigniew

> 
> With that:
> 
>     Reviewed-by: Francois Dugast <francois.dugast at intel.com>
> 
> > pitch/width/height must be within mem-copy command limits -
> > 18-bit width * 18-bit height gives 64GiB object so testing
> > copying bigger buffer would be an overkill.
> > 
> > Cc: Francois Dugast <francois.dugast at intel.com>
> > Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
> > ---
> >  lib/intel_blt.c | 69 +++++++++++++++++++++++++++++++++----------------
> >  1 file changed, 47 insertions(+), 22 deletions(-)
> > 
> > diff --git a/lib/intel_blt.c b/lib/intel_blt.c
> > index 265f5ed50f..77a03aff4e 100644
> > --- a/lib/intel_blt.c
> > +++ b/lib/intel_blt.c
> > @@ -1893,17 +1893,18 @@ static uint64_t emit_blt_mem_copy(int fd, uint64_t ahnd,
> >  {
> >  	struct xe_mem_copy_data data = {};
> >  	uint64_t dst_offset, src_offset, shift;
> > -	uint32_t height, width_max, remain;
> > +	uint32_t width, height, width_max, height_max, remain;
> >  	uint32_t bbe = MI_BATCH_BUFFER_END;
> >  	uint32_t *bb;
> >  
> >  	if (mem->mode == MODE_BYTE) {
> >  		data.dw01.byte_copy.width = -1;
> > -		width_max = data.dw01.byte_copy.width + 1;
> > +		height_max = width_max = data.dw01.byte_copy.width + 1;
> >  		shift = width_max;
> >  	} else {
> >  		data.dw01.page_copy.width = -1;
> >  		width_max = data.dw01.page_copy.width + 1;
> > +		height_max = 1;
> >  		shift = width_max << 8;
> >  	}
> >  
> > @@ -1914,6 +1915,7 @@ static uint64_t emit_blt_mem_copy(int fd, uint64_t ahnd,
> >  
> >  	bb = bo_map(fd, mem->bb.handle, mem->bb.size, mem->driver);
> >  
> > +	width = mem->src.width;
> >  	height = mem->dst.height;
> >  
> >  	data.dw00.client = 0x2;
> > @@ -1930,34 +1932,57 @@ static uint64_t emit_blt_mem_copy(int fd, uint64_t ahnd,
> >  	data.dw09.src_mocs = mem->src.mocs_index;
> >  	data.dw09.dst_mocs = mem->dst.mocs_index;
> >  
> > -	remain = mem->src.width;
> > +	/* For matrix we don't iterate */
> > +	if (mem->copy_type == TYPE_MATRIX) {
> > +		if (width > width_max) {
> > +			width = width_max;
> > +			igt_warn("src width is bigger than max width [%u > %u => %u], truncating it\n",
> > +				 mem->src.width, width_max, width);
> > +		}
> >  
> > -	/* Truncate pitches to match operation bits */
> > -	if (mem->src.pitch > width_max)
> > -		data.dw03.src_pitch = width_max - 1;
> > -	else
> > -		data.dw03.src_pitch = mem->src.pitch;
> > +		if (height > height_max) {
> > +			height = height_max;
> > +			igt_warn("src height is bigger than max height [%u > %u => %u], truncating it\n",
> > +				 mem->src.height, height_max, height);
> > +		}
> >  
> > -	if (mem->dst.pitch > width_max)
> > -		data.dw04.dst_pitch = width_max - 1;
> > -	else
> > -		data.dw04.dst_pitch = mem->dst.pitch;
> > -
> > -	while (remain) {
> > -		data.dw01.val = min_t(uint32_t, width_max, remain) - 1;
> > +		data.dw01.byte_copy.width = width - 1;
> > +		data.dw03.src_pitch = mem->src.pitch - 1;
> > +		data.dw04.dst_pitch = mem->dst.pitch - 1;
> >  
> >  		igt_assert(bb_pos + sizeof(data) < mem->bb.size);
> >  		memcpy(bb + bb_pos, &data, sizeof(data));
> >  		bb_pos += sizeof(data);
> > +	} else {
> > +		remain = mem->src.width;
> >  
> > -		remain -= remain > width_max ? width_max : remain;
> > -		src_offset += shift;
> > -		dst_offset += shift;
> > +		/* Truncate pitches to match operation bits */
> > +		if (mem->src.pitch > width_max)
> > +			data.dw03.src_pitch = width_max - 1;
> > +		else
> > +			data.dw03.src_pitch = mem->src.pitch;
> >  
> > -		data.dw05.src_address_lo = src_offset;
> > -		data.dw06.src_address_hi = src_offset >> 32;
> > -		data.dw07.dst_address_lo = dst_offset;
> > -		data.dw08.dst_address_hi = dst_offset >> 32;
> > +		if (mem->dst.pitch > width_max)
> > +			data.dw04.dst_pitch = width_max - 1;
> > +		else
> > +			data.dw04.dst_pitch = mem->dst.pitch;
> > +
> > +		while (remain) {
> > +			data.dw01.val = min_t(uint32_t, width_max, remain) - 1;
> > +
> > +			igt_assert(bb_pos + sizeof(data) < mem->bb.size);
> > +			memcpy(bb + bb_pos, &data, sizeof(data));
> > +			bb_pos += sizeof(data);
> > +
> > +			remain -= remain > width_max ? width_max : remain;
> > +			src_offset += shift;
> > +			dst_offset += shift;
> > +
> > +			data.dw05.src_address_lo = src_offset;
> > +			data.dw06.src_address_hi = src_offset >> 32;
> > +			data.dw07.dst_address_lo = dst_offset;
> > +			data.dw08.dst_address_hi = dst_offset >> 32;
> > +		}
> >  	}
> >  
> >  	if (emit_bbe) {
> > -- 
> > 2.43.0
> >