[PATCH v3 3/4] lib/gpgpu_shader: pass surface desription to shaders via inline data

Grzegorzek, Dominik dominik.grzegorzek at intel.com
Fri Nov 22 14:27:22 UTC 2024


On Thu, 2024-11-21 at 18:12 +0100, Andrzej Hajda wrote:
> Since newer architectures require stateless load/stores we need to pass
> surface description to the shader. Instead of doing it for every call
> we can use inline data which is passed by COMPUTE_WALKER and is stored
> in GRF register r1.
> 
> Signed-off-by: Andrzej Hajda <andrzej.hajda at intel.com>
> ---
>  lib/gpgpu_shader.c | 22 ++++++++++++++++++++++
>  1 file changed, 22 insertions(+)
> 
> diff --git a/lib/gpgpu_shader.c b/lib/gpgpu_shader.c
> index 363435e7efd3..518423158880 100644
> --- a/lib/gpgpu_shader.c
> +++ b/lib/gpgpu_shader.c
> @@ -148,6 +148,16 @@ __xelp_gpgpu_execfunc(struct intel_bb *ibb,
>  		      engine | I915_EXEC_NO_RELOC, false);
>  }
>  
> +static void
> +fill_inline_data(uint32_t *inline_data, uint64_t target_offset, struct intel_buf *target)
> +{
> +	igt_assert(target->surface[0].stride == intel_buf_width(target) * target->bpp/8);
> +	*inline_data++ = lower_32_bits(target_offset);
> +	*inline_data++ = upper_32_bits(target_offset);
> +	*inline_data++ = target->surface[0].stride;
> +	*inline_data++ = intel_buf_height(target);
> +}
> +
>  static void
>  __xehp_gpgpu_execfunc(struct intel_bb *ibb,
>  		      struct intel_buf *target,
> @@ -159,6 +169,7 @@ __xehp_gpgpu_execfunc(struct intel_bb *ibb,
>  	struct xehp_interface_descriptor_data idd;
>  	uint32_t sip_offset;
>  	uint64_t engine;
> +	uint32_t *inline_data;
>  
>  	intel_bb_add_intel_buf(ibb, target, true);
>  
> @@ -186,7 +197,10 @@ __xehp_gpgpu_execfunc(struct intel_bb *ibb,
>  	if (sip_offset)
>  		emit_sip(ibb, sip_offset);
>  
> +	/* Inline data is at 31th/32th dword of COMPUTE_WALKER, BSpec: 67028 */
> +	inline_data = intel_bb_ptr(ibb) + 4 * (shdr->gen_ver < 2000 ? 31 : 32);
>  	xehp_emit_compute_walk(ibb, 0, 0, x_dim * 16, y_dim, &idd, 0x0);
> +	fill_inline_data(inline_data, CANONICAL(target->addr.offset), target);
>  
>  	intel_bb_out(ibb, MI_BATCH_BUFFER_END);
>  	intel_bb_ptr_align(ibb, 32);
> @@ -217,10 +231,18 @@ void gpgpu_shader_exec(struct intel_bb *ibb,
>  		       struct gpgpu_shader *sip,
>  		       uint64_t ring, bool explicit_engine)
>  {
> +	uint64_t ahnd;
> +
>  	igt_require(shdr->gen_ver >= SUPPORTED_GEN_VER);
>  	igt_assert(ibb->size >= PAGE_SIZE);
>  	igt_assert(ibb->ptr == ibb->batch);
>  
> +	ahnd = intel_allocator_open_full(ibb->fd, 0, 0, 0, INTEL_ALLOCATOR_SIMPLE,
> +					 ALLOC_STRATEGY_LOW_TO_HIGH, 0);
> +	target->addr.offset = intel_allocator_alloc(ahnd, target->handle,
> +						    target->surface[0].size, 0);
I believe we should open allocator with ibb->vm_id to be error prone. Otherwise we make an
assumption that ibb->vm_id empty upon gpgpu_shader_exec. Which may not be true. 

Regards,
Dominik			
> +	intel_allocator_close(ahnd);
> +
>  	if (shdr->gen_ver >= 1250)
>  		__xehp_gpgpu_execfunc(ibb, target, x_dim, y_dim, shdr, sip,
>  				      ring, explicit_engine);
> 



More information about the igt-dev mailing list