[PATCH 1/2] lib/amdgpu: add support for gang cs

Christian König christian.koenig at amd.com
Wed Jan 17 07:33:42 UTC 2024



Am 17.01.24 um 05:54 schrieb vitaly.prosyak at amd.com:
> From: Vitaly Prosyak <vitaly.prosyak at amd.com>
>
> When gang command submission is used we need to add fields
> for the second buf and second pm4 packet.
>
> Add ASIC-dependent implementation of WAIT_REG_MEM used to poll on
> location in the register or memory space until a reference value
> is satisfied.
>
> Cc: Jesse Zhang <jesse.zhang at amd.com>
> Cc: Alex Deucher <alexander.deucher at amd.com>
> Cc: Christian Koenig <christian.koenig at amd.com>
> Signed-off-by: Yogesh Mohan Marimuthu <yogesh.mohanmarimuthu at amd.com>
> Signed-off-by: Vitaly Prosyak <vitaly.prosyak at amd.com>

Acked-by: Christian König <christian.koenig at amd.com> for the series.

Going to give those a testing round since I'm working on gang submit 
improvements anyway.

Thanks,
Christian.

> ---
>   lib/amdgpu/amd_ip_blocks.c | 35 +++++++++++++++++++++++++++++++++++
>   lib/amdgpu/amd_ip_blocks.h | 20 ++++++++++++++++----
>   2 files changed, 51 insertions(+), 4 deletions(-)
>
> diff --git a/lib/amdgpu/amd_ip_blocks.c b/lib/amdgpu/amd_ip_blocks.c
> index 1adea6987..20264c019 100644
> --- a/lib/amdgpu/amd_ip_blocks.c
> +++ b/lib/amdgpu/amd_ip_blocks.c
> @@ -288,6 +288,39 @@ gfx_ring_copy_linear(const struct amdgpu_ip_funcs *func,
>   	return 0;
>   }
>   
> +static int
> +gfx_ring_wait_reg_mem(const struct amdgpu_ip_funcs *func,
> +			const struct amdgpu_ring_context *ring_context,
> +			uint32_t *pm4_dw)
> +{
> +	uint32_t i;
> +
> +	i = *pm4_dw;
> +	ring_context->pm4[i++] = PACKET3(PACKET3_WAIT_REG_MEM, 5);
> +	ring_context->pm4[i++] = (WAIT_REG_MEM_MEM_SPACE(1) | /* memory */
> +							WAIT_REG_MEM_FUNCTION(3) | /* == */
> +							WAIT_REG_MEM_ENGINE(0));  /* me */
> +	ring_context->pm4[i++] = lower_32_bits(ring_context->bo_mc);
> +	ring_context->pm4[i++] = upper_32_bits(ring_context->bo_mc);
> +	ring_context->pm4[i++] = func->deadbeaf; /* reference value */
> +	ring_context->pm4[i++] = 0xffffffff; /* and mask */
> +	ring_context->pm4[i++] = 0x00000004; /* poll interval */
> +	*pm4_dw = i;
> +
> +	return 0;
> +}
> +
> +static int
> +sdma_ring_wait_reg_mem(const struct amdgpu_ip_funcs *func,
> +			const struct amdgpu_ring_context *ring_context,
> +			uint32_t *pm4_dw)
> +{
> +	int r;
> +
> +	r = gfx_ring_wait_reg_mem(func, ring_context, pm4_dw);
> +	return r;
> +}
> +
>   /* we may cobine these two functions later */
>   static int
>   x_compare(const struct amdgpu_ip_funcs *func,
> @@ -336,6 +369,7 @@ static struct amdgpu_ip_funcs gfx_v8_x_ip_funcs = {
>   	.compare = x_compare,
>   	.compare_pattern = x_compare_pattern,
>   	.get_reg_offset = gfx_v8_0_get_reg_offset,
> +	.wait_reg_mem = gfx_ring_wait_reg_mem,
>   };
>   
>   static struct amdgpu_ip_funcs sdma_v3_x_ip_funcs = {
> @@ -351,6 +385,7 @@ static struct amdgpu_ip_funcs sdma_v3_x_ip_funcs = {
>   	.compare = x_compare,
>   	.compare_pattern = x_compare_pattern,
>   	.get_reg_offset = gfx_v8_0_get_reg_offset,
> +	.wait_reg_mem = sdma_ring_wait_reg_mem,
>   };
>   
>   struct amdgpu_ip_block_version gfx_v8_x_ip_block = {
> diff --git a/lib/amdgpu/amd_ip_blocks.h b/lib/amdgpu/amd_ip_blocks.h
> index aef433e7f..4cad30d1e 100644
> --- a/lib/amdgpu/amd_ip_blocks.h
> +++ b/lib/amdgpu/amd_ip_blocks.h
> @@ -31,22 +31,31 @@ struct amdgpu_ring_context {
>   	int res_cnt; /* num of bo in amdgpu_bo_handle resources[2] */
>   
>   	uint32_t write_length;  /* length of data */
> +	uint32_t write_length2; /* length of data for second packet */
>   	uint32_t *pm4;		/* data of the packet */
>   	uint32_t pm4_size;	/* max allocated packet size */
>   	bool secure;		/* secure or not */
>   
> -	uint64_t bo_mc;		/* result from amdgpu_bo_alloc_and_map */
> -	uint64_t bo_mc2;	/* result from amdgpu_bo_alloc_and_map */
> +	uint64_t bo_mc;		/* GPU address of first buffer */
> +	uint64_t bo_mc2;	/* GPU address for p4 packet */
> +	uint64_t bo_mc3;	/* GPU address of second buffer */
> +	uint64_t bo_mc4;	/* GPU address of second p4 packet */
>   
>   	uint32_t pm4_dw;	/* actual size of pm4 */
> +	uint32_t pm4_dw2;	/* actual size of second pm4 */
>   
> -	volatile uint32_t *bo_cpu;
> -	volatile uint32_t *bo2_cpu;
> +	volatile uint32_t *bo_cpu;	/* cpu adddress of mapped GPU buf */
> +	volatile uint32_t *bo2_cpu;	/* cpu adddress of mapped pm4 */
> +	volatile uint32_t *bo3_cpu;	/* cpu adddress of mapped GPU second buf */
> +	volatile uint32_t *bo4_cpu;	/* cpu adddress of mapped second pm4 */
>   
>   	uint32_t bo_cpu_origin;
>   
>   	amdgpu_bo_handle bo;
>   	amdgpu_bo_handle bo2;
> +	amdgpu_bo_handle bo3;
> +	amdgpu_bo_handle bo4;
> +
>   	amdgpu_bo_handle boa_vram[2];
>   	amdgpu_bo_handle boa_gtt[2];
>   
> @@ -56,6 +65,8 @@ struct amdgpu_ring_context {
>   	amdgpu_bo_handle resources[4]; /* amdgpu_bo_alloc_and_map */
>   	amdgpu_va_handle va_handle;    /* amdgpu_bo_alloc_and_map */
>   	amdgpu_va_handle va_handle2;   /* amdgpu_bo_alloc_and_map */
> +	amdgpu_va_handle va_handle3;   /* amdgpu_bo_alloc_and_map */
> +	amdgpu_va_handle va_handle4;   /* amdgpu_bo_alloc_and_map */
>   
>   	struct amdgpu_cs_ib_info ib_info;     /* amdgpu_bo_list_create */
>   	struct amdgpu_cs_request ibs_request; /* amdgpu_cs_query_fence_status */
> @@ -76,6 +87,7 @@ struct amdgpu_ip_funcs {
>   	int (*compare)(const struct amdgpu_ip_funcs *func, const struct amdgpu_ring_context *context, int div);
>   	int (*compare_pattern)(const struct amdgpu_ip_funcs *func, const struct amdgpu_ring_context *context, int div);
>   	int (*get_reg_offset)(enum general_reg reg);
> +	int (*wait_reg_mem)(const struct amdgpu_ip_funcs *func, const struct amdgpu_ring_context *context, uint32_t *pm4_dw);
>   
>   };
>   



More information about the igt-dev mailing list