[PATCH 2/3] drm/amdgpu: Use efficient ring padding with more rings

Tvrtko Ursulin tursulin at ursulin.net
Tue Dec 24 09:16:56 UTC 2024


On 23/12/2024 16:39, Tvrtko Ursulin wrote:
> From: Tvrtko Ursulin <tvrtko.ursulin at igalia.com>
> 
> We can easily expand the approach of writing nops in blocks using memset32
> (via the respective helper) to more rings.
> 
> We do that by trivially factoring out a new amdgpu_ring_fill() helper out
> of amdgpu_ring_insert_nop() and call it from SDMA and VPE vfuncs.
> 
> The amount of padding with those rings is a bit less than GFX but it is
> still a bit nicer to use the same approach across the board.

Scratch this for now, I think there is a way to make something in this 
area more sellable by adding more advantages.

Regards,

Tvrtko

> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at igalia.com>
> Cc: Christian König <ckoenig.leichtzumerken at gmail.com>
> Cc: Sunil Khatri <sunil.khatri at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 29 +++++++++++++++++-------
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  1 +
>   drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c  | 16 +++++++------
>   drivers/gpu/drm/amd/amdgpu/cik_sdma.c    | 15 ++++++------
>   drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c   | 16 +++++++------
>   drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c   | 16 +++++++------
>   drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   | 16 +++++++------
>   drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 16 +++++++------
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c   | 16 +++++++------
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c   | 16 +++++++------
>   drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c   | 16 +++++++------
>   drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c   | 16 +++++++------
>   12 files changed, 111 insertions(+), 78 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> index a27e32f48f99..c3a68eae1c9a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
> @@ -99,16 +99,16 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw)
>   	return 0;
>   }
>   
> -/** amdgpu_ring_insert_nop - insert NOP packets
> +/**
> + * amdgpu_ring_fill - insert dwords into a ring
>    *
>    * @ring: amdgpu_ring structure holding ring information
> - * @count: the number of NOP packets to insert
> - *
> - * This is the generic insert_nop function for rings except SDMA
> + * @val: dword value to insert
> + * @count: the number of dwords to insert
>    */
> -void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
> +void amdgpu_ring_fill(struct amdgpu_ring *ring, u32 val, u32 count)
>   {
> -	uint32_t occupied, chunk1, chunk2;
> +	u32 occupied, chunk1, chunk2;
>   
>   	occupied = ring->wptr & ring->buf_mask;
>   	chunk1 = ring->buf_mask + 1 - occupied;
> @@ -116,16 +116,29 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
>   	chunk2 = count - chunk1;
>   
>   	if (chunk1)
> -		memset32(&ring->ring[occupied], ring->funcs->nop, chunk1);
> +		memset32(&ring->ring[occupied], val, chunk1);
>   
>   	if (chunk2)
> -		memset32(ring->ring, ring->funcs->nop, chunk2);
> +		memset32(ring->ring, val, chunk2);
>   
>   	ring->wptr += count;
>   	ring->wptr &= ring->ptr_mask;
>   	ring->count_dw -= count;
>   }
>   
> +/**
> + * amdgpu_ring_insert_nop - insert NOP packets
> + *
> + * @ring: amdgpu_ring structure holding ring information
> + * @count: the number of NOP packets to insert
> + *
> + * This is the generic insert_nop function for rings except SDMA
> + */
> +void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
> +{
> +	amdgpu_ring_fill(ring, ring->funcs->nop, count);
> +}
> +
>   /**
>    * amdgpu_ring_generic_pad_ib - pad IB with NOP packets
>    *
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index dee5a1b4e572..4a8134b682f7 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -346,6 +346,7 @@ void amdgpu_ring_ib_on_emit_cntl(struct amdgpu_ring *ring);
>   void amdgpu_ring_ib_on_emit_ce(struct amdgpu_ring *ring);
>   void amdgpu_ring_ib_on_emit_de(struct amdgpu_ring *ring);
>   
> +void amdgpu_ring_fill(struct amdgpu_ring *ring, u32 val, u32 count);
>   void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
>   void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
>   void amdgpu_ring_commit(struct amdgpu_ring *ring);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
> index 121ee17b522b..92a444922b2c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vpe.c
> @@ -459,14 +459,16 @@ static int vpe_resume(struct amdgpu_ip_block *ip_block)
>   
>   static void vpe_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
>   {
> -	int i;
> +	const u32 nop = ring->funcs->nop;
>   
> -	for (i = 0; i < count; i++)
> -		if (i == 0)
> -			amdgpu_ring_write(ring, ring->funcs->nop |
> -				VPE_CMD_NOP_HEADER_COUNT(count - 1));
> -		else
> -			amdgpu_ring_write(ring, ring->funcs->nop);
> +	if (count) {
> +		count--;
> +		amdgpu_ring_write(ring, nop | VPE_CMD_NOP_HEADER_COUNT(count));
> +		if (count > 1)
> +			amdgpu_ring_fill(ring, nop, count);
> +		else if (count)
> +			amdgpu_ring_write(ring, nop);
> +	}
>   }
>   
>   static uint64_t vpe_get_csa_mc_addr(struct amdgpu_ring *ring, uint32_t vmid)
> diff --git a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> index d9bd8f3f17e2..275494e6cc8e 100644
> --- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
> @@ -200,14 +200,15 @@ static void cik_sdma_ring_set_wptr(struct amdgpu_ring *ring)
>   static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
>   {
>   	struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
> -	int i;
> +	const u32 nop = ring->funcs->nop;
>   
> -	for (i = 0; i < count; i++)
> -		if (sdma && sdma->burst_nop && (i == 0))
> -			amdgpu_ring_write(ring, ring->funcs->nop |
> -					  SDMA_NOP_COUNT(count - 1));
> -		else
> -			amdgpu_ring_write(ring, ring->funcs->nop);
> +	if (count && sdma && sdma->burst_nop)
> +		amdgpu_ring_write(ring, nop | SDMA_NOP_COUNT(--count));
> +
> +	if (count > 1)
> +		amdgpu_ring_fill(ring, nop, count);
> +	else if (count)
> +		amdgpu_ring_write(ring, nop);
>   }
>   
>   /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> index 135c5099bfb8..7e1498a60f01 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> @@ -224,14 +224,16 @@ static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring)
>   static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
>   {
>   	struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
> -	int i;
> +	const u32 nop = ring->funcs->nop;
>   
> -	for (i = 0; i < count; i++)
> -		if (sdma && sdma->burst_nop && (i == 0))
> -			amdgpu_ring_write(ring, ring->funcs->nop |
> -				SDMA_PKT_NOP_HEADER_COUNT(count - 1));
> -		else
> -			amdgpu_ring_write(ring, ring->funcs->nop);
> +	if (count && sdma && sdma->burst_nop)
> +		amdgpu_ring_write(ring,
> +				  nop | SDMA_PKT_NOP_HEADER_COUNT(--count));
> +
> +	if (count > 1)
> +		amdgpu_ring_fill(ring, nop, count);
> +	else if (count)
> +		amdgpu_ring_write(ring, nop);
>   }
>   
>   /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> index c611328671ed..eae8310407eb 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> @@ -400,14 +400,16 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
>   static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
>   {
>   	struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
> -	int i;
> +	const u32 nop = ring->funcs->nop;
>   
> -	for (i = 0; i < count; i++)
> -		if (sdma && sdma->burst_nop && (i == 0))
> -			amdgpu_ring_write(ring, ring->funcs->nop |
> -				SDMA_PKT_NOP_HEADER_COUNT(count - 1));
> -		else
> -			amdgpu_ring_write(ring, ring->funcs->nop);
> +	if (count && sdma && sdma->burst_nop)
> +		amdgpu_ring_write(ring,
> +				  nop | SDMA_PKT_NOP_HEADER_COUNT(--count));
> +
> +	if (count > 1)
> +		amdgpu_ring_fill(ring, nop, count);
> +	else if (count)
> +		amdgpu_ring_write(ring, nop);
>   }
>   
>   /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index b48d9c0b2e1c..68120baafbbd 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -784,14 +784,16 @@ static void sdma_v4_0_page_ring_set_wptr(struct amdgpu_ring *ring)
>   static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
>   {
>   	struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
> -	int i;
> +	const u32 nop = ring->funcs->nop;
>   
> -	for (i = 0; i < count; i++)
> -		if (sdma && sdma->burst_nop && (i == 0))
> -			amdgpu_ring_write(ring, ring->funcs->nop |
> -				SDMA_PKT_NOP_HEADER_COUNT(count - 1));
> -		else
> -			amdgpu_ring_write(ring, ring->funcs->nop);
> +	if (count && sdma && sdma->burst_nop)
> +		amdgpu_ring_write(ring,
> +				  nop | SDMA_PKT_NOP_HEADER_COUNT(--count));
> +
> +	if (count > 1)
> +		amdgpu_ring_fill(ring, nop, count);
> +	else if (count)
> +		amdgpu_ring_write(ring, nop);
>   }
>   
>   /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> index 56507ae919b0..076a07400db6 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> @@ -344,14 +344,16 @@ static void sdma_v4_4_2_page_ring_set_wptr(struct amdgpu_ring *ring)
>   static void sdma_v4_4_2_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
>   {
>   	struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
> -	int i;
> +	const u32 nop = ring->funcs->nop;
>   
> -	for (i = 0; i < count; i++)
> -		if (sdma && sdma->burst_nop && (i == 0))
> -			amdgpu_ring_write(ring, ring->funcs->nop |
> -				SDMA_PKT_NOP_HEADER_COUNT(count - 1));
> -		else
> -			amdgpu_ring_write(ring, ring->funcs->nop);
> +	if (count && sdma && sdma->burst_nop)
> +		amdgpu_ring_write(ring,
> +				  nop | SDMA_PKT_NOP_HEADER_COUNT(--count));
> +
> +	if (count > 1)
> +		amdgpu_ring_fill(ring, nop, count);
> +	else if (count)
> +		amdgpu_ring_write(ring, nop);
>   }
>   
>   /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> index b764550834a0..97079c63e462 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> @@ -436,14 +436,16 @@ static void sdma_v5_0_ring_set_wptr(struct amdgpu_ring *ring)
>   static void sdma_v5_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
>   {
>   	struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
> -	int i;
> +	const u32 nop = ring->funcs->nop;
>   
> -	for (i = 0; i < count; i++)
> -		if (sdma && sdma->burst_nop && (i == 0))
> -			amdgpu_ring_write(ring, ring->funcs->nop |
> -				SDMA_PKT_NOP_HEADER_COUNT(count - 1));
> -		else
> -			amdgpu_ring_write(ring, ring->funcs->nop);
> +	if (count && sdma && sdma->burst_nop)
> +		amdgpu_ring_write(ring,
> +				  nop | SDMA_PKT_NOP_HEADER_COUNT(--count));
> +
> +	if (count > 1)
> +		amdgpu_ring_fill(ring, nop, count);
> +	else if (count)
> +		amdgpu_ring_write(ring, nop);
>   }
>   
>   /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> index b1818e87889a..8913f3ed4e2f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> @@ -253,14 +253,16 @@ static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring)
>   static void sdma_v5_2_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
>   {
>   	struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
> -	int i;
> +	const u32 nop = ring->funcs->nop;
>   
> -	for (i = 0; i < count; i++)
> -		if (sdma && sdma->burst_nop && (i == 0))
> -			amdgpu_ring_write(ring, ring->funcs->nop |
> -				SDMA_PKT_NOP_HEADER_COUNT(count - 1));
> -		else
> -			amdgpu_ring_write(ring, ring->funcs->nop);
> +	if (count && sdma && sdma->burst_nop)
> +		amdgpu_ring_write(ring,
> +				  nop | SDMA_PKT_NOP_HEADER_COUNT(--count));
> +
> +	if (count > 1)
> +		amdgpu_ring_fill(ring, nop, count);
> +	else if (count)
> +		amdgpu_ring_write(ring, nop);
>   }
>   
>   /**
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
> index 1a023b45f0be..f2b04adfebdc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
> @@ -238,14 +238,16 @@ static void sdma_v6_0_ring_set_wptr(struct amdgpu_ring *ring)
>   static void sdma_v6_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
>   {
>   	struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
> -	int i;
> +	const u32 nop = ring->funcs->nop;
>   
> -	for (i = 0; i < count; i++)
> -		if (sdma && sdma->burst_nop && (i == 0))
> -			amdgpu_ring_write(ring, ring->funcs->nop |
> -				SDMA_PKT_NOP_HEADER_COUNT(count - 1));
> -		else
> -			amdgpu_ring_write(ring, ring->funcs->nop);
> +	if (count && sdma && sdma->burst_nop)
> +		amdgpu_ring_write(ring,
> +				  nop | SDMA_PKT_NOP_HEADER_COUNT(--count));
> +
> +	if (count > 1)
> +		amdgpu_ring_fill(ring, nop, count);
> +	else if (count)
> +		amdgpu_ring_write(ring, nop);
>   }
>   
>   /*
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
> index 9c17df2cf37b..ce2af42a9289 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
> @@ -270,14 +270,16 @@ static void sdma_v7_0_ring_set_wptr(struct amdgpu_ring *ring)
>   static void sdma_v7_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
>   {
>   	struct amdgpu_sdma_instance *sdma = amdgpu_sdma_get_instance_from_ring(ring);
> -	int i;
> +	const u32 nop = ring->funcs->nop;
>   
> -	for (i = 0; i < count; i++)
> -		if (sdma && sdma->burst_nop && (i == 0))
> -			amdgpu_ring_write(ring, ring->funcs->nop |
> -				SDMA_PKT_NOP_HEADER_COUNT(count - 1));
> -		else
> -			amdgpu_ring_write(ring, ring->funcs->nop);
> +	if (count && sdma && sdma->burst_nop)
> +		amdgpu_ring_write(ring,
> +				  nop | SDMA_PKT_NOP_HEADER_COUNT(--count));
> +
> +	if (count > 1)
> +		amdgpu_ring_fill(ring, nop, count);
> +	else if (count)
> +		amdgpu_ring_write(ring, nop);
>   }
>   
>   /**


More information about the amd-gfx mailing list