[PATCH 04/12] drm/amdgpu: Consolidate a bunch of similar sdma insert nop vfuncs

Christian König ckoenig.leichtzumerken at gmail.com
Thu Jan 2 13:49:42 UTC 2025


Am 27.12.24 um 12:19 schrieb Tvrtko Ursulin:
> From: Tvrtko Ursulin <tvrtko.ursulin at igalia.com>
>
> A lot of the hardware generations apparently uses the same nop insertion
> logic, just with different masks and shifts.
>
> We can consolidate if we store those shifts and mask in the ring and
> shrink both the source and binary.

The shift and mask should be identical for most HW generations anyway. 
Only SI might be a bit different.

So probably better to have a special function for SI and a general for 
all other HW generations.

Regards,
Christian.

>
> Though it appears a bit of a departure from the existing "duplicate
> everything" state so we will see how this is received.
>
> Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at igalia.com>
> Cc: Christian König <christian.koenig at amd.com>
> Cc: Sunil Khatri <sunil.khatri at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h |  5 +++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c | 18 ++++++++++++++++++
>   drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h |  1 +
>   drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c   | 19 ++++---------------
>   drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c   | 19 ++++---------------
>   drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c   | 23 +++++++----------------
>   drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c | 23 +++++++----------------
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c   | 19 ++++---------------
>   drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c   | 19 ++++---------------
>   drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c   | 19 ++++---------------
>   drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c   | 19 ++++---------------
>   11 files changed, 62 insertions(+), 122 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index 429c77db920f..4c0861ebc77a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -303,6 +303,11 @@ struct amdgpu_ring {
>   	struct {
>   		struct amdgpu_sdma_instance *instance;
>   		int			     index;
> +
> +		struct {
> +			u32		mask;
> +			unsigned int	shift;
> +		} nop_pkt;
>   	} sdma;
>   
>   	/* used for mes */
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
> index d43dfec82624..148413f01875 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
> @@ -34,6 +34,24 @@
>    * GPU SDMA IP block helpers function.
>    */
>   
> +void amdgpu_sdma_ring_insert_nop(struct amdgpu_ring *ring, u32 count)
> +{
> +	struct amdgpu_sdma_instance *sdma = ring->sdma.instance;
> +	const u32 nop = ring->funcs->nop;
> +	u32 i;
> +
> +	if (!count)
> +		return;
> +
> +	if (sdma && sdma->burst_nop)
> +		amdgpu_ring_write(ring, nop |
> +					(--count & ring->sdma.nop_pkt.mask) <<
> +					ring->sdma.nop_pkt.shift);
> +
> +	for (i = 0; i < count; i++)
> +		amdgpu_ring_write(ring, nop);
> +}
> +
>   uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring,
>   				     unsigned int vmid)
>   {
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> index 7debf3ed0b46..d2642a9113ae 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.h
> @@ -159,6 +159,7 @@ struct amdgpu_buffer_funcs {
>   #define amdgpu_emit_copy_buffer(adev, ib, s, d, b, t) (adev)->mman.buffer_funcs->emit_copy_buffer((ib),  (s), (d), (b), (t))
>   #define amdgpu_emit_fill_buffer(adev, ib, s, d, b) (adev)->mman.buffer_funcs->emit_fill_buffer((ib), (s), (d), (b))
>   
> +void amdgpu_sdma_ring_insert_nop(struct amdgpu_ring *ring, u32 count);
>   uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring, unsigned vmid);
>   int amdgpu_sdma_ras_late_init(struct amdgpu_device *adev,
>   			      struct ras_common_if *ras_block);
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> index 2e844dba4ad5..e0ed65eca431 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v2_4.c
> @@ -221,19 +221,6 @@ static void sdma_v2_4_ring_set_wptr(struct amdgpu_ring *ring)
>   	WREG32(mmSDMA0_GFX_RB_WPTR + sdma_offsets[ring->me], ring->wptr << 2);
>   }
>   
> -static void sdma_v2_4_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
> -{
> -	struct amdgpu_sdma_instance *sdma = ring->sdma.instance;
> -	int i;
> -
> -	for (i = 0; i < count; i++)
> -		if (sdma && sdma->burst_nop && (i == 0))
> -			amdgpu_ring_write(ring, ring->funcs->nop |
> -				SDMA_PKT_NOP_HEADER_COUNT(count - 1));
> -		else
> -			amdgpu_ring_write(ring, ring->funcs->nop);
> -}
> -
>   /**
>    * sdma_v2_4_ring_emit_ib - Schedule an IB on the DMA engine
>    *
> @@ -252,7 +239,7 @@ static void sdma_v2_4_ring_emit_ib(struct amdgpu_ring *ring,
>   	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>   
>   	/* IB packet must end on a 8 DW boundary */
> -	sdma_v2_4_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
> +	amdgpu_sdma_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
>   
>   	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
>   			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
> @@ -866,6 +853,8 @@ static int sdma_v2_4_sw_init(struct amdgpu_ip_block *ip_block)
>   
>   		ring->sdma.instance = &adev->sdma.instance[i];
>   		ring->sdma.index = i;
> +		ring->sdma.nop_pkt.mask = SDMA_PKT_NOP_HEADER_count_mask;
> +		ring->sdma.nop_pkt.shift = SDMA_PKT_NOP_HEADER_count_shift;
>   	}
>   
>   	return r;
> @@ -1137,7 +1126,7 @@ static const struct amdgpu_ring_funcs sdma_v2_4_ring_funcs = {
>   	.emit_hdp_flush = sdma_v2_4_ring_emit_hdp_flush,
>   	.test_ring = sdma_v2_4_ring_test_ring,
>   	.test_ib = sdma_v2_4_ring_test_ib,
> -	.insert_nop = sdma_v2_4_ring_insert_nop,
> +	.insert_nop = amdgpu_sdma_ring_insert_nop,
>   	.pad_ib = sdma_v2_4_ring_pad_ib,
>   	.emit_wreg = sdma_v2_4_ring_emit_wreg,
>   };
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> index 104fd1214c4c..8a644ea28589 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c
> @@ -397,19 +397,6 @@ static void sdma_v3_0_ring_set_wptr(struct amdgpu_ring *ring)
>   	}
>   }
>   
> -static void sdma_v3_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
> -{
> -	struct amdgpu_sdma_instance *sdma = ring->sdma.instance;
> -	int i;
> -
> -	for (i = 0; i < count; i++)
> -		if (sdma && sdma->burst_nop && (i == 0))
> -			amdgpu_ring_write(ring, ring->funcs->nop |
> -				SDMA_PKT_NOP_HEADER_COUNT(count - 1));
> -		else
> -			amdgpu_ring_write(ring, ring->funcs->nop);
> -}
> -
>   /**
>    * sdma_v3_0_ring_emit_ib - Schedule an IB on the DMA engine
>    *
> @@ -428,7 +415,7 @@ static void sdma_v3_0_ring_emit_ib(struct amdgpu_ring *ring,
>   	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>   
>   	/* IB packet must end on a 8 DW boundary */
> -	sdma_v3_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
> +	amdgpu_sdma_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
>   
>   	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
>   			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
> @@ -1152,6 +1139,8 @@ static int sdma_v3_0_sw_init(struct amdgpu_ip_block *ip_block)
>   
>   		ring->sdma.instance = &adev->sdma.instance[i];
>   		ring->sdma.index = i;
> +		ring->sdma.nop_pkt.mask = SDMA_PKT_NOP_HEADER_count_mask;
> +		ring->sdma.nop_pkt.shift = SDMA_PKT_NOP_HEADER_count_shift;
>   	}
>   
>   	return r;
> @@ -1579,7 +1568,7 @@ static const struct amdgpu_ring_funcs sdma_v3_0_ring_funcs = {
>   	.emit_hdp_flush = sdma_v3_0_ring_emit_hdp_flush,
>   	.test_ring = sdma_v3_0_ring_test_ring,
>   	.test_ib = sdma_v3_0_ring_test_ib,
> -	.insert_nop = sdma_v3_0_ring_insert_nop,
> +	.insert_nop = amdgpu_sdma_ring_insert_nop,
>   	.pad_ib = sdma_v3_0_ring_pad_ib,
>   	.emit_wreg = sdma_v3_0_ring_emit_wreg,
>   };
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> index c91d05a4593e..0f0f05a03cd4 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_0.c
> @@ -781,19 +781,6 @@ static void sdma_v4_0_page_ring_set_wptr(struct amdgpu_ring *ring)
>   	}
>   }
>   
> -static void sdma_v4_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
> -{
> -	struct amdgpu_sdma_instance *sdma = ring->sdma.instance;
> -	int i;
> -
> -	for (i = 0; i < count; i++)
> -		if (sdma && sdma->burst_nop && (i == 0))
> -			amdgpu_ring_write(ring, ring->funcs->nop |
> -				SDMA_PKT_NOP_HEADER_COUNT(count - 1));
> -		else
> -			amdgpu_ring_write(ring, ring->funcs->nop);
> -}
> -
>   /**
>    * sdma_v4_0_ring_emit_ib - Schedule an IB on the DMA engine
>    *
> @@ -812,7 +799,7 @@ static void sdma_v4_0_ring_emit_ib(struct amdgpu_ring *ring,
>   	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>   
>   	/* IB packet must end on a 8 DW boundary */
> -	sdma_v4_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
> +	amdgpu_sdma_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
>   
>   	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
>   			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
> @@ -1876,6 +1863,8 @@ static int sdma_v4_0_sw_init(struct amdgpu_ip_block *ip_block)
>   
>   		ring->sdma.instance = &adev->sdma.instance[i];
>   		ring->sdma.index = i;
> +		ring->sdma.nop_pkt.mask = SDMA_PKT_NOP_HEADER_count_mask;
> +		ring->sdma.nop_pkt.shift = SDMA_PKT_NOP_HEADER_count_shift;
>   
>   		if (adev->sdma.has_page_queue) {
>   			ring = &adev->sdma.instance[i].page;
> @@ -1917,6 +1906,8 @@ static int sdma_v4_0_sw_init(struct amdgpu_ip_block *ip_block)
>   
>   			ring->sdma.instance = &adev->sdma.instance[i];
>   			ring->sdma.index = i;
> +			ring->sdma.nop_pkt.mask = SDMA_PKT_NOP_HEADER_count_mask;
> +			ring->sdma.nop_pkt.shift = SDMA_PKT_NOP_HEADER_count_shift;
>   		}
>   	}
>   
> @@ -2443,7 +2434,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_ring_funcs = {
>   	.emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush,
>   	.test_ring = sdma_v4_0_ring_test_ring,
>   	.test_ib = sdma_v4_0_ring_test_ib,
> -	.insert_nop = sdma_v4_0_ring_insert_nop,
> +	.insert_nop = amdgpu_sdma_ring_insert_nop,
>   	.pad_ib = sdma_v4_0_ring_pad_ib,
>   	.emit_wreg = sdma_v4_0_ring_emit_wreg,
>   	.emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
> @@ -2475,7 +2466,7 @@ static const struct amdgpu_ring_funcs sdma_v4_0_page_ring_funcs = {
>   	.emit_hdp_flush = sdma_v4_0_ring_emit_hdp_flush,
>   	.test_ring = sdma_v4_0_ring_test_ring,
>   	.test_ib = sdma_v4_0_ring_test_ib,
> -	.insert_nop = sdma_v4_0_ring_insert_nop,
> +	.insert_nop = amdgpu_sdma_ring_insert_nop,
>   	.pad_ib = sdma_v4_0_ring_pad_ib,
>   	.emit_wreg = sdma_v4_0_ring_emit_wreg,
>   	.emit_reg_wait = sdma_v4_0_ring_emit_reg_wait,
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> index d1d21a3951f8..f68b7f2e0a40 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v4_4_2.c
> @@ -341,19 +341,6 @@ static void sdma_v4_4_2_page_ring_set_wptr(struct amdgpu_ring *ring)
>   	}
>   }
>   
> -static void sdma_v4_4_2_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
> -{
> -	struct amdgpu_sdma_instance *sdma = ring->sdma.instance;
> -	int i;
> -
> -	for (i = 0; i < count; i++)
> -		if (sdma && sdma->burst_nop && (i == 0))
> -			amdgpu_ring_write(ring, ring->funcs->nop |
> -				SDMA_PKT_NOP_HEADER_COUNT(count - 1));
> -		else
> -			amdgpu_ring_write(ring, ring->funcs->nop);
> -}
> -
>   /**
>    * sdma_v4_4_2_ring_emit_ib - Schedule an IB on the DMA engine
>    *
> @@ -372,7 +359,7 @@ static void sdma_v4_4_2_ring_emit_ib(struct amdgpu_ring *ring,
>   	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
>   
>   	/* IB packet must end on a 8 DW boundary */
> -	sdma_v4_4_2_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
> +	amdgpu_sdma_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
>   
>   	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
>   			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
> @@ -1431,6 +1418,8 @@ static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block *ip_block)
>   
>   		ring->sdma.instance = &adev->sdma.instance[i];
>   		ring->sdma.index = i;
> +		ring->sdma.nop_pkt.mask = SDMA_PKT_NOP_HEADER_count_mask;
> +		ring->sdma.nop_pkt.shift = SDMA_PKT_NOP_HEADER_count_shift;
>   
>   		if (adev->sdma.has_page_queue) {
>   			ring = &adev->sdma.instance[i].page;
> @@ -1455,6 +1444,8 @@ static int sdma_v4_4_2_sw_init(struct amdgpu_ip_block *ip_block)
>   
>   			ring->sdma.instance = &adev->sdma.instance[i];
>   			ring->sdma.index = i;
> +			ring->sdma.nop_pkt.mask = SDMA_PKT_NOP_HEADER_count_mask;
> +			ring->sdma.nop_pkt.shift = SDMA_PKT_NOP_HEADER_count_shift;
>   		}
>   	}
>   
> @@ -2013,7 +2004,7 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_ring_funcs = {
>   	.emit_hdp_flush = sdma_v4_4_2_ring_emit_hdp_flush,
>   	.test_ring = sdma_v4_4_2_ring_test_ring,
>   	.test_ib = sdma_v4_4_2_ring_test_ib,
> -	.insert_nop = sdma_v4_4_2_ring_insert_nop,
> +	.insert_nop = amdgpu_sdma_ring_insert_nop,
>   	.pad_ib = sdma_v4_4_2_ring_pad_ib,
>   	.emit_wreg = sdma_v4_4_2_ring_emit_wreg,
>   	.emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait,
> @@ -2045,7 +2036,7 @@ static const struct amdgpu_ring_funcs sdma_v4_4_2_page_ring_funcs = {
>   	.emit_hdp_flush = sdma_v4_4_2_ring_emit_hdp_flush,
>   	.test_ring = sdma_v4_4_2_ring_test_ring,
>   	.test_ib = sdma_v4_4_2_ring_test_ib,
> -	.insert_nop = sdma_v4_4_2_ring_insert_nop,
> +	.insert_nop = amdgpu_sdma_ring_insert_nop,
>   	.pad_ib = sdma_v4_4_2_ring_pad_ib,
>   	.emit_wreg = sdma_v4_4_2_ring_emit_wreg,
>   	.emit_reg_wait = sdma_v4_4_2_ring_emit_reg_wait,
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> index 97536f82dfcd..bbf60bfa4b1b 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_0.c
> @@ -433,19 +433,6 @@ static void sdma_v5_0_ring_set_wptr(struct amdgpu_ring *ring)
>   	}
>   }
>   
> -static void sdma_v5_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
> -{
> -	struct amdgpu_sdma_instance *sdma = ring->sdma.instance;
> -	int i;
> -
> -	for (i = 0; i < count; i++)
> -		if (sdma && sdma->burst_nop && (i == 0))
> -			amdgpu_ring_write(ring, ring->funcs->nop |
> -				SDMA_PKT_NOP_HEADER_COUNT(count - 1));
> -		else
> -			amdgpu_ring_write(ring, ring->funcs->nop);
> -}
> -
>   /**
>    * sdma_v5_0_ring_emit_ib - Schedule an IB on the DMA engine
>    *
> @@ -472,7 +459,7 @@ static void sdma_v5_0_ring_emit_ib(struct amdgpu_ring *ring,
>   	 * (wptr + 6 + x) % 8 = 0.
>   	 * The expression below, is a solution of x.
>   	 */
> -	sdma_v5_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
> +	amdgpu_sdma_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
>   
>   	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
>   			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
> @@ -1453,6 +1440,8 @@ static int sdma_v5_0_sw_init(struct amdgpu_ip_block *ip_block)
>   
>   		ring->sdma.instance = &adev->sdma.instance[i];
>   		ring->sdma.index = i;
> +		ring->sdma.nop_pkt.mask = SDMA_PKT_NOP_HEADER_count_mask;
> +		ring->sdma.nop_pkt.shift = SDMA_PKT_NOP_HEADER_count_shift;
>   	}
>   
>   	adev->sdma.supported_reset =
> @@ -1991,7 +1980,7 @@ static const struct amdgpu_ring_funcs sdma_v5_0_ring_funcs = {
>   	.emit_hdp_flush = sdma_v5_0_ring_emit_hdp_flush,
>   	.test_ring = sdma_v5_0_ring_test_ring,
>   	.test_ib = sdma_v5_0_ring_test_ib,
> -	.insert_nop = sdma_v5_0_ring_insert_nop,
> +	.insert_nop = amdgpu_sdma_ring_insert_nop,
>   	.pad_ib = sdma_v5_0_ring_pad_ib,
>   	.emit_wreg = sdma_v5_0_ring_emit_wreg,
>   	.emit_reg_wait = sdma_v5_0_ring_emit_reg_wait,
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> index 8eaddee1d97d..69b88db32117 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c
> @@ -250,19 +250,6 @@ static void sdma_v5_2_ring_set_wptr(struct amdgpu_ring *ring)
>   	}
>   }
>   
> -static void sdma_v5_2_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
> -{
> -	struct amdgpu_sdma_instance *sdma = ring->sdma.instance;
> -	int i;
> -
> -	for (i = 0; i < count; i++)
> -		if (sdma && sdma->burst_nop && (i == 0))
> -			amdgpu_ring_write(ring, ring->funcs->nop |
> -				SDMA_PKT_NOP_HEADER_COUNT(count - 1));
> -		else
> -			amdgpu_ring_write(ring, ring->funcs->nop);
> -}
> -
>   /**
>    * sdma_v5_2_ring_emit_ib - Schedule an IB on the DMA engine
>    *
> @@ -289,7 +276,7 @@ static void sdma_v5_2_ring_emit_ib(struct amdgpu_ring *ring,
>   	 * (wptr + 6 + x) % 8 = 0.
>   	 * The expression below, is a solution of x.
>   	 */
> -	sdma_v5_2_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
> +	amdgpu_sdma_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
>   
>   	amdgpu_ring_write(ring, SDMA_PKT_HEADER_OP(SDMA_OP_INDIRECT) |
>   			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
> @@ -1358,6 +1345,8 @@ static int sdma_v5_2_sw_init(struct amdgpu_ip_block *ip_block)
>   
>   		ring->sdma.instance = &adev->sdma.instance[i];
>   		ring->sdma.index = i;
> +		ring->sdma.nop_pkt.mask = SDMA_PKT_NOP_HEADER_count_mask;
> +		ring->sdma.nop_pkt.shift = SDMA_PKT_NOP_HEADER_count_shift;
>   	}
>   
>   	adev->sdma.supported_reset =
> @@ -1986,7 +1975,7 @@ static const struct amdgpu_ring_funcs sdma_v5_2_ring_funcs = {
>   	.emit_hdp_flush = sdma_v5_2_ring_emit_hdp_flush,
>   	.test_ring = sdma_v5_2_ring_test_ring,
>   	.test_ib = sdma_v5_2_ring_test_ib,
> -	.insert_nop = sdma_v5_2_ring_insert_nop,
> +	.insert_nop = amdgpu_sdma_ring_insert_nop,
>   	.pad_ib = sdma_v5_2_ring_pad_ib,
>   	.begin_use = sdma_v5_2_ring_begin_use,
>   	.end_use = sdma_v5_2_ring_end_use,
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
> index 3ead269eccdc..aa3992fd5313 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v6_0.c
> @@ -235,19 +235,6 @@ static void sdma_v6_0_ring_set_wptr(struct amdgpu_ring *ring)
>   	}
>   }
>   
> -static void sdma_v6_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
> -{
> -	struct amdgpu_sdma_instance *sdma = ring->sdma.instance;
> -	int i;
> -
> -	for (i = 0; i < count; i++)
> -		if (sdma && sdma->burst_nop && (i == 0))
> -			amdgpu_ring_write(ring, ring->funcs->nop |
> -				SDMA_PKT_NOP_HEADER_COUNT(count - 1));
> -		else
> -			amdgpu_ring_write(ring, ring->funcs->nop);
> -}
> -
>   /*
>    * sdma_v6_0_ring_emit_ib - Schedule an IB on the DMA engine
>    *
> @@ -274,7 +261,7 @@ static void sdma_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
>   	 * (wptr + 6 + x) % 8 = 0.
>   	 * The expression below, is a solution of x.
>   	 */
> -	sdma_v6_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
> +	amdgpu_sdma_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
>   
>   	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_INDIRECT) |
>   			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
> @@ -1351,6 +1338,8 @@ static int sdma_v6_0_sw_init(struct amdgpu_ip_block *ip_block)
>   
>   		ring->sdma.instance = &adev->sdma.instance[i];
>   		ring->sdma.index = i;
> +		ring->sdma.nop_pkt.mask = SDMA_PKT_NOP_HEADER_count_mask;
> +		ring->sdma.nop_pkt.shift = SDMA_PKT_NOP_HEADER_count_shift;
>   	}
>   
>   	adev->sdma.supported_reset =
> @@ -1707,7 +1696,7 @@ static const struct amdgpu_ring_funcs sdma_v6_0_ring_funcs = {
>   	.emit_hdp_flush = sdma_v6_0_ring_emit_hdp_flush,
>   	.test_ring = sdma_v6_0_ring_test_ring,
>   	.test_ib = sdma_v6_0_ring_test_ib,
> -	.insert_nop = sdma_v6_0_ring_insert_nop,
> +	.insert_nop = amdgpu_sdma_ring_insert_nop,
>   	.pad_ib = sdma_v6_0_ring_pad_ib,
>   	.emit_wreg = sdma_v6_0_ring_emit_wreg,
>   	.emit_reg_wait = sdma_v6_0_ring_emit_reg_wait,
> diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
> index 5fadaf35a03a..4ccc00248a09 100644
> --- a/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v7_0.c
> @@ -267,19 +267,6 @@ static void sdma_v7_0_ring_set_wptr(struct amdgpu_ring *ring)
>   	}
>   }
>   
> -static void sdma_v7_0_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
> -{
> -	struct amdgpu_sdma_instance *sdma = ring->sdma.instance;
> -	int i;
> -
> -	for (i = 0; i < count; i++)
> -		if (sdma && sdma->burst_nop && (i == 0))
> -			amdgpu_ring_write(ring, ring->funcs->nop |
> -				SDMA_PKT_NOP_HEADER_COUNT(count - 1));
> -		else
> -			amdgpu_ring_write(ring, ring->funcs->nop);
> -}
> -
>   /**
>    * sdma_v7_0_ring_emit_ib - Schedule an IB on the DMA engine
>    *
> @@ -306,7 +293,7 @@ static void sdma_v7_0_ring_emit_ib(struct amdgpu_ring *ring,
>   	 * (wptr + 6 + x) % 8 = 0.
>   	 * The expression below, is a solution of x.
>   	 */
> -	sdma_v7_0_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
> +	amdgpu_sdma_ring_insert_nop(ring, (2 - lower_32_bits(ring->wptr)) & 7);
>   
>   	amdgpu_ring_write(ring, SDMA_PKT_COPY_LINEAR_HEADER_OP(SDMA_OP_INDIRECT) |
>   			  SDMA_PKT_INDIRECT_HEADER_VMID(vmid & 0xf));
> @@ -1365,6 +1352,8 @@ static int sdma_v7_0_sw_init(struct amdgpu_ip_block *ip_block)
>   
>   		ring->sdma.instance = &adev->sdma.instance[i];
>   		ring->sdma.index = i;
> +		ring->sdma.nop_pkt.mask = SDMA_PKT_NOP_HEADER_count_mask;
> +		ring->sdma.nop_pkt.shift = SDMA_PKT_NOP_HEADER_count_shift;
>   	}
>   
>   	adev->sdma.supported_reset =
> @@ -1687,7 +1676,7 @@ static const struct amdgpu_ring_funcs sdma_v7_0_ring_funcs = {
>   	.emit_hdp_flush = sdma_v7_0_ring_emit_hdp_flush,
>   	.test_ring = sdma_v7_0_ring_test_ring,
>   	.test_ib = sdma_v7_0_ring_test_ib,
> -	.insert_nop = sdma_v7_0_ring_insert_nop,
> +	.insert_nop = amdgpu_sdma_ring_insert_nop,
>   	.pad_ib = sdma_v7_0_ring_pad_ib,
>   	.emit_wreg = sdma_v7_0_ring_emit_wreg,
>   	.emit_reg_wait = sdma_v7_0_ring_emit_reg_wait,



More information about the amd-gfx mailing list