[PATCH] drm/amdgpu: add support of burst nop for gfx10

Christian König christian.koenig at amd.com
Tue Jul 30 07:41:03 UTC 2024


Am 30.07.24 um 07:21 schrieb Sunil Khatri:
> Problem:
> Till now we are adding NOP packet one by one
> i.e if we need N nop packets for padding we are adding
> N NOP packets in the ring which does not use the HW
> efficiently.
>
> Solution:
> Use the data block of the NOP packet for NOP packets
> up to the max no of NOPS HW support. Using this HW would
> skip passing the information to CP and it skips over
> N packets assuming NOP packets.
>
> Signed-off-by: Sunil Khatri <sunil.khatri at amd.com>
> ---
>   drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c | 22 +++++++++++++++++++---
>   1 file changed, 19 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> index 853084a2ce7f..01f98e2401ae 100644
> --- a/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v10_0.c
> @@ -9397,6 +9397,22 @@ static void gfx_v10_0_emit_mem_sync(struct amdgpu_ring *ring)
>   	amdgpu_ring_write(ring, gcr_cntl); /* GCR_CNTL */
>   }
>   
> +static void amdgpu_gfx10_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
> +{
> +	int i;
> +
> +	if (count == 0 || count > 0x3fff) {
> +		DRM_ERROR("Invalid NOP's pkt count\n");
> +		return;
> +	}

Please drop that, we should not have parameter validation in the 
backend. That's the job of the frontend and middleware.

> +
> +	for (i = 0; i < count; i++)
> +		if (count == 1)

That looks incorrect to me. You should probably test (i == 0) here, or 
even better move that outside of the loop.

Regards,
Christian.

> +			amdgpu_ring_write(ring, ring->funcs->nop | PACKET3(PACKET3_NOP, count - 1));
> +		else
> +			amdgpu_ring_write(ring, ring->funcs->nop);
> +}
> +
>   static void gfx_v10_ip_print(void *handle, struct drm_printer *p)
>   {
>   	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
> @@ -9588,7 +9604,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_gfx = {
>   	.emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush,
>   	.test_ring = gfx_v10_0_ring_test_ring,
>   	.test_ib = gfx_v10_0_ring_test_ib,
> -	.insert_nop = amdgpu_ring_insert_nop,
> +	.insert_nop = amdgpu_gfx10_ring_insert_nop,
>   	.pad_ib = amdgpu_ring_generic_pad_ib,
>   	.emit_switch_buffer = gfx_v10_0_ring_emit_sb,
>   	.emit_cntxcntl = gfx_v10_0_ring_emit_cntxcntl,
> @@ -9629,7 +9645,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_compute = {
>   	.emit_hdp_flush = gfx_v10_0_ring_emit_hdp_flush,
>   	.test_ring = gfx_v10_0_ring_test_ring,
>   	.test_ib = gfx_v10_0_ring_test_ib,
> -	.insert_nop = amdgpu_ring_insert_nop,
> +	.insert_nop = amdgpu_gfx10_ring_insert_nop,
>   	.pad_ib = amdgpu_ring_generic_pad_ib,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,
>   	.emit_reg_wait = gfx_v10_0_ring_emit_reg_wait,
> @@ -9659,7 +9675,7 @@ static const struct amdgpu_ring_funcs gfx_v10_0_ring_funcs_kiq = {
>   	.emit_fence = gfx_v10_0_ring_emit_fence_kiq,
>   	.test_ring = gfx_v10_0_ring_test_ring,
>   	.test_ib = gfx_v10_0_ring_test_ib,
> -	.insert_nop = amdgpu_ring_insert_nop,
> +	.insert_nop = amdgpu_gfx10_ring_insert_nop,
>   	.pad_ib = amdgpu_ring_generic_pad_ib,
>   	.emit_rreg = gfx_v10_0_ring_emit_rreg,
>   	.emit_wreg = gfx_v10_0_ring_emit_wreg,



More information about the amd-gfx mailing list