[PATCH v2 1/6] drm/amdgpu: optimize insert_nop using multi dwords

Sunil Khatri sunil.khatri at amd.com
Tue Oct 8 13:34:51 UTC 2024


Optimize the ring_insert_nop fn for n dwords in one
step rather then call to amdgpu_ring_write for each
nop packet. This avoid function call for each nop
packet and also wptr is updated once only.

Signed-off-by: Sunil Khatri <sunil.khatri at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 03bce2fa866a..910293664902 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -108,10 +108,26 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw)
  */
 void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
 {
-	int i;
+	uint32_t occupied, chunk1, chunk2;
+	uint32_t *dst;
 
-	for (i = 0; i < count; i++)
-		amdgpu_ring_write(ring, ring->funcs->nop);
+	occupied = ring->wptr & ring->buf_mask;
+	dst = &ring->ring[occupied];
+	chunk1 = ring->buf_mask + 1 - occupied;
+	chunk1 = (chunk1 >= count) ? count : chunk1;
+	chunk2 = count - chunk1;
+
+	if (chunk1)
+		memset32(dst, ring->funcs->nop, chunk1);
+
+	if (chunk2) {
+		dst = ring->ring;
+		memset32(dst, ring->funcs->nop, chunk2);
+	}
+
+	ring->wptr += count;
+	ring->wptr &= ring->ptr_mask;
+	ring->count_dw -= count;
 }
 
 /**
-- 
2.34.1



More information about the amd-gfx mailing list