[PATCH 05/12] drm/amdgpu: Use memset32 for sdma insert nops

Tvrtko Ursulin tursulin at igalia.com
Fri Dec 27 11:19:31 UTC 2024


From: Tvrtko Ursulin <tvrtko.ursulin at igalia.com>

Instead of open coding it via the inefficient amdgpu_ring_write() helper
which the compiler is not able to optimise much, we can use the new
amdgpu_ring_fill() helper which pads using memset32.

With SDMA this should have much less benefit than what was measured with
GFX (only SDMA v4.0 uses the 256 byte ring padding while the rest use 16),
but on the other hand it should not harm and is at least more consistent.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at igalia.com>
Cc: Christian König <christian.koenig at amd.com>
Cc: Sunil Khatri <sunil.khatri at amd.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 17 +---------------
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 26 ++++++++++++++++++++++++
 drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c |  4 +---
 3 files changed, 28 insertions(+), 19 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 8e4e9ec68262..a4ae09fed5c3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -108,22 +108,7 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw)
  */
 void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
 {
-	uint32_t occupied, chunk1, chunk2;
-
-	occupied = ring->wptr & ring->buf_mask;
-	chunk1 = ring->buf_mask + 1 - occupied;
-	chunk1 = (chunk1 >= count) ? count : chunk1;
-	chunk2 = count - chunk1;
-
-	if (chunk1)
-		memset32(&ring->ring[occupied], ring->funcs->nop, chunk1);
-
-	if (chunk2)
-		memset32(ring->ring, ring->funcs->nop, chunk2);
-
-	ring->wptr += count;
-	ring->wptr &= ring->ptr_mask;
-	ring->count_dw -= count;
+	amdgpu_ring_fill(ring, ring->funcs->nop, count);
 }
 
 /**
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index 4c0861ebc77a..0a59738fa1d4 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -415,6 +415,32 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
 	ring->count_dw -= count_dw;
 }
 
+static inline void amdgpu_ring_fill(struct amdgpu_ring *ring,
+				    u32 val, u32 count)
+{
+	const u32 buf_mask = ring->buf_mask;
+	u32 occupied, chunk1, chunk2;
+	u64 wptr = ring->wptr;
+
+	if (count == 0)
+		return;
+
+	occupied = wptr & buf_mask;
+	chunk1 = buf_mask + 1 - occupied;
+	chunk1 = (chunk1 >= count) ? count : chunk1;
+	chunk2 = count - chunk1;
+
+	if (chunk1)
+		memset32(&ring->ring[occupied], val, chunk1);
+
+	if (chunk2)
+		memset32(ring->ring, val, chunk2);
+
+	wptr += count;
+	ring->wptr = wptr & ring->ptr_mask;
+	ring->count_dw -= count;
+}
+
 /**
  * amdgpu_ring_patch_cond_exec - patch dw count of conditional execute
  * @ring: amdgpu_ring structure
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
index 148413f01875..2d07fcbd21b7 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sdma.c
@@ -38,7 +38,6 @@ void amdgpu_sdma_ring_insert_nop(struct amdgpu_ring *ring, u32 count)
 {
 	struct amdgpu_sdma_instance *sdma = ring->sdma.instance;
 	const u32 nop = ring->funcs->nop;
-	u32 i;
 
 	if (!count)
 		return;
@@ -48,8 +47,7 @@ void amdgpu_sdma_ring_insert_nop(struct amdgpu_ring *ring, u32 count)
 					(--count & ring->sdma.nop_pkt.mask) <<
 					ring->sdma.nop_pkt.shift);
 
-	for (i = 0; i < count; i++)
-		amdgpu_ring_write(ring, nop);
+	amdgpu_ring_fill(ring, nop, count);
 }
 
 uint64_t amdgpu_sdma_get_csa_mc_addr(struct amdgpu_ring *ring,
-- 
2.47.1



More information about the amd-gfx mailing list