[PATCH] drm/amdgpu: Cache some values in ring emission helpers

Tvrtko Ursulin tvrtko.ursulin at igalia.com
Fri Jul 11 12:36:38 UTC 2025


By caching some values in local variables we can allow the compiler to
emit more compact code because it does not have to reload those values
constantly.

Before and after size comparisons:

     text	   data	    bss	     dec	    hex	filename
  10708384	 547307	 213512	11469203	 af0193	amdgpu.ko.0
  10688632	 547307	 213512	11449451	 aeb46b	amdgpu.ko.1

 add/remove: 0/0 grow/shrink: 3/340 up/down: 29/-20025 (-19996)
  Function                                     old     new   delta
   amdgpu_ring_write_multiple                   600     612     +12
   amdgpu_umsch_mm_submit_pkt                   196     207     +11
   amdgpu_ring_write_multiple.constprop         453     459      +6
   vcn_v2_0_enc_ring_insert_end                  69      64      -5
 ...
   jpeg_v4_0_3_dec_ring_emit_ib                1281    1045    -236
   jpeg_v2_0_dec_ring_emit_ib                  1402    1147    -255
   jpeg_v1_0_decode_ring_emit_fence            1788    1507    -281
 Total: Before=8949691, After=8929695, chg -0.22%

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at igalia.com>
---
 drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index afaf951b0b78..d37e822ff46e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -467,8 +467,10 @@ static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
 
 static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v)
 {
-	ring->ring[ring->wptr++ & ring->buf_mask] = v;
-	ring->wptr &= ring->ptr_mask;
+	u64 wptr = ring->wptr;
+
+	ring->ring[wptr++ & ring->buf_mask] = v;
+	ring->wptr = wptr & ring->ptr_mask;
 	ring->count_dw--;
 }
 
@@ -476,9 +478,11 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
 					      void *src, int count_dw)
 {
 	unsigned occupied, chunk1, chunk2;
+	u32 buf_mask = ring->buf_mask;
+	u64 wptr = ring->wptr;
 
-	occupied = ring->wptr & ring->buf_mask;
-	chunk1 = ring->buf_mask + 1 - occupied;
+	occupied = wptr & buf_mask;
+	chunk1 = buf_mask + 1 - occupied;
 	chunk1 = (chunk1 >= count_dw) ? count_dw : chunk1;
 	chunk2 = count_dw - chunk1;
 	chunk1 <<= 2;
@@ -492,8 +496,8 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
 		memcpy(ring->ring, src, chunk2);
 	}
 
-	ring->wptr += count_dw;
-	ring->wptr &= ring->ptr_mask;
+	wptr += count_dw;
+	ring->wptr = wptr & ring->ptr_mask;
 	ring->count_dw -= count_dw;
 }
 
-- 
2.48.0



More information about the amd-gfx mailing list