[PATCH] drm/amdgpu: Cache some values in ring emission helpers
Tvrtko Ursulin
tvrtko.ursulin at igalia.com
Fri Jul 11 12:36:38 UTC 2025
By caching some values in local variables we can allow the compiler to
emit more compact code because it does not have to reload those values
constantly.
Before and after size comparisons:
text data bss dec hex filename
10708384 547307 213512 11469203 af0193 amdgpu.ko.0
10688632 547307 213512 11449451 aeb46b amdgpu.ko.1
add/remove: 0/0 grow/shrink: 3/340 up/down: 29/-20025 (-19996)
Function old new delta
amdgpu_ring_write_multiple 600 612 +12
amdgpu_umsch_mm_submit_pkt 196 207 +11
amdgpu_ring_write_multiple.constprop 453 459 +6
vcn_v2_0_enc_ring_insert_end 69 64 -5
...
jpeg_v4_0_3_dec_ring_emit_ib 1281 1045 -236
jpeg_v2_0_dec_ring_emit_ib 1402 1147 -255
jpeg_v1_0_decode_ring_emit_fence 1788 1507 -281
Total: Before=8949691, After=8929695, chg -0.22%
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at igalia.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 16 ++++++++++------
1 file changed, 10 insertions(+), 6 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
index afaf951b0b78..d37e822ff46e 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@@ -467,8 +467,10 @@ static inline void amdgpu_ring_clear_ring(struct amdgpu_ring *ring)
static inline void amdgpu_ring_write(struct amdgpu_ring *ring, uint32_t v)
{
- ring->ring[ring->wptr++ & ring->buf_mask] = v;
- ring->wptr &= ring->ptr_mask;
+ u64 wptr = ring->wptr;
+
+ ring->ring[wptr++ & ring->buf_mask] = v;
+ ring->wptr = wptr & ring->ptr_mask;
ring->count_dw--;
}
@@ -476,9 +478,11 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
void *src, int count_dw)
{
unsigned occupied, chunk1, chunk2;
+ u32 buf_mask = ring->buf_mask;
+ u64 wptr = ring->wptr;
- occupied = ring->wptr & ring->buf_mask;
- chunk1 = ring->buf_mask + 1 - occupied;
+ occupied = wptr & buf_mask;
+ chunk1 = buf_mask + 1 - occupied;
chunk1 = (chunk1 >= count_dw) ? count_dw : chunk1;
chunk2 = count_dw - chunk1;
chunk1 <<= 2;
@@ -492,8 +496,8 @@ static inline void amdgpu_ring_write_multiple(struct amdgpu_ring *ring,
memcpy(ring->ring, src, chunk2);
}
- ring->wptr += count_dw;
- ring->wptr &= ring->ptr_mask;
+ wptr += count_dw;
+ ring->wptr = wptr & ring->ptr_mask;
ring->count_dw -= count_dw;
}
--
2.48.0
More information about the amd-gfx
mailing list