[PATCH v2 1/6] drm/amdgpu: optimize insert_nop using multi dwords
Sunil Khatri
sunil.khatri at amd.com
Tue Oct 8 13:34:51 UTC 2024
Optimize the ring_insert_nop fn for n dwords in one
step rather then call to amdgpu_ring_write for each
nop packet. This avoid function call for each nop
packet and also wptr is updated once only.
Signed-off-by: Sunil Khatri <sunil.khatri at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c | 22 +++++++++++++++++++---
1 file changed, 19 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
index 03bce2fa866a..910293664902 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@@ -108,10 +108,26 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned int ndw)
*/
void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
{
- int i;
+ uint32_t occupied, chunk1, chunk2;
+ uint32_t *dst;
- for (i = 0; i < count; i++)
- amdgpu_ring_write(ring, ring->funcs->nop);
+ occupied = ring->wptr & ring->buf_mask;
+ dst = &ring->ring[occupied];
+ chunk1 = ring->buf_mask + 1 - occupied;
+ chunk1 = (chunk1 >= count) ? count : chunk1;
+ chunk2 = count - chunk1;
+
+ if (chunk1)
+ memset32(dst, ring->funcs->nop, chunk1);
+
+ if (chunk2) {
+ dst = ring->ring;
+ memset32(dst, ring->funcs->nop, chunk2);
+ }
+
+ ring->wptr += count;
+ ring->wptr &= ring->ptr_mask;
+ ring->count_dw -= count;
}
/**
--
2.34.1
More information about the amd-gfx
mailing list