[PATCH 4/4] drm/amdgpu: busywait KIQ register accessing

Liu, Monk Monk.Liu at amd.com
Fri Oct 13 10:02:25 UTC 2017


The busy waiting method somehow make the performance worse considering 16 VF and vf0 need to wait 15*time_slice when it try to access registers

BR

-----Original Message-----
From: Ding, Pixel 
Sent: 2017年10月13日 17:39
To: Liu, Monk <Monk.Liu at amd.com>; amd-gfx at lists.freedesktop.org
Cc: Li, Bingley <Bingley.Li at amd.com>
Subject: Re: [PATCH 4/4] drm/amdgpu: busywait KIQ register accessing

I’m afraid there’s racing issue if polling and IRQ use cases are mixed at the same time.

The original implementation is as your suggested. Is there any benefit to keep to sleepy version?
— 
Sincerely Yours,
Pixel








On 13/10/2017, 5:34 PM, "Liu, Monk" <Monk.Liu at amd.com> wrote:

>Pixel
>
>I don't think this will work well, my suggestion is you add a new function like:
>amdgpu_wreg_kiq_busy(),
>
>which will write registers through KIQ and use polling/busy wait, and the original amdgu_wreg_no_kiq() can be still there.
>
>When you need to disable sleep like in IRQ CONTEXT, you can call wreg_kiq_busy() or wreg_no_kiq(),
>
>But don't just change the original function 
>
>BR Monk
>
>-----Original Message-----
>From: amd-gfx [mailto:amd-gfx-bounces at lists.freedesktop.org] On Behalf Of Pixel Ding
>Sent: 2017年10月13日 16:26
>To: amd-gfx at lists.freedesktop.org
>Cc: Ding, Pixel <Pixel.Ding at amd.com>; Li, Bingley <Bingley.Li at amd.com>
>Subject: [PATCH 4/4] drm/amdgpu: busywait KIQ register accessing
>
>From: pding <Pixel.Ding at amd.com>
>
>Register accessing is performed when IRQ is disabled. Never sleep in this function.
>
>Known issue: dead sleep in many use cases of index/data registers.
>
>Signed-off-by: pding <Pixel.Ding at amd.com>
>---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h        |  1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c |  8 ++---  drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c  |  2 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c    |  1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h   |  1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c   | 55 ++++++++++++++++++------------
> 6 files changed, 40 insertions(+), 28 deletions(-)
>
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>index ca212ef..f9de756 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>@@ -886,6 +886,7 @@ struct amdgpu_kiq {
> 	u64			eop_gpu_addr;
> 	struct amdgpu_bo	*eop_obj;
> 	struct mutex		ring_mutex;
>+	spinlock_t              ring_lock;
> 	struct amdgpu_ring	ring;
> 	struct amdgpu_irq_src	irq;
> };
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>index ab8f0d6..1197274 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>@@ -109,10 +109,8 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,  {
> 	uint32_t ret;
> 
>-	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) {
>-		BUG_ON(in_interrupt());
>+	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
> 		return amdgpu_virt_kiq_rreg(adev, reg);
>-	}
> 
> 	if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
> 		ret = readl(((void __iomem *)adev->rmmio) + (reg * 4)); @@ -137,10 +135,8 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
> 		adev->last_mm_index = v;
> 	}
> 
>-	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) {
>-		BUG_ON(in_interrupt());
>+	if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
> 		return amdgpu_virt_kiq_wreg(adev, reg, v);
>-	}
> 
> 	if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
> 		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4)); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
>index 2044758..c6c7bf3 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
>@@ -109,7 +109,7 @@ static void amdgpu_fence_write(struct amdgpu_ring *ring, u32 seq)
>  * Reads a fence value from memory (all asics).
>  * Returns the value of the fence read from memory.
>  */
>-static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
>+u32 amdgpu_fence_read(struct amdgpu_ring *ring)
> {
> 	struct amdgpu_fence_driver *drv = &ring->fence_drv;
> 	u32 seq = 0;
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>index 4f6c68f..46fa88c 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>@@ -186,6 +186,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
> 	int r = 0;
> 
> 	mutex_init(&kiq->ring_mutex);
>+	spin_lock_init(&kiq->ring_lock);
> 
> 	r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
> 	if (r)
>diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>index af8e544..a4fa923 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>@@ -89,6 +89,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,  void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);  void amdgpu_fence_driver_resume(struct amdgpu_device *adev);  int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence);
>+u32 amdgpu_fence_read(struct amdgpu_ring *ring);
> void amdgpu_fence_process(struct amdgpu_ring *ring);  int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);  unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>index ab05121..9757df1 100644
>--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>@@ -22,7 +22,7 @@
>  */
> 
> #include "amdgpu.h"
>-#define MAX_KIQ_REG_WAIT	100000
>+#define MAX_KIQ_REG_WAIT	100000000 /* in usecs */
> 
> int amdgpu_allocate_static_csa(struct amdgpu_device *adev)  { @@ -113,28 +113,32 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
> 
> uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)  {
>-	signed long r;
>-	uint32_t val;
>-	struct dma_fence *f;
>+	signed long timeout = usecs_to_jiffies(MAX_KIQ_REG_WAIT);
>+	unsigned long flags;
>+	uint32_t val, seq, wait_seq;
> 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> 	struct amdgpu_ring *ring = &kiq->ring;
> 
> 	BUG_ON(!ring->funcs->emit_rreg);
> 
>-	mutex_lock(&kiq->ring_mutex);
>+	spin_lock_irqsave(&kiq->ring_lock, flags);
> 	amdgpu_ring_alloc(ring, 32);
> 	amdgpu_ring_emit_rreg(ring, reg);
>-	amdgpu_fence_emit(ring, &f);
>+	wait_seq = ++ring->fence_drv.sync_seq;
>+	amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
>+			       wait_seq, AMDGPU_FENCE_FLAG_INT);
> 	amdgpu_ring_commit(ring);
>-	mutex_unlock(&kiq->ring_mutex);
>-
>-	r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT));
>-	dma_fence_put(f);
>-	if (r < 1) {
>-		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
>+	spin_unlock_irqrestore(&kiq->ring_lock, flags);
>+	do {
>+		seq = amdgpu_fence_read(ring);
>+		udelay(5);
>+		timeout -= 5;
>+	} while (seq < wait_seq && timeout > 0);
>+
>+	if (timeout < 1) {
>+		DRM_ERROR("wait for kiq fence error: %ld\n", timeout);
> 		return ~0;
> 	}
>-
> 	val = adev->wb.wb[adev->virt.reg_val_offs];
> 
> 	return val;
>@@ -142,24 +146,33 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
> 
> void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)  {
>-	signed long r;
>+	signed long timeout = usecs_to_jiffies(MAX_KIQ_REG_WAIT);
>+	unsigned long flags;
>+	uint32_t seq, wait_seq;
> 	struct dma_fence *f;
> 	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> 	struct amdgpu_ring *ring = &kiq->ring;
> 
> 	BUG_ON(!ring->funcs->emit_wreg);
> 
>-	mutex_lock(&kiq->ring_mutex);
>+	spin_lock_irqsave(&kiq->ring_lock, flags);
> 	amdgpu_ring_alloc(ring, 32);
> 	amdgpu_ring_emit_wreg(ring, reg, v);
>-	amdgpu_fence_emit(ring, &f);
>+	/* amdgpu_fence_emit(ring, &f); */
>+	wait_seq = ++ring->fence_drv.sync_seq;
>+	amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
>+			       wait_seq, AMDGPU_FENCE_FLAG_INT);
> 	amdgpu_ring_commit(ring);
>-	mutex_unlock(&kiq->ring_mutex);
>+	spin_unlock_irqrestore(&kiq->ring_lock, flags);
>+
>+	do {
>+		seq = amdgpu_fence_read(ring);
>+		udelay(5);
>+		timeout -= 5;
>+	} while (seq < wait_seq && timeout > 0);
> 
>-	r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT));
>-	if (r < 1)
>-		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
>-	dma_fence_put(f);
>+	if (timeout < 1)
>+		DRM_ERROR("wait for kiq fence error: %ld\n", timeout);
> }
> 
> /**
>--
>2.9.5
>
>_______________________________________________
>amd-gfx mailing list
>amd-gfx at lists.freedesktop.org
>https://lists.freedesktop.org/mailman/listinfo/amd-gfx


More information about the amd-gfx mailing list