[PATCH 4/4] drm/amdgpu: busywait KIQ register accessing
Christian König
ckoenig.leichtzumerken at gmail.com
Fri Oct 13 09:16:58 UTC 2017
Am 13.10.2017 um 10:26 schrieb Pixel Ding:
> From: pding <Pixel.Ding at amd.com>
>
> Register accessing is performed when IRQ is disabled. Never sleep in
> this function.
>
> Known issue: dead sleep in many use cases of index/data registers.
>
> Signed-off-by: pding <Pixel.Ding at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 ++---
> drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 2 +-
> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 1 +
> drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 55 ++++++++++++++++++------------
> 6 files changed, 40 insertions(+), 28 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index ca212ef..f9de756 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -886,6 +886,7 @@ struct amdgpu_kiq {
> u64 eop_gpu_addr;
> struct amdgpu_bo *eop_obj;
> struct mutex ring_mutex;
You can remove the ring_mutex if you don't use it any more.
> + spinlock_t ring_lock;
> struct amdgpu_ring ring;
> struct amdgpu_irq_src irq;
> };
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> index ab8f0d6..1197274 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
> @@ -109,10 +109,8 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
> {
> uint32_t ret;
>
> - if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) {
> - BUG_ON(in_interrupt());
> + if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
> return amdgpu_virt_kiq_rreg(adev, reg);
> - }
>
> if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
> ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
> @@ -137,10 +135,8 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
> adev->last_mm_index = v;
> }
>
> - if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) {
> - BUG_ON(in_interrupt());
> + if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
> return amdgpu_virt_kiq_wreg(adev, reg, v);
> - }
>
> if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
> writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> index 2044758..c6c7bf3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> @@ -109,7 +109,7 @@ static void amdgpu_fence_write(struct amdgpu_ring *ring, u32 seq)
> * Reads a fence value from memory (all asics).
> * Returns the value of the fence read from memory.
> */
> -static u32 amdgpu_fence_read(struct amdgpu_ring *ring)
> +u32 amdgpu_fence_read(struct amdgpu_ring *ring)
> {
> struct amdgpu_fence_driver *drv = &ring->fence_drv;
> u32 seq = 0;
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> index 4f6c68f..46fa88c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
> @@ -186,6 +186,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
> int r = 0;
>
> mutex_init(&kiq->ring_mutex);
> + spin_lock_init(&kiq->ring_lock);
>
> r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
> if (r)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> index af8e544..a4fa923 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
> @@ -89,6 +89,7 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
> void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);
> void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
> int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence);
> +u32 amdgpu_fence_read(struct amdgpu_ring *ring);
> void amdgpu_fence_process(struct amdgpu_ring *ring);
> int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
> unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> index ab05121..9757df1 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
> @@ -22,7 +22,7 @@
> */
>
> #include "amdgpu.h"
> -#define MAX_KIQ_REG_WAIT 100000
> +#define MAX_KIQ_REG_WAIT 100000000 /* in usecs */
You want to busy wait 100 seconds for an result?
Forget it that is way to long you will certainly run into problems with
the NMI much earlier.
>
> int amdgpu_allocate_static_csa(struct amdgpu_device *adev)
> {
> @@ -113,28 +113,32 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
>
> uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
> {
> - signed long r;
> - uint32_t val;
> - struct dma_fence *f;
> + signed long timeout = usecs_to_jiffies(MAX_KIQ_REG_WAIT);
> + unsigned long flags;
> + uint32_t val, seq, wait_seq;
> struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> struct amdgpu_ring *ring = &kiq->ring;
>
> BUG_ON(!ring->funcs->emit_rreg);
>
> - mutex_lock(&kiq->ring_mutex);
> + spin_lock_irqsave(&kiq->ring_lock, flags);
> amdgpu_ring_alloc(ring, 32);
> amdgpu_ring_emit_rreg(ring, reg);
> - amdgpu_fence_emit(ring, &f);
> + wait_seq = ++ring->fence_drv.sync_seq;
> + amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
> + wait_seq, AMDGPU_FENCE_FLAG_INT);
Do you still need to send an interrupt when you busy wait? I don't think so.
> amdgpu_ring_commit(ring);
> - mutex_unlock(&kiq->ring_mutex);
> -
> - r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT));
> - dma_fence_put(f);
> - if (r < 1) {
> - DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> + spin_unlock_irqrestore(&kiq->ring_lock, flags);
> + do {
> + seq = amdgpu_fence_read(ring);
> + udelay(5);
> + timeout -= 5;
> + } while (seq < wait_seq && timeout > 0);
You need to correctly handle sequence wrap around here.
> +
> + if (timeout < 1) {
> + DRM_ERROR("wait for kiq fence error: %ld\n", timeout);
> return ~0;
> }
> -
> val = adev->wb.wb[adev->virt.reg_val_offs];
>
> return val;
> @@ -142,24 +146,33 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
>
> void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
> {
> - signed long r;
> + signed long timeout = usecs_to_jiffies(MAX_KIQ_REG_WAIT);
> + unsigned long flags;
> + uint32_t seq, wait_seq;
> struct dma_fence *f;
> struct amdgpu_kiq *kiq = &adev->gfx.kiq;
> struct amdgpu_ring *ring = &kiq->ring;
>
> BUG_ON(!ring->funcs->emit_wreg);
>
> - mutex_lock(&kiq->ring_mutex);
> + spin_lock_irqsave(&kiq->ring_lock, flags);
> amdgpu_ring_alloc(ring, 32);
> amdgpu_ring_emit_wreg(ring, reg, v);
> - amdgpu_fence_emit(ring, &f);
> + /* amdgpu_fence_emit(ring, &f); */
> + wait_seq = ++ring->fence_drv.sync_seq;
> + amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
> + wait_seq, AMDGPU_FENCE_FLAG_INT);
> amdgpu_ring_commit(ring);
> - mutex_unlock(&kiq->ring_mutex);
> + spin_unlock_irqrestore(&kiq->ring_lock, flags);
> +
> + do {
> + seq = amdgpu_fence_read(ring);
> + udelay(5);
> + timeout -= 5;
> + } while (seq < wait_seq && timeout > 0);
Same here. It would probably be better to have a function for this in
amdgpu_fence.c
Regards,
Christian.
>
> - r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT));
> - if (r < 1)
> - DRM_ERROR("wait for kiq fence error: %ld.\n", r);
> - dma_fence_put(f);
> + if (timeout < 1)
> + DRM_ERROR("wait for kiq fence error: %ld\n", timeout);
> }
>
> /**
More information about the amd-gfx
mailing list