[PATCH 3/3] drm/amdgpu: busywait KIQ register accessing v2
Ding, Pixel
Pixel.Ding at amd.com
Tue Oct 17 08:38:49 UTC 2017
On 17/10/2017, 4:20 PM, "Koenig, Christian" <Christian.Koenig at amd.com> wrote:
>Am 17.10.2017 um 08:37 schrieb Pixel Ding:
>> From: pding <Pixel.Ding at amd.com>
>>
>> Register accessing is performed when IRQ is disabled. Never sleep in
>> this function.
>>
>> Known issue: dead sleep in many use cases of index/data registers.
>>
>> v2: wrap polling fence functions. don't trigger IRQ for polling in
>> case of wrongly fence signal.
>>
>> Signed-off-by: pding <Pixel.Ding at amd.com>
>> ---
>> drivers/gpu/drm/amd/amdgpu/amdgpu.h | 2 +-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c | 4 +-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c | 4 +-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 8 +---
>> drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c | 53 +++++++++++++++++++++++
>> drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 2 +-
>> drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h | 4 ++
>> drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c | 30 ++++++-------
>> 8 files changed, 78 insertions(+), 29 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index ca212ef..cc06e62 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -885,7 +885,7 @@ struct amdgpu_mec {
>> struct amdgpu_kiq {
>> u64 eop_gpu_addr;
>> struct amdgpu_bo *eop_obj;
>> - struct mutex ring_mutex;
>> + spinlock_t ring_lock;
>> struct amdgpu_ring ring;
>> struct amdgpu_irq_src irq;
>> };
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
>> index 9d9965d..6d83573 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
>> @@ -788,7 +788,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
>> struct dma_fence *f;
>> struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
>>
>> - mutex_lock(&adev->gfx.kiq.ring_mutex);
>> + spin_lock(&adev->gfx.kiq.ring_lock);
>> amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
>> amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
>> amdgpu_ring_write(ring,
>> @@ -796,7 +796,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
>> PACKET3_INVALIDATE_TLBS_PASID(pasid));
>> amdgpu_fence_emit(ring, &f);
>> amdgpu_ring_commit(ring);
>> - mutex_unlock(&adev->gfx.kiq.ring_mutex);
>> + spin_unlock(&adev->gfx.kiq.ring_lock);
>>
>> r = dma_fence_wait(f, false);
>> if (r)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
>> index edbae19..c92217f 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
>> @@ -973,7 +973,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
>> struct dma_fence *f;
>> struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
>>
>> - mutex_lock(&adev->gfx.kiq.ring_mutex);
>> + spin_lock(&adev->gfx.kiq.ring_lock);
>> amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
>> amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
>> amdgpu_ring_write(ring,
>> @@ -983,7 +983,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
>> PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(2));
>> amdgpu_fence_emit(ring, &f);
>> amdgpu_ring_commit(ring);
>> - mutex_unlock(&adev->gfx.kiq.ring_mutex);
>> + spin_unlock(&adev->gfx.kiq.ring_lock);
>>
>> r = dma_fence_wait(f, false);
>> if (r)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> index ab8f0d6..1197274 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
>> @@ -109,10 +109,8 @@ uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
>> {
>> uint32_t ret;
>>
>> - if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) {
>> - BUG_ON(in_interrupt());
>> + if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
>> return amdgpu_virt_kiq_rreg(adev, reg);
>> - }
>>
>> if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
>> ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
>> @@ -137,10 +135,8 @@ void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
>> adev->last_mm_index = v;
>> }
>>
>> - if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev)) {
>> - BUG_ON(in_interrupt());
>> + if (!(acc_flags & AMDGPU_REGS_NO_KIQ) && amdgpu_sriov_runtime(adev))
>> return amdgpu_virt_kiq_wreg(adev, reg, v);
>> - }
>>
>> if ((reg * 4) < adev->rmmio_size && !(acc_flags & AMDGPU_REGS_IDX))
>> writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
>> index 688740e..68a5e90 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
>> @@ -169,6 +169,32 @@ int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **f)
>> }
>>
>> /**
>> + * amdgpu_fence_emit_polling - emit a fence on the requeste ring
>> + *
>> + * @ring: ring the fence is associated with
>> + * @s: resulting sequence number
>> + *
>> + * Emits a fence command on the requested ring (all asics).
>> + * Used For polling fence.
>> + * Returns 0 on success, -ENOMEM on failure.
>> + */
>> +int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s)
>> +{
>> + uint32_t seq;
>> +
>> + if (!s)
>> + return -EINVAL;
>> +
>> + seq = ++ring->fence_drv.sync_seq;
>> + amdgpu_ring_emit_fence(ring, ring->fence_drv.gpu_addr,
>> + seq, AMDGPU_FENCE_FLAG_INT);
>> +
>> + *s = seq;
>> +
>> + return 0;
>> +}
>> +
>> +/**
>> * amdgpu_fence_schedule_fallback - schedule fallback check
>> *
>> * @ring: pointer to struct amdgpu_ring
>> @@ -281,6 +307,33 @@ int amdgpu_fence_wait_empty(struct amdgpu_ring *ring)
>> return r;
>> }
>>
>
>Please add some documentation here that timeout is in usec.
[Pixel] sure.
>
>> +signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
>> + uint32_t wait_seq,
>> + signed long timeout)
>> +{
>> + uint32_t seq, last_seq;
>> + struct amdgpu_fence_driver *drv = &ring->fence_drv;
>> +
>> + last_seq = atomic_read(&ring->fence_drv.last_seq);
>> +
>> + do {
>> + seq = amdgpu_fence_read(ring);
>> +
>> + if (unlikely(seq == last_seq))
>> + break;
>
>That doesn't look correct to me, it will abort the busy wait as soon as
>we see the last value we have seen.
>[pixel]you’re right. My intention is to use the “seq" as latest submitted one(sync_seq), then it means all submitted seqs are notified. This condition could be removed.
>> + if (seq >= wait_seq && wait_seq >= last_seq)
>> + break;
>> + if (seq <= last_seq &&
>> + (wait_seq <= seq || wait_seq >= last_seq))
>> + break;
>
>Why not just "(int32_t)(wait_seq - seq) > 0" ? IIRC that should be
>sufficient for a wrap around test.
>[pixel] seq could be larger (executed) or smaller (not yet) than wait_seq even without a wrap round, I think you mean "(int32_t)(last_seq - seq) > 0”
>it actually can know there’s a wrap around, but it still doesn't know if the waited seq is in the range between or not, we still need other conditions. Code here is to identify cases to break as:
===last_seq=====wait_seq====seq===
===wait_seq==seq========last_seq==
==seq==========last_seq==wait_seq=
Does it make sense?
Regards.
Pixel
>
>> + udelay(5);
>> + timeout -= 5;
>> + } while (timeout > 0);
>> +
>> + atomic_cmpxchg(&drv->last_seq, last_seq, wait_seq);
>> +
>> + return timeout;
>> +}
>> /**
>> * amdgpu_fence_count_emitted - get the count of emitted fences
>> *
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> index 4f6c68f..e5a9077 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
>> @@ -185,7 +185,7 @@ int amdgpu_gfx_kiq_init_ring(struct amdgpu_device *adev,
>> struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>> int r = 0;
>>
>> - mutex_init(&kiq->ring_mutex);
>> + spin_lock_init(&kiq->ring_lock);
>>
>> r = amdgpu_wb_get(adev, &adev->virt.reg_val_offs);
>> if (r)
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> index af8e544..9de89ea 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
>> @@ -89,8 +89,12 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
>> void amdgpu_fence_driver_suspend(struct amdgpu_device *adev);
>> void amdgpu_fence_driver_resume(struct amdgpu_device *adev);
>> int amdgpu_fence_emit(struct amdgpu_ring *ring, struct dma_fence **fence);
>> +int amdgpu_fence_emit_polling(struct amdgpu_ring *ring, uint32_t *s);
>> void amdgpu_fence_process(struct amdgpu_ring *ring);
>> int amdgpu_fence_wait_empty(struct amdgpu_ring *ring);
>> +signed long amdgpu_fence_wait_polling(struct amdgpu_ring *ring,
>> + uint32_t wait_seq,
>> + signed long timeout);
>> unsigned amdgpu_fence_count_emitted(struct amdgpu_ring *ring);
>>
>> /*
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>> index ab05121..177fe10 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_virt.c
>> @@ -22,7 +22,7 @@
>> */
>>
>> #include "amdgpu.h"
>> -#define MAX_KIQ_REG_WAIT 100000
>> +#define MAX_KIQ_REG_WAIT 100000000 /* in usecs */
>>
>> int amdgpu_allocate_static_csa(struct amdgpu_device *adev)
>> {
>> @@ -114,27 +114,24 @@ void amdgpu_virt_init_setting(struct amdgpu_device *adev)
>> uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
>> {
>> signed long r;
>> - uint32_t val;
>> - struct dma_fence *f;
>> + uint32_t val, seq;
>> struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>> struct amdgpu_ring *ring = &kiq->ring;
>>
>> BUG_ON(!ring->funcs->emit_rreg);
>>
>> - mutex_lock(&kiq->ring_mutex);
>> + spin_lock(&kiq->ring_lock);
>> amdgpu_ring_alloc(ring, 32);
>> amdgpu_ring_emit_rreg(ring, reg);
>> - amdgpu_fence_emit(ring, &f);
>> + amdgpu_fence_emit_polling(ring, &seq);
>> amdgpu_ring_commit(ring);
>> - mutex_unlock(&kiq->ring_mutex);
>> + spin_unlock(&kiq->ring_lock);
>>
>> - r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT));
>> - dma_fence_put(f);
>> + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
>> if (r < 1) {
>> - DRM_ERROR("wait for kiq fence error: %ld.\n", r);
>> + DRM_ERROR("wait for kiq fence error: %ld\n", r);
>> return ~0;
>> }
>> -
>> val = adev->wb.wb[adev->virt.reg_val_offs];
>>
>> return val;
>> @@ -143,23 +140,22 @@ uint32_t amdgpu_virt_kiq_rreg(struct amdgpu_device *adev, uint32_t reg)
>> void amdgpu_virt_kiq_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v)
>> {
>> signed long r;
>> - struct dma_fence *f;
>> + uint32_t seq;
>> struct amdgpu_kiq *kiq = &adev->gfx.kiq;
>> struct amdgpu_ring *ring = &kiq->ring;
>>
>> BUG_ON(!ring->funcs->emit_wreg);
>>
>> - mutex_lock(&kiq->ring_mutex);
>> + spin_lock(&kiq->ring_lock);
>> amdgpu_ring_alloc(ring, 32);
>> amdgpu_ring_emit_wreg(ring, reg, v);
>> - amdgpu_fence_emit(ring, &f);
>> + amdgpu_fence_emit_polling(ring, &seq);
>> amdgpu_ring_commit(ring);
>> - mutex_unlock(&kiq->ring_mutex);
>> + spin_unlock(&kiq->ring_lock);
>>
>> - r = dma_fence_wait_timeout(f, false, msecs_to_jiffies(MAX_KIQ_REG_WAIT));
>> + r = amdgpu_fence_wait_polling(ring, seq, MAX_KIQ_REG_WAIT);
>> if (r < 1)
>> - DRM_ERROR("wait for kiq fence error: %ld.\n", r);
>> - dma_fence_put(f);
>> + DRM_ERROR("wait for kiq fence error: %ld\n", r);
>> }
>>
>> /**
>
>
More information about the amd-gfx
mailing list