From: Rob Clark robdclark@chromium.org
I noticed while looking at some traces, that we could miss calls to msm_update_fence(), as the irq could have raced with retire_submits() which could have already popped the last submit on a ring out of the queue of in-flight submits. But walking the list of submits in the irq handler isn't really needed, as dma_fence_is_signaled() will dtrt. So lets just drop it entirely.
Reported-by: Steev Klimaszewski steev@kali.org Fixes: 95d1deb02a9c ("drm/msm/gem: Add fenced vma unpin") Signed-off-by: Rob Clark robdclark@chromium.org --- drivers/gpu/drm/msm/msm_gpu.c | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index e59a757578df..b61078f0cd0f 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -176,24 +176,6 @@ int msm_gpu_hw_init(struct msm_gpu *gpu) return ret; }
-static void update_fences(struct msm_gpu *gpu, struct msm_ringbuffer *ring, - uint32_t fence) -{ - struct msm_gem_submit *submit; - unsigned long flags; - - spin_lock_irqsave(&ring->submit_lock, flags); - list_for_each_entry(submit, &ring->submits, node) { - if (fence_after(submit->seqno, fence)) - break; - - msm_update_fence(submit->ring->fctx, - submit->hw_fence->seqno); - dma_fence_signal(submit->hw_fence); - } - spin_unlock_irqrestore(&ring->submit_lock, flags); -} - #ifdef CONFIG_DEV_COREDUMP static ssize_t msm_gpu_devcoredump_read(char *buffer, loff_t offset, size_t count, void *data, size_t datalen) @@ -450,7 +432,7 @@ static void recover_worker(struct kthread_work *work) if (ring == cur_ring) fence++;
- update_fences(gpu, ring, fence); + msm_update_fence(ring->fctx, fence); }
if (msm_gpu_active(gpu)) { @@ -753,7 +735,7 @@ void msm_gpu_retire(struct msm_gpu *gpu) int i;
for (i = 0; i < gpu->nr_rings; i++) - update_fences(gpu, gpu->rb[i], gpu->rb[i]->memptrs->fence); + msm_update_fence(gpu->rb[i]->fctx, gpu->rb[i]->memptrs->fence);
kthread_queue_work(gpu->worker, &gpu->retire_work); update_sw_cntrs(gpu);
From: Rob Clark robdclark@chromium.org
Prior to the last commit, this could result in setting the GPU written fence value back to an older value, if we had missed updating completed_fence prior to suspend. This was mostly harmless as the GPU would eventually overwrite it again with the correct value. But we should just not do this. Instead just leave a sanity check that the fence looks plausible (in case the GPU scribbled on memory).
Reported-by: Steev Klimaszewski steev@kali.org Fixes: 95d1deb02a9c ("drm/msm/gem: Add fenced vma unpin") Signed-off-by: Rob Clark robdclark@chromium.org --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 11 ++++++++--- drivers/gpu/drm/msm/msm_gpu.c | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index e1aef4875e2f..dd044d557c7c 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -498,10 +498,15 @@ int adreno_hw_init(struct msm_gpu *gpu)
ring->cur = ring->start; ring->next = ring->start; - - /* reset completed fence seqno: */ - ring->memptrs->fence = ring->fctx->completed_fence; ring->memptrs->rptr = 0; + + /* Detect and clean up an impossible fence, ie. if GPU managed + * to scribble something invalid, we don't want that to confuse + * us into mistakingly believing that submits have completed. + */ + if (fence_before(ring->fctx->last_fence, ring->memptrs->fence)) { + ring->memptrs->fence = ring->fctx->last_fence; + } }
return 0; diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index b61078f0cd0f..8c00f9187c03 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -430,7 +430,7 @@ static void recover_worker(struct kthread_work *work) * one more to clear the faulting submit */ if (ring == cur_ring) - fence++; + ring->memptrs->fence = ++fence;
msm_update_fence(ring->fctx, fence); }
On 6/15/22 10:01 AM, Rob Clark wrote:
From: Rob Clark robdclark@chromium.org
Prior to the last commit, this could result in setting the GPU written fence value back to an older value, if we had missed updating completed_fence prior to suspend. This was mostly harmless as the GPU would eventually overwrite it again with the correct value. But we should just not do this. Instead just leave a sanity check that the fence looks plausible (in case the GPU scribbled on memory).
Reported-by: Steev Klimaszewski steev@kali.org Fixes: 95d1deb02a9c ("drm/msm/gem: Add fenced vma unpin") Signed-off-by: Rob Clark robdclark@chromium.org
drivers/gpu/drm/msm/adreno/adreno_gpu.c | 11 ++++++++--- drivers/gpu/drm/msm/msm_gpu.c | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index e1aef4875e2f..dd044d557c7c 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -498,10 +498,15 @@ int adreno_hw_init(struct msm_gpu *gpu)
ring->cur = ring->start; ring->next = ring->start;
/* reset completed fence seqno: */
ring->memptrs->rptr = 0;ring->memptrs->fence = ring->fctx->completed_fence;
/* Detect and clean up an impossible fence, ie. if GPU managed
* to scribble something invalid, we don't want that to confuse
* us into mistakingly believing that submits have completed.
*/
if (fence_before(ring->fctx->last_fence, ring->memptrs->fence)) {
ring->memptrs->fence = ring->fctx->last_fence;
}
}
return 0;
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index b61078f0cd0f..8c00f9187c03 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -430,7 +430,7 @@ static void recover_worker(struct kthread_work *work) * one more to clear the faulting submit */ if (ring == cur_ring)
fence++;
ring->memptrs->fence = ++fence;
msm_update_fence(ring->fctx, fence); }
Tested on the Lenovo Yoga C630
Tested-by: Steev Klimaszewski steev@kali.org
On 6/15/22 10:01 AM, Rob Clark wrote:
From: Rob Clark robdclark@chromium.org
I noticed while looking at some traces, that we could miss calls to msm_update_fence(), as the irq could have raced with retire_submits() which could have already popped the last submit on a ring out of the queue of in-flight submits. But walking the list of submits in the irq handler isn't really needed, as dma_fence_is_signaled() will dtrt. So lets just drop it entirely.
Reported-by: Steev Klimaszewski steev@kali.org Fixes: 95d1deb02a9c ("drm/msm/gem: Add fenced vma unpin") Signed-off-by: Rob Clark robdclark@chromium.org
drivers/gpu/drm/msm/msm_gpu.c | 22 ++-------------------- 1 file changed, 2 insertions(+), 20 deletions(-)
diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c index e59a757578df..b61078f0cd0f 100644 --- a/drivers/gpu/drm/msm/msm_gpu.c +++ b/drivers/gpu/drm/msm/msm_gpu.c @@ -176,24 +176,6 @@ int msm_gpu_hw_init(struct msm_gpu *gpu) return ret; }
-static void update_fences(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
uint32_t fence)
-{
- struct msm_gem_submit *submit;
- unsigned long flags;
- spin_lock_irqsave(&ring->submit_lock, flags);
- list_for_each_entry(submit, &ring->submits, node) {
if (fence_after(submit->seqno, fence))
break;
msm_update_fence(submit->ring->fctx,
submit->hw_fence->seqno);
dma_fence_signal(submit->hw_fence);
- }
- spin_unlock_irqrestore(&ring->submit_lock, flags);
-}
- #ifdef CONFIG_DEV_COREDUMP static ssize_t msm_gpu_devcoredump_read(char *buffer, loff_t offset, size_t count, void *data, size_t datalen)
@@ -450,7 +432,7 @@ static void recover_worker(struct kthread_work *work) if (ring == cur_ring) fence++;
update_fences(gpu, ring, fence);
msm_update_fence(ring->fctx, fence);
}
if (msm_gpu_active(gpu)) {
@@ -753,7 +735,7 @@ void msm_gpu_retire(struct msm_gpu *gpu) int i;
for (i = 0; i < gpu->nr_rings; i++)
update_fences(gpu, gpu->rb[i], gpu->rb[i]->memptrs->fence);
msm_update_fence(gpu->rb[i]->fctx, gpu->rb[i]->memptrs->fence);
kthread_queue_work(gpu->worker, &gpu->retire_work); update_sw_cntrs(gpu);
Tested on the Lenovo Yoga C630
Tested-by: Steev Klimaszewski steev@kali.org
dri-devel@lists.freedesktop.org