[PATCH 1/2] drm/msm: Handle ringbuffer overflow
Dmitry Baryshkov
dmitry.baryshkov at linaro.org
Thu Nov 25 07:36:42 UTC 2021
On 28/04/2021 22:36, Rob Clark wrote:
> From: Rob Clark <robdclark at chromium.org>
>
> Currently if userspace manages to fill up the ring faster than the GPU
> can consume we (a) spin for up to 1sec, and then (b) overwrite the
> ringbuffer contents from previous submits that the GPU is still busy
> executing. Which predictably goes rather badly.
>
> Instead, just skip flushing (updating WPTR) and reset ring->next back to
> where it was before we tried writing the submit into the ringbuffer, and
> return an error to userspace (which can then try again).
>
> Signed-off-by: Rob Clark <robdclark at chromium.org>
Rob, you've posted this patch, but never merged it. Should it be merged
at some point?
> ---
> drivers/gpu/drm/msm/adreno/a5xx_gpu.c | 3 +++
> drivers/gpu/drm/msm/adreno/a6xx_gpu.c | 3 +++
> drivers/gpu/drm/msm/adreno/adreno_gpu.c | 24 +++++++++++++++++-
> drivers/gpu/drm/msm/msm_gem_submit.c | 7 +++++-
> drivers/gpu/drm/msm/msm_gpu.c | 33 +++++++++++++++++++++++--
> drivers/gpu/drm/msm/msm_gpu.h | 2 +-
> drivers/gpu/drm/msm/msm_ringbuffer.h | 5 ++++
> 7 files changed, 72 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
> index ce13d49e615b..0c8faad3b328 100644
> --- a/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a5xx_gpu.c
> @@ -36,6 +36,9 @@ void a5xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring,
> OUT_RING(ring, upper_32_bits(shadowptr(a5xx_gpu, ring)));
> }
>
> + if (unlikely(ring->overflow))
> + return;
> +
> spin_lock_irqsave(&ring->preempt_lock, flags);
>
> /* Copy the shadow to the actual register */
> diff --git a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> index d553f62f4eeb..4a4728a774c0 100644
> --- a/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/a6xx_gpu.c
> @@ -68,6 +68,9 @@ static void a6xx_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
> OUT_RING(ring, upper_32_bits(shadowptr(a6xx_gpu, ring)));
> }
>
> + if (unlikely(ring->overflow))
> + return;
> +
> spin_lock_irqsave(&ring->preempt_lock, flags);
>
> /* Copy the shadow to the actual register */
> diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> index 0f184c3dd9d9..a658777e07b1 100644
> --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c
> @@ -467,6 +467,9 @@ void adreno_flush(struct msm_gpu *gpu, struct msm_ringbuffer *ring, u32 reg)
> {
> uint32_t wptr;
>
> + if (unlikely(ring->overflow))
> + return;
> +
> /* Copy the shadow to the actual register */
> ring->cur = ring->next;
>
> @@ -788,12 +791,31 @@ static uint32_t ring_freewords(struct msm_ringbuffer *ring)
> return (rptr + (size - 1) - wptr) % size;
> }
>
> +static bool space_avail(struct msm_ringbuffer *ring, uint32_t ndwords)
> +{
> + if (ring_freewords(ring) >= ndwords)
> + return true;
> +
> + /* We don't have a good way to know in general when the RPTR has
> + * advanced.. newer things that use CP_WHERE_AM_I to update the
> + * shadow rptr could possibly insert a packet to generate an irq.
> + * But that doesn't cover older GPUs. But if the ringbuffer is
> + * full, it could take a while before it is empty again, so just
> + * insert a blind sleep to avoid a busy loop.
> + */
> + msleep(1);
> +
> + return false;
> +}
> +
> void adreno_wait_ring(struct msm_ringbuffer *ring, uint32_t ndwords)
> {
> - if (spin_until(ring_freewords(ring) >= ndwords))
> + if (spin_until(space_avail(ring, ndwords))) {
> DRM_DEV_ERROR(ring->gpu->dev->dev,
> "timeout waiting for space in ringbuffer %d\n",
> ring->id);
> + ring->overflow = true;
> + }
> }
>
> /* Get legacy powerlevels from qcom,gpu-pwrlevels and populate the opp table */
> diff --git a/drivers/gpu/drm/msm/msm_gem_submit.c b/drivers/gpu/drm/msm/msm_gem_submit.c
> index 5480852bdeda..4bc669460fda 100644
> --- a/drivers/gpu/drm/msm/msm_gem_submit.c
> +++ b/drivers/gpu/drm/msm/msm_gem_submit.c
> @@ -683,6 +683,9 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
> submitid = atomic_inc_return(&ident) - 1;
>
> ring = gpu->rb[queue->prio];
> +
> + GEM_WARN_ON(ring->overflow);
> +
> trace_msm_gpu_submit(pid_nr(pid), ring->id, submitid,
> args->nr_bos, args->nr_cmds);
>
> @@ -829,7 +832,9 @@ int msm_ioctl_gem_submit(struct drm_device *dev, void *data,
> }
> }
>
> - msm_gpu_submit(gpu, submit);
> + ret = msm_gpu_submit(gpu, submit);
> + if (ret)
> + goto out;
>
> args->fence = submit->fence->seqno;
>
> diff --git a/drivers/gpu/drm/msm/msm_gpu.c b/drivers/gpu/drm/msm/msm_gpu.c
> index ab7c167b0623..7655ad9108c8 100644
> --- a/drivers/gpu/drm/msm/msm_gpu.c
> +++ b/drivers/gpu/drm/msm/msm_gpu.c
> @@ -787,7 +787,7 @@ void msm_gpu_retire(struct msm_gpu *gpu)
> }
>
> /* add bo's to gpu's ring, and kick gpu: */
> -void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
> +int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
> {
> struct drm_device *dev = gpu->dev;
> struct msm_drm_private *priv = dev->dev_private;
> @@ -834,9 +834,38 @@ void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
> spin_unlock(&ring->submit_lock);
>
> gpu->funcs->submit(gpu, submit);
> - priv->lastctx = submit->queue->ctx;
>
> hangcheck_timer_reset(gpu);
> +
> + if (unlikely(ring->overflow)) {
> + /*
> + * Reset the ptr back to before the submit, so the GPU
> + * doesn't see a partial submit:
> + */
> + ring->next = ring->cur;
> +
> + /*
> + * Clear the overflow flag, hopefully the next submit on
> + * the ring actually fits
> + */
> + ring->overflow = false;
> +
> + /*
> + * One might be tempted to remove the submit from the
> + * submits list, and drop it's reference (and drop the
> + * active reference for all the bos). But we can't
> + * really signal the fence attached to obj->resv without
> + * disturbing other fences on the timeline. So instead
> + * just leave it and let it retire normally when a
> + * later submit completes.
> + */
> +
> + return -ENOSPC;
> + }
> +
> + priv->lastctx = submit->queue->ctx;
> +
> + return 0;
> }
>
> /*
> diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
> index d7cd02cd2109..2dd2ef1f8328 100644
> --- a/drivers/gpu/drm/msm/msm_gpu.h
> +++ b/drivers/gpu/drm/msm/msm_gpu.h
> @@ -302,7 +302,7 @@ int msm_gpu_perfcntr_sample(struct msm_gpu *gpu, uint32_t *activetime,
> uint32_t *totaltime, uint32_t ncntrs, uint32_t *cntrs);
>
> void msm_gpu_retire(struct msm_gpu *gpu);
> -void msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit);
> +int msm_gpu_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit);
>
> int msm_gpu_init(struct drm_device *drm, struct platform_device *pdev,
> struct msm_gpu *gpu, const struct msm_gpu_funcs *funcs,
> diff --git a/drivers/gpu/drm/msm/msm_ringbuffer.h b/drivers/gpu/drm/msm/msm_ringbuffer.h
> index fe55d4a1aa16..d8ad9818c389 100644
> --- a/drivers/gpu/drm/msm/msm_ringbuffer.h
> +++ b/drivers/gpu/drm/msm/msm_ringbuffer.h
> @@ -40,6 +40,8 @@ struct msm_ringbuffer {
> struct drm_gem_object *bo;
> uint32_t *start, *end, *cur, *next;
>
> + bool overflow;
> +
> /*
> * List of in-flight submits on this ring. Protected by submit_lock.
> */
> @@ -69,6 +71,9 @@ void msm_ringbuffer_destroy(struct msm_ringbuffer *ring);
> static inline void
> OUT_RING(struct msm_ringbuffer *ring, uint32_t data)
> {
> + if (ring->overflow)
> + return;
> +
> /*
> * ring->next points to the current command being written - it won't be
> * committed as ring->cur until the flush
>
--
With best wishes
Dmitry
More information about the dri-devel
mailing list