[PATCH v2 2/2] drm/msm: Hangcheck progress detection
Hillf Danton
hdanton at sina.com
Wed Nov 2 02:09:17 UTC 2022
On 1 Nov 2022 15:33:10 -0700 Rob Clark <robdclark at chromium.org>
> --- a/drivers/gpu/drm/msm/msm_gpu.c
> +++ b/drivers/gpu/drm/msm/msm_gpu.c
> @@ -500,6 +500,21 @@ static void hangcheck_timer_reset(struct msm_gpu *gpu)
> round_jiffies_up(jiffies + msecs_to_jiffies(priv->hangcheck_period)));
> }
>
> +static bool made_progress(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
> +{
> + if (ring->hangcheck_progress_retries >= DRM_MSM_HANGCHECK_PROGRESS_RETRIES)
> + return false;
> +
> + if (!gpu->funcs->progress)
> + return false;
Retry can not make difference without the progress callback provided.
> +
> + if (!gpu->funcs->progress(gpu, ring))
> + return false;
> +
> + ring->hangcheck_progress_retries++;
> + return true;
> +}
> +
> static void hangcheck_handler(struct timer_list *t)
> {
> struct msm_gpu *gpu = from_timer(gpu, t, hangcheck_timer);
> @@ -511,9 +526,12 @@ static void hangcheck_handler(struct timer_list *t)
> if (fence != ring->hangcheck_fence) {
> /* some progress has been made.. ya! */
> ring->hangcheck_fence = fence;
> - } else if (fence_before(fence, ring->fctx->last_fence)) {
> + ring->hangcheck_progress_retries = 0;
> + } else if (fence_before(fence, ring->fctx->last_fence) &&
> + !made_progress(gpu, ring)) {
> /* no progress and not done.. hung! */
> ring->hangcheck_fence = fence;
> + ring->hangcheck_progress_retries = 0;
> DRM_DEV_ERROR(dev->dev, "%s: hangcheck detected gpu lockup rb %d!\n",
> gpu->name, ring->id);
> DRM_DEV_ERROR(dev->dev, "%s: completed fence: %u\n",
Cutting DRM_MSM_HANGCHECK_DEFAULT_PERIOD down to 250ms leads to report of
false hang detected in case of no ->progress implemented.
> diff --git a/drivers/gpu/drm/msm/msm_gpu.h b/drivers/gpu/drm/msm/msm_gpu.h
> index 585fd9c8d45a..d8f355e9f0b2 100644
> --- a/drivers/gpu/drm/msm/msm_gpu.h
> +++ b/drivers/gpu/drm/msm/msm_gpu.h
> @@ -78,6 +78,8 @@ struct msm_gpu_funcs {
> struct msm_gem_address_space *(*create_private_address_space)
> (struct msm_gpu *gpu);
> uint32_t (*get_rptr)(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
> +
> + bool (*progress)(struct msm_gpu *gpu, struct msm_ringbuffer *ring);
> };
>
> /* Additional state for iommu faults: */
> @@ -236,7 +238,8 @@ struct msm_gpu {
> */
> #define DRM_MSM_INACTIVE_PERIOD 66 /* in ms (roughly four frames) */
>
> -#define DRM_MSM_HANGCHECK_DEFAULT_PERIOD 500 /* in ms */
> +#define DRM_MSM_HANGCHECK_DEFAULT_PERIOD 250 /* in ms */
> +#define DRM_MSM_HANGCHECK_PROGRESS_RETRIES 3
More information about the dri-devel
mailing list