[PATCH v3 5/8] drm/v3d: Use DRM_GPU_SCHED_STAT_NO_HANG to skip the reset

Tue Jun 24 09:14:30 UTC 2025

On 18/06/2025 15:47, Maíra Canal wrote:
> When a CL/CSD job times out, we check if the GPU has made any progress
> since the last timeout. If so, instead of resetting the hardware, we skip
> the reset and allow the timer to be rearmed. This gives long-running jobs
> a chance to complete.
> 
> Instead of manipulating scheduler's internals, inform the scheduler that
> the job did not actually timeout and no reset was performed through
> the new status code DRM_GPU_SCHED_STAT_NO_HANG.
> 
> Signed-off-by: Maíra Canal <mcanal at igalia.com>
> ---
>   drivers/gpu/drm/v3d/v3d_sched.c | 16 ++--------------
>   1 file changed, 2 insertions(+), 14 deletions(-)
> 
> diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
> index e2b7f24d528e773968daea0f5b31c869584bb692..cc85f1b19ac405146a2a516f335a46376684bc91 100644
> --- a/drivers/gpu/drm/v3d/v3d_sched.c
> +++ b/drivers/gpu/drm/v3d/v3d_sched.c
> @@ -744,16 +744,6 @@ v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job)
>   	return DRM_GPU_SCHED_STAT_RESET;
>   }
>   
> -static void
> -v3d_sched_skip_reset(struct drm_sched_job *sched_job)
> -{
> -	struct drm_gpu_scheduler *sched = sched_job->sched;
> -
> -	spin_lock(&sched->job_list_lock);
> -	list_add(&sched_job->list, &sched->pending_list);
> -	spin_unlock(&sched->job_list_lock);
> -}
> -
>   static enum drm_gpu_sched_stat
>   v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q,
>   		    u32 *timedout_ctca, u32 *timedout_ctra)
> @@ -772,8 +762,7 @@ v3d_cl_job_timedout(struct drm_sched_job *sched_job, enum v3d_queue q,
>   		*timedout_ctca = ctca;
>   		*timedout_ctra = ctra;
>   
> -		v3d_sched_skip_reset(sched_job);
> -		return DRM_GPU_SCHED_STAT_RESET;
> +		return DRM_GPU_SCHED_STAT_NO_HANG;
>   	}
>   
>   	return v3d_gpu_reset_for_timeout(v3d, sched_job);
> @@ -818,8 +807,7 @@ v3d_csd_job_timedout(struct drm_sched_job *sched_job)
>   	if (job->timedout_batches != batches) {
>   		job->timedout_batches = batches;
>   
> -		v3d_sched_skip_reset(sched_job);
> -		return DRM_GPU_SCHED_STAT_RESET;
> +		return DRM_GPU_SCHED_STAT_NO_HANG;
>   	}
>   
>   	return v3d_gpu_reset_for_timeout(v3d, sched_job);
> 

This one is easy, it looks the same before and after so:

Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at igalia.com>

Regards,

Tvrtko