[PATCH v5 02/16] drm/sched: Allow using a dedicated workqueue for the timeout/fault tdr

Tue Jun 29 08:50:36 UTC 2021

On Tue, Jun 29, 2021 at 09:34:56AM +0200, Boris Brezillon wrote:
> Mali Midgard/Bifrost GPUs have 3 hardware queues but only a global GPU
> reset. This leads to extra complexity when we need to synchronize timeout
> works with the reset work. One solution to address that is to have an
> ordered workqueue at the driver level that will be used by the different
> schedulers to queue their timeout work. Thanks to the serialization
> provided by the ordered workqueue we are guaranteed that timeout
> handlers are executed sequentially, and can thus easily reset the GPU
> from the timeout handler without extra synchronization.
> 
> v5:
> * Add a new paragraph to the timedout_job() method
> 
> v3:
> * New patch
> 
> v4:
> * Actually use the timeout_wq to queue the timeout work
> 
> Signed-off-by: Boris Brezillon <boris.brezillon at collabora.com>
> Reviewed-by: Steven Price <steven.price at arm.com>
> Reviewed-by: Lucas Stach <l.stach at pengutronix.de>
> Cc: Qiang Yu <yuq825 at gmail.com>
> Cc: Emma Anholt <emma at anholt.net>
> Cc: Alex Deucher <alexander.deucher at amd.com>
> Cc: "Christian König" <christian.koenig at amd.com>

Acked-by: Daniel Vetter <daniel.vetter at ffwll.ch>

Also since I'm occasionally blinded by my own pride, add suggested-by: me?
I did spend quite a bit pondering how to untangle your various lockdep
splats in the trd handler :-)

Cheers, Daniel

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c |  2 +-
>  drivers/gpu/drm/etnaviv/etnaviv_sched.c   |  3 ++-
>  drivers/gpu/drm/lima/lima_sched.c         |  3 ++-
>  drivers/gpu/drm/panfrost/panfrost_job.c   |  3 ++-
>  drivers/gpu/drm/scheduler/sched_main.c    | 14 +++++++++-----
>  drivers/gpu/drm/v3d/v3d_sched.c           | 10 +++++-----
>  include/drm/gpu_scheduler.h               | 23 ++++++++++++++++++++++-
>  7 files changed, 43 insertions(+), 15 deletions(-)
> 
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> index 47ea46859618..532636ea20bc 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
> @@ -488,7 +488,7 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
>  
>  	r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
>  			   num_hw_submission, amdgpu_job_hang_limit,
> -			   timeout, sched_score, ring->name);
> +			   timeout, NULL, sched_score, ring->name);
>  	if (r) {
>  		DRM_ERROR("Failed to create scheduler on ring %s.\n",
>  			  ring->name);
> diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> index 19826e504efc..feb6da1b6ceb 100644
> --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
> @@ -190,7 +190,8 @@ int etnaviv_sched_init(struct etnaviv_gpu *gpu)
>  
>  	ret = drm_sched_init(&gpu->sched, &etnaviv_sched_ops,
>  			     etnaviv_hw_jobs_limit, etnaviv_job_hang_limit,
> -			     msecs_to_jiffies(500), NULL, dev_name(gpu->dev));
> +			     msecs_to_jiffies(500), NULL, NULL,
> +			     dev_name(gpu->dev));
>  	if (ret)
>  		return ret;
>  
> diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c
> index ecf3267334ff..dba8329937a3 100644
> --- a/drivers/gpu/drm/lima/lima_sched.c
> +++ b/drivers/gpu/drm/lima/lima_sched.c
> @@ -508,7 +508,8 @@ int lima_sched_pipe_init(struct lima_sched_pipe *pipe, const char *name)
>  	INIT_WORK(&pipe->recover_work, lima_sched_recover_work);
>  
>  	return drm_sched_init(&pipe->base, &lima_sched_ops, 1,
> -			      lima_job_hang_limit, msecs_to_jiffies(timeout),
> +			      lima_job_hang_limit,
> +			      msecs_to_jiffies(timeout), NULL,
>  			      NULL, name);
>  }
>  
> diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c
> index 682f2161b999..8ff79fd49577 100644
> --- a/drivers/gpu/drm/panfrost/panfrost_job.c
> +++ b/drivers/gpu/drm/panfrost/panfrost_job.c
> @@ -626,7 +626,8 @@ int panfrost_job_init(struct panfrost_device *pfdev)
>  
>  		ret = drm_sched_init(&js->queue[j].sched,
>  				     &panfrost_sched_ops,
> -				     1, 0, msecs_to_jiffies(JOB_TIMEOUT_MS),
> +				     1, 0,
> +				     msecs_to_jiffies(JOB_TIMEOUT_MS), NULL,
>  				     NULL, "pan_js");
>  		if (ret) {
>  			dev_err(pfdev->dev, "Failed to create scheduler: %d.", ret);
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> index c0a2f8f8d472..3e180f0d4305 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -232,7 +232,7 @@ static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched)
>  {
>  	if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
>  	    !list_empty(&sched->pending_list))
> -		schedule_delayed_work(&sched->work_tdr, sched->timeout);
> +		queue_delayed_work(sched->timeout_wq, &sched->work_tdr, sched->timeout);
>  }
>  
>  /**
> @@ -244,7 +244,7 @@ static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched)
>   */
>  void drm_sched_fault(struct drm_gpu_scheduler *sched)
>  {
> -	mod_delayed_work(system_wq, &sched->work_tdr, 0);
> +	mod_delayed_work(sched->timeout_wq, &sched->work_tdr, 0);
>  }
>  EXPORT_SYMBOL(drm_sched_fault);
>  
> @@ -270,7 +270,7 @@ unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched)
>  	 * Modify the timeout to an arbitrarily large value. This also prevents
>  	 * the timeout to be restarted when new submissions arrive
>  	 */
> -	if (mod_delayed_work(system_wq, &sched->work_tdr, MAX_SCHEDULE_TIMEOUT)
> +	if (mod_delayed_work(sched->timeout_wq, &sched->work_tdr, MAX_SCHEDULE_TIMEOUT)
>  			&& time_after(sched_timeout, now))
>  		return sched_timeout - now;
>  	else
> @@ -294,7 +294,7 @@ void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
>  	if (list_empty(&sched->pending_list))
>  		cancel_delayed_work(&sched->work_tdr);
>  	else
> -		mod_delayed_work(system_wq, &sched->work_tdr, remaining);
> +		mod_delayed_work(sched->timeout_wq, &sched->work_tdr, remaining);
>  
>  	spin_unlock(&sched->job_list_lock);
>  }
> @@ -837,6 +837,8 @@ static int drm_sched_main(void *param)
>   * @hw_submission: number of hw submissions that can be in flight
>   * @hang_limit: number of times to allow a job to hang before dropping it
>   * @timeout: timeout value in jiffies for the scheduler
> + * @timeout_wq: workqueue to use for timeout work. If NULL, the system_wq is
> + *		used
>   * @score: optional score atomic shared with other schedulers
>   * @name: name used for debugging
>   *
> @@ -844,7 +846,8 @@ static int drm_sched_main(void *param)
>   */
>  int drm_sched_init(struct drm_gpu_scheduler *sched,
>  		   const struct drm_sched_backend_ops *ops,
> -		   unsigned hw_submission, unsigned hang_limit, long timeout,
> +		   unsigned hw_submission, unsigned hang_limit,
> +		   long timeout, struct workqueue_struct *timeout_wq,
>  		   atomic_t *score, const char *name)
>  {
>  	int i, ret;
> @@ -852,6 +855,7 @@ int drm_sched_init(struct drm_gpu_scheduler *sched,
>  	sched->hw_submission_limit = hw_submission;
>  	sched->name = name;
>  	sched->timeout = timeout;
> +	sched->timeout_wq = timeout_wq ? : system_wq;
>  	sched->hang_limit = hang_limit;
>  	sched->score = score ? score : &sched->_score;
>  	for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_COUNT; i++)
> diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c
> index 8992480c88fa..a39bdd5cfc4f 100644
> --- a/drivers/gpu/drm/v3d/v3d_sched.c
> +++ b/drivers/gpu/drm/v3d/v3d_sched.c
> @@ -402,7 +402,7 @@ v3d_sched_init(struct v3d_dev *v3d)
>  	ret = drm_sched_init(&v3d->queue[V3D_BIN].sched,
>  			     &v3d_bin_sched_ops,
>  			     hw_jobs_limit, job_hang_limit,
> -			     msecs_to_jiffies(hang_limit_ms),
> +			     msecs_to_jiffies(hang_limit_ms), NULL,
>  			     NULL, "v3d_bin");
>  	if (ret) {
>  		dev_err(v3d->drm.dev, "Failed to create bin scheduler: %d.", ret);
> @@ -412,7 +412,7 @@ v3d_sched_init(struct v3d_dev *v3d)
>  	ret = drm_sched_init(&v3d->queue[V3D_RENDER].sched,
>  			     &v3d_render_sched_ops,
>  			     hw_jobs_limit, job_hang_limit,
> -			     msecs_to_jiffies(hang_limit_ms),
> +			     msecs_to_jiffies(hang_limit_ms), NULL,
>  			     NULL, "v3d_render");
>  	if (ret) {
>  		dev_err(v3d->drm.dev, "Failed to create render scheduler: %d.",
> @@ -424,7 +424,7 @@ v3d_sched_init(struct v3d_dev *v3d)
>  	ret = drm_sched_init(&v3d->queue[V3D_TFU].sched,
>  			     &v3d_tfu_sched_ops,
>  			     hw_jobs_limit, job_hang_limit,
> -			     msecs_to_jiffies(hang_limit_ms),
> +			     msecs_to_jiffies(hang_limit_ms), NULL,
>  			     NULL, "v3d_tfu");
>  	if (ret) {
>  		dev_err(v3d->drm.dev, "Failed to create TFU scheduler: %d.",
> @@ -437,7 +437,7 @@ v3d_sched_init(struct v3d_dev *v3d)
>  		ret = drm_sched_init(&v3d->queue[V3D_CSD].sched,
>  				     &v3d_csd_sched_ops,
>  				     hw_jobs_limit, job_hang_limit,
> -				     msecs_to_jiffies(hang_limit_ms),
> +				     msecs_to_jiffies(hang_limit_ms), NULL,
>  				     NULL, "v3d_csd");
>  		if (ret) {
>  			dev_err(v3d->drm.dev, "Failed to create CSD scheduler: %d.",
> @@ -449,7 +449,7 @@ v3d_sched_init(struct v3d_dev *v3d)
>  		ret = drm_sched_init(&v3d->queue[V3D_CACHE_CLEAN].sched,
>  				     &v3d_cache_clean_sched_ops,
>  				     hw_jobs_limit, job_hang_limit,
> -				     msecs_to_jiffies(hang_limit_ms),
> +				     msecs_to_jiffies(hang_limit_ms), NULL,
>  				     NULL, "v3d_cache_clean");
>  		if (ret) {
>  			dev_err(v3d->drm.dev, "Failed to create CACHE_CLEAN scheduler: %d.",
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index 65700511e074..9c41904ffd83 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -253,6 +253,24 @@ struct drm_sched_backend_ops {
>  	 * 5. Restart the scheduler using drm_sched_start(). At that point, new
>  	 *    jobs can be queued, and the scheduler thread is unblocked
>  	 *
> +	 * Note that some GPUs have distinct hardware queues but need to reset
> +	 * the GPU globally, which requires extra synchronization between the
> +	 * timeout handler of the different &drm_gpu_scheduler. One way to
> +	 * achieve this synchronization is to create an ordered workqueue
> +	 * (using alloc_ordered_workqueue()) at the driver level, and pass this
> +	 * queue to drm_sched_init(), to guarantee that timeout handlers are
> +	 * executed sequentially. The above workflow needs to be slightly
> +	 * adjusted in that case:
> +	 *
> +	 * 1. Stop all schedulers impacted by the reset using drm_sched_stop()
> +	 * 2. Try to gracefully stop non-faulty jobs on all queues impacted by
> +	 *    the reset (optional)
> +	 * 3. Issue a GPU reset on all faulty queues (driver-specific)
> +	 * 4. Re-submit jobs on all schedulers impacted by the reset using
> +	 *    drm_sched_resubmit_jobs()
> +	 * 5. Restart all schedulers that were stopped in step #1 using
> +	 *    drm_sched_start()
> +	 *
>  	 * Return DRM_GPU_SCHED_STAT_NOMINAL, when all is normal,
>  	 * and the underlying driver has started or completed recovery.
>  	 *
> @@ -283,6 +301,7 @@ struct drm_sched_backend_ops {
>   *                 finished.
>   * @hw_rq_count: the number of jobs currently in the hardware queue.
>   * @job_id_count: used to assign unique id to the each job.
> + * @timeout_wq: workqueue used to queue @work_tdr
>   * @work_tdr: schedules a delayed call to @drm_sched_job_timedout after the
>   *            timeout interval is over.
>   * @thread: the kthread on which the scheduler which run.
> @@ -307,6 +326,7 @@ struct drm_gpu_scheduler {
>  	wait_queue_head_t		job_scheduled;
>  	atomic_t			hw_rq_count;
>  	atomic64_t			job_id_count;
> +	struct workqueue_struct		*timeout_wq;
>  	struct delayed_work		work_tdr;
>  	struct task_struct		*thread;
>  	struct list_head		pending_list;
> @@ -320,7 +340,8 @@ struct drm_gpu_scheduler {
>  
>  int drm_sched_init(struct drm_gpu_scheduler *sched,
>  		   const struct drm_sched_backend_ops *ops,
> -		   uint32_t hw_submission, unsigned hang_limit, long timeout,
> +		   uint32_t hw_submission, unsigned hang_limit,
> +		   long timeout, struct workqueue_struct *timeout_wq,
>  		   atomic_t *score, const char *name);
>  
>  void drm_sched_fini(struct drm_gpu_scheduler *sched);
> -- 
> 2.31.1
> 

-- 
Daniel Vetter
Software Engineer, Intel Corporation
http://blog.ffwll.ch