[PATCH v3] drm/sched: Fix kernel NULL pointer dereference error

Christian König christian.koenig at amd.com
Mon Oct 17 14:50:39 UTC 2022


Am 17.10.22 um 16:30 schrieb Arvind Yadav:
> -This is purely a timing issue. Here, sometimes Job free
> is happening before the job is done.
> To fix this issue moving 'dma_fence_cb' callback from
> job(struct drm_sched_job) to scheduler fence (struct drm_sched_fence).
>
> - Added drm_sched_fence_set_parent() function(and others *_parent_cb)
> in sched_fence.c. Moved parent fence intilization and callback
> installation into this (this just cleanup).
>
>
> BUG: kernel NULL pointer dereference, address: 0000000000000088
>   #PF: supervisor read access in kernel mode
>   #PF: error_code(0x0000) - not-present page
>   PGD 0 P4D 0
>   Oops: 0000 [#1] PREEMPT SMP NOPTI
>   CPU: 2 PID: 0 Comm: swapper/2 Not tainted 6.0.0-rc2-custom #1
>   Arvind : [dma_fence_default_wait _START] timeout = -1
>   Hardware name: AMD Dibbler/Dibbler, BIOS RDB1107CC 09/26/2018
>   RIP: 0010:drm_sched_job_done.isra.0+0x11/0x140 [gpu_sched]
>   Code: 8b fe ff ff be 03 00 00 00 e8 7b da b7 e3 e9 d4 fe ff ff 66 0f 1f 44 00 00 0f 1f 44 00 00 55 48 89 e5 41 55 41 54 49 89 fc 53 <48> 8b 9f 88 00 00 00 f0 ff 8b f0 00 00 00 48 8b 83 80 01 00 00 f0
>   RSP: 0018:ffffb1b1801d4d38 EFLAGS: 00010087
>   RAX: ffffffffc0aa48b0 RBX: ffffb1b1801d4d70 RCX: 0000000000000018
>   RDX: 000036c70afb7c1d RSI: ffff8a45ca413c60 RDI: 0000000000000000
>   RBP: ffffb1b1801d4d50 R08: 00000000000000b5 R09: 0000000000000000
>   R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000000
>   R13: ffffb1b1801d4d70 R14: ffff8a45c4160000 R15: ffff8a45c416a708
>   FS:  0000000000000000(0000) GS:ffff8a48a0a80000(0000) knlGS:0000000000000000
>   CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
>   CR2: 0000000000000088 CR3: 000000014ad50000 CR4: 00000000003506e0
>   Call Trace:
>    <IRQ>
>    drm_sched_job_done_cb+0x12/0x20 [gpu_sched]
>    dma_fence_signal_timestamp_locked+0x7e/0x110
>    dma_fence_signal+0x31/0x60
>    amdgpu_fence_process+0xc4/0x140 [amdgpu]
>    gfx_v9_0_eop_irq+0x9d/0xd0 [amdgpu]
>    amdgpu_irq_dispatch+0xb7/0x210 [amdgpu]
>    amdgpu_ih_process+0x86/0x100 [amdgpu]
>    amdgpu_irq_handler+0x24/0x60 [amdgpu]
>    __handle_irq_event_percpu+0x4b/0x190
>    handle_irq_event_percpu+0x15/0x50
>    handle_irq_event+0x39/0x60
>    handle_edge_irq+0xaf/0x210
>    __common_interrupt+0x6e/0x110
>    common_interrupt+0xc1/0xe0
>    </IRQ>
>    <TASK>
>
> Signed-off-by: Arvind Yadav <Arvind.Yadav at amd.com>
> ---
>
> Changes in v2: Moving 'dma_fence_cb' callback from
> job(struct drm_sched_job) to scheduler fence(struct drm_sched_fence)
> instead of adding NULL check for s_fence.
>
> Changes in v3: Added drm_sched_fence_set_parent() function(and others *_parent_cb)
> in sched_fence.c. Moved parent fence intilization and callback
> installation into this (this just cleanup).
>
> ---
>   drivers/gpu/drm/scheduler/sched_fence.c | 53 +++++++++++++++++++++++++
>   drivers/gpu/drm/scheduler/sched_main.c  | 38 +++++-------------
>   include/drm/gpu_scheduler.h             | 12 +++++-
>   3 files changed, 72 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/gpu/drm/scheduler/sched_fence.c b/drivers/gpu/drm/scheduler/sched_fence.c
> index 7fd869520ef2..f6808f363261 100644
> --- a/drivers/gpu/drm/scheduler/sched_fence.c
> +++ b/drivers/gpu/drm/scheduler/sched_fence.c
> @@ -77,6 +77,59 @@ static void drm_sched_fence_free_rcu(struct rcu_head *rcu)
>   	if (!WARN_ON_ONCE(!fence))
>   		kmem_cache_free(sched_fence_slab, fence);
>   }

Please add an empty line here.

> +/**
> + * drm_sched_job_done_cb - the callback for a done job
> + * @f: fence
> + * @cb: fence callbacks
> + */
> +static void drm_sched_job_done_cb(struct dma_fence *f, struct dma_fence_cb *cb)

Probably best to rename this to something like drm_sched_fence_parent_cb().

> +{
> +	struct drm_sched_fence *s_fence = container_of(cb, struct drm_sched_fence,
> +						       cb);
> +	struct drm_gpu_scheduler *sched = s_fence->sched;
> +
> +	atomic_dec(&sched->hw_rq_count);
> +	atomic_dec(sched->score);
> +
> +	dma_fence_get(&s_fence->finished);

We should probably make sure that this reference is taken before 
installing the callback.

> +	drm_sched_fence_finished(s_fence);
> +	dma_fence_put(&s_fence->finished);
> +	wake_up_interruptible(&sched->wake_up_worker);
> +}
> +
> +int drm_sched_fence_add_parent_cb(struct dma_fence *fence,
> +				  struct drm_sched_fence *s_fence)
> +{
> +	return dma_fence_add_callback(fence, &s_fence->cb,
> +				      drm_sched_job_done_cb);
> +}
> +
> +bool drm_sched_fence_remove_parent_cb(struct drm_sched_fence *s_fence)
> +{
> +	return dma_fence_remove_callback(s_fence->parent,
> +					 &s_fence->cb);
> +}

Do we really need separate functions for that?

> +
> +/**
> + * drm_sched_fence_set_parent - set the parent fence and add the callback
> + * fence: pointer to the hw fence
> + * @s_fence: pointer to the fence

Reverse the parameter order, s_fence is the object we work on.

> + *
> + * Set the parent fence and intall the callback for a done job.

You need to document that we take the reference of the parent fence.

> + */
> +int drm_sched_fence_set_parent(struct dma_fence *fence,
> +			       struct drm_sched_fence *s_fence)
> +{
> +	if (s_fence->parent &&
> +	   dma_fence_remove_callback(s_fence->parent, &s_fence->cb))
> +		dma_fence_put(s_fence->parent);
> +
> +	s_fence->parent = dma_fence_get(fence);
> +	/* Drop for original kref_init of the fence */
> +	dma_fence_put(fence);

This leaks the reference to the old parent and the get/put dance is not 
optimal either.

Better do something like this.

/* We keep the reference of the parent fence here. */
swap(s_fence->parent, fence);
dma_fence_put(fence);


> +	return dma_fence_add_callback(fence, &s_fence->cb,
> +				      drm_sched_job_done_cb);
> +}

When installing the callback fails we usually call the callback function 
instead of returning the error.



>   
>   /**
>    * drm_sched_fence_free - free up an uninitialized fence
> diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c
> index 4cc59bae38dd..cfb52e15f5b0 100644
> --- a/drivers/gpu/drm/scheduler/sched_main.c
> +++ b/drivers/gpu/drm/scheduler/sched_main.c
> @@ -253,13 +253,12 @@ drm_sched_rq_select_entity_fifo(struct drm_sched_rq *rq)
>   
>   /**
>    * drm_sched_job_done - complete a job
> - * @s_job: pointer to the job which is done
> + * @s_fence: pointer to the fence of a done job
>    *
>    * Finish the job's fence and wake up the worker thread.
>    */
> -static void drm_sched_job_done(struct drm_sched_job *s_job)
> +static void drm_sched_job_done(struct drm_sched_fence *s_fence)
>   {
> -	struct drm_sched_fence *s_fence = s_job->s_fence;
>   	struct drm_gpu_scheduler *sched = s_fence->sched;
>   
>   	atomic_dec(&sched->hw_rq_count);
> @@ -273,18 +272,6 @@ static void drm_sched_job_done(struct drm_sched_job *s_job)
>   	wake_up_interruptible(&sched->wake_up_worker);
>   }
>   
> -/**
> - * drm_sched_job_done_cb - the callback for a done job
> - * @f: fence
> - * @cb: fence callbacks
> - */
> -static void drm_sched_job_done_cb(struct dma_fence *f, struct dma_fence_cb *cb)
> -{
> -	struct drm_sched_job *s_job = container_of(cb, struct drm_sched_job, cb);
> -
> -	drm_sched_job_done(s_job);
> -}
> -
>   /**
>    * drm_sched_dependency_optimized - test if the dependency can be optimized
>    *
> @@ -505,8 +492,7 @@ void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
>   	list_for_each_entry_safe_reverse(s_job, tmp, &sched->pending_list,
>   					 list) {
>   		if (s_job->s_fence->parent &&
> -		    dma_fence_remove_callback(s_job->s_fence->parent,
> -					      &s_job->cb)) {
> +		    drm_sched_fence_remove_parent_cb(s_job->s_fence)) {
>   			dma_fence_put(s_job->s_fence->parent);
>   			s_job->s_fence->parent = NULL;

Better just call drm_sched_fence_set_parent() with NULL here to clear 
the currently installed parent.

This moves all this dance into the scheduler fence code.

>   			atomic_dec(&sched->hw_rq_count);
> @@ -576,15 +562,14 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)
>   			continue;
>   
>   		if (fence) {
> -			r = dma_fence_add_callback(fence, &s_job->cb,
> -						   drm_sched_job_done_cb);
> +			r = drm_sched_fence_add_parent_cb(fence, s_job->s_fence);
>   			if (r == -ENOENT)
> -				drm_sched_job_done(s_job);
> +				drm_sched_job_done(s_job->s_fence);
>   			else if (r)
>   				DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n",

Completely nuke that here. All of this should be done in the single 
drm_sched_fence_set_parent() function.

And an error message is completely superfluous. We just need to handle 
the case that the callback can't be installed because the fence is 
already signaled.

Regards,
Christian.

>   					  r);
>   		} else
> -			drm_sched_job_done(s_job);
> +			drm_sched_job_done(s_job->s_fence);
>   	}
>   
>   	if (full_recovery) {
> @@ -1049,14 +1034,9 @@ static int drm_sched_main(void *param)
>   		drm_sched_fence_scheduled(s_fence);
>   
>   		if (!IS_ERR_OR_NULL(fence)) {
> -			s_fence->parent = dma_fence_get(fence);
> -			/* Drop for original kref_init of the fence */
> -			dma_fence_put(fence);
> -
> -			r = dma_fence_add_callback(fence, &sched_job->cb,
> -						   drm_sched_job_done_cb);
> +			r = drm_sched_fence_set_parent(fence, s_fence);
>   			if (r == -ENOENT)
> -				drm_sched_job_done(sched_job);
> +				drm_sched_job_done(s_fence);
>   			else if (r)
>   				DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n",
>   					  r);
> @@ -1064,7 +1044,7 @@ static int drm_sched_main(void *param)
>   			if (IS_ERR(fence))
>   				dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
>   
> -			drm_sched_job_done(sched_job);
> +			drm_sched_job_done(s_fence);
>   		}
>   
>   		wake_up(&sched->job_scheduled);
> diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h
> index 1f7d9dd1a444..7258e2fa195f 100644
> --- a/include/drm/gpu_scheduler.h
> +++ b/include/drm/gpu_scheduler.h
> @@ -281,6 +281,10 @@ struct drm_sched_fence {
>            * @owner: job owner for debugging
>            */
>   	void				*owner;
> +	/**
> +	 * @cb: callback
> +	 */
> +	struct dma_fence_cb cb;
>   };
>   
>   struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f);
> @@ -300,7 +304,6 @@ struct drm_sched_fence *to_drm_sched_fence(struct dma_fence *f);
>    *         be scheduled further.
>    * @s_priority: the priority of the job.
>    * @entity: the entity to which this job belongs.
> - * @cb: the callback for the parent fence in s_fence.
>    *
>    * A job is created by the driver using drm_sched_job_init(), and
>    * should call drm_sched_entity_push_job() once it wants the scheduler
> @@ -325,7 +328,6 @@ struct drm_sched_job {
>   	atomic_t			karma;
>   	enum drm_sched_priority		s_priority;
>   	struct drm_sched_entity         *entity;
> -	struct dma_fence_cb		cb;
>   	/**
>   	 * @dependencies:
>   	 *
> @@ -559,6 +561,12 @@ void drm_sched_fence_free(struct drm_sched_fence *fence);
>   void drm_sched_fence_scheduled(struct drm_sched_fence *fence);
>   void drm_sched_fence_finished(struct drm_sched_fence *fence);
>   
> +int drm_sched_fence_add_parent_cb(struct dma_fence *fence,
> +				  struct drm_sched_fence *s_fence);
> +bool drm_sched_fence_remove_parent_cb(struct drm_sched_fence *s_fence);
> +int drm_sched_fence_set_parent(struct dma_fence *fence,
> +			       struct drm_sched_fence *s_fence);
> +
>   unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched);
>   void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
>   		                unsigned long remaining);



More information about the dri-devel mailing list