Re: 回复: [PATCH] drm/amdgpu: fix a bug NULL pointer dereference

Thu Feb 20 15:15:31 UTC 2020

On 2/20/20 2:35 PM, Liu, Monk wrote:
> Sorry, my previous idea still leave RQ null, please check if below method works:
>
> 29 static struct drm_sched_rq *
> 130 drm_sched_entity_get_free_sched(struct drm_sched_entity *entity)
> 131 {
> 132     struct drm_sched_rq *rq = NULL;
> 133     unsigned int min_jobs = UINT_MAX, num_jobs;
> 134     int i;
>
> 135
> 		While (!mutex_trylock(....))
> 			Sleep()
We can't do that drm_sched_entity_get_free_sched is in another 
module(drm scheduler) independent of amdgpu
> 136     for (i = 0; i < entity->num_rq_list; ++i) {
> 137         struct drm_gpu_scheduler *sched = entity->rq_list[i]->sched;
> 138
> 139         if (!entity->rq_list[i]->sched->ready) {    //we take the gpu reset mutex lock, so now sched->ready won't be set to "not ready"
> 140             DRM_WARN("sched%s is not ready, skipping", sched->name);				
> 141             continue;
> 142         }
> 143
> 144         num_jobs = atomic_read(&sched->num_jobs);
> 145         if (num_jobs < min_jobs) {
> 146             min_jobs = num_jobs;
> 147             rq = entity->rq_list[i];
> 148         }
> 149     }
>
> 		Mutex_unlock(...)
>
> 150
> 151     return rq;
> 152 }
>