[PATCH 1/2] drm/amdgpu: change job_list_lock to mutex

Tue Jun 28 09:33:02 UTC 2016


On 2016年06月28日 17:36, Christian König wrote:
> Am 28.06.2016 um 09:27 schrieb Huang Rui:
>> On Tue, Jun 28, 2016 at 03:04:18PM +0800, Chunming Zhou wrote:
>>> ring_mirror_list is only used kthread context, no need to spinlock.
>>> otherwise deadlock happens when kthread_park.
>>>
>> Yes, in process context, we prefer to use mutex, because it avoids to
>> grab the CPU all the time.
>>
>> Reviewed-by: Huang Rui <ray.huang at amd.com>
>
> NAK, the patch won't apply because I've changed the irq save spin lock 
> to a normal one quite a while ago. But, I'm not sure if Alex picked up 
> that patch yet.
>
> You shouldn't use a mutex here when you don't have a reason to do so. 
> Spin locks have less overhead and we won't expect any contention here.
>
> Additional to that how should this cause a deadlock with kthread_park?
you can apply another patch to drop hw ring, and then run glxgears, and 
hang the gfx with the attached, then you will find that deadlock info.

Regards,
David Zhou
>
> Regards,
> Christian.
>
>>
>>> Change-Id: I906022297015faf14a0ddb5f62a728af3e5f9448
>>> Signed-off-by: Chunming Zhou <David1.Zhou at amd.com>
>>> ---
>>>   drivers/gpu/drm/amd/scheduler/gpu_scheduler.c | 12 +++++-------
>>>   drivers/gpu/drm/amd/scheduler/gpu_scheduler.h |  2 +-
>>>   2 files changed, 6 insertions(+), 8 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c 
>>> b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
>>> index b53cf58..3373d97 100644
>>> --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
>>> +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.c
>>> @@ -331,10 +331,9 @@ static void amd_sched_job_finish(struct 
>>> work_struct *work)
>>>       struct amd_sched_job *s_job = container_of(work, struct 
>>> amd_sched_job,
>>>                              finish_work);
>>>       struct amd_gpu_scheduler *sched = s_job->sched;
>>> -    unsigned long flags;
>>>         /* remove job from ring_mirror_list */
>>> -    spin_lock_irqsave(&sched->job_list_lock, flags);
>>> +    mutex_lock(&sched->job_list_lock);
>>>       list_del_init(&s_job->node);
>>>       if (sched->timeout != MAX_SCHEDULE_TIMEOUT) {
>>>           struct amd_sched_job *next;
>>> @@ -348,7 +347,7 @@ static void amd_sched_job_finish(struct 
>>> work_struct *work)
>>>           if (next)
>>>               schedule_delayed_work(&next->work_tdr, sched->timeout);
>>>       }
>>> -    spin_unlock_irqrestore(&sched->job_list_lock, flags);
>>> +    mutex_unlock(&sched->job_list_lock);
>>>       sched->ops->free_job(s_job);
>>>   }
>>>   @@ -362,15 +361,14 @@ static void amd_sched_job_finish_cb(struct 
>>> fence *f, struct fence_cb *cb)
>>>   static void amd_sched_job_begin(struct amd_sched_job *s_job)
>>>   {
>>>       struct amd_gpu_scheduler *sched = s_job->sched;
>>> -    unsigned long flags;
>>>   -    spin_lock_irqsave(&sched->job_list_lock, flags);
>>> +    mutex_lock(&sched->job_list_lock);
>>>       list_add_tail(&s_job->node, &sched->ring_mirror_list);
>>>       if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
>>> list_first_entry_or_null(&sched->ring_mirror_list,
>>>                        struct amd_sched_job, node) == s_job)
>>>           schedule_delayed_work(&s_job->work_tdr, sched->timeout);
>>> -    spin_unlock_irqrestore(&sched->job_list_lock, flags);
>>> +    mutex_unlock(&sched->job_list_lock);
>>>   }
>>>     static void amd_sched_job_timedout(struct work_struct *work)
>>> @@ -564,7 +562,7 @@ int amd_sched_init(struct amd_gpu_scheduler *sched,
>>>       init_waitqueue_head(&sched->wake_up_worker);
>>>       init_waitqueue_head(&sched->job_scheduled);
>>>       INIT_LIST_HEAD(&sched->ring_mirror_list);
>>> -    spin_lock_init(&sched->job_list_lock);
>>> +    mutex_init(&sched->job_list_lock);
>>>       atomic_set(&sched->hw_rq_count, 0);
>>>       if (atomic_inc_return(&sched_fence_slab_ref) == 1) {
>>>           sched_fence_slab = kmem_cache_create(
>>> diff --git a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h 
>>> b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
>>> index 221a515..5675906 100644
>>> --- a/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
>>> +++ b/drivers/gpu/drm/amd/scheduler/gpu_scheduler.h
>>> @@ -132,7 +132,7 @@ struct amd_gpu_scheduler {
>>>       atomic_t            hw_rq_count;
>>>       struct task_struct        *thread;
>>>       struct list_head    ring_mirror_list;
>>> -    spinlock_t            job_list_lock;
>>> +    struct mutex            job_list_lock;
>>>   };
>>>     int amd_sched_init(struct amd_gpu_scheduler *sched,
>>> -- 
>>> 1.9.1
>>>
>>> _______________________________________________
>>> amd-gfx mailing list
>>> amd-gfx at lists.freedesktop.org
>>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx

-------------- next part --------------
A non-text attachment was scrubbed...
Name: 0001-drm-amdgpu-add-gfx-hang-debugfs.patch
Type: text/x-patch
Size: 4965 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/amd-gfx/attachments/20160628/abb497e7/attachment-0001.bin>