[PATCH] drm/amdgpu: Optimize mutex usage (v3)

axie axie at amd.com
Thu Jul 20 01:54:12 UTC 2017


Hi Dave,

Is it easy to reproduce this backtrace? If it is easy, would you give 
this patch a try?

 From 3c83e1f05352f4795ddc2a8c9acca65b4b58ded9 Mon Sep 17 00:00:00 2001
From: Alex Xie <AlexBin.Xie at amd.com>
Date: Wed, 19 Jul 2017 21:51:56 -0400
Subject: [PATCH] drm/amdgpu: Fix a warning on suspicious RCU usage

 From Dave Airlie

[  141.965723] =============================
[  141.965724] WARNING: suspicious RCU usage
[  141.965726] 4.12.0-rc7 #221 Not tainted
[  141.965727] -----------------------------
[  141.965728] 
/home/airlied/devel/kernel/linux-2.6/include/linux/rcupdate.h:531
Illegal context switch in RCU read-side critical section!
[  141.965730]
                other info that might help us debug this:

[  141.965731]
                rcu_scheduler_active = 2, debug_locks = 0
[  141.965732] 1 lock held by amdgpu_cs:0/1332:
[  141.965733]  #0:  (rcu_read_lock){......}, at: [<ffffffffa01a0d07>]
amdgpu_bo_list_get+0x0/0x109 [amdgpu]
[  141.965774]
                stack backtrace:
[  141.965776] CPU: 6 PID: 1332 Comm: amdgpu_cs:0 Not tainted 4.12.0-rc7 
#221
[  141.965777] Hardware name: To be filled by O.E.M. To be filled by
O.E.M./M5A97 R2.0, BIOS 2603 06/26/2015
[  141.965778] Call Trace:
[  141.965782]  dump_stack+0x68/0x92
[  141.965785]  lockdep_rcu_suspicious+0xf7/0x100
[  141.965788]  ___might_sleep+0x56/0x1fc
[  141.965790]  __might_sleep+0x68/0x6f
[  141.965793]  __mutex_lock+0x4e/0x7b5
[  141.965817]  ? amdgpu_bo_list_get+0xa4/0x109 [amdgpu]
[  141.965820]  ? lock_acquire+0x125/0x1b9
[  141.965844]  ? amdgpu_bo_list_set+0x464/0x464 [amdgpu]
[  141.965846]  mutex_lock_nested+0x16/0x18
[  141.965848]  ? mutex_lock_nested+0x16/0x18
[  141.965872]  amdgpu_bo_list_get+0xa4/0x109 [amdgpu]
[  141.965895]  amdgpu_cs_ioctl+0x4a0/0x17dd [amdgpu]
[  141.965898]  ? radix_tree_node_alloc.constprop.11+0x77/0xab
[  141.965916]  drm_ioctl+0x264/0x393 [drm]
[  141.965939]  ? amdgpu_cs_find_mapping+0x83/0x83 [amdgpu]
[  141.965942]  ? trace_hardirqs_on_caller+0x16a/0x186

Signed-off-by: Alex Xie <AlexBin.Xie at amd.com>
---
  drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 9 ++++++---
  1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c 
b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
index dc87962..565ca90 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@@ -198,12 +198,15 @@ amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id)
      result = idr_find(&fpriv->bo_list_handles, id);

      if (result) {
-        if (kref_get_unless_zero(&result->refcount))
+        if (kref_get_unless_zero(&result->refcount)) {
+            rcu_read_unlock();
              mutex_lock(&result->lock);
-        else
+        }
+        else {
+            rcu_read_unlock();
              result = NULL;
+        }
      }
-    rcu_read_unlock();

      return result;
  }
-- 
2.7.4

Thanks,

Alex Bin Xie



On 2017-07-19 09:21 PM, Dave Airlie wrote:
> On 16 June 2017 at 23:08, Alex Xie <AlexBin.Xie at amd.com> wrote:
>> In original function amdgpu_bo_list_get, the waiting
>> for result->lock can be quite long while mutex
>> bo_list_lock was holding. It can make other tasks
>> waiting for bo_list_lock for long period.
>>
>> Secondly, this patch allows several tasks(readers of idr)
>> to proceed at the same time.
>>
>> v2: use rcu and kref (Dave Airlie and Christian König)
>> v3: update v1 commit message (Michel Dänzer)
> Got this, I'm guessing due to this patch.
>
> Dave.
>
> [  141.965723] =============================
> [  141.965724] WARNING: suspicious RCU usage
> [  141.965726] 4.12.0-rc7 #221 Not tainted
> [  141.965727] -----------------------------
> [  141.965728] /home/airlied/devel/kernel/linux-2.6/include/linux/rcupdate.h:531
> Illegal context switch in RCU read-side critical section!
> [  141.965730]
>                 other info that might help us debug this:
>
> [  141.965731]
>                 rcu_scheduler_active = 2, debug_locks = 0
> [  141.965732] 1 lock held by amdgpu_cs:0/1332:
> [  141.965733]  #0:  (rcu_read_lock){......}, at: [<ffffffffa01a0d07>]
> amdgpu_bo_list_get+0x0/0x109 [amdgpu]
> [  141.965774]
>                 stack backtrace:
> [  141.965776] CPU: 6 PID: 1332 Comm: amdgpu_cs:0 Not tainted 4.12.0-rc7 #221
> [  141.965777] Hardware name: To be filled by O.E.M. To be filled by
> O.E.M./M5A97 R2.0, BIOS 2603 06/26/2015
> [  141.965778] Call Trace:
> [  141.965782]  dump_stack+0x68/0x92
> [  141.965785]  lockdep_rcu_suspicious+0xf7/0x100
> [  141.965788]  ___might_sleep+0x56/0x1fc
> [  141.965790]  __might_sleep+0x68/0x6f
> [  141.965793]  __mutex_lock+0x4e/0x7b5
> [  141.965817]  ? amdgpu_bo_list_get+0xa4/0x109 [amdgpu]
> [  141.965820]  ? lock_acquire+0x125/0x1b9
> [  141.965844]  ? amdgpu_bo_list_set+0x464/0x464 [amdgpu]
> [  141.965846]  mutex_lock_nested+0x16/0x18
> [  141.965848]  ? mutex_lock_nested+0x16/0x18
> [  141.965872]  amdgpu_bo_list_get+0xa4/0x109 [amdgpu]
> [  141.965895]  amdgpu_cs_ioctl+0x4a0/0x17dd [amdgpu]
> [  141.965898]  ? radix_tree_node_alloc.constprop.11+0x77/0xab
> [  141.965916]  drm_ioctl+0x264/0x393 [drm]
> [  141.965939]  ? amdgpu_cs_find_mapping+0x83/0x83 [amdgpu]
> [  141.965942]  ? trace_hardirqs_on_caller+0x16a/0x186
>
>
>> Signed-off-by: Alex Xie <AlexBin.Xie at amd.com>
>> ---
>>   drivers/gpu/drm/amd/amdgpu/amdgpu.h         |  2 ++
>>   drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c | 40 ++++++++++++++++++++---------
>>   2 files changed, 30 insertions(+), 12 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> index 063fc73..e9b3981 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
>> @@ -871,6 +871,8 @@ struct amdgpu_fpriv {
>>
>>   struct amdgpu_bo_list {
>>          struct mutex lock;
>> +       struct rcu_head rhead;
>> +       struct kref refcount;
>>          struct amdgpu_bo *gds_obj;
>>          struct amdgpu_bo *gws_obj;
>>          struct amdgpu_bo *oa_obj;
>> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>> index 5af956f..efa6903 100644
>> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
>> @@ -41,6 +41,20 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
>>                                       struct drm_amdgpu_bo_list_entry *info,
>>                                       unsigned num_entries);
>>
>> +static void amdgpu_bo_list_release_rcu(struct kref *ref)
>> +{
>> +       unsigned i;
>> +       struct amdgpu_bo_list *list = container_of(ref, struct amdgpu_bo_list,
>> +                                                  refcount);
>> +
>> +       for (i = 0; i < list->num_entries; ++i)
>> +               amdgpu_bo_unref(&list->array[i].robj);
>> +
>> +       mutex_destroy(&list->lock);
>> +       drm_free_large(list->array);
>> +       kfree_rcu(list, rhead);
>> +}
>> +
>>   static int amdgpu_bo_list_create(struct amdgpu_device *adev,
>>                                   struct drm_file *filp,
>>                                   struct drm_amdgpu_bo_list_entry *info,
>> @@ -57,7 +71,7 @@ static int amdgpu_bo_list_create(struct amdgpu_device *adev,
>>
>>          /* initialize bo list*/
>>          mutex_init(&list->lock);
>> -
>> +       kref_init(&list->refcount);
>>          r = amdgpu_bo_list_set(adev, filp, list, info, num_entries);
>>          if (r) {
>>                  kfree(list);
>> @@ -83,14 +97,9 @@ static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
>>
>>          mutex_lock(&fpriv->bo_list_lock);
>>          list = idr_remove(&fpriv->bo_list_handles, id);
>> -       if (list) {
>> -               /* Another user may have a reference to this list still */
>> -               mutex_lock(&list->lock);
>> -               mutex_unlock(&list->lock);
>> -               amdgpu_bo_list_free(list);
>> -       }
>> -
>>          mutex_unlock(&fpriv->bo_list_lock);
>> +       if (list)
>> +               kref_put(&list->refcount, amdgpu_bo_list_release_rcu);
>>   }
>>
>>   static int amdgpu_bo_list_set(struct amdgpu_device *adev,
>> @@ -185,11 +194,17 @@ amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id)
>>   {
>>          struct amdgpu_bo_list *result;
>>
>> -       mutex_lock(&fpriv->bo_list_lock);
>> +       rcu_read_lock();
>>          result = idr_find(&fpriv->bo_list_handles, id);
>> -       if (result)
>> -               mutex_lock(&result->lock);
>> -       mutex_unlock(&fpriv->bo_list_lock);
>> +
>> +       if (result) {
>> +               if (kref_get_unless_zero(&result->refcount))
>> +                       mutex_lock(&result->lock);
>> +               else
>> +                       result = NULL;
>> +       }
>> +       rcu_read_unlock();
>> +
>>          return result;
>>   }
>>
>> @@ -227,6 +242,7 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
>>   void amdgpu_bo_list_put(struct amdgpu_bo_list *list)
>>   {
>>          mutex_unlock(&list->lock);
>> +       kref_put(&list->refcount, amdgpu_bo_list_release_rcu);
>>   }
>>
>>   void amdgpu_bo_list_free(struct amdgpu_bo_list *list)
>> --
>> 2.7.4
>>
>> _______________________________________________
>> amd-gfx mailing list
>> amd-gfx at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/amd-gfx



More information about the amd-gfx mailing list