<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
<style type="text/css" style="display:none;"> P {margin-top:0;margin-bottom:0;} </style>
</head>
<body dir="ltr">
<p style="font-family:Arial;font-size:10pt;color:#0078D7;margin:15pt;" align="Left">
[AMD Official Use Only - Internal Distribution Only]<br>
</p>
<br>
<div>
<div style="font-family: Calibri, Arial, Helvetica, sans-serif; font-size: 12pt; color: rgb(0, 0, 0);">
<br>
</div>
<div id="appendonsend"></div>
<div style="font-family:Calibri,Arial,Helvetica,sans-serif; font-size:12pt; color:rgb(0,0,0)">
<br>
</div>
<hr tabindex="-1" style="display:inline-block; width:98%">
<div id="divRplyFwdMsg" dir="ltr"><font face="Calibri, sans-serif" color="#000000" style="font-size:11pt"><b>From:</b> Alex Deucher <alexdeucher@gmail.com><br>
<b>Sent:</b> Friday, May 22, 2020 11:16 PM<br>
<b>To:</b> Wang, Kevin(Yang) <Kevin1.Wang@amd.com><br>
<b>Cc:</b> amd-gfx list <amd-gfx@lists.freedesktop.org>; Deucher, Alexander <Alexander.Deucher@amd.com>; Zhang, Hawking <Hawking.Zhang@amd.com><br>
<b>Subject:</b> Re: [PATCH] drm/amdgpu: fix device attribute node create failed with multi gpu</font>
<div> </div>
</div>
<div class="BodyFragment"><font size="2"><span style="font-size:11pt">
<div class="PlainText">On Fri, May 22, 2020 at 10:57 AM Kevin Wang <kevin1.wang@amd.com> wrote:<br>
><br>
> the origin design will use varible of "attr->states" to save node<br>
> supported states on current gpu device, but for multi gpu device, when<br>
> probe second gpu device, the driver will check attribute node states<br>
> from previous gpu device wthether to create attribute node.<br>
> it will cause other gpu device create attribute node faild.<br>
><br>
> 1. add member attr_list into amdgpu_device to link supported device attribute node.<br>
> 2. add new structure "struct amdgpu_device_attr_entry{}" to track device attribute state.<br>
><br>
> fix:<br>
> drm/amdgpu: optimize amdgpu device attribute code<br>
><br>
> Signed-off-by: Kevin Wang <kevin1.wang@amd.com><br>
> ---<br>
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h    |  1 +<br>
>  drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c | 75 +++++++++++++++-----------<br>
>  drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h | 13 +++--<br>
>  3 files changed, 52 insertions(+), 37 deletions(-)<br>
><br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h<br>
> index bfce0931f9c1..b84146339ea3 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h<br>
> @@ -995,6 +995,7 @@ struct amdgpu_device {<br>
>         char                            serial[16];<br>
><br>
>         struct amdgpu_autodump          autodump;<br>
> +       struct list_head                attr_list;<br>
<br>
Might want to call this pm_attr_list or even move this to the<br>
amdgpu_pm struct, but either way, assuming you've tested this with<br>
multiple GPUs:<br>
Reviewed-by: Alex Deucher <alexander.deucher@amd.com><br>
<br>
<br>
[kevin]:</div>
<div class="PlainText">thanks,  the patch v2 will fix it. </div>
<div class="PlainText">and this patch is test passed on local witl multi gpu.</div>
<div class="PlainText"><br>
</div>
<div class="PlainText">>  };<br>
><br>
>  static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)<br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c<br>
> index 55815b899942..ac99aa0a16a8 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c<br>
> @@ -1725,7 +1725,7 @@ static struct amdgpu_device_attr amdgpu_device_attrs[] = {<br>
>  };<br>
><br>
>  static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_attr *attr,<br>
> -                              uint32_t mask)<br>
> +                              uint32_t mask, enum amdgpu_device_attr_states *states)<br>
>  {<br>
>         struct device_attribute *dev_attr = &attr->dev_attr;<br>
>         const char *attr_name = dev_attr->attr.name;<br>
> @@ -1733,7 +1733,7 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_<br>
>         enum amd_asic_type asic_type = adev->asic_type;<br>
><br>
>         if (!(attr->flags & mask)) {<br>
> -               attr->states = ATTR_STATE_UNSUPPORTED;<br>
> +               *states = ATTR_STATE_UNSUPPORTED;<br>
>                 return 0;<br>
>         }<br>
><br>
> @@ -1741,34 +1741,34 @@ static int default_attr_update(struct amdgpu_device *adev, struct amdgpu_device_<br>
><br>
>         if (DEVICE_ATTR_IS(pp_dpm_socclk)) {<br>
>                 if (asic_type <= CHIP_VEGA10)<br>
> -                       attr->states = ATTR_STATE_UNSUPPORTED;<br>
> +                       *states = ATTR_STATE_UNSUPPORTED;<br>
>         } else if (DEVICE_ATTR_IS(pp_dpm_dcefclk)) {<br>
>                 if (asic_type <= CHIP_VEGA10 || asic_type == CHIP_ARCTURUS)<br>
> -                       attr->states = ATTR_STATE_UNSUPPORTED;<br>
> +                       *states = ATTR_STATE_UNSUPPORTED;<br>
>         } else if (DEVICE_ATTR_IS(pp_dpm_fclk)) {<br>
>                 if (asic_type < CHIP_VEGA20)<br>
> -                       attr->states = ATTR_STATE_UNSUPPORTED;<br>
> +                       *states = ATTR_STATE_UNSUPPORTED;<br>
>         } else if (DEVICE_ATTR_IS(pp_dpm_pcie)) {<br>
>                 if (asic_type == CHIP_ARCTURUS)<br>
> -                       attr->states = ATTR_STATE_UNSUPPORTED;<br>
> +                       *states = ATTR_STATE_UNSUPPORTED;<br>
>         } else if (DEVICE_ATTR_IS(pp_od_clk_voltage)) {<br>
> -               attr->states = ATTR_STATE_UNSUPPORTED;<br>
> +               *states = ATTR_STATE_UNSUPPORTED;<br>
>                 if ((is_support_sw_smu(adev) && adev->smu.od_enabled) ||<br>
>                     (!is_support_sw_smu(adev) && hwmgr->od_enabled))<br>
> -                       attr->states = ATTR_STATE_UNSUPPORTED;<br>
> +                       *states = ATTR_STATE_UNSUPPORTED;<br>
>         } else if (DEVICE_ATTR_IS(mem_busy_percent)) {<br>
>                 if (adev->flags & AMD_IS_APU || asic_type == CHIP_VEGA10)<br>
> -                       attr->states = ATTR_STATE_UNSUPPORTED;<br>
> +                       *states = ATTR_STATE_UNSUPPORTED;<br>
>         } else if (DEVICE_ATTR_IS(pcie_bw)) {<br>
>                 /* PCIe Perf counters won't work on APU nodes */<br>
>                 if (adev->flags & AMD_IS_APU)<br>
> -                       attr->states = ATTR_STATE_UNSUPPORTED;<br>
> +                       *states = ATTR_STATE_UNSUPPORTED;<br>
>         } else if (DEVICE_ATTR_IS(unique_id)) {<br>
>                 if (!adev->unique_id)<br>
> -                       attr->states = ATTR_STATE_UNSUPPORTED;<br>
> +                       *states = ATTR_STATE_UNSUPPORTED;<br>
>         } else if (DEVICE_ATTR_IS(pp_features)) {<br>
>                 if (adev->flags & AMD_IS_APU || asic_type <= CHIP_VEGA10)<br>
> -                       attr->states = ATTR_STATE_UNSUPPORTED;<br>
> +                       *states = ATTR_STATE_UNSUPPORTED;<br>
>         }<br>
><br>
>         if (asic_type == CHIP_ARCTURUS) {<br>
> @@ -1794,22 +1794,24 @@ static int amdgpu_device_attr_create(struct amdgpu_device *adev,<br>
>         int ret = 0;<br>
>         struct device_attribute *dev_attr = &attr->dev_attr;<br>
>         const char *name = dev_attr->attr.name;<br>
> +       enum amdgpu_device_attr_states attr_states = ATTR_STATE_SUPPORTED;<br>
> +       struct amdgpu_device_attr_entry *attr_entry;<br>
> +<br>
>         int (*attr_update)(struct amdgpu_device *adev, struct amdgpu_device_attr *attr,<br>
> -                          uint32_t mask) = default_attr_update;<br>
> +                          uint32_t mask, enum amdgpu_device_attr_states *states) = default_attr_update;<br>
><br>
>         BUG_ON(!attr);<br>
><br>
>         attr_update = attr->attr_update ? attr_update : default_attr_update;<br>
><br>
> -       ret = attr_update(adev, attr, mask);<br>
> +       ret = attr_update(adev, attr, mask, &attr_states);<br>
>         if (ret) {<br>
>                 dev_err(adev->dev, "failed to update device file %s, ret = %d\n",<br>
>                         name, ret);<br>
>                 return ret;<br>
>         }<br>
><br>
> -       /* the attr->states maybe changed after call attr->attr_update function */<br>
> -       if (attr->states == ATTR_STATE_UNSUPPORTED)<br>
> +       if (attr_states == ATTR_STATE_UNSUPPORTED)<br>
>                 return 0;<br>
><br>
>         ret = device_create_file(adev->dev, dev_attr);<br>
> @@ -1818,7 +1820,14 @@ static int amdgpu_device_attr_create(struct amdgpu_device *adev,<br>
>                         name, ret);<br>
>         }<br>
><br>
> -       attr->states = ATTR_STATE_SUPPORTED;<br>
> +       attr_entry = kmalloc(sizeof(*attr_entry), GFP_KERNEL);<br>
> +       if (!attr_entry)<br>
> +               return -ENOMEM;<br>
> +<br>
> +       attr_entry->attr = attr;<br>
> +       INIT_LIST_HEAD(&attr_entry->entry);<br>
> +<br>
> +       list_add_tail(&attr_entry->entry, &adev->attr_list);<br>
><br>
>         return ret;<br>
>  }<br>
> @@ -1827,14 +1836,12 @@ static void amdgpu_device_attr_remove(struct amdgpu_device *adev, struct amdgpu_<br>
>  {<br>
>         struct device_attribute *dev_attr = &attr->dev_attr;<br>
><br>
> -       if (attr->states == ATTR_STATE_UNSUPPORTED)<br>
> -               return;<br>
> -<br>
>         device_remove_file(adev->dev, dev_attr);<br>
> -<br>
> -       attr->states = ATTR_STATE_UNSUPPORTED;<br>
>  }<br>
><br>
> +static void amdgpu_device_attr_remove_groups(struct amdgpu_device *adev,<br>
> +                                            struct list_head *attr_list);<br>
> +<br>
>  static int amdgpu_device_attr_create_groups(struct amdgpu_device *adev,<br>
>                                             struct amdgpu_device_attr *attrs,<br>
>                                             uint32_t counts,<br>
> @@ -1852,20 +1859,24 @@ static int amdgpu_device_attr_create_groups(struct amdgpu_device *adev,<br>
>         return 0;<br>
><br>
>  failed:<br>
> -       while (i--)<br>
> -               amdgpu_device_attr_remove(adev, &attrs[i]);<br>
> +       amdgpu_device_attr_remove_groups(adev, &adev->attr_list);<br>
><br>
>         return ret;<br>
>  }<br>
><br>
>  static void amdgpu_device_attr_remove_groups(struct amdgpu_device *adev,<br>
> -                                            struct amdgpu_device_attr *attrs,<br>
> -                                            uint32_t counts)<br>
> +                                            struct list_head *attr_list)<br>
>  {<br>
> -       uint32_t i = 0;<br>
> +       struct amdgpu_device_attr_entry *entry, *entry_tmp;<br>
><br>
> -       for (i = 0; i < counts; i++)<br>
> -               amdgpu_device_attr_remove(adev, &attrs[i]);<br>
> +       if (list_empty(&adev->attr_list))<br>
> +               return ;<br>
> +<br>
> +       list_for_each_entry_safe(entry, entry_tmp, attr_list, entry) {<br>
> +               amdgpu_device_attr_remove(adev, entry->attr);<br>
> +               list_del(&entry->entry);<br>
> +               kfree(entry);<br>
> +       }<br>
>  }<br>
><br>
>  static ssize_t amdgpu_hwmon_show_temp(struct device *dev,<br>
> @@ -3276,6 +3287,8 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)<br>
>         if (adev->pm.dpm_enabled == 0)<br>
>                 return 0;<br>
><br>
> +       INIT_LIST_HEAD(&adev->attr_list);<br>
> +<br>
>         adev->pm.int_hwmon_dev = hwmon_device_register_with_groups(adev->dev,<br>
>                                                                    DRIVER_NAME, adev,<br>
>                                                                    hwmon_groups);<br>
> @@ -3319,9 +3332,7 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)<br>
>         if (adev->pm.int_hwmon_dev)<br>
>                 hwmon_device_unregister(adev->pm.int_hwmon_dev);<br>
><br>
> -       amdgpu_device_attr_remove_groups(adev,<br>
> -                                        amdgpu_device_attrs,<br>
> -                                        ARRAY_SIZE(amdgpu_device_attrs));<br>
> +       amdgpu_device_attr_remove_groups(adev, &adev->attr_list);<br>
>  }<br>
><br>
>  void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)<br>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h<br>
> index 48e8086baf33..d9ae2b49a402 100644<br>
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h<br>
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.h<br>
> @@ -47,10 +47,14 @@ enum amdgpu_device_attr_states {<br>
>  struct amdgpu_device_attr {<br>
>         struct device_attribute dev_attr;<br>
>         enum amdgpu_device_attr_flags flags;<br>
> -       enum amdgpu_device_attr_states states;<br>
> -       int (*attr_update)(struct amdgpu_device *adev,<br>
> -                          struct amdgpu_device_attr* attr,<br>
> -                          uint32_t mask);<br>
> +       int (*attr_update)(struct amdgpu_device *adev, struct amdgpu_device_attr *attr,<br>
> +                          uint32_t mask, enum amdgpu_device_attr_states *states);<br>
> +<br>
> +};<br>
> +<br>
> +struct amdgpu_device_attr_entry {<br>
> +       struct list_head entry;<br>
> +       struct amdgpu_device_attr *attr;<br>
>  };<br>
><br>
>  #define to_amdgpu_device_attr(_dev_attr) \<br>
> @@ -59,7 +63,6 @@ struct amdgpu_device_attr {<br>
>  #define __AMDGPU_DEVICE_ATTR(_name, _mode, _show, _store, _flags, ...) \<br>
>         { .dev_attr = __ATTR(_name, _mode, _show, _store),              \<br>
>           .flags = _flags,                                              \<br>
> -         .states = ATTR_STATE_SUPPORTED,                                       \<br>
>           ##__VA_ARGS__, }<br>
><br>
>  #define AMDGPU_DEVICE_ATTR(_name, _mode, _flags, ...)                  \<br>
> --<br>
> 2.17.1<br>
><br>
> _______________________________________________<br>
> amd-gfx mailing list<br>
> amd-gfx@lists.freedesktop.org<br>
> <a href="https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=02%7C01%7Ckevin1.wang%40amd.com%7Cdd2ce58bbf7f4cbc34c408d7fe6317d8%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637257573884409063&amp;sdata=%2FYecffjaMNwPS%2BU0QM%2BWkQ%2BmVBDFP9pw3eYE%2FBgfU60%3D&amp;reserved=0">
https://nam11.safelinks.protection.outlook.com/?url=https%3A%2F%2Flists.freedesktop.org%2Fmailman%2Flistinfo%2Famd-gfx&amp;data=02%7C01%7Ckevin1.wang%40amd.com%7Cdd2ce58bbf7f4cbc34c408d7fe6317d8%7C3dd8961fe4884e608e11a82d994e183d%7C0%7C0%7C637257573884409063&amp;sdata=%2FYecffjaMNwPS%2BU0QM%2BWkQ%2BmVBDFP9pw3eYE%2FBgfU60%3D&amp;reserved=0</a><br>
</div>
</span></font></div>
</div>
</body>
</html>