[PATCH 1/2] drm/amdgpu: Move racy global PMU list into device

Alex Deucher alexdeucher at gmail.com
Tue Nov 8 16:11:12 UTC 2022


On Fri, Oct 28, 2022 at 6:48 PM Brian Norris <briannorris at chromium.org> wrote:
>
> If there are multiple amdgpu devices, this list processing can be racy.
>
> We're really treating this like a per-device list, so make that explicit
> and remove the global list.
>
> Signed-off-by: Brian Norris <briannorris at chromium.org>

@Kuehling, Felix @Kim, Jonathan can you take a look at this patch?

Thanks,

Alex


> ---
>
>  drivers/gpu/drm/amd/amdgpu/amdgpu.h     |  4 ++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c | 12 +++++-------
>  2 files changed, 9 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu.h b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> index 0e6ddf05c23c..e968b7f2417c 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
> @@ -1063,6 +1063,10 @@ struct amdgpu_device {
>         struct work_struct              reset_work;
>
>         bool                            job_hang;
> +
> +#if IS_ENABLED(CONFIG_PERF_EVENTS)
> +       struct list_head pmu_list;
> +#endif
>  };
>
>  static inline struct amdgpu_device *drm_to_adev(struct drm_device *ddev)
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
> index 71ee361d0972..24f2055a2f23 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pmu.c
> @@ -23,6 +23,7 @@
>
>  #include <linux/perf_event.h>
>  #include <linux/init.h>
> +#include <linux/list.h>
>  #include "amdgpu.h"
>  #include "amdgpu_pmu.h"
>
> @@ -72,9 +73,6 @@ static ssize_t amdgpu_pmu_event_show(struct device *dev,
>                         amdgpu_pmu_attr->event_str, amdgpu_pmu_attr->type);
>  }
>
> -static LIST_HEAD(amdgpu_pmu_list);
> -
> -
>  struct amdgpu_pmu_attr {
>         const char *name;
>         const char *config;
> @@ -558,7 +556,7 @@ static int init_pmu_entry_by_type_and_add(struct amdgpu_pmu_entry *pmu_entry,
>                 pr_info("Detected AMDGPU %d Perf Events.\n", total_num_events);
>
>
> -       list_add_tail(&pmu_entry->entry, &amdgpu_pmu_list);
> +       list_add_tail(&pmu_entry->entry, &pmu_entry->adev->pmu_list);
>
>         return 0;
>  err_register:
> @@ -579,9 +577,7 @@ void amdgpu_pmu_fini(struct amdgpu_device *adev)
>  {
>         struct amdgpu_pmu_entry *pe, *temp;
>
> -       list_for_each_entry_safe(pe, temp, &amdgpu_pmu_list, entry) {
> -               if (pe->adev != adev)
> -                       continue;
> +       list_for_each_entry_safe(pe, temp, &adev->pmu_list, entry) {
>                 list_del(&pe->entry);
>                 perf_pmu_unregister(&pe->pmu);
>                 kfree(pe->pmu.attr_groups);
> @@ -623,6 +619,8 @@ int amdgpu_pmu_init(struct amdgpu_device *adev)
>         int ret = 0;
>         struct amdgpu_pmu_entry *pmu_entry, *pmu_entry_df;
>
> +       INIT_LIST_HEAD(&adev->pmu_list);
> +
>         switch (adev->asic_type) {
>         case CHIP_VEGA20:
>                 pmu_entry_df = create_pmu_entry(adev, AMDGPU_PMU_PERF_TYPE_DF,
> --
> 2.38.1.273.g43a17bfeac-goog
>


More information about the amd-gfx mailing list