[PATCH 02/10] drm/amdgpu: Protect the amdgpu_bo_list list with a mutex v2

Alex Deucher alexdeucher at gmail.com
Thu Jul 14 15:10:23 UTC 2022


On Thu, Jul 14, 2022 at 6:39 AM Christian König
<ckoenig.leichtzumerken at gmail.com> wrote:
>
> From: Luben Tuikov <luben.tuikov at amd.com>
>
> Protect the struct amdgpu_bo_list with a mutex. This is used during command
> submission in order to avoid buffer object corruption as recorded in
> the link below.
>
> v2 (chk): Keep the mutex looked for the whole CS to avoid using the
>           list from multiple CS threads at the same time.
>
> Suggested-by: Christian König <christian.koenig at amd.com>
> Cc: Alex Deucher <Alexander.Deucher at amd.com>
> Cc: Andrey Grodzovsky <Andrey.Grodzovsky at amd.com>
> Cc: Vitaly Prosyak <Vitaly.Prosyak at amd.com>
> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2048
> Signed-off-by: Luben Tuikov <luben.tuikov at amd.com>
> Signed-off-by: Christian König <christian.koenig at amd.com>

I think this is a valid bug fix on its own for stable.
Reviewed-by: Alex Deucher <alexander.deucher at amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c |  3 ++-
>  drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h |  4 ++++
>  drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c      | 16 +++++++++++++---
>  3 files changed, 19 insertions(+), 4 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
> index 714178f1b6c6..2168163aad2d 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
> @@ -40,7 +40,7 @@ static void amdgpu_bo_list_free_rcu(struct rcu_head *rcu)
>  {
>         struct amdgpu_bo_list *list = container_of(rcu, struct amdgpu_bo_list,
>                                                    rhead);
> -
> +       mutex_destroy(&list->bo_list_mutex);
>         kvfree(list);
>  }
>
> @@ -136,6 +136,7 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
>
>         trace_amdgpu_cs_bo_status(list->num_entries, total_size);
>
> +       mutex_init(&list->bo_list_mutex);
>         *result = list;
>         return 0;
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> index 529d52a204cf..9caea1688fc3 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
> @@ -47,6 +47,10 @@ struct amdgpu_bo_list {
>         struct amdgpu_bo *oa_obj;
>         unsigned first_userptr;
>         unsigned num_entries;
> +
> +       /* Protect access during command submission.
> +        */
> +       struct mutex bo_list_mutex;
>  };
>
>  int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id,
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> index b28af04b0c3e..d8f1335bc68f 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
> @@ -519,6 +519,8 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>                         return r;
>         }
>
> +       mutex_lock(&p->bo_list->bo_list_mutex);
> +
>         /* One for TTM and one for the CS job */
>         amdgpu_bo_list_for_each_entry(e, p->bo_list)
>                 e->tv.num_shared = 2;
> @@ -651,6 +653,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
>                         kvfree(e->user_pages);
>                         e->user_pages = NULL;
>                 }
> +               mutex_unlock(&p->bo_list->bo_list_mutex);
>         }
>         return r;
>  }
> @@ -690,9 +693,11 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,
>  {
>         unsigned i;
>
> -       if (error && backoff)
> +       if (error && backoff) {
>                 ttm_eu_backoff_reservation(&parser->ticket,
>                                            &parser->validated);
> +               mutex_unlock(&parser->bo_list->bo_list_mutex);
> +       }
>
>         for (i = 0; i < parser->num_post_deps; i++) {
>                 drm_syncobj_put(parser->post_deps[i].syncobj);
> @@ -832,12 +837,16 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
>                         continue;
>
>                 r = amdgpu_vm_bo_update(adev, bo_va, false);
> -               if (r)
> +               if (r) {
> +                       mutex_unlock(&p->bo_list->bo_list_mutex);
>                         return r;
> +               }
>
>                 r = amdgpu_sync_fence(&p->job->sync, bo_va->last_pt_update);
> -               if (r)
> +               if (r) {
> +                       mutex_unlock(&p->bo_list->bo_list_mutex);
>                         return r;
> +               }
>         }
>
>         r = amdgpu_vm_handle_moved(adev, vm);
> @@ -1278,6 +1287,7 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
>
>         ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
>         mutex_unlock(&p->adev->notifier_lock);
> +       mutex_unlock(&p->bo_list->bo_list_mutex);
>
>         return 0;
>
> --
> 2.25.1
>


More information about the amd-gfx mailing list