[PATCH] drm/amdgpu: fix TTM move entity init order

Alex Deucher alexdeucher at gmail.com
Thu Jul 12 15:01:41 UTC 2018


On Thu, Jul 12, 2018 at 8:33 AM, Christian König
<ckoenig.leichtzumerken at gmail.com> wrote:
> We are initializing the entity before the scheduler is actually
> initialized.
>
> This can lead to all kind of problem, but especially NULL pointer deref
> because of Nayan's scheduler work.
>
> Signed-off-by: Christian König <christian.koenig at amd.com>

Acked-by: Alex Deucher <alexander.deucher at amd.com>

> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 36 ++++++++++++++++++---------------
>  1 file changed, 20 insertions(+), 16 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> index 2a0e1b4752ff..11a12483c995 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
> @@ -104,8 +104,6 @@ static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref)
>  static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
>  {
>         struct drm_global_reference *global_ref;
> -       struct amdgpu_ring *ring;
> -       struct drm_sched_rq *rq;
>         int r;
>
>         /* ensure reference is false in case init fails */
> @@ -138,21 +136,10 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
>
>         mutex_init(&adev->mman.gtt_window_lock);
>
> -       ring = adev->mman.buffer_funcs_ring;
> -       rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
> -       r = drm_sched_entity_init(&adev->mman.entity,
> -                                 &rq, 1, NULL);
> -       if (r) {
> -               DRM_ERROR("Failed setting up TTM BO move run queue.\n");
> -               goto error_entity;
> -       }
> -
>         adev->mman.mem_global_referenced = true;
>
>         return 0;
>
> -error_entity:
> -       drm_global_item_unref(&adev->mman.bo_global_ref.ref);
>  error_bo:
>         drm_global_item_unref(&adev->mman.mem_global_ref);
>  error_mem:
> @@ -162,8 +149,6 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
>  static void amdgpu_ttm_global_fini(struct amdgpu_device *adev)
>  {
>         if (adev->mman.mem_global_referenced) {
> -               drm_sched_entity_destroy(adev->mman.entity.sched,
> -                                     &adev->mman.entity);
>                 mutex_destroy(&adev->mman.gtt_window_lock);
>                 drm_global_item_unref(&adev->mman.bo_global_ref.ref);
>                 drm_global_item_unref(&adev->mman.mem_global_ref);
> @@ -1921,10 +1906,29 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
>  {
>         struct ttm_mem_type_manager *man = &adev->mman.bdev.man[TTM_PL_VRAM];
>         uint64_t size;
> +       int r;
>
> -       if (!adev->mman.initialized || adev->in_gpu_reset)
> +       if (!adev->mman.initialized || adev->in_gpu_reset ||
> +           adev->mman.buffer_funcs_enabled == enable)
>                 return;
>
> +       if (enable) {
> +               struct amdgpu_ring *ring;
> +               struct drm_sched_rq *rq;
> +
> +               ring = adev->mman.buffer_funcs_ring;
> +               rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
> +               r = drm_sched_entity_init(&adev->mman.entity, &rq, 1, NULL);
> +               if (r) {
> +                       DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
> +                                 r);
> +                       return;
> +               }
> +       } else {
> +               drm_sched_entity_destroy(adev->mman.entity.sched,
> +                                        &adev->mman.entity);
> +       }
> +
>         /* this just adjusts TTM size idea, which sets lpfn to the correct value */
>         if (enable)
>                 size = adev->gmc.real_vram_size;
> --
> 2.14.1
>
> _______________________________________________
> amd-gfx mailing list
> amd-gfx at lists.freedesktop.org
> https://lists.freedesktop.org/mailman/listinfo/amd-gfx


More information about the amd-gfx mailing list