[1/2] drm/amdgpu: fix slab-out-of-bounds issue in amdgpu_vm_pt_create

Chen, Guchun Guchun.Chen at amd.com
Mon Jul 17 01:16:36 UTC 2023


[AMD Official Use Only - General]

> -----Original Message-----
> From: Limonciello, Mario <Mario.Limonciello at amd.com>
> Sent: Monday, July 17, 2023 8:39 AM
> To: Chen, Guchun <Guchun.Chen at amd.com>; amd-
> gfx at lists.freedesktop.org; Deucher, Alexander
> <Alexander.Deucher at amd.com>; Zhang, Hawking
> <Hawking.Zhang at amd.com>; Koenig, Christian
> <Christian.Koenig at amd.com>; Yang, Philip <Philip.Yang at amd.com>;
> Kuehling, Felix <Felix.Kuehling at amd.com>; mikhail.v.gavrilov at gmail.com
> Subject: Re: [1/2] drm/amdgpu: fix slab-out-of-bounds issue in
> amdgpu_vm_pt_create
>
>
> On 7/14/2023 6:05 AM, Guchun Chen wrote:
> > Recent code set xcp_id stored from file private data when opening
> > device to amdgpu bo for accounting memory usage etc, but not all VMs
> > are attached to this fpriv structure like the vm cases in
> > amdgpu_mes_self_test, otherwise, KASAN will complain below out of
> > bound access. And more importantly, VM code should not touch fpriv
> > structure, so drop fpriv code handling from amdgpu_vm_pt.
> >
> > [   77.292314] BUG: KASAN: slab-out-of-bounds in
> amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu]
> > [   77.293845] Read of size 4 at addr ffff888102c48a48 by task
> modprobe/1069
> > [   77.294146] Call Trace:
> > [   77.294178]  <TASK>
> > [   77.294208]  dump_stack_lvl+0x49/0x63
> > [   77.294260]  print_report+0x16f/0x4a6
> > [   77.294307]  ? amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu]
> > [   77.295979]  ? kasan_complete_mode_report_info+0x3c/0x200
> > [   77.296057]  ? amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu]
> > [   77.297556]  kasan_report+0xb4/0x130
> > [   77.297609]  ? amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu]
> > [   77.299202]  __asan_load4+0x6f/0x90
> > [   77.299272]  amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu]
> > [   77.300796]  ? amdgpu_init+0x6e/0x1000 [amdgpu]
> > [   77.302222]  ? amdgpu_vm_pt_clear+0x750/0x750 [amdgpu]
> > [   77.303721]  ? preempt_count_sub+0x18/0xc0
> > [   77.303786]  amdgpu_vm_init+0x39e/0x870 [amdgpu]
> > [   77.305186]  ? amdgpu_vm_wait_idle+0x90/0x90 [amdgpu]
> > [   77.306683]  ? kasan_set_track+0x25/0x30
> > [   77.306737]  ? kasan_save_alloc_info+0x1b/0x30
> > [   77.306795]  ? __kasan_kmalloc+0x87/0xa0
> > [   77.306852]  amdgpu_mes_self_test+0x169/0x620 [amdgpu]
> >
> > Fixes: ffc6deb773f7 ("drm/amdkfd: Store xcp partition id to amdgpu
> > bo")
> > Signed-off-by: Guchun Chen <guchun.chen at amd.com>
> > Reviewed-by: Christian König <christian.koenig at amd.com>
>
> This bug was also reported in Gitlab.  Here's some more tags.
>
> Link: https://gitlab.freedesktop.org/drm/amd/-/issues/2686
> |Tested-by: Mikhail Gavrilov <mikhail.v.gavrilov at gmail.com>|

Thanks for the info, Mario. Will add it in new patch set.

Regards,
Guchun

> > ---
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c   |  2 +-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c   |  2 +-
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c    |  5 +++--
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h    |  5 +++--
> >   drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 11 ++++++-----
> >   5 files changed, 14 insertions(+), 11 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > index 85a0d5f419c8..9a5aa4318cad 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
> > @@ -1232,7 +1232,7 @@ int amdgpu_driver_open_kms(struct drm_device
> *dev, struct drm_file *file_priv)
> >             pasid = 0;
> >     }
> >
> > -   r = amdgpu_vm_init(adev, &fpriv->vm);
> > +   r = amdgpu_vm_init(adev, &fpriv->vm, fpriv->xcp_id);
> >     if (r)
> >             goto error_pasid;
> >
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> > index e9091ebfe230..cac1d1b227f5 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
> > @@ -1382,7 +1382,7 @@ int amdgpu_mes_self_test(struct amdgpu_device
> *adev)
> >             goto error_pasid;
> >     }
> >
> > -   r = amdgpu_vm_init(adev, vm);
> > +   r = amdgpu_vm_init(adev, vm, 0);
> >     if (r) {
> >             DRM_ERROR("failed to initialize vm\n");
> >             goto error_pasid;
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > index 32adc31c093d..74380b21e7a5 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
> > @@ -2121,13 +2121,14 @@ long amdgpu_vm_wait_idle(struct
> amdgpu_vm *vm, long timeout)
> >    *
> >    * @adev: amdgpu_device pointer
> >    * @vm: requested vm
> > + * @xcp_id: GPU partition selection id
> >    *
> >    * Init @vm fields.
> >    *
> >    * Returns:
> >    * 0 for success, error for failure.
> >    */
> > -int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
> > +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> > +int32_t xcp_id)
> >   {
> >     struct amdgpu_bo *root_bo;
> >     struct amdgpu_bo_vm *root;
> > @@ -2177,7 +2178,7 @@ int amdgpu_vm_init(struct amdgpu_device
> *adev, struct amdgpu_vm *vm)
> >     vm->evicting = false;
> >
> >     r = amdgpu_vm_pt_create(adev, vm, adev->vm_manager.root_level,
> > -                           false, &root);
> > +                           false, &root, xcp_id);
> >     if (r)
> >             goto error_free_delayed;
> >     root_bo = &root->bo;
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> > index 88ee4507f6b6..bca258c38919 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
> > @@ -398,7 +398,7 @@ int amdgpu_vm_set_pasid(struct amdgpu_device
> *adev, struct amdgpu_vm *vm,
> >                     u32 pasid);
> >
> >   long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout); -int
> > amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm);
> > +int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
> > +int32_t xcp_id);
> >   int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct
> amdgpu_vm *vm);
> >   void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct
> amdgpu_vm *vm);
> >   void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm
> > *vm); @@ -481,7 +481,8 @@ void amdgpu_vm_get_memory(struct
> amdgpu_vm *vm,
> >   int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm
> *vm,
> >                    struct amdgpu_bo_vm *vmbo, bool immediate);
> >   int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct
> amdgpu_vm *vm,
> > -                   int level, bool immediate, struct amdgpu_bo_vm
> **vmbo);
> > +                   int level, bool immediate, struct amdgpu_bo_vm
> **vmbo,
> > +                   int32_t xcp_id);
> >   void amdgpu_vm_pt_free_root(struct amdgpu_device *adev, struct
> amdgpu_vm *vm);
> >   bool amdgpu_vm_pt_is_root_clean(struct amdgpu_device *adev,
> >                             struct amdgpu_vm *vm);
> > diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> > b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> > index 70fc5856a5b9..eb52dfe64948 100644
> > --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> > +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
> > @@ -498,11 +498,12 @@ int amdgpu_vm_pt_clear(struct amdgpu_device
> *adev, struct amdgpu_vm *vm,
> >    * @level: the page table level
> >    * @immediate: use a immediate update
> >    * @vmbo: pointer to the buffer object pointer
> > + * @xcp_id: GPU partition id
> >    */
> >   int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct
> amdgpu_vm *vm,
> > -                   int level, bool immediate, struct amdgpu_bo_vm
> **vmbo)
> > +                   int level, bool immediate, struct amdgpu_bo_vm
> **vmbo,
> > +                   int32_t xcp_id)
> >   {
> > -   struct amdgpu_fpriv *fpriv = container_of(vm, struct amdgpu_fpriv,
> vm);
> >     struct amdgpu_bo_param bp;
> >     struct amdgpu_bo *bo;
> >     struct dma_resv *resv;
> > @@ -535,7 +536,7 @@ int amdgpu_vm_pt_create(struct amdgpu_device
> > *adev, struct amdgpu_vm *vm,
> >
> >     bp.type = ttm_bo_type_kernel;
> >     bp.no_wait_gpu = immediate;
> > -   bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1;
> > +   bp.xcp_id_plus1 = xcp_id + 1;
> >
> >     if (vm->root.bo)
> >             bp.resv = vm->root.bo->tbo.base.resv; @@ -561,7 +562,7
> @@ int
> > amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm
> *vm,
> >     bp.type = ttm_bo_type_kernel;
> >     bp.resv = bo->tbo.base.resv;
> >     bp.bo_ptr_size = sizeof(struct amdgpu_bo);
> > -   bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1;
> > +   bp.xcp_id_plus1 = xcp_id + 1;
> >
> >     r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow);
> >
> > @@ -606,7 +607,7 @@ static int amdgpu_vm_pt_alloc(struct
> amdgpu_device *adev,
> >             return 0;
> >
> >     amdgpu_vm_eviction_unlock(vm);
> > -   r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt);
> > +   r = amdgpu_vm_pt_create(adev, vm, cursor->level, immediate, &pt,
> 0);
> >     amdgpu_vm_eviction_lock(vm);
> >     if (r)
> >             return r;


More information about the amd-gfx mailing list