[PATCH] drm/amdgpu: add a flag to indicate if a VM is attached to fpriv
Guchun Chen
guchun.chen at amd.com
Wed May 24 09:23:26 UTC 2023
Recent code stores xcp_id to amdgpu bo for accounting memory
usage or find correct KFD node, and this xcp_id is from file
private data after opening device. However, not all VMs are
attached to this fpriv structure like the case in amdgpu_mes_self_test.
So add a flag to differentiate the cases. Otherwise, KASAN will
complain out of bound access.
[ 77.292314] BUG: KASAN: slab-out-of-bounds in amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu]
[ 77.293845] Read of size 4 at addr ffff888102c48a48 by task modprobe/1069
[ 77.294146] Call Trace:
[ 77.294178] <TASK>
[ 77.294208] dump_stack_lvl+0x49/0x63
[ 77.294260] print_report+0x16f/0x4a6
[ 77.294307] ? amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu]
[ 77.295979] ? kasan_complete_mode_report_info+0x3c/0x200
[ 77.296057] ? amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu]
[ 77.297556] kasan_report+0xb4/0x130
[ 77.297609] ? amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu]
[ 77.299202] __asan_load4+0x6f/0x90
[ 77.299272] amdgpu_vm_pt_create+0x17e/0x4b0 [amdgpu]
[ 77.300796] ? amdgpu_init+0x6e/0x1000 [amdgpu]
[ 77.302222] ? amdgpu_vm_pt_clear+0x750/0x750 [amdgpu]
[ 77.303721] ? preempt_count_sub+0x18/0xc0
[ 77.303786] amdgpu_vm_init+0x39e/0x870 [amdgpu]
[ 77.305186] ? amdgpu_vm_wait_idle+0x90/0x90 [amdgpu]
[ 77.306683] ? kasan_set_track+0x25/0x30
[ 77.306737] ? kasan_save_alloc_info+0x1b/0x30
[ 77.306795] ? __kasan_kmalloc+0x87/0xa0
[ 77.306852] amdgpu_mes_self_test+0x169/0x620 [amdgpu]
Fixes: ffc6deb773f7("drm/amdkfd: Store xcp partition id to amdgpu bo")
Signed-off-by: Guchun Chen <guchun.chen at amd.com>
---
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c | 2 +-
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 5 ++++-
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h | 5 ++++-
drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c | 12 +++++++++---
5 files changed, 19 insertions(+), 7 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
index 41d047e5de69..79b80f9233db 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@@ -1229,7 +1229,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
pasid = 0;
}
- r = amdgpu_vm_init(adev, &fpriv->vm);
+ r = amdgpu_vm_init(adev, &fpriv->vm, true);
if (r)
goto error_pasid;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
index 49bb6c03d606..3be5219edf88 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mes.c
@@ -1345,7 +1345,7 @@ int amdgpu_mes_self_test(struct amdgpu_device *adev)
goto error_pasid;
}
- r = amdgpu_vm_init(adev, vm);
+ r = amdgpu_vm_init(adev, vm, false);
if (r) {
DRM_ERROR("failed to initialize vm\n");
goto error_pasid;
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
index 37b9d8a8dbec..47ffaa1526a0 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@@ -2099,13 +2099,15 @@ long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout)
*
* @adev: amdgpu_device pointer
* @vm: requested vm
+ * @vm_attach_to_fpriv: flag to tell if vm is attached to file private data
*
* Init @vm fields.
*
* Returns:
* 0 for success, error for failure.
*/
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+ bool vm_attach_to_fpriv)
{
struct amdgpu_bo *root_bo;
struct amdgpu_bo_vm *root;
@@ -2131,6 +2133,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
vm->pte_support_ats = false;
vm->is_compute_context = false;
+ vm->vm_attach_to_fpriv = vm_attach_to_fpriv;
vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode &
AMDGPU_VM_USE_CPU_FOR_GFX);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
index d551fca1780e..62ed14b1fc16 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@@ -333,6 +333,9 @@ struct amdgpu_vm {
/* Flag to indicate if VM is used for compute */
bool is_compute_context;
+ /* Flag to tell if VM is attached to file private data */
+ bool vm_attach_to_fpriv;
+
/* Memory partition number, -1 means any partition */
int8_t mem_id;
};
@@ -392,7 +395,7 @@ int amdgpu_vm_set_pasid(struct amdgpu_device *adev, struct amdgpu_vm *vm,
u32 pasid);
long amdgpu_vm_wait_idle(struct amdgpu_vm *vm, long timeout);
-int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm);
+int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, bool vm_attach_to_fpriv);
int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
void amdgpu_vm_release_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm);
void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
index cc3b1b596e56..16b3350aa896 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm_pt.c
@@ -502,7 +502,7 @@ int amdgpu_vm_pt_clear(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
int level, bool immediate, struct amdgpu_bo_vm **vmbo)
{
- struct amdgpu_fpriv *fpriv = container_of(vm, struct amdgpu_fpriv, vm);
+ struct amdgpu_fpriv *fpriv;
struct amdgpu_bo_param bp;
struct amdgpu_bo *bo;
struct dma_resv *resv;
@@ -535,7 +535,10 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
bp.type = ttm_bo_type_kernel;
bp.no_wait_gpu = immediate;
- bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1;
+ if (vm->vm_attach_to_fpriv) {
+ fpriv = container_of(vm, struct amdgpu_fpriv, vm);
+ bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1;
+ }
if (vm->root.bo)
bp.resv = vm->root.bo->tbo.base.resv;
@@ -561,7 +564,10 @@ int amdgpu_vm_pt_create(struct amdgpu_device *adev, struct amdgpu_vm *vm,
bp.type = ttm_bo_type_kernel;
bp.resv = bo->tbo.base.resv;
bp.bo_ptr_size = sizeof(struct amdgpu_bo);
- bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1;
+ if (vm->vm_attach_to_fpriv) {
+ fpriv = container_of(vm, struct amdgpu_fpriv, vm);
+ bp.xcp_id_plus1 = fpriv->xcp_id == ~0 ? 0 : fpriv->xcp_id + 1;
+ }
r = amdgpu_bo_create(adev, &bp, &(*vmbo)->shadow);
--
2.25.1
More information about the amd-gfx
mailing list