[PATCH] drm/amdgpu: Fix use after free in trace_amdgpu_bo_move
Tvrtko Ursulin
tursulin at igalia.com
Wed Mar 20 14:40:38 UTC 2024
From: Tvrtko Ursulin <tursulin at ursulin.net>
Pipelined object migration will free up the old bo->resource, meaning
the tracepoint added in 94aeb4117343 ("drm/amdgpu: fix ftrace event
amdgpu_bo_move always move on same heap") will trigger an use after free
when it dereferences the cached old_mem.
Fix it by caching the memory type locally, which is the only thing
tracepoint wants to know about.
While at it convert the whole function to use the cached memory types for
consistency.
Signed-off-by: Tvrtko Ursulin <tursulin at ursulin.net>
Fixes: 94aeb4117343 ("drm/amdgpu: fix ftrace event amdgpu_bo_move always move on same heap")
Closes: https://gitlab.freedesktop.org/drm/amd/-/issues/3171
Cc: Beyond Wang <Wang.Beyond at amd.com>
Cc: Christian König <christian.koenig at amd.com>
Cc: Alex Deucher <alexander.deucher at amd.com>
---
Beware this is a speculative fix for now based only on source code
analysis and backtraces from 3171. It is also a bit on the churny side so
I am happy to minimize it. But most importantly, given how I don't have
any experience in amdgpu, I am looking for domain experts to either
confirm or disprove my analysis.
---
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 47 ++++++++++++-------------
1 file changed, 22 insertions(+), 25 deletions(-)
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
index 8722beba494e..e38d2945dbf3 100644
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@@ -452,10 +452,11 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
struct amdgpu_device *adev;
struct amdgpu_bo *abo;
struct ttm_resource *old_mem = bo->resource;
+ uint32_t new_mem_type = new_mem->type;
+ uint32_t old_mem_type;
int r;
- if (new_mem->mem_type == TTM_PL_TT ||
- new_mem->mem_type == AMDGPU_PL_PREEMPT) {
+ if (new_mem_type == TTM_PL_TT || new_mem_type == AMDGPU_PL_PREEMPT) {
r = amdgpu_ttm_backend_bind(bo->bdev, bo->ttm, new_mem);
if (r)
return r;
@@ -464,20 +465,18 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
abo = ttm_to_amdgpu_bo(bo);
adev = amdgpu_ttm_adev(bo->bdev);
- if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM &&
- bo->ttm == NULL)) {
+ if (!old_mem || (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL)) {
ttm_bo_move_null(bo, new_mem);
goto out;
}
- if (old_mem->mem_type == TTM_PL_SYSTEM &&
- (new_mem->mem_type == TTM_PL_TT ||
- new_mem->mem_type == AMDGPU_PL_PREEMPT)) {
+ old_mem_type = old_mem->mem_type;
+ if (old_mem_type == TTM_PL_SYSTEM &&
+ (new_mem_type == TTM_PL_TT || new_mem_type == AMDGPU_PL_PREEMPT)) {
ttm_bo_move_null(bo, new_mem);
goto out;
}
- if ((old_mem->mem_type == TTM_PL_TT ||
- old_mem->mem_type == AMDGPU_PL_PREEMPT) &&
- new_mem->mem_type == TTM_PL_SYSTEM) {
+ if ((old_mem_type == TTM_PL_TT || old_mem_type == AMDGPU_PL_PREEMPT) &&
+ new_mem_type == TTM_PL_SYSTEM) {
r = ttm_bo_wait_ctx(bo, ctx);
if (r)
return r;
@@ -488,22 +487,22 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
goto out;
}
- if (old_mem->mem_type == AMDGPU_PL_GDS ||
- old_mem->mem_type == AMDGPU_PL_GWS ||
- old_mem->mem_type == AMDGPU_PL_OA ||
- old_mem->mem_type == AMDGPU_PL_DOORBELL ||
- new_mem->mem_type == AMDGPU_PL_GDS ||
- new_mem->mem_type == AMDGPU_PL_GWS ||
- new_mem->mem_type == AMDGPU_PL_OA ||
- new_mem->mem_type == AMDGPU_PL_DOORBELL) {
+ if (old_mem_type == AMDGPU_PL_GDS ||
+ old_mem_type == AMDGPU_PL_GWS ||
+ old_mem_type == AMDGPU_PL_OA ||
+ old_mem_type == AMDGPU_PL_DOORBELL ||
+ new_mem_type == AMDGPU_PL_GDS ||
+ new_mem_type == AMDGPU_PL_GWS ||
+ new_mem_type == AMDGPU_PL_OA ||
+ new_mem_type == AMDGPU_PL_DOORBELL) {
/* Nothing to save here */
ttm_bo_move_null(bo, new_mem);
goto out;
}
if (bo->type == ttm_bo_type_device &&
- new_mem->mem_type == TTM_PL_VRAM &&
- old_mem->mem_type != TTM_PL_VRAM) {
+ new_mem_type == TTM_PL_VRAM &&
+ old_mem_type != TTM_PL_VRAM) {
/* amdgpu_bo_fault_reserve_notify will re-set this if the CPU
* accesses the BO after it's moved.
*/
@@ -511,10 +510,8 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
}
if (adev->mman.buffer_funcs_enabled) {
- if (((old_mem->mem_type == TTM_PL_SYSTEM &&
- new_mem->mem_type == TTM_PL_VRAM) ||
- (old_mem->mem_type == TTM_PL_VRAM &&
- new_mem->mem_type == TTM_PL_SYSTEM))) {
+ if (((old_mem_type == TTM_PL_SYSTEM && new_mem_type == TTM_PL_VRAM) ||
+ (old_mem_type == TTM_PL_VRAM && new_mem_type == TTM_PL_SYSTEM))) {
hop->fpfn = 0;
hop->lpfn = 0;
hop->mem_type = TTM_PL_TT;
@@ -540,7 +537,7 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo, bool evict,
return r;
}
- trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
+ trace_amdgpu_bo_move(abo, new_mem_type, old_mem_type);
out:
/* update statistics */
atomic64_add(bo->base.size, &adev->num_bytes_moved);
--
2.44.0
More information about the amd-gfx
mailing list