[PATCH v2] drm/amdgpu: Add NULL check for 'bo_va' in update_bo_mapping v2
Christian König
christian.koenig at amd.com
Tue Apr 22 13:34:58 UTC 2025
Am 22.04.25 um 15:17 schrieb Srinivasan Shanmugam:
> This change adds a check to ensure that 'bo_va' is not null before
> dereferencing it. If 'bo_va' is null, the function returns early,
> preventing any potential crashes or undefined behavior
That commit message doesn't reflect the changes any more.
>
> Fixes the below:
> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c:139 amdgpu_gem_update_bo_mapping()
> error: we previously assumed 'bo_va' could be null (see line 124)
>
> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> 115 static void
> 116 amdgpu_gem_update_bo_mapping(struct drm_file *filp,
> 117 struct amdgpu_bo_va *bo_va,
> 118 uint32_t operation,
> 119 uint64_t point,
> 120 struct dma_fence *fence,
> 121 struct drm_syncobj *syncobj,
> 122 struct dma_fence_chain *chain)
> 123 {
> 124 struct amdgpu_bo *bo = bo_va ? bo_va->base.bo : NULL;
> ^^^^^^^^^^ If bo_va is NULL then bo is also NULL
>
> ...
> 135 case AMDGPU_VA_OP_REPLACE:
> 136 if (bo && (bo->tbo.base.resv == vm->root.bo->tbo.base.resv))
> ^^
>
> 137 last_update = vm->last_update;
> 138 else
> --> 139 last_update = bo_va->last_pt_update;
> ^^^^^ This pointer is dereferenced without being checked.
>
> 140 break;
Please completely drop that. This conclusion is actually incorrect.
BO might be NULL here because bo_va->base.bo is NULL and *not* because bo_va is NULL.
@Dan your script seems to reports false positives here.
>
> v2: Refactor `amdgpu_gem_update_bo_mapping()` to move the last update
> fence logic to `amdgpu_gem_va_update_vm()`. (Christian)
>
> Fixes: 16856d135622 ("drm/amdgpu: update userqueue BOs and PDs")
> Cc: Christian König <christian.koenig at amd.com>
> Cc: Alex Deucher <alexander.deucher at amd.com>
> Reported-by: Dan Carpenter <dan.carpenter at linaro.org>
> Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam at amd.com>
> ---
> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 128 ++++++++++++------------
> 1 file changed, 64 insertions(+), 64 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index f03fc3cf4d50..32d80acfe65a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -112,6 +112,63 @@ amdgpu_gem_update_timeline_node(struct drm_file *filp,
> return 0;
> }
>
> +/**
> + * amdgpu_gem_va_update_vm -update the bo_va in its VM
> + *
> + * @adev: amdgpu_device pointer
> + * @vm: vm to update
> + * @bo_va: bo_va to update
> + * @operation: map, unmap or clear
> + * @last_update: a pointer to a pointer where the last update fence will
> + * be stored, reflecting the most recent mapping or update operation.
> + *
> + * Update the bo_va directly after setting its address. Errors are not
> + * vital here, so they are not reported back to userspace.
> + *
> + * Returns resulting fence if freed BO(s) got cleared from the PT.
> + * otherwise stub fence in case of error.
> + */
> +static struct dma_fence *
> +amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
> + struct amdgpu_vm *vm,
> + struct amdgpu_bo_va *bo_va,
> + u32 operation,
> + struct dma_fence **last_update)
> +{
> + struct dma_fence *fence = dma_fence_get_stub();
> + int r;
> +
> + if (!amdgpu_vm_ready(vm))
> + return fence;
> +
> + r = amdgpu_vm_clear_freed(adev, vm, &fence);
> + if (r)
> + goto error;
> +
> + if (operation == AMDGPU_VA_OP_MAP || operation == AMDGPU_VA_OP_REPLACE) {
> + r = amdgpu_vm_bo_update(adev, bo_va, false);
> + if (r)
> + goto error;
> +
> + /* Set the last_update fence based on the operation */
> + if (amdgpu_vm_is_bo_always_valid(vm, bo_va->base.bo))
> + last_update = &vm->last_update; /* Use VM's last update fence */
> + else
> + last_update = &bo_va->last_pt_update; /* Use buffer object's last update fence */
> +
> + } else if (operation == AMDGPU_VA_OP_UNMAP || operation == AMDGPU_VA_OP_CLEAR) {
> + *last_update = fence; /* Assigning the temporary fence for unmap/clear */
> + }
> +
> + r = amdgpu_vm_update_pdes(adev, vm, false);
> +
> +error:
> + if (r && r != -ERESTARTSYS)
> + DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
> +
> + return fence;
> +}
> +
Well that doesn't seem to make any sense at all.
> static void
> amdgpu_gem_update_bo_mapping(struct drm_file *filp,
> struct amdgpu_bo_va *bo_va,
> @@ -121,30 +178,17 @@ amdgpu_gem_update_bo_mapping(struct drm_file *filp,
> struct drm_syncobj *syncobj,
> struct dma_fence_chain *chain)
As far as I can see this function here should be completely removed.
> {
> - struct amdgpu_bo *bo = bo_va ? bo_va->base.bo : NULL;
> struct amdgpu_fpriv *fpriv = filp->driver_priv;
> struct amdgpu_vm *vm = &fpriv->vm;
> - struct dma_fence *last_update;
> + struct amdgpu_device *adev = drm_to_adev(filp->minor->dev);
> + struct dma_fence *last_update = NULL;
>
> if (!syncobj)
> return;
>
> /* Find the last update fence */
> - switch (operation) {
> - case AMDGPU_VA_OP_MAP:
> - case AMDGPU_VA_OP_REPLACE:
> - if (bo && (bo->tbo.base.resv == vm->root.bo->tbo.base.resv))
> - last_update = vm->last_update;
> - else
> - last_update = bo_va->last_pt_update;
> - break;
> - case AMDGPU_VA_OP_UNMAP:
> - case AMDGPU_VA_OP_CLEAR:
> - last_update = fence;
> - break;
> - default:
> - return;
> - }
> + /* Call to update the BO VA and retrieve the resulting fence */
> + (void)amdgpu_gem_va_update_vm(adev, vm, bo_va, operation, &last_update);
>
> /* Add fence to timeline */
> if (!point)
> @@ -744,52 +788,6 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
> return r;
> }
>
> -/**
> - * amdgpu_gem_va_update_vm -update the bo_va in its VM
> - *
> - * @adev: amdgpu_device pointer
> - * @vm: vm to update
> - * @bo_va: bo_va to update
> - * @operation: map, unmap or clear
> - *
> - * Update the bo_va directly after setting its address. Errors are not
> - * vital here, so they are not reported back to userspace.
> - *
> - * Returns resulting fence if freed BO(s) got cleared from the PT.
> - * otherwise stub fence in case of error.
> - */
> -static struct dma_fence *
> -amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
> - struct amdgpu_vm *vm,
> - struct amdgpu_bo_va *bo_va,
> - uint32_t operation)
> -{
> - struct dma_fence *fence = dma_fence_get_stub();
> - int r;
> -
> - if (!amdgpu_vm_ready(vm))
> - return fence;
> -
> - r = amdgpu_vm_clear_freed(adev, vm, &fence);
> - if (r)
> - goto error;
> -
> - if (operation == AMDGPU_VA_OP_MAP ||
> - operation == AMDGPU_VA_OP_REPLACE) {
> - r = amdgpu_vm_bo_update(adev, bo_va, false);
> - if (r)
> - goto error;
> - }
> -
> - r = amdgpu_vm_update_pdes(adev, vm, false);
> -
> -error:
> - if (r && r != -ERESTARTSYS)
> - DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
> -
> - return fence;
> -}
> -
And that function should absolutely stay where it is.
Regards,
Christian.
> /**
> * amdgpu_gem_va_map_flags - map GEM UAPI flags into hardware flags
> *
> @@ -839,6 +837,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
> struct drm_syncobj *timeline_syncobj = NULL;
> struct dma_fence_chain *timeline_chain = NULL;
> struct dma_fence *fence;
> + struct dma_fence *last_update_fence = NULL;
> struct drm_exec exec;
> uint64_t va_flags;
> uint64_t vm_size;
> @@ -968,8 +967,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
> break;
> }
> if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm) {
> + /* Call to update VM and retrieve the resulting fence */
> fence = amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
> - args->operation);
> + args->operation, &last_update_fence);
>
> if (timeline_syncobj)
> amdgpu_gem_update_bo_mapping(filp, bo_va,
More information about the amd-gfx
mailing list