[PATCH v2] drm/amdgpu: Add NULL check for 'bo_va' in update_bo_mapping v2

Tue Apr 22 13:34:58 UTC 2025

Am 22.04.25 um 15:17 schrieb Srinivasan Shanmugam:
> This change adds a check to ensure that 'bo_va' is not null before
> dereferencing it. If 'bo_va' is null, the function returns early,
> preventing any potential crashes or undefined behavior

That commit message doesn't reflect the changes any more.

>
> Fixes the below:
> 	drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c:139 amdgpu_gem_update_bo_mapping()
> 	error: we previously assumed 'bo_va' could be null (see line 124)
>
> drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
>     115 static void
>     116 amdgpu_gem_update_bo_mapping(struct drm_file *filp,
>     117                              struct amdgpu_bo_va *bo_va,
>     118                              uint32_t operation,
>     119                              uint64_t point,
>     120                              struct dma_fence *fence,
>     121                              struct drm_syncobj *syncobj,
>     122                              struct dma_fence_chain *chain)
>     123 {
>     124         struct amdgpu_bo *bo = bo_va ? bo_va->base.bo : NULL;
>                                   ^^^^^^^^^^ If bo_va is NULL then bo is also NULL
>
> 	...
>     135         case AMDGPU_VA_OP_REPLACE:
>     136                 if (bo && (bo->tbo.base.resv == vm->root.bo->tbo.base.resv))
>                             ^^
>
>     137                         last_update = vm->last_update;
>     138                 else
> --> 139                         last_update = bo_va->last_pt_update;
>                                               ^^^^^ This pointer is dereferenced without being checked.
>
>     140                 break;

Please completely drop that. This conclusion is actually incorrect.

BO might be NULL here because bo_va->base.bo is NULL and *not* because bo_va is NULL.

@Dan your script seems to reports false positives here.

>
> v2: Refactor `amdgpu_gem_update_bo_mapping()` to move the last update
>     fence logic to `amdgpu_gem_va_update_vm()`. (Christian)
>
> Fixes: 16856d135622 ("drm/amdgpu: update userqueue BOs and PDs")
> Cc: Christian König <christian.koenig at amd.com>
> Cc: Alex Deucher <alexander.deucher at amd.com>
> Reported-by: Dan Carpenter <dan.carpenter at linaro.org>
> Signed-off-by: Srinivasan Shanmugam <srinivasan.shanmugam at amd.com>
> ---
>  drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c | 128 ++++++++++++------------
>  1 file changed, 64 insertions(+), 64 deletions(-)
>
> diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> index f03fc3cf4d50..32d80acfe65a 100644
> --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
> @@ -112,6 +112,63 @@ amdgpu_gem_update_timeline_node(struct drm_file *filp,
>  	return 0;
>  }
>  
> +/**
> + * amdgpu_gem_va_update_vm -update the bo_va in its VM
> + *
> + * @adev: amdgpu_device pointer
> + * @vm: vm to update
> + * @bo_va: bo_va to update
> + * @operation: map, unmap or clear
> + * @last_update: a pointer to a pointer where the last update fence will
> + * be stored, reflecting the most recent mapping or update operation.
> + *
> + * Update the bo_va directly after setting its address. Errors are not
> + * vital here, so they are not reported back to userspace.
> + *
> + * Returns resulting fence if freed BO(s) got cleared from the PT.
> + * otherwise stub fence in case of error.
> + */
> +static struct dma_fence *
> +amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
> +			struct amdgpu_vm *vm,
> +			struct amdgpu_bo_va *bo_va,
> +			u32 operation,
> +			struct dma_fence **last_update)
> +{
> +	struct dma_fence *fence = dma_fence_get_stub();
> +	int r;
> +
> +	if (!amdgpu_vm_ready(vm))
> +		return fence;
> +
> +	r = amdgpu_vm_clear_freed(adev, vm, &fence);
> +	if (r)
> +		goto error;
> +
> +	if (operation == AMDGPU_VA_OP_MAP || operation == AMDGPU_VA_OP_REPLACE) {
> +		r = amdgpu_vm_bo_update(adev, bo_va, false);
> +		if (r)
> +			goto error;
> +
> +		/* Set the last_update fence based on the operation */
> +		if (amdgpu_vm_is_bo_always_valid(vm, bo_va->base.bo))
> +			last_update = &vm->last_update; /* Use VM's last update fence */
> +		else
> +			last_update = &bo_va->last_pt_update; /* Use buffer object's last update fence */
> +
> +	} else if (operation == AMDGPU_VA_OP_UNMAP || operation == AMDGPU_VA_OP_CLEAR) {
> +		*last_update = fence; /* Assigning the temporary fence for unmap/clear */
> +	}
> +
> +	r = amdgpu_vm_update_pdes(adev, vm, false);
> +
> +error:
> +	if (r && r != -ERESTARTSYS)
> +		DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
> +
> +	return fence;
> +}
> +

Well that doesn't seem to make any sense at all.

>  static void
>  amdgpu_gem_update_bo_mapping(struct drm_file *filp,
>  			     struct amdgpu_bo_va *bo_va,
> @@ -121,30 +178,17 @@ amdgpu_gem_update_bo_mapping(struct drm_file *filp,
>  			     struct drm_syncobj *syncobj,
>  			     struct dma_fence_chain *chain)

As far as I can see this function here should be completely removed.

>  {
> -	struct amdgpu_bo *bo = bo_va ? bo_va->base.bo : NULL;
>  	struct amdgpu_fpriv *fpriv = filp->driver_priv;
>  	struct amdgpu_vm *vm = &fpriv->vm;
> -	struct dma_fence *last_update;
> +	struct amdgpu_device *adev = drm_to_adev(filp->minor->dev);
> +	struct dma_fence *last_update = NULL;
>  
>  	if (!syncobj)
>  		return;
>  
>  	/* Find the last update fence */
> -	switch (operation) {
> -	case AMDGPU_VA_OP_MAP:
> -	case AMDGPU_VA_OP_REPLACE:
> -		if (bo && (bo->tbo.base.resv == vm->root.bo->tbo.base.resv))
> -			last_update = vm->last_update;
> -		else
> -			last_update = bo_va->last_pt_update;
> -		break;
> -	case AMDGPU_VA_OP_UNMAP:
> -	case AMDGPU_VA_OP_CLEAR:
> -		last_update = fence;
> -		break;
> -	default:
> -		return;
> -	}
> +	/* Call to update the BO VA and retrieve the resulting fence */
> +	(void)amdgpu_gem_va_update_vm(adev, vm, bo_va, operation, &last_update);
>  
>  	/* Add fence to timeline */
>  	if (!point)
> @@ -744,52 +788,6 @@ int amdgpu_gem_metadata_ioctl(struct drm_device *dev, void *data,
>  	return r;
>  }
>  
> -/**
> - * amdgpu_gem_va_update_vm -update the bo_va in its VM
> - *
> - * @adev: amdgpu_device pointer
> - * @vm: vm to update
> - * @bo_va: bo_va to update
> - * @operation: map, unmap or clear
> - *
> - * Update the bo_va directly after setting its address. Errors are not
> - * vital here, so they are not reported back to userspace.
> - *
> - * Returns resulting fence if freed BO(s) got cleared from the PT.
> - * otherwise stub fence in case of error.
> - */
> -static struct dma_fence *
> -amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
> -			struct amdgpu_vm *vm,
> -			struct amdgpu_bo_va *bo_va,
> -			uint32_t operation)
> -{
> -	struct dma_fence *fence = dma_fence_get_stub();
> -	int r;
> -
> -	if (!amdgpu_vm_ready(vm))
> -		return fence;
> -
> -	r = amdgpu_vm_clear_freed(adev, vm, &fence);
> -	if (r)
> -		goto error;
> -
> -	if (operation == AMDGPU_VA_OP_MAP ||
> -	    operation == AMDGPU_VA_OP_REPLACE) {
> -		r = amdgpu_vm_bo_update(adev, bo_va, false);
> -		if (r)
> -			goto error;
> -	}
> -
> -	r = amdgpu_vm_update_pdes(adev, vm, false);
> -
> -error:
> -	if (r && r != -ERESTARTSYS)
> -		DRM_ERROR("Couldn't update BO_VA (%d)\n", r);
> -
> -	return fence;
> -}
> -

And that function should absolutely stay where it is.

Regards,
Christian.

>  /**
>   * amdgpu_gem_va_map_flags - map GEM UAPI flags into hardware flags
>   *
> @@ -839,6 +837,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>  	struct drm_syncobj *timeline_syncobj = NULL;
>  	struct dma_fence_chain *timeline_chain = NULL;
>  	struct dma_fence *fence;
> +	struct dma_fence *last_update_fence = NULL;
>  	struct drm_exec exec;
>  	uint64_t va_flags;
>  	uint64_t vm_size;
> @@ -968,8 +967,9 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
>  		break;
>  	}
>  	if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !adev->debug_vm) {
> +		/* Call to update VM and retrieve the resulting fence */
>  		fence = amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
> -						args->operation);
> +						args->operation, &last_update_fence);
>  
>  		if (timeline_syncobj)
>  			amdgpu_gem_update_bo_mapping(filp, bo_va,