[Intel-xe] [PATCH 3/4] drm/xe: Add vm snapshot mutex for easily taking a vm snapshot during devcoredump

Matthew Brost matthew.brost at intel.com
Fri Oct 13 19:21:46 UTC 2023


On Fri, Oct 13, 2023 at 05:21:41PM +0200, maarten.lankhorst at linux.intel.com wrote:
> From: Maarten Lankhorst <dev at lankhorst.se>
> 
> Signed-off-by: Maarten Lankhorst <dev at lankhorst.se>
> ---
>  drivers/gpu/drm/xe/xe_vm.c       | 7 +++++++
>  drivers/gpu/drm/xe/xe_vm_types.h | 5 +++++
>  2 files changed, 12 insertions(+)
> 
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index da006249147e..544d998293d3 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -1163,7 +1163,9 @@ static int xe_vm_insert_vma(struct xe_vm *vm, struct xe_vma *vma)
>  	xe_assert(vm->xe, xe_vma_vm(vma) == vm);
>  	lockdep_assert_held(&vm->lock);
>  
> +	mutex_lock(&vm->snap_mutex);

I can't say I'm a fan of adding a new lock. Can you explain why this is
needed? e.g. Why does the exisiting &vm->lock not work? We have lockdep
annotations in both xe_vm_insert_vma & xe_vm_remove_vma that vm->lock is
held. As is, the capture should be able to take vm->lock in write mode
and this is safe.

Side note the annotations should probably be in write mode for
xe_vm_insert_vma & xe_vm_remove_vma and the capture should be able to
take this in read mode. We should double check on this.

Matt 

>  	err = drm_gpuva_insert(&vm->gpuvm, &vma->gpuva);
> +	mutex_unlock(&vm->snap_mutex);
>  	XE_WARN_ON(err);	/* Shouldn't be possible */
>  
>  	return err;
> @@ -1174,7 +1176,9 @@ static void xe_vm_remove_vma(struct xe_vm *vm, struct xe_vma *vma)
>  	xe_assert(vm->xe, xe_vma_vm(vma) == vm);
>  	lockdep_assert_held(&vm->lock);
>  
> +	mutex_lock(&vm->snap_mutex);
>  	drm_gpuva_remove(&vma->gpuva);
> +	mutex_unlock(&vm->snap_mutex);
>  	if (vm->usm.last_fault_vma == vma)
>  		vm->usm.last_fault_vma = NULL;
>  }
> @@ -1343,6 +1347,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
>  	vm->flags = flags;
>  
>  	init_rwsem(&vm->lock);
> +	mutex_init(&vm->snap_mutex);
>  
>  	INIT_LIST_HEAD(&vm->rebind_list);
>  
> @@ -1486,6 +1491,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
>  	dma_resv_unlock(&vm->resv);
>  	drm_gpuvm_destroy(&vm->gpuvm);
>  err_put:
> +	mutex_destroy(&vm->snap_mutex);
>  	dma_resv_fini(&vm->resv);
>  	for_each_tile(tile, xe, id)
>  		xe_range_fence_tree_fini(&vm->rftree[id]);
> @@ -1625,6 +1631,7 @@ void xe_vm_close_and_put(struct xe_vm *vm)
>  	up_write(&vm->lock);
>  
>  	drm_gpuvm_destroy(&vm->gpuvm);
> +	mutex_destroy(&vm->snap_mutex);
>  
>  	mutex_lock(&xe->usm.lock);
>  	if (vm->flags & XE_VM_FLAG_FAULT_MODE)
> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
> index 825f8127741e..bf5017b631a5 100644
> --- a/drivers/gpu/drm/xe/xe_vm_types.h
> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> @@ -183,6 +183,11 @@ struct xe_vm {
>  	 * VM
>  	 */
>  	struct rw_semaphore lock;
> +	/**
> +	 * @snap_mutex: Mutex used to guard insertions and removals from gpuva,
> +	 * so we can take a snapshot safely from devcoredump.
> +	 */
> +	struct mutex snap_mutex;
>  
>  	/**
>  	 * @rebind_list: list of VMAs that need rebinding, and if they are
> -- 
> 2.40.1
> 


More information about the Intel-xe mailing list