[RFC v1 1/1] drm/xe: Allow fault injection in vm create and vm bind IOCTLs

Francois Dugast francois.dugast at intel.com
Fri Nov 8 17:20:56 UTC 2024


On Fri, Nov 08, 2024 at 08:16:50AM -0800, Matthew Brost wrote:
> On Fri, Nov 08, 2024 at 05:11:56PM +0100, Francois Dugast wrote:
> > Use fault injection infrastructure to allow specific functions to
> > be configured over debugfs for failing during the execution of
> > xe_vm_create_ioctl() and xe_vm_bind_ioctl(). This allows more
> > thorough testing from user space by going through code paths for
> > error handling and unwinding which cannot be reached by simply
> > injecting errors in IOCTL arguments. This can help increase code
> > robustness.
> > 
> 
> Let also add xe_pt_update_ops_prepare and xe_pt_update_ops_run if possible.

This was just to show a couple of examples but it is straightforward to
add more, for instance this is enough to add xe_pt_update_ops_prepare and
xe_pt_update_ops_run:

	diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
	index e111698abbd9..684dc075deac 100644
	--- a/drivers/gpu/drm/xe/xe_pt.c
	+++ b/drivers/gpu/drm/xe/xe_pt.c
	@@ -1852,6 +1852,7 @@ int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops *vops)
	 
	 	return 0;
	 }
	+ALLOW_ERROR_INJECTION(xe_pt_update_ops_prepare, ERRNO);
	 
	 static void bind_op_commit(struct xe_vm *vm, struct xe_tile *tile,
	 			   struct xe_vm_pgtable_update_ops *pt_update_ops,
	@@ -2132,6 +2133,7 @@ xe_pt_update_ops_run(struct xe_tile *tile, struct xe_vma_ops *vops)
	 
	 	return ERR_PTR(err);
	 }
	+ALLOW_ERROR_INJECTION(xe_pt_update_ops_run, ERRNO);
	 
	 /**
	  * xe_pt_update_ops_fini() - Finish PT update operations

and in IGT:

	diff --git a/tests/intel/xe_fault_injection.c b/tests/intel/xe_fault_injection.c
	index 43d3a2da0..ced6edaea 100644
	--- a/tests/intel/xe_fault_injection.c
	+++ b/tests/intel/xe_fault_injection.c
	@@ -233,6 +233,8 @@ simple_vm_bind(int fd, uint32_t vm)
	  * @xe_vma_ops_alloc:			xe_vma_ops_alloc
	  * @vm_bind_ioctl_ops_execute:		vm_bind_ioctl_ops_execute
	  * @vm_bind_ioctl_ops_create:		vm_bind_ioctl_ops_create
	+ * @xe_pt_update_ops_prepare:	xe_pt_update_ops_prepare
	+ * @xe_pt_update_ops_run:	xe_pt_update_ops_run
	  */
	 static void
	 vm_bind_fail(int fd, const char function_name[])
	@@ -282,6 +284,8 @@ igt_main
	 		{ "xe_vma_ops_alloc" },
	 		{ "vm_bind_ioctl_ops_create" },
	 		{ "vm_bind_ioctl_ops_execute" },
	+		{ "xe_pt_update_ops_prepare" },
	+		{ "xe_pt_update_ops_run" },
	 		{ }
	 	};
	 

Then in the next IGT run:

	...
	Starting subtest: vm-bind-fail-xe_pt_update_ops_prepare
	Subtest vm-bind-fail-xe_pt_update_ops_prepare: SUCCESS (0.042s)
	Starting subtest: vm-bind-fail-xe_pt_update_ops_run
	Subtest vm-bind-fail-xe_pt_update_ops_run: SUCCESS (0.048s)
	...

... which triggers vm_bind_ioctl_ops_unwind().

I will wait for more feedback and bring in those changes in the next version.

Thanks,
Francois

> 
> Matt
> 
> > Signed-off-by: Francois Dugast <francois.dugast at intel.com>
> > ---
> >  drivers/gpu/drm/xe/xe_exec_queue.c | 1 +
> >  drivers/gpu/drm/xe/xe_pt.c         | 1 +
> >  drivers/gpu/drm/xe/xe_vm.c         | 4 ++++
> >  3 files changed, 6 insertions(+)
> > 
> > diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
> > index fd0f3b3c9101..b999db5f5c19 100644
> > --- a/drivers/gpu/drm/xe/xe_exec_queue.c
> > +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
> > @@ -240,6 +240,7 @@ struct xe_exec_queue *xe_exec_queue_create_bind(struct xe_device *xe,
> >  
> >  	return q;
> >  }
> > +ALLOW_ERROR_INJECTION(xe_exec_queue_create_bind, ERRNO);
> >  
> >  void xe_exec_queue_destroy(struct kref *ref)
> >  {
> > diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
> > index f27f579f4d85..e111698abbd9 100644
> > --- a/drivers/gpu/drm/xe/xe_pt.c
> > +++ b/drivers/gpu/drm/xe/xe_pt.c
> > @@ -136,6 +136,7 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
> >  	xe_pt_free(pt);
> >  	return ERR_PTR(err);
> >  }
> > +ALLOW_ERROR_INJECTION(xe_pt_create, ERRNO);
> >  
> >  /**
> >   * xe_pt_populate_empty() - Populate a page-table bo with scratch- or zero
> > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > index 624133fae5f5..2e67648ed512 100644
> > --- a/drivers/gpu/drm/xe/xe_vm.c
> > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > @@ -740,6 +740,7 @@ static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool array_of_binds)
> >  
> >  	return 0;
> >  }
> > +ALLOW_ERROR_INJECTION(xe_vma_ops_alloc, ERRNO);
> >  
> >  static void xe_vma_ops_fini(struct xe_vma_ops *vops)
> >  {
> > @@ -1352,6 +1353,7 @@ static int xe_vm_create_scratch(struct xe_device *xe, struct xe_tile *tile,
> >  
> >  	return 0;
> >  }
> > +ALLOW_ERROR_INJECTION(xe_vm_create_scratch, ERRNO);
> >  
> >  static void xe_vm_free_scratch(struct xe_vm *vm)
> >  {
> > @@ -1978,6 +1980,7 @@ vm_bind_ioctl_ops_create(struct xe_vm *vm, struct xe_bo *bo,
> >  
> >  	return ops;
> >  }
> > +ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_create, ERRNO);
> >  
> >  static struct xe_vma *new_vma(struct xe_vm *vm, struct drm_gpuva_op_map *op,
> >  			      u16 pat_index, unsigned int flags)
> > @@ -2697,6 +2700,7 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
> >  	drm_exec_fini(&exec);
> >  	return err;
> >  }
> > +ALLOW_ERROR_INJECTION(vm_bind_ioctl_ops_execute, ERRNO);
> >  
> >  #define SUPPORTED_FLAGS_STUB  \
> >  	(DRM_XE_VM_BIND_FLAG_READONLY | \
> > -- 
> > 2.43.0
> > 


More information about the Intel-xe mailing list