[igt-dev] [PATCH] text/xe/xe_exec_basic: Wait for the correct vm_bind before exec

Matthew Brost matthew.brost at intel.com
Wed Mar 15 15:41:36 UTC 2023


On Wed, Mar 15, 2023 at 03:18:10PM +0100, Thomas Hellström wrote:
> The test was submitting the same syncobj for signalling to all
> vm_bind operations, and then awaited it in all execs. However, the
> binds don't necessarily complete in order, leading to GPU hangs if
> an exec is launched before its bind is complete:
> 
> sudo ./xe_exec_basic --r many-engines-many-vm-basic-defer-mmap
> IGT-Version: 1.26-g9b86de12 (x86_64) (Linux: 6.1.0+ x86_64)
> Starting subtest: many-engines-many-vm-basic-defer-mmap
> (xe_exec_basic:4204) xe/xe_ioctl-CRITICAL: Test assertion failure function __xe_exec_assert, file ../lib/xe/xe_ioctl.c:373:
> (xe_exec_basic:4204) xe/xe_ioctl-CRITICAL: Failed assertion: igt_ioctl(fd, (((1U) << (((0+8)+8)+14)) | ((('d')) << (0+8)) | (((0x40 + 0x08)) << 0) | ((((sizeof(struct drm_xe_exec)))) << ((0+8)+8))), exec) == 0
> (xe_exec_basic:4204) xe/xe_ioctl-CRITICAL: Last errno: 125, Operation canceled
> (xe_exec_basic:4204) xe/xe_ioctl-CRITICAL: error: -1 != 0
> Stack trace:
> Subtest many-engines-many-vm-basic-defer-mmap failed.
> **** DEBUG ****
> (xe_exec_basic:4204) xe/xe_ioctl-CRITICAL: Test assertion failure function __xe_exec_assert, file ../lib/xe/xe_ioctl.c:373:
> (xe_exec_basic:4204) xe/xe_ioctl-CRITICAL: Failed assertion: igt_ioctl(fd, (((1U) << (((0+8)+8)+14)) | ((('d')) << (0+8)) | (((0x40 + 0x08)) << 0) | ((((sizeof(struct drm_xe_exec)))) << ((0+8)+8))), exec) == 0
> (xe_exec_basic:4204) xe/xe_ioctl-CRITICAL: Last errno: 125, Operation canceled
> (xe_exec_basic:4204) xe/xe_ioctl-CRITICAL: error: -1 != 0
> (xe_exec_basic:4204) igt_core-INFO: Stack trace:
> ****  END  ****
> Subtest many-engines-many-vm-basic-defer-mmap: FAIL (0.231s)
> 
> Fix this by using a unique syncobj per vm which is awaited in
> execs touching that VM.
> 
> Signed-off-by: Thomas Hellström <thomas.hellstrom at linux.intel.com>

Been meaning to fix this for awhile, LGTM.

Reviewed-by: Matthew Brost <matthew.brost at intel.com>

> ---
>  tests/xe/xe_exec_basic.c | 20 ++++++++++++++------
>  1 file changed, 14 insertions(+), 6 deletions(-)
> 
> diff --git a/tests/xe/xe_exec_basic.c b/tests/xe/xe_exec_basic.c
> index cb0b8d2c..c1069829 100644
> --- a/tests/xe/xe_exec_basic.c
> +++ b/tests/xe/xe_exec_basic.c
> @@ -93,6 +93,7 @@ test_exec(int fd, struct drm_xe_engine_class_instance *eci,
>  	uint32_t engines[MAX_N_ENGINES];
>  	uint32_t bind_engines[MAX_N_ENGINES];
>  	uint32_t syncobjs[MAX_N_ENGINES];
> +	uint32_t bind_syncobjs[MAX_N_ENGINES];
>  	size_t bo_size;
>  	uint32_t bo = 0;
>  	struct {
> @@ -150,10 +151,11 @@ test_exec(int fd, struct drm_xe_engine_class_instance *eci,
>  		else
>  			bind_engines[i] = 0;
>  		syncobjs[i] = syncobj_create(fd, 0);
> +		bind_syncobjs[i] = syncobj_create(fd, 0);
>  	};
>  
> -	sync[0].handle = syncobj_create(fd, 0);
>  	for (i = 0; i < n_vm; ++i) {
> +		sync[0].handle = bind_syncobjs[i];
>  		if (bo)
>  			xe_vm_bind_async(fd, vm[i], bind_engines[i], bo, 0,
>  					 addr[i], bo_size, sync, 1);
> @@ -167,7 +169,8 @@ test_exec(int fd, struct drm_xe_engine_class_instance *eci,
>  		data = xe_bo_map(fd, bo, bo_size);
>  
>  	for (i = 0; i < n_execs; i++) {
> -		uint64_t __addr = addr[i % n_vm];
> +		int cur_vm = i % n_vm;
> +		uint64_t __addr = addr[cur_vm];
>  		uint64_t batch_offset = (char *)&data[i].batch - (char *)data;
>  		uint64_t batch_addr = __addr + batch_offset;
>  		uint64_t sdi_offset = (char *)&data[i].data - (char *)data;
> @@ -183,6 +186,7 @@ test_exec(int fd, struct drm_xe_engine_class_instance *eci,
>  		igt_assert(b <= ARRAY_SIZE(data[i].batch));
>  
>  		sync[0].flags &= ~DRM_XE_SYNC_SIGNAL;
> +		sync[0].handle = bind_syncobjs[cur_vm];
>  		sync[1].flags |= DRM_XE_SYNC_SIGNAL;
>  		sync[1].handle = syncobjs[e];
>  
> @@ -193,7 +197,7 @@ test_exec(int fd, struct drm_xe_engine_class_instance *eci,
>  		__xe_exec_assert(fd, &exec);
>  
>  		if (flags & REBIND && i + 1 != n_execs) {
> -			uint32_t __vm = vm[i % n_vm];
> +			uint32_t __vm = vm[cur_vm];
>  
>  			sync[1].flags &= ~DRM_XE_SYNC_SIGNAL;
>  			xe_vm_unbind_async(fd, __vm, bind_engines[e], 0,
> @@ -243,7 +247,10 @@ test_exec(int fd, struct drm_xe_engine_class_instance *eci,
>  	for (i = 0; i < n_engines && n_execs; i++)
>  		igt_assert(syncobj_wait(fd, &syncobjs[i], 1, INT64_MAX, 0,
>  					NULL));
> -	igt_assert(syncobj_wait(fd, &sync[0].handle, 1, INT64_MAX, 0, NULL));
> +
> +	for (i = 0; i < n_vm; i++)
> +		igt_assert(syncobj_wait(fd, &bind_syncobjs[i], 1, INT64_MAX, 0,
> +					NULL));
>  
>  	sync[0].flags |= DRM_XE_SYNC_SIGNAL;
>  	for (i = 0; i < n_vm; ++i) {
> @@ -258,7 +265,6 @@ test_exec(int fd, struct drm_xe_engine_class_instance *eci,
>  	     i < n_execs; i++)
>  		igt_assert_eq(data[i].data, 0xc0ffee);
>  
> -	syncobj_destroy(fd, sync[0].handle);
>  	for (i = 0; i < n_engines; i++) {
>  		syncobj_destroy(fd, syncobjs[i]);
>  		xe_engine_destroy(fd, engines[i]);
> @@ -272,8 +278,10 @@ test_exec(int fd, struct drm_xe_engine_class_instance *eci,
>  	} else if (!(flags & INVALIDATE)) {
>  		free(data);
>  	}
> -	for (i = 0; i < n_vm; ++i)
> +	for (i = 0; i < n_vm; ++i) {
> +		syncobj_destroy(fd, bind_syncobjs[i]);
>  		xe_vm_destroy(fd, vm[i]);
> +	}
>  }
>  
>  igt_main
> -- 
> 2.39.2
> 


More information about the igt-dev mailing list