[PATCH i-g-t v2 1/4] lib/intel_compute: move user-fence to allow async exec call
Francois Dugast
francois.dugast at intel.com
Wed May 28 09:11:59 UTC 2025
On Thu, May 22, 2025 at 09:55:12AM +0200, Zbigniew Kempczyński wrote:
> User-fence in bo_execenv_exec() limits its usage to synchronous call.
> Remove this limitation by migration of user-fence from function level
> to execenv structure. This allows to synchronize later what is useful
> for parallel submission scenarios.
>
> Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
> Cc: Francois Dugast <francois.dugast at intel.com>
Reviewed-by: Francois Dugast <francois.dugast at intel.com>
> ---
> lib/intel_compute.c | 77 ++++++++++++++++++++++++++++++---------------
> 1 file changed, 52 insertions(+), 25 deletions(-)
>
> diff --git a/lib/intel_compute.c b/lib/intel_compute.c
> index 070bc0d2d8..5579bec85b 100644
> --- a/lib/intel_compute.c
> +++ b/lib/intel_compute.c
> @@ -89,6 +89,12 @@ struct bo_execenv {
> uint32_t exec_queue;
> uint32_t array_size;
>
> + /* Xe user-fence */
> + uint32_t bo;
> + size_t bo_size;
> + struct bo_sync *bo_sync;
> + struct drm_xe_sync sync;
> +
> /* i915 part */
> struct drm_i915_gem_execbuffer2 execbuf;
> struct drm_i915_gem_exec_object2 *obj;
> @@ -266,48 +272,69 @@ static void bo_execenv_unbind(struct bo_execenv *execenv,
> }
> }
>
> -static void bo_execenv_exec(struct bo_execenv *execenv, uint64_t start_addr)
> +static void __bo_execenv_exec(struct bo_execenv *execenv, uint64_t start_addr)
> {
> int fd = execenv->fd;
>
> if (execenv->driver == INTEL_DRIVER_XE) {
> uint32_t exec_queue = execenv->exec_queue;
> - struct bo_sync *bo_sync;
> - size_t bo_size = sizeof(*bo_sync);
> - uint32_t bo = 0;
> - struct drm_xe_sync sync = {
> - .type = DRM_XE_SYNC_TYPE_USER_FENCE,
> - .flags = DRM_XE_SYNC_FLAG_SIGNAL,
> - .timeline_value = USER_FENCE_VALUE,
> - };
> + size_t bo_size = ALIGN(sizeof(struct bo_sync),
> + xe_get_default_alignment(fd));
>
> - bo_size = xe_bb_size(fd, bo_size);
> - bo = xe_bo_create(fd, execenv->vm, bo_size, vram_if_possible(fd, 0),
> - DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM);
> - bo_sync = xe_bo_map(fd, bo, bo_size);
> - sync.addr = to_user_pointer(&bo_sync->sync);
> - xe_vm_bind_async(fd, execenv->vm, 0, bo, 0, ADDR_SYNC, bo_size, &sync, 1);
> - xe_wait_ufence(fd, &bo_sync->sync, USER_FENCE_VALUE, exec_queue, INT64_MAX);
> + execenv->bo_size = bo_size;
> + execenv->bo = xe_bo_create(fd, execenv->vm, bo_size, vram_if_possible(fd, 0),
> + DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM);
> + execenv->bo_sync = xe_bo_map(fd, execenv->bo, bo_size);
> + execenv->sync.type = DRM_XE_SYNC_TYPE_USER_FENCE;
> + execenv->sync.flags = DRM_XE_SYNC_FLAG_SIGNAL;
> + execenv->sync.timeline_value = USER_FENCE_VALUE;
> + execenv->sync.addr = to_user_pointer(&execenv->bo_sync->sync);
> + xe_vm_bind_async(fd, execenv->vm, 0, execenv->bo, 0, ADDR_SYNC,
> + bo_size, &execenv->sync, 1);
> + xe_wait_ufence(fd, &execenv->bo_sync->sync, USER_FENCE_VALUE,
> + exec_queue, INT64_MAX);
>
> - sync.addr = ADDR_SYNC;
> - bo_sync->sync = 0;
> + execenv->sync.addr = ADDR_SYNC;
> + execenv->bo_sync->sync = 0;
>
> - xe_exec_sync(fd, exec_queue, start_addr, &sync, 1);
> - xe_wait_ufence(fd, &bo_sync->sync, USER_FENCE_VALUE, exec_queue, INT64_MAX);
> -
> - munmap(bo_sync, bo_size);
> - gem_close(fd, bo);
> + xe_exec_sync(fd, exec_queue, start_addr, &execenv->sync, 1);
> } else {
> struct drm_i915_gem_execbuffer2 *execbuf = &execenv->execbuf;
> - struct drm_i915_gem_exec_object2 *obj = execenv->obj;
> - int num_objects = execbuf->buffer_count;
>
> execbuf->flags = I915_EXEC_RENDER;
> gem_execbuf(fd, execbuf);
> + }
> +}
> +
> +static void bo_execenv_sync(struct bo_execenv *execenv)
> +{
> + int fd = execenv->fd;
> +
> + if (execenv->driver == INTEL_DRIVER_XE) {
> + xe_wait_ufence(fd, &execenv->bo_sync->sync,
> + USER_FENCE_VALUE, execenv->exec_queue, INT64_MAX);
> + munmap(execenv->bo_sync, execenv->bo_size);
> + gem_close(fd, execenv->bo);
> + } else {
> + struct drm_i915_gem_execbuffer2 *execbuf = &execenv->execbuf;
> + struct drm_i915_gem_exec_object2 *obj = execenv->obj;
> + int num_objects = execbuf->buffer_count;
> +
> gem_sync(fd, obj[num_objects - 1].handle); /* batch handle */
> }
> }
>
> +static void bo_execenv_exec_async(struct bo_execenv *execenv, uint64_t start_addr)
> +{
> + __bo_execenv_exec(execenv, start_addr);
> +}
> +
> +static void bo_execenv_exec(struct bo_execenv *execenv, uint64_t start_addr)
> +{
> + bo_execenv_exec_async(execenv, start_addr);
> + bo_execenv_sync(execenv);
> +}
> +
> static uint32_t size_thread_group_x(uint32_t work_size)
> {
> return MAX(1, work_size / (ENQUEUED_LOCAL_SIZE_X *
> --
> 2.43.0
>
More information about the igt-dev
mailing list