[PATCH i-g-t v2 1/4] lib/intel_compute: move user-fence to allow async exec call
Zbigniew Kempczyński
zbigniew.kempczynski at intel.com
Thu May 22 07:55:12 UTC 2025
User-fence in bo_execenv_exec() limits its usage to synchronous call.
Remove this limitation by migration of user-fence from function level
to execenv structure. This allows to synchronize later what is useful
for parallel submission scenarios.
Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
Cc: Francois Dugast <francois.dugast at intel.com>
---
lib/intel_compute.c | 77 ++++++++++++++++++++++++++++++---------------
1 file changed, 52 insertions(+), 25 deletions(-)
diff --git a/lib/intel_compute.c b/lib/intel_compute.c
index 070bc0d2d8..5579bec85b 100644
--- a/lib/intel_compute.c
+++ b/lib/intel_compute.c
@@ -89,6 +89,12 @@ struct bo_execenv {
uint32_t exec_queue;
uint32_t array_size;
+ /* Xe user-fence */
+ uint32_t bo;
+ size_t bo_size;
+ struct bo_sync *bo_sync;
+ struct drm_xe_sync sync;
+
/* i915 part */
struct drm_i915_gem_execbuffer2 execbuf;
struct drm_i915_gem_exec_object2 *obj;
@@ -266,48 +272,69 @@ static void bo_execenv_unbind(struct bo_execenv *execenv,
}
}
-static void bo_execenv_exec(struct bo_execenv *execenv, uint64_t start_addr)
+static void __bo_execenv_exec(struct bo_execenv *execenv, uint64_t start_addr)
{
int fd = execenv->fd;
if (execenv->driver == INTEL_DRIVER_XE) {
uint32_t exec_queue = execenv->exec_queue;
- struct bo_sync *bo_sync;
- size_t bo_size = sizeof(*bo_sync);
- uint32_t bo = 0;
- struct drm_xe_sync sync = {
- .type = DRM_XE_SYNC_TYPE_USER_FENCE,
- .flags = DRM_XE_SYNC_FLAG_SIGNAL,
- .timeline_value = USER_FENCE_VALUE,
- };
+ size_t bo_size = ALIGN(sizeof(struct bo_sync),
+ xe_get_default_alignment(fd));
- bo_size = xe_bb_size(fd, bo_size);
- bo = xe_bo_create(fd, execenv->vm, bo_size, vram_if_possible(fd, 0),
- DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM);
- bo_sync = xe_bo_map(fd, bo, bo_size);
- sync.addr = to_user_pointer(&bo_sync->sync);
- xe_vm_bind_async(fd, execenv->vm, 0, bo, 0, ADDR_SYNC, bo_size, &sync, 1);
- xe_wait_ufence(fd, &bo_sync->sync, USER_FENCE_VALUE, exec_queue, INT64_MAX);
+ execenv->bo_size = bo_size;
+ execenv->bo = xe_bo_create(fd, execenv->vm, bo_size, vram_if_possible(fd, 0),
+ DRM_XE_GEM_CREATE_FLAG_NEEDS_VISIBLE_VRAM);
+ execenv->bo_sync = xe_bo_map(fd, execenv->bo, bo_size);
+ execenv->sync.type = DRM_XE_SYNC_TYPE_USER_FENCE;
+ execenv->sync.flags = DRM_XE_SYNC_FLAG_SIGNAL;
+ execenv->sync.timeline_value = USER_FENCE_VALUE;
+ execenv->sync.addr = to_user_pointer(&execenv->bo_sync->sync);
+ xe_vm_bind_async(fd, execenv->vm, 0, execenv->bo, 0, ADDR_SYNC,
+ bo_size, &execenv->sync, 1);
+ xe_wait_ufence(fd, &execenv->bo_sync->sync, USER_FENCE_VALUE,
+ exec_queue, INT64_MAX);
- sync.addr = ADDR_SYNC;
- bo_sync->sync = 0;
+ execenv->sync.addr = ADDR_SYNC;
+ execenv->bo_sync->sync = 0;
- xe_exec_sync(fd, exec_queue, start_addr, &sync, 1);
- xe_wait_ufence(fd, &bo_sync->sync, USER_FENCE_VALUE, exec_queue, INT64_MAX);
-
- munmap(bo_sync, bo_size);
- gem_close(fd, bo);
+ xe_exec_sync(fd, exec_queue, start_addr, &execenv->sync, 1);
} else {
struct drm_i915_gem_execbuffer2 *execbuf = &execenv->execbuf;
- struct drm_i915_gem_exec_object2 *obj = execenv->obj;
- int num_objects = execbuf->buffer_count;
execbuf->flags = I915_EXEC_RENDER;
gem_execbuf(fd, execbuf);
+ }
+}
+
+static void bo_execenv_sync(struct bo_execenv *execenv)
+{
+ int fd = execenv->fd;
+
+ if (execenv->driver == INTEL_DRIVER_XE) {
+ xe_wait_ufence(fd, &execenv->bo_sync->sync,
+ USER_FENCE_VALUE, execenv->exec_queue, INT64_MAX);
+ munmap(execenv->bo_sync, execenv->bo_size);
+ gem_close(fd, execenv->bo);
+ } else {
+ struct drm_i915_gem_execbuffer2 *execbuf = &execenv->execbuf;
+ struct drm_i915_gem_exec_object2 *obj = execenv->obj;
+ int num_objects = execbuf->buffer_count;
+
gem_sync(fd, obj[num_objects - 1].handle); /* batch handle */
}
}
+static void bo_execenv_exec_async(struct bo_execenv *execenv, uint64_t start_addr)
+{
+ __bo_execenv_exec(execenv, start_addr);
+}
+
+static void bo_execenv_exec(struct bo_execenv *execenv, uint64_t start_addr)
+{
+ bo_execenv_exec_async(execenv, start_addr);
+ bo_execenv_sync(execenv);
+}
+
static uint32_t size_thread_group_x(uint32_t work_size)
{
return MAX(1, work_size / (ENQUEUED_LOCAL_SIZE_X *
--
2.43.0
More information about the igt-dev
mailing list