[igt-dev] [PATCH i-g-t 05/12] lib/xe_util: Add vm bind/unbind helper for Xe

Thu Jul 6 10:16:43 UTC 2023

On 6.07.2023 07:34, Zbigniew Kempczyński wrote:
> On Wed, Jul 05, 2023 at 05:12:23PM +0200, Karolina Stolarek wrote:
>> On 4.07.2023 11:00, Zbigniew Kempczyński wrote:
>>> Before calling exec we need to prepare vm to contain valid entries.
>>> Bind/unbind in xe expects single bind_op or vector of bind_ops what
>>> makes preparation of it a little bit inconvinient. Add function
>>> which iterates over list of xe_object (auxiliary structure which
>>> describes bind information for object) and performs the bind/unbind
>>> in one step. It also supports passing syncobj in/out to work in
>>> pipelined executions.
>>>
>>> Signed-off-by: Zbigniew Kempczyński <zbigniew.kempczynski at intel.com>
>>> ---
>>>    lib/xe/xe_util.c | 133 +++++++++++++++++++++++++++++++++++++++++++++++
>>>    lib/xe/xe_util.h |  21 ++++++--
>>>    2 files changed, 151 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/lib/xe/xe_util.c b/lib/xe/xe_util.c
>>> index 448b3a3d27..8552db47d5 100644
>>> --- a/lib/xe/xe_util.c
>>> +++ b/lib/xe/xe_util.c
>>> @@ -102,3 +102,136 @@ char *xe_memregion_dynamic_subtest_name(int xe, struct igt_collection *set)
>>>    	return name;
>>>    }
>>> +#ifdef XEBINDDBG
>>> +#define bind_info igt_info
>>> +#define bind_debug igt_debug
>>> +#else
>>> +#define bind_info(...) {}
>>> +#define bind_debug(...) {}
>>> +#endif
>>> +
>>> +static struct drm_xe_vm_bind_op *xe_alloc_bind_ops(struct igt_list_head *obj_list,
>>> +						   uint32_t *num_ops)
>>> +{
>>> +	struct drm_xe_vm_bind_op *bind_ops, *ops;
>>> +	struct xe_object *obj;
>>> +	uint32_t num_objects = 0, i = 0, op;
>>> +
>>> +	num_objects = 0;
>>
>> It is already set to 0, you can remove that line
>>
>>> +	igt_list_for_each_entry(obj, obj_list, link)
>>> +		num_objects++;
>>> +
>>> +	*num_ops = num_objects;
>>> +	if (!num_objects) {
>>> +		bind_info(" [nothing to bind]\n");
>>> +		return NULL;
>>> +	}
>>> +
>>> +	bind_ops = calloc(num_objects, sizeof(*bind_ops));
>>> +	igt_assert(bind_ops);
>>> +
>>> +	igt_list_for_each_entry(obj, obj_list, link) {
>>> +		ops = &bind_ops[i];
>>> +
>>> +		if (obj->bind_op == XE_OBJECT_BIND) {
>>> +			op = XE_VM_BIND_OP_MAP | XE_VM_BIND_FLAG_ASYNC;
>>> +			ops->obj = obj->handle;
>>> +		} else {
>>> +			op = XE_VM_BIND_OP_UNMAP | XE_VM_BIND_FLAG_ASYNC;
>>> +		}
>>> +
>>> +		ops->op = op;
>>> +		ops->obj_offset = 0;
>>> +		ops->addr = obj->offset;
>>> +		ops->range = obj->size;
>>> +		ops->region = 0;
>>
>> Shouldn't we pass an actual region instance here?
>>
> 
> I think this is valid for prefetch mode (see xe_vm_prefetch_async() helper)
> so for normal usage 0 is fine.

If I understand correctly, this will bring BO to the system memory, so 
it should be ok. Also, why prefetch? It looks like we're working with 
map and unmap here.

All the best,
Karolina

> 
>>> +
>>> +		bind_info("  [%d]: [%6s] handle: %u, offset: %llx, size: %llx\n",
>>> +			  i, obj->bind_op == XE_OBJECT_BIND ? "BIND" : "UNBIND",
>>> +			  ops->obj, (long long)ops->addr, (long long)ops->range);
>>> +		i++;
>>> +	}
>>> +
>>> +	return bind_ops;
>>> +}
>>> +
>>> +static void __xe_op_bind_async(int xe, uint32_t vm, uint32_t bind_engine,
>>> +			       struct igt_list_head *obj_list,
>>> +			       uint32_t sync_in, uint32_t sync_out)
>>> +{
>>> +	struct drm_xe_vm_bind_op *bind_ops;
>>> +	struct drm_xe_sync tabsyncs[2] = {
>>> +		{ .flags = DRM_XE_SYNC_SYNCOBJ, .handle = sync_in },
>>> +		{ .flags = DRM_XE_SYNC_SYNCOBJ | DRM_XE_SYNC_SIGNAL, .handle = sync_out },
>>> +	};
>>> +	struct drm_xe_sync *syncs;
>>> +	uint32_t num_binds = 0;
>>> +	int num_syncs;
>>> +
>>> +	bind_info("[Binding to vm: %u]\n", vm);
>>> +	bind_ops = xe_alloc_bind_ops(obj_list, &num_binds);
>>> +
>>> +	if (!num_binds) {
>>> +		if (sync_out)
>>> +			syncobj_signal(xe, &sync_out, 1);
>>> +		return;
>>> +	}
>>> +
>>> +	if (sync_in) {
>>> +		syncs = tabsyncs;
>>> +		num_syncs = 2;
>>> +	} else {
>>> +		syncs = &tabsyncs[1];
>>> +		num_syncs = 1;
>>> +	}
>>> +
>>> +	/* User didn't pass sync out, create it and wait for completion */
>>> +	if (!sync_out)
>>> +		tabsyncs[1].handle = syncobj_create(xe, 0);
>>> +
>>> +	bind_info("[Binding syncobjs: (in: %u, out: %u)]\n",
>>> +		  tabsyncs[0].handle, tabsyncs[1].handle);
>>> +
>>> +	if (num_binds == 1) {
>>> +		if ((bind_ops[0].op & 0xffff) == XE_VM_BIND_OP_MAP)
>>> +			xe_vm_bind_async(xe, vm, bind_engine, bind_ops[0].obj, 0,
>>> +					bind_ops[0].addr, bind_ops[0].range,
>>> +					syncs, num_syncs);
>>> +		else
>>> +			xe_vm_unbind_async(xe, vm, bind_engine, 0,
>>> +					   bind_ops[0].addr, bind_ops[0].range,
>>> +					   syncs, num_syncs);
>>> +	} else {
>>> +		xe_vm_bind_array(xe, vm, bind_engine, bind_ops,
>>> +				 num_binds, syncs, num_syncs);
>>> +	}
>>> +
>>> +	if (!sync_out) {
>>> +		igt_assert_eq(syncobj_wait_err(xe, &tabsyncs[1].handle, 1, INT64_MAX, 0), 0);
>>
>> That timeout is quite long, but I see that in syncobj_wait.c test we have an
>> even higher value... Should factor the CI timeout in when picking the fence
>> timeout?
> 
> I think infinity is best selection - I mean we cannot move forward
> if binding is not complete - we don't handle errors here apart
> of asserting if sth wrong has happened. If we stuck on this fence
> that's really bad but CI will kill and report a failure in thi case.
> 
> Thank you for the review.
> --
> Zbigniew
> 
>>
>> Apart from that one line thingy, the patch looks good, I think.
>>
>> All the best,
>> Karolina
>>
>>> +		syncobj_destroy(xe, tabsyncs[1].handle);
>>> +	}
>>> +
>>> +	free(bind_ops);
>>> +}
>>> +
>>> +/**
>>> +  * xe_bind_unbind_async:
>>> +  * @xe: drm fd of Xe device
>>> +  * @vm: vm to bind/unbind objects to/from
>>> +  * @bind_engine: bind engine, 0 if default
>>> +  * @obj_list: list of xe_object
>>> +  * @sync_in: sync object (fence-in), 0 if there's no input dependency
>>> +  * @sync_out: sync object (fence-out) to signal on bind/unbind completion,
>>> +  *            if 0 wait for bind/unbind completion.
>>> +  *
>>> +  * Function iterates over xe_object @obj_list, prepares binding operation
>>> +  * and does bind/unbind in one step. Providing sync_in / sync_out allows
>>> +  * working in pipelined mode. With sync_in and sync_out set to 0 function
>>> +  * waits until binding operation is complete.
>>> +  */
>>> +void xe_bind_unbind_async(int fd, uint32_t vm, uint32_t bind_engine,
>>> +			  struct igt_list_head *obj_list,
>>> +			  uint32_t sync_in, uint32_t sync_out)
>>> +{
>>> +	return __xe_op_bind_async(fd, vm, bind_engine, obj_list, sync_in, sync_out);
>>> +}
>>> diff --git a/lib/xe/xe_util.h b/lib/xe/xe_util.h
>>> index 46b7e0d46b..32f309923e 100644
>>> --- a/lib/xe/xe_util.h
>>> +++ b/lib/xe/xe_util.h
>>> @@ -12,9 +12,6 @@
>>>    #include <stdint.h>
>>>    #include <xe_drm.h>
>>> -#define XE_REGION_SMEM XE_MEM_REGION_CLASS_SYSMEM
>>> -#define XE_REGION_LMEM XE_MEM_REGION_CLASS_VRAM
>>> -
>>>    #define XE_IS_SYSMEM_MEMORY_REGION(fd, region) \
>>>    	(xe_region_class(fd, region) == XE_MEM_REGION_CLASS_SYSMEM)
>>>    #define XE_IS_VRAM_MEMORY_REGION(fd, region) \
>>> @@ -30,4 +27,22 @@ __xe_get_memory_region_set(int xe, uint32_t *mem_regions_type, int num_regions);
>>>    char *xe_memregion_dynamic_subtest_name(int xe, struct igt_collection *set);
>>> +enum xe_bind_op {
>>> +	XE_OBJECT_BIND,
>>> +	XE_OBJECT_UNBIND,
>>> +};
>>> +
>>> +struct xe_object {
>>> +	uint32_t handle;
>>> +	uint64_t offset;
>>> +	uint64_t size;
>>> +	enum xe_bind_op bind_op;
>>> +	void *priv;
>>> +	struct igt_list_head link;
>>> +};
>>> +
>>> +void xe_bind_unbind_async(int fd, uint32_t vm, uint32_t bind_engine,
>>> +			  struct igt_list_head *obj_list,
>>> +			  uint32_t sync_in, uint32_t sync_out);
>>> +
>>>    #endif /* XE_UTIL_H */