[PATCH 4/4] drm/xe: Finish refactoring of exec_queue_create
Welty, Brian
brian.welty at intel.com
Wed Jan 3 18:16:33 UTC 2024
On 1/3/2024 12:07 AM, Matthew Brost wrote:
> On Tue, Jan 02, 2024 at 01:17:31PM -0800, Brian Welty wrote:
>> Setting of exec_queue user extensions is moved from the end of the ioctl
>> function earlier, into __xe_exec_queue_alloc().
>> This fixes bug in that the USM attributes for access counters were being
>> applied too late, and effectively were ignored.
>>
>> However, in order to apply user extensions this early, we can no longer
>> call q->ops functions. Instead, make it more efficient. The user extension
>> functions can simply update the q->sched_props values and they will be
>> applied by the backend during q->ops->init().
>>
>> Signed-off-by: Brian Welty <brian.welty at intel.com>
>> ---
>> drivers/gpu/drm/xe/xe_exec_queue.c | 74 ++++++++++++++++++++----------
>> drivers/gpu/drm/xe/xe_exec_queue.h | 3 +-
>> drivers/gpu/drm/xe/xe_gsc.c | 2 +-
>> drivers/gpu/drm/xe/xe_gt.c | 4 +-
>> drivers/gpu/drm/xe/xe_migrate.c | 2 +-
>> 5 files changed, 57 insertions(+), 28 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
>> index 9891cddba71c..8a2d51b994cf 100644
>> --- a/drivers/gpu/drm/xe/xe_exec_queue.c
>> +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
>> @@ -30,14 +30,18 @@ enum xe_exec_queue_sched_prop {
>> XE_EXEC_QUEUE_SCHED_PROP_MAX = 3,
>> };
>>
>> +static int exec_queue_user_extensions(struct xe_device *xe, struct xe_exec_queue *q,
>> + u64 extensions, int ext_number, bool create);
>> +
>> static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
>> struct xe_vm *vm,
>> u32 logical_mask,
>> u16 width, struct xe_hw_engine *hwe,
>> - u32 flags)
>> + u32 flags, u64 extensions)
>> {
>> struct xe_exec_queue *q;
>> struct xe_gt *gt = hwe->gt;
>> + int err;
>>
>> /* only kernel queues can be permanent */
>> XE_WARN_ON((flags & EXEC_QUEUE_FLAG_PERMANENT) && !(flags & EXEC_QUEUE_FLAG_KERNEL));
>> @@ -50,8 +54,6 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
>> q->flags = flags;
>> q->hwe = hwe;
>> q->gt = gt;
>> - if (vm)
>> - q->vm = xe_vm_get(vm);
>> q->class = hwe->class;
>> q->width = width;
>> q->logical_mask = logical_mask;
>> @@ -69,6 +71,21 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
>> hwe->eclass->sched_props.job_timeout_ms;
>> q->sched_props.priority = XE_EXEC_QUEUE_PRIORITY_NORMAL;
>>
>> + if (extensions) {
>> + /*
>> + * may set q->usm, must come before xe_lrc_init(),
>> + * may overwrite q->sched_props, must come before q->ops->init()
>> + */
>> + err = exec_queue_user_extensions(xe, q, extensions, 0, true);
>> + if (err) {
>> + kfree(q);
>> + return ERR_PTR(err);
>> + }
>> + }
>> +
>> + if (vm)
>> + q->vm = xe_vm_get(vm);
>> +
>> if (xe_exec_queue_is_parallel(q)) {
>> q->parallel.composite_fence_ctx = dma_fence_context_alloc(1);
>> q->parallel.composite_fence_seqno = XE_FENCE_INITIAL_SEQNO;
>> @@ -124,12 +141,14 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q)
>>
>> struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm,
>> u32 logical_mask, u16 width,
>> - struct xe_hw_engine *hwe, u32 flags)
>> + struct xe_hw_engine *hwe, u32 flags,
>> + u64 extensions)
>> {
>> struct xe_exec_queue *q;
>> int err;
>>
>> - q = __xe_exec_queue_alloc(xe, vm, logical_mask, width, hwe, flags);
>> + q = __xe_exec_queue_alloc(xe, vm, logical_mask, width, hwe, flags,
>> + extensions);
>> if (IS_ERR(q))
>> return q;
>>
>> @@ -174,7 +193,7 @@ struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe
>> if (!logical_mask)
>> return ERR_PTR(-ENODEV);
>>
>> - return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags);
>> + return xe_exec_queue_create(xe, vm, logical_mask, 1, hwe0, flags, 0);
>> }
>>
>> void xe_exec_queue_destroy(struct kref *ref)
>> @@ -258,7 +277,11 @@ static int exec_queue_set_priority(struct xe_device *xe, struct xe_exec_queue *q
>> if (XE_IOCTL_DBG(xe, value > xe_exec_queue_device_get_max_priority(xe)))
>> return -EPERM;
>>
>> - return q->ops->set_priority(q, value);
>> + if (!create)
>> + return q->ops->set_priority(q, value);
>> + q->sched_props.priority = value;
>> +
>> + return 0;
>> }
>>
>> static bool xe_exec_queue_enforce_schedule_limit(void)
>> @@ -325,7 +348,11 @@ static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *
>> !xe_hw_engine_timeout_in_range(value, min, max))
>> return -EINVAL;
>>
>> - return q->ops->set_timeslice(q, value);
>> + if (!create)
>> + return q->ops->set_timeslice(q, value);
>> + q->sched_props.timeslice_us = value;
>> +
>> + return 0;
>> }
>>
>> static int exec_queue_set_preemption_timeout(struct xe_device *xe,
>> @@ -341,7 +368,11 @@ static int exec_queue_set_preemption_timeout(struct xe_device *xe,
>> !xe_hw_engine_timeout_in_range(value, min, max))
>> return -EINVAL;
>>
>> - return q->ops->set_preempt_timeout(q, value);
>> + if (!create)
>> + return q->ops->set_preempt_timeout(q, value);
>> + q->sched_props.preempt_timeout_us = value;
>> +
>> + return 0;
>
>
> Nit: I found this tricky to read with indentaions as is, how about:
>
> if (!create)
> return q->ops->set_preempt_timeout(q, value);
>
> q->sched_props.preempt_timeout_us = value;
> return 0;
>
> Just my preference. Same for all other cases.
I like it better too. Will change.
>
>> }
>>
>> static int exec_queue_set_persistence(struct xe_device *xe, struct xe_exec_queue *q,
>> @@ -376,7 +407,9 @@ static int exec_queue_set_job_timeout(struct xe_device *xe, struct xe_exec_queue
>> !xe_hw_engine_timeout_in_range(value, min, max))
>> return -EINVAL;
>>
>> - return q->ops->set_job_timeout(q, value);
>> + q->sched_props.job_timeout_ms = value;
>
> set_job_timeout vfunc can now be deleted, right?
Agreed. I'll delete as a next patch in series.
>
> Everything else LGTM.
>
> Matt
>
>> +
>> + return 0;
>> }
>>
>> static int exec_queue_set_acc_trigger(struct xe_device *xe, struct xe_exec_queue *q,
>> @@ -651,6 +684,7 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
>> if (eci[0].engine_class == DRM_XE_ENGINE_CLASS_VM_BIND) {
>> for_each_gt(gt, xe, id) {
>> struct xe_exec_queue *new;
>> + u32 flags;
>>
>> if (xe_gt_is_media_type(gt))
>> continue;
>> @@ -669,14 +703,13 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
>> /* The migration vm doesn't hold rpm ref */
>> xe_device_mem_access_get(xe);
>>
>> + flags = EXEC_QUEUE_FLAG_PERSISTENT | EXEC_QUEUE_FLAG_VM |
>> + (id ? EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD : 0);
>> +
>> migrate_vm = xe_migrate_get_vm(gt_to_tile(gt)->migrate);
>> new = xe_exec_queue_create(xe, migrate_vm, logical_mask,
>> - args->width, hwe,
>> - EXEC_QUEUE_FLAG_PERSISTENT |
>> - EXEC_QUEUE_FLAG_VM |
>> - (id ?
>> - EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD :
>> - 0));
>> + args->width, hwe, flags,
>> + args->extensions);
>>
>> xe_device_mem_access_put(xe); /* now held by engine */
>>
>> @@ -724,7 +757,8 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
>> q = xe_exec_queue_create(xe, vm, logical_mask,
>> args->width, hwe,
>> xe_vm_in_lr_mode(vm) ? 0 :
>> - EXEC_QUEUE_FLAG_PERSISTENT);
>> + EXEC_QUEUE_FLAG_PERSISTENT,
>> + args->extensions);
>> up_read(&vm->lock);
>> xe_vm_put(vm);
>> if (IS_ERR(q))
>> @@ -740,12 +774,6 @@ int xe_exec_queue_create_ioctl(struct drm_device *dev, void *data,
>> }
>> }
>>
>> - if (args->extensions) {
>> - err = exec_queue_user_extensions(xe, q, args->extensions, 0, true);
>> - if (XE_IOCTL_DBG(xe, err))
>> - goto kill_exec_queue;
>> - }
>> -
>> q->persistent.xef = xef;
>>
>> mutex_lock(&xef->exec_queue.lock);
>> diff --git a/drivers/gpu/drm/xe/xe_exec_queue.h b/drivers/gpu/drm/xe/xe_exec_queue.h
>> index d959cc4a1a82..02ce8d204622 100644
>> --- a/drivers/gpu/drm/xe/xe_exec_queue.h
>> +++ b/drivers/gpu/drm/xe/xe_exec_queue.h
>> @@ -16,7 +16,8 @@ struct xe_file;
>>
>> struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *vm,
>> u32 logical_mask, u16 width,
>> - struct xe_hw_engine *hw_engine, u32 flags);
>> + struct xe_hw_engine *hw_engine, u32 flags,
>> + u64 extensions);
>> struct xe_exec_queue *xe_exec_queue_create_class(struct xe_device *xe, struct xe_gt *gt,
>> struct xe_vm *vm,
>> enum xe_engine_class class, u32 flags);
>> diff --git a/drivers/gpu/drm/xe/xe_gsc.c b/drivers/gpu/drm/xe/xe_gsc.c
>> index a8a895cf4b44..5b84fc9ab8ad 100644
>> --- a/drivers/gpu/drm/xe/xe_gsc.c
>> +++ b/drivers/gpu/drm/xe/xe_gsc.c
>> @@ -356,7 +356,7 @@ int xe_gsc_init_post_hwconfig(struct xe_gsc *gsc)
>> q = xe_exec_queue_create(xe, NULL,
>> BIT(hwe->logical_instance), 1, hwe,
>> EXEC_QUEUE_FLAG_KERNEL |
>> - EXEC_QUEUE_FLAG_PERMANENT);
>> + EXEC_QUEUE_FLAG_PERMANENT, 0);
>> if (IS_ERR(q)) {
>> xe_gt_err(gt, "Failed to create queue for GSC submission\n");
>> err = PTR_ERR(q);
>> diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
>> index 3af2adec1295..0f2258dc4a00 100644
>> --- a/drivers/gpu/drm/xe/xe_gt.c
>> +++ b/drivers/gpu/drm/xe/xe_gt.c
>> @@ -235,7 +235,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt)
>> return -ENOMEM;
>>
>> q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance), 1,
>> - hwe, EXEC_QUEUE_FLAG_KERNEL);
>> + hwe, EXEC_QUEUE_FLAG_KERNEL, 0);
>> if (IS_ERR(q)) {
>> err = PTR_ERR(q);
>> xe_gt_err(gt, "hwe %s: xe_exec_queue_create failed (%pe)\n",
>> @@ -252,7 +252,7 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt)
>> }
>>
>> nop_q = xe_exec_queue_create(xe, NULL, BIT(hwe->logical_instance),
>> - 1, hwe, EXEC_QUEUE_FLAG_KERNEL);
>> + 1, hwe, EXEC_QUEUE_FLAG_KERNEL, 0);
>> if (IS_ERR(nop_q)) {
>> err = PTR_ERR(nop_q);
>> xe_gt_err(gt, "hwe %s: nop xe_exec_queue_create failed (%pe)\n",
>> diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
>> index 6567abcb3c6d..e1615717486a 100644
>> --- a/drivers/gpu/drm/xe/xe_migrate.c
>> +++ b/drivers/gpu/drm/xe/xe_migrate.c
>> @@ -344,7 +344,7 @@ struct xe_migrate *xe_migrate_init(struct xe_tile *tile)
>>
>> m->q = xe_exec_queue_create(xe, vm, logical_mask, 1, hwe,
>> EXEC_QUEUE_FLAG_KERNEL |
>> - EXEC_QUEUE_FLAG_PERMANENT);
>> + EXEC_QUEUE_FLAG_PERMANENT, 0);
>> } else {
>> m->q = xe_exec_queue_create_class(xe, primary_gt, vm,
>> XE_ENGINE_CLASS_COPY,
>> --
>> 2.43.0
>>
More information about the Intel-xe
mailing list