[PATCH v2 02/12] drm/xe/pxp: Allocate PXP execution resources
Daniele Ceraolo Spurio
daniele.ceraolospurio at intel.com
Wed Nov 6 22:25:34 UTC 2024
<snip>
>> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
>> index 6dd76f77b504..56f105797ae6 100644
>> --- a/drivers/gpu/drm/xe/xe_vm.c
>> +++ b/drivers/gpu/drm/xe/xe_vm.c
>> @@ -1381,6 +1381,15 @@ struct xe_vm *xe_vm_create(struct xe_device
>> *xe, u32 flags)
>> struct xe_tile *tile;
>> u8 id;
>> + /*
>> + * All GSC VMs are owned by the kernel and can also only be used on
>> + * the GSCCS. We don't want a kernel-owned VM to put the device in
>> + * either fault or not fault mode, so we need to exclude the GSC
>> VMs
>> + * from that count; this is only safe if we ensure that all GSC
>> VMs are
>> + * non-faulting.
>> + */
>> + xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags &
>> XE_VM_FLAG_FAULT_MODE)));
>> +
>> vm = kzalloc(sizeof(*vm), GFP_KERNEL);
>> if (!vm)
>> return ERR_PTR(-ENOMEM);
>> @@ -1391,7 +1400,21 @@ struct xe_vm *xe_vm_create(struct xe_device
>> *xe, u32 flags)
>> vm->flags = flags;
>> - init_rwsem(&vm->lock);
>> + /**
>> + * GSC VMs are kernel-owned, only used for PXP ops and can be
>> + * manipulated under the PXP mutex. However, the PXP mutex can
>> be taken
> Is that 'can be (but don't have to be) manipulated' or 'can only be
> manipulated'?
The first one, will reword.
>
>> + * under a user-VM lock when the PXP session is started at
>> exec_queue
>> + * creation time. Those are different VMs and therefore there is
>> no risk
>> + * of deadlock, but we need to tell lockdep that this is the
>> case or it
>> + * will print a warning.
>> + */
>> + if (flags & XE_VM_FLAG_GSC) {
>> + static struct lock_class_key gsc_vm_key;
>> +
>> + __init_rwsem(&vm->lock, "gsc_vm", &gsc_vm_key);
>> + } else {
>> + init_rwsem(&vm->lock);
>> + }
>> mutex_init(&vm->snap_mutex);
>> INIT_LIST_HEAD(&vm->rebind_list);
>> @@ -1510,7 +1533,7 @@ struct xe_vm *xe_vm_create(struct xe_device
>> *xe, u32 flags)
>> mutex_lock(&xe->usm.lock);
>> if (flags & XE_VM_FLAG_FAULT_MODE)
>> xe->usm.num_vm_in_fault_mode++;
>> - else if (!(flags & XE_VM_FLAG_MIGRATION))
>> + else if (!(flags & (XE_VM_FLAG_MIGRATION | XE_VM_FLAG_GSC)))
>> xe->usm.num_vm_in_non_fault_mode++;
>> mutex_unlock(&xe->usm.lock);
>> @@ -2694,11 +2717,10 @@ static void vm_bind_ioctl_ops_fini(struct
>> xe_vm *vm, struct xe_vma_ops *vops,
>> for (i = 0; i < vops->num_syncs; i++)
>> xe_sync_entry_signal(vops->syncs + i, fence);
>> xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
>> - dma_fence_put(fence);
>> }
>> -static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
>> - struct xe_vma_ops *vops)
>> +static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm,
>> + struct xe_vma_ops *vops)
> Rather than changing the internals, is it not possible to just call
> xe_exec_queue_last_fence_get() after vm_bind_ioctl_ops_execute has
> returned?
If you have multiple bind ops, the last fence can be overwritten, so we
might end up waiting for a different (newer) allocation.
>
>> {
>> struct drm_exec exec;
>> struct dma_fence *fence;
>> @@ -2711,21 +2733,21 @@ static int vm_bind_ioctl_ops_execute(struct
>> xe_vm *vm,
>> drm_exec_until_all_locked(&exec) {
>> err = vm_bind_ioctl_ops_lock_and_prep(&exec, vm, vops);
>> drm_exec_retry_on_contention(&exec);
>> - if (err)
>> + if (err) {
>> + fence = ERR_PTR(err);
>> goto unlock;
>> + }
>> fence = ops_execute(vm, vops);
>> - if (IS_ERR(fence)) {
>> - err = PTR_ERR(fence);
>> + if (IS_ERR(fence))
>> goto unlock;
>> - }
>> vm_bind_ioctl_ops_fini(vm, vops, fence);
>> }
>> unlock:
>> drm_exec_fini(&exec);
>> - return err;
>> + return fence;
>> }
>> #define SUPPORTED_FLAGS_STUB \
>> @@ -2946,6 +2968,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev,
>> void *data, struct drm_file *file)
>> struct xe_sync_entry *syncs = NULL;
>> struct drm_xe_vm_bind_op *bind_ops;
>> struct xe_vma_ops vops;
>> + struct dma_fence *fence;
>> int err;
>> int i;
>> @@ -3108,7 +3131,11 @@ int xe_vm_bind_ioctl(struct drm_device *dev,
>> void *data, struct drm_file *file)
>> if (err)
>> goto unwind_ops;
>> - err = vm_bind_ioctl_ops_execute(vm, &vops);
>> + fence = vm_bind_ioctl_ops_execute(vm, &vops);
>> + if (IS_ERR(fence))
>> + err = PTR_ERR(fence);
>> + else
>> + dma_fence_put(fence);
> There isn't a new fence get in vm_bind_ioctl_ops_execute(). The change
> in return value is the only difference in behaviour. So why is an
> extra put required?
I've removed a dma_fence_put from vm_bind_ioctl_ops_fini (which is
called from ops_execute), so this is not extra.
>
>> unwind_ops:
>> if (err && err != -ENODATA)
>> @@ -3142,6 +3169,81 @@ int xe_vm_bind_ioctl(struct drm_device *dev,
>> void *data, struct drm_file *file)
>> return err;
>> }
>> +/**
>> + * xe_vm_bind_bo - bind a kernel BO to a VM
>> + * @vm: VM to bind the BO to
>> + * @bo: BO to bind
>> + * @q: exec queue to use for the bind (optional)
>> + * @addr: address at which to bind the BO
>> + * @cache_lvl: PAT cache level to use
>> + *
>> + * Execute a VM bind map operation on a kernel-owned BO to bind it
>> into a
>> + * kernel-owned VM.
>> + *
>> + * Returns a dma_fence to track the binding completion if the job to
>> do so was
>> + * successfully submitted, an error pointer otherwise.
>> + */
>> +struct dma_fence *xe_vm_bind_bo(struct xe_vm *vm, struct xe_bo *bo,
>> + struct xe_exec_queue *q, u64 addr,
>> + enum xe_cache_level cache_lvl)
> Should this have '_kernel_' in the name given the description of
> kernel-owned BO to kernel-owned VM?
will rename.
Daniele
>
> John.
>
>> +{
>> + struct xe_vma_ops vops;
>> + struct drm_gpuva_ops *ops = NULL;
>> + struct dma_fence *fence;
>> + int err;
>> +
>> + xe_bo_get(bo);
>> + xe_vm_get(vm);
>> + if (q)
>> + xe_exec_queue_get(q);
>> +
>> + down_write(&vm->lock);
>> +
>> + xe_vma_ops_init(&vops, vm, q, NULL, 0);
>> +
>> + ops = vm_bind_ioctl_ops_create(vm, bo, 0, addr, bo->size,
>> + DRM_XE_VM_BIND_OP_MAP, 0, 0,
>> + vm->xe->pat.idx[cache_lvl]);
>> + if (IS_ERR(ops)) {
>> + err = PTR_ERR(ops);
>> + goto release_vm_lock;
>> + }
>> +
>> + err = vm_bind_ioctl_ops_parse(vm, ops, &vops);
>> + if (err)
>> + goto release_vm_lock;
>> +
>> + xe_assert(vm->xe, !list_empty(&vops.list));
>> +
>> + err = xe_vma_ops_alloc(&vops, false);
>> + if (err)
>> + goto unwind_ops;
>> +
>> + fence = vm_bind_ioctl_ops_execute(vm, &vops);
>> + if (IS_ERR(fence))
>> + err = PTR_ERR(fence);
>> +
>> +unwind_ops:
>> + if (err && err != -ENODATA)
>> + vm_bind_ioctl_ops_unwind(vm, &ops, 1);
>> +
>> + xe_vma_ops_fini(&vops);
>> + drm_gpuva_ops_free(&vm->gpuvm, ops);
>> +
>> +release_vm_lock:
>> + up_write(&vm->lock);
>> +
>> + if (q)
>> + xe_exec_queue_put(q);
>> + xe_vm_put(vm);
>> + xe_bo_put(bo);
>> +
>> + if (err)
>> + fence = ERR_PTR(err);
>> +
>> + return fence;
>> +}
>> +
>> /**
>> * xe_vm_lock() - Lock the vm's dma_resv object
>> * @vm: The struct xe_vm whose lock is to be locked
>> diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
>> index c864dba35e1d..bfc19e8113c3 100644
>> --- a/drivers/gpu/drm/xe/xe_vm.h
>> +++ b/drivers/gpu/drm/xe/xe_vm.h
>> @@ -19,6 +19,8 @@ struct drm_file;
>> struct ttm_buffer_object;
>> struct ttm_validate_buffer;
>> +struct dma_fence;
>> +
>> struct xe_exec_queue;
>> struct xe_file;
>> struct xe_sync_entry;
>> @@ -248,6 +250,10 @@ int xe_vm_lock_vma(struct drm_exec *exec, struct
>> xe_vma *vma);
>> int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
>> unsigned int num_fences);
>> +struct dma_fence *xe_vm_bind_bo(struct xe_vm *vm, struct xe_bo *bo,
>> + struct xe_exec_queue *q, u64 addr,
>> + enum xe_cache_level cache_lvl);
>> +
>> /**
>> * xe_vm_resv() - Return's the vm's reservation object
>> * @vm: The vm
>> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h
>> b/drivers/gpu/drm/xe/xe_vm_types.h
>> index 7f9a303e51d8..52467b9b5348 100644
>> --- a/drivers/gpu/drm/xe/xe_vm_types.h
>> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
>> @@ -164,6 +164,7 @@ struct xe_vm {
>> #define XE_VM_FLAG_BANNED BIT(5)
>> #define XE_VM_FLAG_TILE_ID(flags) FIELD_GET(GENMASK(7, 6), flags)
>> #define XE_VM_FLAG_SET_TILE_ID(tile) FIELD_PREP(GENMASK(7, 6),
>> (tile)->id)
>> +#define XE_VM_FLAG_GSC BIT(8)
>> unsigned long flags;
>> /** @composite_fence_ctx: context composite fence */
More information about the Intel-xe
mailing list