[RFC 03/14] drm/xe/pxp: Allocate PXP execution resources

Fri Jul 12 23:00:09 UTC 2024

On 7/12/2024 3:43 PM, Matthew Brost wrote:
> On Fri, Jul 12, 2024 at 02:28:47PM -0700, Daniele Ceraolo Spurio wrote:
>> PXP requires submissions to the HW for the following operations
>>
>> 1) Key invalidation, done via the VCS engine
>> 2) Communication with the GSC FW for session management, done via the
>>     GSCCS
>>
>> For #1 we can allocate a simple kernel context, but #2 requires the
>> submissions to be done with PPGTT, which is not currently supported in Xe.
>> To add this support, the following changes have been included:
>>
>> - a new type of kernel-owned VM (marked as GSC)
>> - a new function to map a BO into a VM from within the kernel
>>
>> RFC note: I've tweaked some of the VM functions to return the fence
>> further up the stack, so I can wait on it from the PXP code. Not sure if
>> this is the best approach.
>>
>> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
>> Cc: Matthew Brost <matthew.brost at intel.com>
> Not a complete review but adding some thoughts. Looks sane enough to me.
>
> Random musing and nits below.
>
>> Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
>> ---
>>   drivers/gpu/drm/xe/Makefile                   |   1 +
>>   drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h |   7 +
>>   drivers/gpu/drm/xe/xe_exec_queue.c            |   3 +
>>   drivers/gpu/drm/xe/xe_pxp.c                   |  25 ++-
>>   drivers/gpu/drm/xe/xe_pxp_submit.c            | 188 ++++++++++++++++++
>>   drivers/gpu/drm/xe/xe_pxp_submit.h            |  16 ++
>>   drivers/gpu/drm/xe/xe_pxp_types.h             |  33 +++
>>   drivers/gpu/drm/xe/xe_vm.c                    | 100 +++++++++-
>>   drivers/gpu/drm/xe/xe_vm.h                    |   6 +
>>   drivers/gpu/drm/xe/xe_vm_types.h              |   1 +
>>   10 files changed, 372 insertions(+), 8 deletions(-)
>>   create mode 100644 drivers/gpu/drm/xe/xe_pxp_submit.c
>>   create mode 100644 drivers/gpu/drm/xe/xe_pxp_submit.h
>>
>> diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
>> index 5f15e6dd5057..a4514265085b 100644
>> --- a/drivers/gpu/drm/xe/Makefile
>> +++ b/drivers/gpu/drm/xe/Makefile
>> @@ -105,6 +105,7 @@ xe-y += xe_bb.o \
>>   	xe_pt.o \
>>   	xe_pt_walk.o \
>>   	xe_pxp.o \
>> +	xe_pxp_submit.o \
>>   	xe_query.o \
>>   	xe_range_fence.o \
>>   	xe_reg_sr.o \
>> diff --git a/drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h b/drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h
>> index 57520809e48d..f3c4cf10ba20 100644
>> --- a/drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h
>> +++ b/drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h
>> @@ -6,6 +6,7 @@
>>   #ifndef _ABI_GSC_PXP_COMMANDS_ABI_H
>>   #define _ABI_GSC_PXP_COMMANDS_ABI_H
>>   
>> +#include <linux/sizes.h>
>>   #include <linux/types.h>
>>   
>>   /* Heci client ID for PXP commands */
>> @@ -13,6 +14,12 @@
>>   
>>   #define PXP_APIVER(x, y) (((x) & 0xFFFF) << 16 | ((y) & 0xFFFF))
>>   
>> +/*
>> + * A PXP sub-section in an HECI packet can be up to 64K big in each direction.
>> + * This does not include the top-level GSC header.
>> + */
>> +#define PXP_MAX_PACKET_SIZE SZ_64K
>> +
>>   /*
>>    * there are a lot of status codes for PXP, but we only define the cross-API
>>    * common ones that we actually can handle in the kernel driver. Other failure
>> diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
>> index 0ba37835849b..bc6e867aba17 100644
>> --- a/drivers/gpu/drm/xe/xe_exec_queue.c
>> +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
>> @@ -131,6 +131,9 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v
>>   	struct xe_exec_queue *q;
>>   	int err;
>>   
>> +	/* VMs for GSCCS queues (and only those) must have the XE_VM_FLAG_GSC flag */
>> +	xe_assert(xe, !vm || (!!(vm->flags & XE_VM_FLAG_GSC) == !!(hwe->engine_id == XE_HW_ENGINE_GSCCS0)));
>> +
> We should be able to remove this soon. More on that below.
>
>>   	q = __xe_exec_queue_alloc(xe, vm, logical_mask, width, hwe, flags,
>>   				  extensions);
>>   	if (IS_ERR(q))
>> diff --git a/drivers/gpu/drm/xe/xe_pxp.c b/drivers/gpu/drm/xe/xe_pxp.c
>> index cdb29b104006..01386b9f0c50 100644
>> --- a/drivers/gpu/drm/xe/xe_pxp.c
>> +++ b/drivers/gpu/drm/xe/xe_pxp.c
>> @@ -12,6 +12,7 @@
>>   #include "xe_gt.h"
>>   #include "xe_gt_types.h"
>>   #include "xe_mmio.h"
>> +#include "xe_pxp_submit.h"
>>   #include "xe_pxp_types.h"
>>   #include "xe_uc_fw.h"
>>   #include "regs/xe_pxp_regs.h"
>> @@ -50,6 +51,20 @@ static int kcr_pxp_enable(const struct xe_pxp *pxp)
>>   	return kcr_pxp_set_status(pxp, true);
>>   }
>>   
>> +static int kcr_pxp_disable(const struct xe_pxp *pxp)
>> +{
>> +	return kcr_pxp_set_status(pxp, false);
>> +}
>> +
>> +static void pxp_fini(void *arg)
>> +{
>> +	struct xe_pxp *pxp = arg;
>> +
>> +	xe_pxp_destroy_execution_resources(pxp);
>> +
>> +	/* no need to explicitly disable KCR since we're going to do an FLR */
>> +}
>> +
>>   /**
>>    * xe_pxp_init - initialize PXP support
>>    * @xe: the xe_device structure
>> @@ -97,7 +112,15 @@ int xe_pxp_init(struct xe_device *xe)
>>   	if (err)
>>   		return err;
>>   
>> +	err = xe_pxp_allocate_execution_resources(pxp);
>> +	if (err)
>> +		goto kcr_disable;
>> +
>>   	xe->pxp = pxp;
>>   
>> -	return 0;
>> +	return devm_add_action_or_reset(xe->drm.dev, pxp_fini, pxp);
>> +
>> +kcr_disable:
>> +	kcr_pxp_disable(pxp);
>> +	return err;
>>   }
>> diff --git a/drivers/gpu/drm/xe/xe_pxp_submit.c b/drivers/gpu/drm/xe/xe_pxp_submit.c
>> new file mode 100644
>> index 000000000000..4fc3c7c58101
>> --- /dev/null
>> +++ b/drivers/gpu/drm/xe/xe_pxp_submit.c
>> @@ -0,0 +1,188 @@
>> +// SPDX-License-Identifier: MIT
>> +/*
>> + * Copyright(c) 2024 Intel Corporation.
>> + */
>> +
>> +#include "xe_pxp_submit.h"
>> +
>> +#include <drm/xe_drm.h>
>> +
>> +#include "xe_device_types.h"
>> +#include "xe_bo.h"
>> +#include "xe_exec_queue.h"
>> +#include "xe_gsc_submit.h"
>> +#include "xe_gt.h"
>> +#include "xe_pxp_types.h"
>> +#include "xe_vm.h"
>> +#include "regs/xe_gt_regs.h"
>> +
>> +static int create_vcs_context(struct xe_pxp *pxp)
>> +{
>> +	struct xe_gt *gt = pxp->gt;
>> +	struct xe_hw_engine *hwe;
>> +	struct xe_exec_queue *q;
>> +
>> +	hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_VIDEO_DECODE, 0, true);
>> +	if (!hwe)
>> +		return -ENODEV;
>> +
> Ugh, really want to completely decouple an exec queue from hwe (e.g.
> don't pass in hwe to xe_exec_queue_create). I guess this already in code
> so fine here just a reminder of this ugliness.
>
>> +	q = xe_exec_queue_create(pxp->xe, NULL, BIT(hwe->logical_instance), 1, hwe,
>> +				 EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_PERMANENT, 0);
>> +	if (IS_ERR(q))
>> +		return PTR_ERR(q);
>> +
>> +	pxp->vcs_queue = q;
>> +
> So how is this used? Not attached to a VM? GGTT or ring instructions
> only? Any downside of attaching this to GSC VM?

Ring instruction only, yes; we only use it to submit a key termination 
(next patch in the series).
I've made the GSC_VM only usable with the GSCCS so I didn't have to care 
about potentially having a kernel-owned non-faulting VM on 
user-accessible engines, where userspace might instead want to use a 
faulting VM. If we're removing the limitation and allowing the 2 types 
to mix that limitation for the GSC_VM should go away.

>
>> +	return 0;
>> +}
>> +
>> +static void destroy_vcs_context(struct xe_pxp *pxp)
>> +{
>> +	if (pxp->vcs_queue)
>> +		xe_exec_queue_put(pxp->vcs_queue);
>> +}
>> +
>> +/*
>> + * We allocate a single object for the batch and the input and output BOs. PXP
>> + * commands can require a lot of BO space (see PXP_MAX_PACKET_SIZE), but we
>> + * currently only support a subset of commands that are small (< 20 dwords),
>> + * so a single page is enough for now.
>> + */
>> +#define PXP_BB_SIZE		XE_PAGE_SIZE
>> +#define PXP_INOUT_SIZE		XE_PAGE_SIZE
>> +#define PXP_BO_SIZE		(PXP_BB_SIZE + (2 * PXP_INOUT_SIZE))
>> +#define PXP_BB_OFFSET		0
>> +#define PXP_MSG_IN_OFFSET 	PXP_BB_SIZE
>> +#define PXP_MSG_OUT_OFFSET 	(PXP_MSG_IN_OFFSET + PXP_INOUT_SIZE)
>> +static int allocate_gsc_execution_resources(struct xe_pxp *pxp)
>> +{
>> +	struct xe_gt *gt = pxp->gt;
>> +	struct xe_tile *tile = gt_to_tile(gt);
>> +	struct xe_device *xe = pxp->xe;
>> +	struct xe_hw_engine *hwe;
>> +	struct xe_vm *vm;
>> +	struct xe_bo *bo;
>> +	struct xe_exec_queue *q;
>> +	struct dma_fence *fence;
>> +	long timeout;
>> +	int err = 0;
>> +
>> +	hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_OTHER, OTHER_GSC_INSTANCE, false);
>> +
>> +	/* we shouldn't reach here if the GSC engine is not available */
>> +	xe_assert(xe, hwe);
>> +
>> +	/* PXP instructions must be issued from PPGTT */
>> +	vm = xe_vm_create(xe, XE_VM_FLAG_GSC);
>> +	if (IS_ERR(vm))
>> +		return PTR_ERR(vm);
>> +
>> +	/* We allocate a single object for the batch and the in/out memory */
>> +	xe_vm_lock(vm, false);
>> +	bo = xe_bo_create_pin_map(xe, tile, vm, PXP_BO_SIZE, ttm_bo_type_kernel,
>> +				  XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | XE_BO_FLAG_NEEDS_UC);
>> +	xe_vm_unlock(vm);
>> +	if (IS_ERR(bo)) {
>> +		err = PTR_ERR(bo);
>> +		goto vm_out;
>> +	}
>> +
>> +	fence = xe_vm_bind_bo(vm, bo, NULL, 0, XE_CACHE_WB);
>> +	if (IS_ERR(fence)) {
>> +		err = PTR_ERR(fence);
>> +		goto bo_out;
>> +	}
>> +
>> +	timeout = dma_fence_wait_timeout(fence, false, HZ);
>> +	dma_fence_put(fence);
>> +	if (timeout <= 0) {
>> +		err = timeout ?: -ETIME;
>> +		goto bo_out;
>> +	}
>> +
>> +	q = xe_exec_queue_create(xe, vm, BIT(hwe->logical_instance), 1, hwe,
>> +				 EXEC_QUEUE_FLAG_KERNEL |
>> +				 EXEC_QUEUE_FLAG_PERMANENT, 0);
>> +	if (IS_ERR(q)) {
>> +		err = PTR_ERR(q);
>> +		goto bo_out;
>> +	}
>> +
>> +	pxp->gsc_exec.vm = vm;
>> +	pxp->gsc_exec.bo = bo;
>> +	pxp->gsc_exec.batch = IOSYS_MAP_INIT_OFFSET(&bo->vmap, PXP_BB_OFFSET);
>> +	pxp->gsc_exec.msg_in = IOSYS_MAP_INIT_OFFSET(&bo->vmap, PXP_MSG_IN_OFFSET);
>> +	pxp->gsc_exec.msg_out = IOSYS_MAP_INIT_OFFSET(&bo->vmap, PXP_MSG_OUT_OFFSET);
> So with this mapping, all GSC are serially executed and waited on. There
> won't ever be a need to pipeline things? If the later is true you could
> xe_bb_* plus suballocation of the BO you map. More complex so if serial
> execute is all you will ever need, then yea probably don't use that.

We only send 2 types of commands, session initialization and session 
invalidation, which have to be serialized.

Even if we had other commands, the GSC is weird and submissions to it 
can complete with a "wait a bit then try again" message, so we have to 
wait until the fence is signaled, then check the memory and only if the 
memory has a "success" return we can move on to the next submission.

>
>> +	pxp->gsc_exec.q = q;
>> +
>> +	/* initialize host-session-handle (for all Xe-to-gsc-firmware PXP cmds) */
>> +	pxp->gsc_exec.host_session_handle = xe_gsc_create_host_session_id();
>> +
>> +	return 0;
>> +
>> +bo_out:
>> +	xe_vm_lock(vm, false);
>> +	xe_bo_unpin(bo);
>> +	xe_vm_unlock(vm);
>> +
>> +	xe_bo_put(bo);
> Can use helper I mention below.
>
>> +vm_out:
>> +	xe_vm_close_and_put(vm);
>> +
>> +	return err;
>> +}
>> +
>> +static void destroy_gsc_execution_resources(struct xe_pxp *pxp)
>> +{
>> +	if (!pxp->gsc_exec.q)
>> +		return;
>> +
>> +	iosys_map_clear(&pxp->gsc_exec.msg_out);
>> +	iosys_map_clear(&pxp->gsc_exec.msg_in);
>> +	iosys_map_clear(&pxp->gsc_exec.batch);
> I don't think this is strickly need as it just sets a pointer to NULL.
>
>> +
>> +	xe_exec_queue_put(pxp->gsc_exec.q);
>> +
>> +	xe_vm_lock(pxp->gsc_exec.vm, false);
>> +	xe_bo_unpin(pxp->gsc_exec.bo);
>> +	xe_vm_unlock(pxp->gsc_exec.vm);
>> +	xe_bo_put(pxp->gsc_exec.bo);
>> +
> This looks awfully like xe_bo_unpin_map_no_vm. Maybe rename that
> function and just use it?
>
> If a BO is private to a VM (this one is, xe_bo_lock and xe_vm_lock mean
> the same thing).

I didn't know the 2 locks where equivalent. I'll switch to the helper.

>
>> +	xe_vm_close_and_put(pxp->gsc_exec.vm);
>> +}
>> +
>> +/**
>> + * xe_pxp_allocate_execution_resources - Allocate PXP submission objects
>> + * @pxp: the xe_pxp structure
>> + *
>> + * Allocates exec_queues objects for VCS and GSCCS submission. The GSCCS
>> + * submissions are done via PPGTT, so this function allocates a VM for it and
>> + * maps the object into it.
>> + *
>> + * Returns 0 if the allocation and mapping is successful, an errno value
>> + * otherwise.
>> + */
>> +int xe_pxp_allocate_execution_resources(struct xe_pxp *pxp)
>> +{
>> +	int err;
>> +
>> +	err = create_vcs_context(pxp);
>> +	if (err)
>> +		return err;
>> +
>> +	err = allocate_gsc_execution_resources(pxp);
>> +	if (err)
>> +		goto destroy_vcs_context;
>> +
>> +	return 0;
>> +
>> +destroy_vcs_context:
>> +	destroy_vcs_context(pxp);
>> +	return err;
>> +}
>> +
>> +void xe_pxp_destroy_execution_resources(struct xe_pxp *pxp)
>> +{
>> +	destroy_gsc_execution_resources(pxp);
>> +	destroy_vcs_context(pxp);
>> +}
>> diff --git a/drivers/gpu/drm/xe/xe_pxp_submit.h b/drivers/gpu/drm/xe/xe_pxp_submit.h
>> new file mode 100644
>> index 000000000000..1a971fadc081
>> --- /dev/null
>> +++ b/drivers/gpu/drm/xe/xe_pxp_submit.h
>> @@ -0,0 +1,16 @@
>> +/* SPDX-License-Identifier: MIT */
>> +/*
>> + * Copyright(c) 2024, Intel Corporation. All rights reserved.
>> + */
>> +
>> +#ifndef __XE_PXP_SUBMIT_H__
>> +#define __XE_PXP_SUBMIT_H__
>> +
>> +#include <linux/types.h>
>> +
>> +struct xe_pxp;
>> +
>> +int xe_pxp_allocate_execution_resources(struct xe_pxp *pxp);
>> +void xe_pxp_destroy_execution_resources(struct xe_pxp *pxp);
>> +
>> +#endif /* __XE_PXP_SUBMIT_H__ */
>> diff --git a/drivers/gpu/drm/xe/xe_pxp_types.h b/drivers/gpu/drm/xe/xe_pxp_types.h
>> index 1561e3bd2676..c16813253b47 100644
>> --- a/drivers/gpu/drm/xe/xe_pxp_types.h
>> +++ b/drivers/gpu/drm/xe/xe_pxp_types.h
>> @@ -6,10 +6,14 @@
>>   #ifndef __XE_PXP_TYPES_H__
>>   #define __XE_PXP_TYPES_H__
>>   
>> +#include <linux/iosys-map.h>
>>   #include <linux/types.h>
>>   
>> +struct xe_bo;
>> +struct xe_exec_queue;
>>   struct xe_device;
>>   struct xe_gt;
>> +struct xe_vm;
>>   
>>   /**
>>    * struct xe_pxp - pxp state
>> @@ -23,6 +27,35 @@ struct xe_pxp {
>>   	 * (VDBOX, KCR and GSC)
>>   	 */
>>   	struct xe_gt *gt;
>> +
>> +	/** @vcs_queue: kernel-owned VCS exec queue used for PXP operations */
>> +	struct xe_exec_queue *vcs_queue;
>> +
>> +	/** @gsc_exec: kernel-owned objects for PXP submissions to the GSCCS */
>> +	struct {
>> +		/**
>> +		 * @gsc_exec.host_session_handle: handle used in communications
>> +		 * with the GSC firmware.
>> +		 */
>> +		u64 host_session_handle;
>> +		/** @gsc_exec.vm: VM used for PXP submissions to the GSCCS */
>> +		struct xe_vm *vm;
>> +		/** @gsc_exec.q: GSCCS exec queue for PXP submissions */
>> +		struct xe_exec_queue *q;
>> +
>> +		/**
>> +		 * @gsc_exec.bo: BO used for submissions to the GSCCS and GSC
>> +		 * FW. It includes space for the GSCCS batch and the
>> +		 * input/output buffers read/written by the FW
>> +		 */
>> +		struct xe_bo *bo;
>> +		/** @gsc_exec.batch: iosys_map to the batch memory within the BO */
>> +		struct iosys_map batch;
>> +		/** @gsc_exec.msg_in: iosys_map to the input memory within the BO */
>> +		struct iosys_map msg_in;
>> +		/** @gsc_exec.msg_out: iosys_map to the output memory within the BO */
>> +		struct iosys_map msg_out;
>> +	} gsc_exec;
>>   };
>>   
>>   #endif /* __XE_PXP_TYPES_H__ */
>> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
>> index 02f684c0330d..412ec9cb9650 100644
>> --- a/drivers/gpu/drm/xe/xe_vm.c
>> +++ b/drivers/gpu/drm/xe/xe_vm.c
>> @@ -1315,6 +1315,15 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
>>   	struct xe_tile *tile;
>>   	u8 id;
>>   
>> +	/*
>> +	 * All GSC VMs are owned by the kernel and can also only be used on
>> +	 * the GSCCS. We don't want a kernel-owned VM to put the device in
>> +	 * either fault or not fault mode, so we need to exclude the GSC VMs
>> +	 * from that count; this is only safe if we ensure that all GSC VMs are
>> +	 * non-faulting.
>> +	 */
>> +	xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE)));
>> +
>>   	vm = kzalloc(sizeof(*vm), GFP_KERNEL);
>>   	if (!vm)
>>   		return ERR_PTR(-ENOMEM);
>> @@ -1442,7 +1451,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
>>   	mutex_lock(&xe->usm.lock);
>>   	if (flags & XE_VM_FLAG_FAULT_MODE)
>>   		xe->usm.num_vm_in_fault_mode++;
>> -	else if (!(flags & XE_VM_FLAG_MIGRATION))
>> +	else if (!(flags & (XE_VM_FLAG_MIGRATION | XE_VM_FLAG_GSC)))
> This change is good now but should become unnecessary once Francois
> lands some code to remove the restriction of mixing faulting and
> non-faulting VM within a device.
>
>>   		xe->usm.num_vm_in_non_fault_mode++;
>>   	mutex_unlock(&xe->usm.lock);
>>   
>> @@ -2867,11 +2876,10 @@ static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
>>   	for (i = 0; i < vops->num_syncs; i++)
>>   		xe_sync_entry_signal(vops->syncs + i, fence);
>>   	xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
>> -	dma_fence_put(fence);
> Nit: I'd send this change and associated change in xe_vm_bind_ioctl +
> vm_bind_ioctl_ops_execute in its own patch, perhaps even as an
> independent series which I'd RB immediately.
>
> Change looks good though and could be useful else where too.
>
>>   }
>>   
>> -static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
>> -				     struct xe_vma_ops *vops)
>> +static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm,
>> +						   struct xe_vma_ops *vops)
>>   {
>>   	struct drm_exec exec;
>>   	struct dma_fence *fence;
>> @@ -2889,7 +2897,6 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
>>   
>>   		fence = ops_execute(vm, vops);
>>   		if (IS_ERR(fence)) {
>> -			err = PTR_ERR(fence);
>>   			/* FIXME: Killing VM rather than proper error handling */
>>   			xe_vm_kill(vm, false);
> Looks like you are on old baseline before this series landed [1]. I
> suggest rebasing as those changes creep up in the upper layers a bit.
>
> [1] https://patchwork.freedesktop.org/series/133034/

Yes, my local tree is from last week. I'll rebase and split out the 
changes to their own patch as suggested.

>>   			goto unlock;
>> @@ -2900,7 +2907,7 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
>>   
>>   unlock:
>>   	drm_exec_fini(&exec);
>> -	return err;
>> +	return fence;
>>   }
>>   
>>   #define SUPPORTED_FLAGS	\
>> @@ -3114,6 +3121,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>>   	struct xe_sync_entry *syncs = NULL;
>>   	struct drm_xe_vm_bind_op *bind_ops;
>>   	struct xe_vma_ops vops;
>> +	struct dma_fence *fence;
>>   	int err;
>>   	int i;
>>   
>> @@ -3264,7 +3272,11 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>>   		goto unwind_ops;
>>   	}
>>   
>> -	err = vm_bind_ioctl_ops_execute(vm, &vops);
>> +	fence = vm_bind_ioctl_ops_execute(vm, &vops);
>> +	if (IS_ERR(fence))
>> +		err = PTR_ERR(fence);
>> +	else
>> +		dma_fence_put(fence);
>>   
>>   unwind_ops:
>>   	if (err && err != -ENODATA)
>> @@ -3297,6 +3309,80 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>>   	return err;
>>   }
>>   
>> +/**
>> + * xe_vm_bind_bo - bind a kernel BO to a VM
>> + * @vm: VM to bind the BO to
>> + * @bo: BO to bind
>> + * @q: exec queue to use for the bind (optional)
>> + * @addr: address at which to bind the BO
>> + * @cache_lvl: PAT cache level to use
>> + *
>> + * Execute a VM bind map operation on a kernel-owned BO to bind it into a
>> + * kernel-owned VM.
>> + *
>> + * Returns 0 if the ops execution is successful, an errno value otherwise.
>> + * TODO: return a fence instead.
>> + */
>> +struct dma_fence *xe_vm_bind_bo(struct xe_vm *vm, struct xe_bo *bo,
>> +				struct xe_exec_queue *q, u64 addr,
>> +				enum xe_cache_level cache_lvl)
>> +{
>> +	struct xe_vma_ops vops;
>> +	struct drm_gpuva_ops *ops = NULL;
>> +	struct dma_fence *fence;
>> +	int err;
>> +
>> +	xe_bo_get(bo);
>> +	xe_vm_get(vm);
>> +	if (q)
>> +		xe_exec_queue_get(q);
>> +
>> +	down_write(&vm->lock);
>> +
>> +	xe_vma_ops_init(&vops, vm, q, NULL, 0);
>> +
>> +	ops = vm_bind_ioctl_ops_create(vm, bo, 0, addr, bo->size,
>> +				       DRM_XE_VM_BIND_OP_MAP, 0,
>> +				       vm->xe->pat.idx[cache_lvl], 0);
>> +	if (IS_ERR(ops)) {
>> +		err = PTR_ERR(ops);
>> +		goto release_vm_lock;
>> +	}
>> +
>> +	err = vm_bind_ioctl_ops_parse(vm, q, ops, NULL, 0, &vops, true);
>> +	if (err)
>> +		goto release_vm_lock;
>> +
>> +	/* Nothing to do */
>> +	if (list_empty(&vops.list)) {
> Can this ever be true? In the current usage it appear so. Maybe convert
> to an asset !list_empty to simplify this function slightly?

will do.

Daniele

>
> Matt
>
>> +		err = -ENODATA;
>> +		goto unwind_ops;
>> +	}
>> +
>> +	fence = vm_bind_ioctl_ops_execute(vm, &vops);
>> +	if (IS_ERR(fence))
>> +		err = PTR_ERR(fence);
>> +
>> +unwind_ops:
>> +	if (err && err != -ENODATA)
>> +		vm_bind_ioctl_ops_unwind(vm, &ops, 1);
>> +
>> +	drm_gpuva_ops_free(&vm->gpuvm, ops);
>> +
>> +release_vm_lock:
>> +	up_write(&vm->lock);
>> +
>> +	if (q)
>> +		xe_exec_queue_put(q);
>> +	xe_vm_put(vm);
>> +	xe_bo_put(bo);
>> +
>> +	if (err)
>> +		fence = ERR_PTR(err);
>> +
>> +	return fence;
>> +}
>> +
>>   /**
>>    * xe_vm_lock() - Lock the vm's dma_resv object
>>    * @vm: The struct xe_vm whose lock is to be locked
>> diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
>> index b481608b12f1..5e298ac90dfc 100644
>> --- a/drivers/gpu/drm/xe/xe_vm.h
>> +++ b/drivers/gpu/drm/xe/xe_vm.h
>> @@ -19,6 +19,8 @@ struct drm_file;
>>   struct ttm_buffer_object;
>>   struct ttm_validate_buffer;
>>   
>> +struct dma_fence;
>> +
>>   struct xe_exec_queue;
>>   struct xe_file;
>>   struct xe_sync_entry;
>> @@ -248,6 +250,10 @@ int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma);
>>   int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
>>   			  unsigned int num_fences);
>>   
>> +struct dma_fence *xe_vm_bind_bo(struct xe_vm *vm, struct xe_bo *bo,
>> +				struct xe_exec_queue *q, u64 addr,
>> +				enum xe_cache_level cache_lvl);
>> +
>>   /**
>>    * xe_vm_resv() - Return's the vm's reservation object
>>    * @vm: The vm
>> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
>> index ce1a63a5e3e7..60ce327d303c 100644
>> --- a/drivers/gpu/drm/xe/xe_vm_types.h
>> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
>> @@ -152,6 +152,7 @@ struct xe_vm {
>>   #define XE_VM_FLAG_BANNED		BIT(5)
>>   #define XE_VM_FLAG_TILE_ID(flags)	FIELD_GET(GENMASK(7, 6), flags)
>>   #define XE_VM_FLAG_SET_TILE_ID(tile)	FIELD_PREP(GENMASK(7, 6), (tile)->id)
>> +#define XE_VM_FLAG_GSC			BIT(8)
>>   	unsigned long flags;
>>   
>>   	/** @composite_fence_ctx: context composite fence */
>> -- 
>> 2.43.0
>>