[RFC 03/14] drm/xe/pxp: Allocate PXP execution resources

Fri Jul 12 23:14:46 UTC 2024

On Fri, Jul 12, 2024 at 04:00:09PM -0700, Daniele Ceraolo Spurio wrote:
> 
> 
> On 7/12/2024 3:43 PM, Matthew Brost wrote:
> > On Fri, Jul 12, 2024 at 02:28:47PM -0700, Daniele Ceraolo Spurio wrote:
> > > PXP requires submissions to the HW for the following operations
> > > 
> > > 1) Key invalidation, done via the VCS engine
> > > 2) Communication with the GSC FW for session management, done via the
> > >     GSCCS
> > > 
> > > For #1 we can allocate a simple kernel context, but #2 requires the
> > > submissions to be done with PPGTT, which is not currently supported in Xe.
> > > To add this support, the following changes have been included:
> > > 
> > > - a new type of kernel-owned VM (marked as GSC)
> > > - a new function to map a BO into a VM from within the kernel
> > > 
> > > RFC note: I've tweaked some of the VM functions to return the fence
> > > further up the stack, so I can wait on it from the PXP code. Not sure if
> > > this is the best approach.
> > > 
> > > Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> > > Cc: Matthew Brost <matthew.brost at intel.com>
> > Not a complete review but adding some thoughts. Looks sane enough to me.
> > 
> > Random musing and nits below.
> > 
> > > Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> > > ---
> > >   drivers/gpu/drm/xe/Makefile                   |   1 +
> > >   drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h |   7 +
> > >   drivers/gpu/drm/xe/xe_exec_queue.c            |   3 +
> > >   drivers/gpu/drm/xe/xe_pxp.c                   |  25 ++-
> > >   drivers/gpu/drm/xe/xe_pxp_submit.c            | 188 ++++++++++++++++++
> > >   drivers/gpu/drm/xe/xe_pxp_submit.h            |  16 ++
> > >   drivers/gpu/drm/xe/xe_pxp_types.h             |  33 +++
> > >   drivers/gpu/drm/xe/xe_vm.c                    | 100 +++++++++-
> > >   drivers/gpu/drm/xe/xe_vm.h                    |   6 +
> > >   drivers/gpu/drm/xe/xe_vm_types.h              |   1 +
> > >   10 files changed, 372 insertions(+), 8 deletions(-)
> > >   create mode 100644 drivers/gpu/drm/xe/xe_pxp_submit.c
> > >   create mode 100644 drivers/gpu/drm/xe/xe_pxp_submit.h
> > > 
> > > diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
> > > index 5f15e6dd5057..a4514265085b 100644
> > > --- a/drivers/gpu/drm/xe/Makefile
> > > +++ b/drivers/gpu/drm/xe/Makefile
> > > @@ -105,6 +105,7 @@ xe-y += xe_bb.o \
> > >   	xe_pt.o \
> > >   	xe_pt_walk.o \
> > >   	xe_pxp.o \
> > > +	xe_pxp_submit.o \
> > >   	xe_query.o \
> > >   	xe_range_fence.o \
> > >   	xe_reg_sr.o \
> > > diff --git a/drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h b/drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h
> > > index 57520809e48d..f3c4cf10ba20 100644
> > > --- a/drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h
> > > +++ b/drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h
> > > @@ -6,6 +6,7 @@
> > >   #ifndef _ABI_GSC_PXP_COMMANDS_ABI_H
> > >   #define _ABI_GSC_PXP_COMMANDS_ABI_H
> > > +#include <linux/sizes.h>
> > >   #include <linux/types.h>
> > >   /* Heci client ID for PXP commands */
> > > @@ -13,6 +14,12 @@
> > >   #define PXP_APIVER(x, y) (((x) & 0xFFFF) << 16 | ((y) & 0xFFFF))
> > > +/*
> > > + * A PXP sub-section in an HECI packet can be up to 64K big in each direction.
> > > + * This does not include the top-level GSC header.
> > > + */
> > > +#define PXP_MAX_PACKET_SIZE SZ_64K
> > > +
> > >   /*
> > >    * there are a lot of status codes for PXP, but we only define the cross-API
> > >    * common ones that we actually can handle in the kernel driver. Other failure
> > > diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
> > > index 0ba37835849b..bc6e867aba17 100644
> > > --- a/drivers/gpu/drm/xe/xe_exec_queue.c
> > > +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
> > > @@ -131,6 +131,9 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v
> > >   	struct xe_exec_queue *q;
> > >   	int err;
> > > +	/* VMs for GSCCS queues (and only those) must have the XE_VM_FLAG_GSC flag */
> > > +	xe_assert(xe, !vm || (!!(vm->flags & XE_VM_FLAG_GSC) == !!(hwe->engine_id == XE_HW_ENGINE_GSCCS0)));
> > > +
> > We should be able to remove this soon. More on that below.
> > 
> > >   	q = __xe_exec_queue_alloc(xe, vm, logical_mask, width, hwe, flags,
> > >   				  extensions);
> > >   	if (IS_ERR(q))
> > > diff --git a/drivers/gpu/drm/xe/xe_pxp.c b/drivers/gpu/drm/xe/xe_pxp.c
> > > index cdb29b104006..01386b9f0c50 100644
> > > --- a/drivers/gpu/drm/xe/xe_pxp.c
> > > +++ b/drivers/gpu/drm/xe/xe_pxp.c
> > > @@ -12,6 +12,7 @@
> > >   #include "xe_gt.h"
> > >   #include "xe_gt_types.h"
> > >   #include "xe_mmio.h"
> > > +#include "xe_pxp_submit.h"
> > >   #include "xe_pxp_types.h"
> > >   #include "xe_uc_fw.h"
> > >   #include "regs/xe_pxp_regs.h"
> > > @@ -50,6 +51,20 @@ static int kcr_pxp_enable(const struct xe_pxp *pxp)
> > >   	return kcr_pxp_set_status(pxp, true);
> > >   }
> > > +static int kcr_pxp_disable(const struct xe_pxp *pxp)
> > > +{
> > > +	return kcr_pxp_set_status(pxp, false);
> > > +}
> > > +
> > > +static void pxp_fini(void *arg)
> > > +{
> > > +	struct xe_pxp *pxp = arg;
> > > +
> > > +	xe_pxp_destroy_execution_resources(pxp);
> > > +
> > > +	/* no need to explicitly disable KCR since we're going to do an FLR */
> > > +}
> > > +
> > >   /**
> > >    * xe_pxp_init - initialize PXP support
> > >    * @xe: the xe_device structure
> > > @@ -97,7 +112,15 @@ int xe_pxp_init(struct xe_device *xe)
> > >   	if (err)
> > >   		return err;
> > > +	err = xe_pxp_allocate_execution_resources(pxp);
> > > +	if (err)
> > > +		goto kcr_disable;
> > > +
> > >   	xe->pxp = pxp;
> > > -	return 0;
> > > +	return devm_add_action_or_reset(xe->drm.dev, pxp_fini, pxp);
> > > +
> > > +kcr_disable:
> > > +	kcr_pxp_disable(pxp);
> > > +	return err;
> > >   }
> > > diff --git a/drivers/gpu/drm/xe/xe_pxp_submit.c b/drivers/gpu/drm/xe/xe_pxp_submit.c
> > > new file mode 100644
> > > index 000000000000..4fc3c7c58101
> > > --- /dev/null
> > > +++ b/drivers/gpu/drm/xe/xe_pxp_submit.c
> > > @@ -0,0 +1,188 @@
> > > +// SPDX-License-Identifier: MIT
> > > +/*
> > > + * Copyright(c) 2024 Intel Corporation.
> > > + */
> > > +
> > > +#include "xe_pxp_submit.h"
> > > +
> > > +#include <drm/xe_drm.h>
> > > +
> > > +#include "xe_device_types.h"
> > > +#include "xe_bo.h"
> > > +#include "xe_exec_queue.h"
> > > +#include "xe_gsc_submit.h"
> > > +#include "xe_gt.h"
> > > +#include "xe_pxp_types.h"
> > > +#include "xe_vm.h"
> > > +#include "regs/xe_gt_regs.h"
> > > +
> > > +static int create_vcs_context(struct xe_pxp *pxp)
> > > +{
> > > +	struct xe_gt *gt = pxp->gt;
> > > +	struct xe_hw_engine *hwe;
> > > +	struct xe_exec_queue *q;
> > > +
> > > +	hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_VIDEO_DECODE, 0, true);
> > > +	if (!hwe)
> > > +		return -ENODEV;
> > > +
> > Ugh, really want to completely decouple an exec queue from hwe (e.g.
> > don't pass in hwe to xe_exec_queue_create). I guess this already in code
> > so fine here just a reminder of this ugliness.
> > 
> > > +	q = xe_exec_queue_create(pxp->xe, NULL, BIT(hwe->logical_instance), 1, hwe,
> > > +				 EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_PERMANENT, 0);
> > > +	if (IS_ERR(q))
> > > +		return PTR_ERR(q);
> > > +
> > > +	pxp->vcs_queue = q;
> > > +
> > So how is this used? Not attached to a VM? GGTT or ring instructions
> > only? Any downside of attaching this to GSC VM?
> 
> Ring instruction only, yes; we only use it to submit a key termination (next
> patch in the series).
> I've made the GSC_VM only usable with the GSCCS so I didn't have to care
> about potentially having a kernel-owned non-faulting VM on user-accessible
> engines, where userspace might instead want to use a faulting VM. If we're
> removing the limitation and allowing the 2 types to mix that limitation for
> the GSC_VM should go away.

If this is going to be ring instructions only yea I guess leave it
without a VM as there will be a pentally to switch between fault mode
and non-fault mode. I think if we never export a dma-fence from the
vcs_queue queue we can bypass a mode switch. If you export a dma-fence
from the vcs_queue anywhere (install in sync obj, dma-resv, etc...) we
cannot without lockdep getting upset or potentially deadlocking
ourselves. We will need to double check with Thomas / Francois though.

Matt

> 
> > 
> > > +	return 0;
> > > +}
> > > +
> > > +static void destroy_vcs_context(struct xe_pxp *pxp)
> > > +{
> > > +	if (pxp->vcs_queue)
> > > +		xe_exec_queue_put(pxp->vcs_queue);
> > > +}
> > > +
> > > +/*
> > > + * We allocate a single object for the batch and the input and output BOs. PXP
> > > + * commands can require a lot of BO space (see PXP_MAX_PACKET_SIZE), but we
> > > + * currently only support a subset of commands that are small (< 20 dwords),
> > > + * so a single page is enough for now.
> > > + */
> > > +#define PXP_BB_SIZE		XE_PAGE_SIZE
> > > +#define PXP_INOUT_SIZE		XE_PAGE_SIZE
> > > +#define PXP_BO_SIZE		(PXP_BB_SIZE + (2 * PXP_INOUT_SIZE))
> > > +#define PXP_BB_OFFSET		0
> > > +#define PXP_MSG_IN_OFFSET 	PXP_BB_SIZE
> > > +#define PXP_MSG_OUT_OFFSET 	(PXP_MSG_IN_OFFSET + PXP_INOUT_SIZE)
> > > +static int allocate_gsc_execution_resources(struct xe_pxp *pxp)
> > > +{
> > > +	struct xe_gt *gt = pxp->gt;
> > > +	struct xe_tile *tile = gt_to_tile(gt);
> > > +	struct xe_device *xe = pxp->xe;
> > > +	struct xe_hw_engine *hwe;
> > > +	struct xe_vm *vm;
> > > +	struct xe_bo *bo;
> > > +	struct xe_exec_queue *q;
> > > +	struct dma_fence *fence;
> > > +	long timeout;
> > > +	int err = 0;
> > > +
> > > +	hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_OTHER, OTHER_GSC_INSTANCE, false);
> > > +
> > > +	/* we shouldn't reach here if the GSC engine is not available */
> > > +	xe_assert(xe, hwe);
> > > +
> > > +	/* PXP instructions must be issued from PPGTT */
> > > +	vm = xe_vm_create(xe, XE_VM_FLAG_GSC);
> > > +	if (IS_ERR(vm))
> > > +		return PTR_ERR(vm);
> > > +
> > > +	/* We allocate a single object for the batch and the in/out memory */
> > > +	xe_vm_lock(vm, false);
> > > +	bo = xe_bo_create_pin_map(xe, tile, vm, PXP_BO_SIZE, ttm_bo_type_kernel,
> > > +				  XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | XE_BO_FLAG_NEEDS_UC);
> > > +	xe_vm_unlock(vm);
> > > +	if (IS_ERR(bo)) {
> > > +		err = PTR_ERR(bo);
> > > +		goto vm_out;
> > > +	}
> > > +
> > > +	fence = xe_vm_bind_bo(vm, bo, NULL, 0, XE_CACHE_WB);
> > > +	if (IS_ERR(fence)) {
> > > +		err = PTR_ERR(fence);
> > > +		goto bo_out;
> > > +	}
> > > +
> > > +	timeout = dma_fence_wait_timeout(fence, false, HZ);
> > > +	dma_fence_put(fence);
> > > +	if (timeout <= 0) {
> > > +		err = timeout ?: -ETIME;
> > > +		goto bo_out;
> > > +	}
> > > +
> > > +	q = xe_exec_queue_create(xe, vm, BIT(hwe->logical_instance), 1, hwe,
> > > +				 EXEC_QUEUE_FLAG_KERNEL |
> > > +				 EXEC_QUEUE_FLAG_PERMANENT, 0);
> > > +	if (IS_ERR(q)) {
> > > +		err = PTR_ERR(q);
> > > +		goto bo_out;
> > > +	}
> > > +
> > > +	pxp->gsc_exec.vm = vm;
> > > +	pxp->gsc_exec.bo = bo;
> > > +	pxp->gsc_exec.batch = IOSYS_MAP_INIT_OFFSET(&bo->vmap, PXP_BB_OFFSET);
> > > +	pxp->gsc_exec.msg_in = IOSYS_MAP_INIT_OFFSET(&bo->vmap, PXP_MSG_IN_OFFSET);
> > > +	pxp->gsc_exec.msg_out = IOSYS_MAP_INIT_OFFSET(&bo->vmap, PXP_MSG_OUT_OFFSET);
> > So with this mapping, all GSC are serially executed and waited on. There
> > won't ever be a need to pipeline things? If the later is true you could
> > xe_bb_* plus suballocation of the BO you map. More complex so if serial
> > execute is all you will ever need, then yea probably don't use that.
> 
> We only send 2 types of commands, session initialization and session
> invalidation, which have to be serialized.
> 
> Even if we had other commands, the GSC is weird and submissions to it can
> complete with a "wait a bit then try again" message, so we have to wait
> until the fence is signaled, then check the memory and only if the memory
> has a "success" return we can move on to the next submission.
> 
> > 
> > > +	pxp->gsc_exec.q = q;
> > > +
> > > +	/* initialize host-session-handle (for all Xe-to-gsc-firmware PXP cmds) */
> > > +	pxp->gsc_exec.host_session_handle = xe_gsc_create_host_session_id();
> > > +
> > > +	return 0;
> > > +
> > > +bo_out:
> > > +	xe_vm_lock(vm, false);
> > > +	xe_bo_unpin(bo);
> > > +	xe_vm_unlock(vm);
> > > +
> > > +	xe_bo_put(bo);
> > Can use helper I mention below.
> > 
> > > +vm_out:
> > > +	xe_vm_close_and_put(vm);
> > > +
> > > +	return err;
> > > +}
> > > +
> > > +static void destroy_gsc_execution_resources(struct xe_pxp *pxp)
> > > +{
> > > +	if (!pxp->gsc_exec.q)
> > > +		return;
> > > +
> > > +	iosys_map_clear(&pxp->gsc_exec.msg_out);
> > > +	iosys_map_clear(&pxp->gsc_exec.msg_in);
> > > +	iosys_map_clear(&pxp->gsc_exec.batch);
> > I don't think this is strickly need as it just sets a pointer to NULL.
> > 
> > > +
> > > +	xe_exec_queue_put(pxp->gsc_exec.q);
> > > +
> > > +	xe_vm_lock(pxp->gsc_exec.vm, false);
> > > +	xe_bo_unpin(pxp->gsc_exec.bo);
> > > +	xe_vm_unlock(pxp->gsc_exec.vm);
> > > +	xe_bo_put(pxp->gsc_exec.bo);
> > > +
> > This looks awfully like xe_bo_unpin_map_no_vm. Maybe rename that
> > function and just use it?
> > 
> > If a BO is private to a VM (this one is, xe_bo_lock and xe_vm_lock mean
> > the same thing).
> 
> I didn't know the 2 locks where equivalent. I'll switch to the helper.
> 
> > 
> > > +	xe_vm_close_and_put(pxp->gsc_exec.vm);
> > > +}
> > > +
> > > +/**
> > > + * xe_pxp_allocate_execution_resources - Allocate PXP submission objects
> > > + * @pxp: the xe_pxp structure
> > > + *
> > > + * Allocates exec_queues objects for VCS and GSCCS submission. The GSCCS
> > > + * submissions are done via PPGTT, so this function allocates a VM for it and
> > > + * maps the object into it.
> > > + *
> > > + * Returns 0 if the allocation and mapping is successful, an errno value
> > > + * otherwise.
> > > + */
> > > +int xe_pxp_allocate_execution_resources(struct xe_pxp *pxp)
> > > +{
> > > +	int err;
> > > +
> > > +	err = create_vcs_context(pxp);
> > > +	if (err)
> > > +		return err;
> > > +
> > > +	err = allocate_gsc_execution_resources(pxp);
> > > +	if (err)
> > > +		goto destroy_vcs_context;
> > > +
> > > +	return 0;
> > > +
> > > +destroy_vcs_context:
> > > +	destroy_vcs_context(pxp);
> > > +	return err;
> > > +}
> > > +
> > > +void xe_pxp_destroy_execution_resources(struct xe_pxp *pxp)
> > > +{
> > > +	destroy_gsc_execution_resources(pxp);
> > > +	destroy_vcs_context(pxp);
> > > +}
> > > diff --git a/drivers/gpu/drm/xe/xe_pxp_submit.h b/drivers/gpu/drm/xe/xe_pxp_submit.h
> > > new file mode 100644
> > > index 000000000000..1a971fadc081
> > > --- /dev/null
> > > +++ b/drivers/gpu/drm/xe/xe_pxp_submit.h
> > > @@ -0,0 +1,16 @@
> > > +/* SPDX-License-Identifier: MIT */
> > > +/*
> > > + * Copyright(c) 2024, Intel Corporation. All rights reserved.
> > > + */
> > > +
> > > +#ifndef __XE_PXP_SUBMIT_H__
> > > +#define __XE_PXP_SUBMIT_H__
> > > +
> > > +#include <linux/types.h>
> > > +
> > > +struct xe_pxp;
> > > +
> > > +int xe_pxp_allocate_execution_resources(struct xe_pxp *pxp);
> > > +void xe_pxp_destroy_execution_resources(struct xe_pxp *pxp);
> > > +
> > > +#endif /* __XE_PXP_SUBMIT_H__ */
> > > diff --git a/drivers/gpu/drm/xe/xe_pxp_types.h b/drivers/gpu/drm/xe/xe_pxp_types.h
> > > index 1561e3bd2676..c16813253b47 100644
> > > --- a/drivers/gpu/drm/xe/xe_pxp_types.h
> > > +++ b/drivers/gpu/drm/xe/xe_pxp_types.h
> > > @@ -6,10 +6,14 @@
> > >   #ifndef __XE_PXP_TYPES_H__
> > >   #define __XE_PXP_TYPES_H__
> > > +#include <linux/iosys-map.h>
> > >   #include <linux/types.h>
> > > +struct xe_bo;
> > > +struct xe_exec_queue;
> > >   struct xe_device;
> > >   struct xe_gt;
> > > +struct xe_vm;
> > >   /**
> > >    * struct xe_pxp - pxp state
> > > @@ -23,6 +27,35 @@ struct xe_pxp {
> > >   	 * (VDBOX, KCR and GSC)
> > >   	 */
> > >   	struct xe_gt *gt;
> > > +
> > > +	/** @vcs_queue: kernel-owned VCS exec queue used for PXP operations */
> > > +	struct xe_exec_queue *vcs_queue;
> > > +
> > > +	/** @gsc_exec: kernel-owned objects for PXP submissions to the GSCCS */
> > > +	struct {
> > > +		/**
> > > +		 * @gsc_exec.host_session_handle: handle used in communications
> > > +		 * with the GSC firmware.
> > > +		 */
> > > +		u64 host_session_handle;
> > > +		/** @gsc_exec.vm: VM used for PXP submissions to the GSCCS */
> > > +		struct xe_vm *vm;
> > > +		/** @gsc_exec.q: GSCCS exec queue for PXP submissions */
> > > +		struct xe_exec_queue *q;
> > > +
> > > +		/**
> > > +		 * @gsc_exec.bo: BO used for submissions to the GSCCS and GSC
> > > +		 * FW. It includes space for the GSCCS batch and the
> > > +		 * input/output buffers read/written by the FW
> > > +		 */
> > > +		struct xe_bo *bo;
> > > +		/** @gsc_exec.batch: iosys_map to the batch memory within the BO */
> > > +		struct iosys_map batch;
> > > +		/** @gsc_exec.msg_in: iosys_map to the input memory within the BO */
> > > +		struct iosys_map msg_in;
> > > +		/** @gsc_exec.msg_out: iosys_map to the output memory within the BO */
> > > +		struct iosys_map msg_out;
> > > +	} gsc_exec;
> > >   };
> > >   #endif /* __XE_PXP_TYPES_H__ */
> > > diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> > > index 02f684c0330d..412ec9cb9650 100644
> > > --- a/drivers/gpu/drm/xe/xe_vm.c
> > > +++ b/drivers/gpu/drm/xe/xe_vm.c
> > > @@ -1315,6 +1315,15 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
> > >   	struct xe_tile *tile;
> > >   	u8 id;
> > > +	/*
> > > +	 * All GSC VMs are owned by the kernel and can also only be used on
> > > +	 * the GSCCS. We don't want a kernel-owned VM to put the device in
> > > +	 * either fault or not fault mode, so we need to exclude the GSC VMs
> > > +	 * from that count; this is only safe if we ensure that all GSC VMs are
> > > +	 * non-faulting.
> > > +	 */
> > > +	xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE)));
> > > +
> > >   	vm = kzalloc(sizeof(*vm), GFP_KERNEL);
> > >   	if (!vm)
> > >   		return ERR_PTR(-ENOMEM);
> > > @@ -1442,7 +1451,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
> > >   	mutex_lock(&xe->usm.lock);
> > >   	if (flags & XE_VM_FLAG_FAULT_MODE)
> > >   		xe->usm.num_vm_in_fault_mode++;
> > > -	else if (!(flags & XE_VM_FLAG_MIGRATION))
> > > +	else if (!(flags & (XE_VM_FLAG_MIGRATION | XE_VM_FLAG_GSC)))
> > This change is good now but should become unnecessary once Francois
> > lands some code to remove the restriction of mixing faulting and
> > non-faulting VM within a device.
> > 
> > >   		xe->usm.num_vm_in_non_fault_mode++;
> > >   	mutex_unlock(&xe->usm.lock);
> > > @@ -2867,11 +2876,10 @@ static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
> > >   	for (i = 0; i < vops->num_syncs; i++)
> > >   		xe_sync_entry_signal(vops->syncs + i, fence);
> > >   	xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
> > > -	dma_fence_put(fence);
> > Nit: I'd send this change and associated change in xe_vm_bind_ioctl +
> > vm_bind_ioctl_ops_execute in its own patch, perhaps even as an
> > independent series which I'd RB immediately.
> > 
> > Change looks good though and could be useful else where too.
> > 
> > >   }
> > > -static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
> > > -				     struct xe_vma_ops *vops)
> > > +static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm,
> > > +						   struct xe_vma_ops *vops)
> > >   {
> > >   	struct drm_exec exec;
> > >   	struct dma_fence *fence;
> > > @@ -2889,7 +2897,6 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
> > >   		fence = ops_execute(vm, vops);
> > >   		if (IS_ERR(fence)) {
> > > -			err = PTR_ERR(fence);
> > >   			/* FIXME: Killing VM rather than proper error handling */
> > >   			xe_vm_kill(vm, false);
> > Looks like you are on old baseline before this series landed [1]. I
> > suggest rebasing as those changes creep up in the upper layers a bit.
> > 
> > [1] https://patchwork.freedesktop.org/series/133034/
> 
> Yes, my local tree is from last week. I'll rebase and split out the changes
> to their own patch as suggested.
> 
> > >   			goto unlock;
> > > @@ -2900,7 +2907,7 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
> > >   unlock:
> > >   	drm_exec_fini(&exec);
> > > -	return err;
> > > +	return fence;
> > >   }
> > >   #define SUPPORTED_FLAGS	\
> > > @@ -3114,6 +3121,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> > >   	struct xe_sync_entry *syncs = NULL;
> > >   	struct drm_xe_vm_bind_op *bind_ops;
> > >   	struct xe_vma_ops vops;
> > > +	struct dma_fence *fence;
> > >   	int err;
> > >   	int i;
> > > @@ -3264,7 +3272,11 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> > >   		goto unwind_ops;
> > >   	}
> > > -	err = vm_bind_ioctl_ops_execute(vm, &vops);
> > > +	fence = vm_bind_ioctl_ops_execute(vm, &vops);
> > > +	if (IS_ERR(fence))
> > > +		err = PTR_ERR(fence);
> > > +	else
> > > +		dma_fence_put(fence);
> > >   unwind_ops:
> > >   	if (err && err != -ENODATA)
> > > @@ -3297,6 +3309,80 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
> > >   	return err;
> > >   }
> > > +/**
> > > + * xe_vm_bind_bo - bind a kernel BO to a VM
> > > + * @vm: VM to bind the BO to
> > > + * @bo: BO to bind
> > > + * @q: exec queue to use for the bind (optional)
> > > + * @addr: address at which to bind the BO
> > > + * @cache_lvl: PAT cache level to use
> > > + *
> > > + * Execute a VM bind map operation on a kernel-owned BO to bind it into a
> > > + * kernel-owned VM.
> > > + *
> > > + * Returns 0 if the ops execution is successful, an errno value otherwise.
> > > + * TODO: return a fence instead.
> > > + */
> > > +struct dma_fence *xe_vm_bind_bo(struct xe_vm *vm, struct xe_bo *bo,
> > > +				struct xe_exec_queue *q, u64 addr,
> > > +				enum xe_cache_level cache_lvl)
> > > +{
> > > +	struct xe_vma_ops vops;
> > > +	struct drm_gpuva_ops *ops = NULL;
> > > +	struct dma_fence *fence;
> > > +	int err;
> > > +
> > > +	xe_bo_get(bo);
> > > +	xe_vm_get(vm);
> > > +	if (q)
> > > +		xe_exec_queue_get(q);
> > > +
> > > +	down_write(&vm->lock);
> > > +
> > > +	xe_vma_ops_init(&vops, vm, q, NULL, 0);
> > > +
> > > +	ops = vm_bind_ioctl_ops_create(vm, bo, 0, addr, bo->size,
> > > +				       DRM_XE_VM_BIND_OP_MAP, 0,
> > > +				       vm->xe->pat.idx[cache_lvl], 0);
> > > +	if (IS_ERR(ops)) {
> > > +		err = PTR_ERR(ops);
> > > +		goto release_vm_lock;
> > > +	}
> > > +
> > > +	err = vm_bind_ioctl_ops_parse(vm, q, ops, NULL, 0, &vops, true);
> > > +	if (err)
> > > +		goto release_vm_lock;
> > > +
> > > +	/* Nothing to do */
> > > +	if (list_empty(&vops.list)) {
> > Can this ever be true? In the current usage it appear so. Maybe convert
> > to an asset !list_empty to simplify this function slightly?
> 
> will do.
> 
> Daniele
> 
> > 
> > Matt
> > 
> > > +		err = -ENODATA;
> > > +		goto unwind_ops;
> > > +	}
> > > +
> > > +	fence = vm_bind_ioctl_ops_execute(vm, &vops);
> > > +	if (IS_ERR(fence))
> > > +		err = PTR_ERR(fence);
> > > +
> > > +unwind_ops:
> > > +	if (err && err != -ENODATA)
> > > +		vm_bind_ioctl_ops_unwind(vm, &ops, 1);
> > > +
> > > +	drm_gpuva_ops_free(&vm->gpuvm, ops);
> > > +
> > > +release_vm_lock:
> > > +	up_write(&vm->lock);
> > > +
> > > +	if (q)
> > > +		xe_exec_queue_put(q);
> > > +	xe_vm_put(vm);
> > > +	xe_bo_put(bo);
> > > +
> > > +	if (err)
> > > +		fence = ERR_PTR(err);
> > > +
> > > +	return fence;
> > > +}
> > > +
> > >   /**
> > >    * xe_vm_lock() - Lock the vm's dma_resv object
> > >    * @vm: The struct xe_vm whose lock is to be locked
> > > diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
> > > index b481608b12f1..5e298ac90dfc 100644
> > > --- a/drivers/gpu/drm/xe/xe_vm.h
> > > +++ b/drivers/gpu/drm/xe/xe_vm.h
> > > @@ -19,6 +19,8 @@ struct drm_file;
> > >   struct ttm_buffer_object;
> > >   struct ttm_validate_buffer;
> > > +struct dma_fence;
> > > +
> > >   struct xe_exec_queue;
> > >   struct xe_file;
> > >   struct xe_sync_entry;
> > > @@ -248,6 +250,10 @@ int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma);
> > >   int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
> > >   			  unsigned int num_fences);
> > > +struct dma_fence *xe_vm_bind_bo(struct xe_vm *vm, struct xe_bo *bo,
> > > +				struct xe_exec_queue *q, u64 addr,
> > > +				enum xe_cache_level cache_lvl);
> > > +
> > >   /**
> > >    * xe_vm_resv() - Return's the vm's reservation object
> > >    * @vm: The vm
> > > diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
> > > index ce1a63a5e3e7..60ce327d303c 100644
> > > --- a/drivers/gpu/drm/xe/xe_vm_types.h
> > > +++ b/drivers/gpu/drm/xe/xe_vm_types.h
> > > @@ -152,6 +152,7 @@ struct xe_vm {
> > >   #define XE_VM_FLAG_BANNED		BIT(5)
> > >   #define XE_VM_FLAG_TILE_ID(flags)	FIELD_GET(GENMASK(7, 6), flags)
> > >   #define XE_VM_FLAG_SET_TILE_ID(tile)	FIELD_PREP(GENMASK(7, 6), (tile)->id)
> > > +#define XE_VM_FLAG_GSC			BIT(8)
> > >   	unsigned long flags;
> > >   	/** @composite_fence_ctx: context composite fence */
> > > -- 
> > > 2.43.0
> > > 
>