[RFC 03/14] drm/xe/pxp: Allocate PXP execution resources
Daniele Ceraolo Spurio
daniele.ceraolospurio at intel.com
Fri Jul 12 23:00:09 UTC 2024
On 7/12/2024 3:43 PM, Matthew Brost wrote:
> On Fri, Jul 12, 2024 at 02:28:47PM -0700, Daniele Ceraolo Spurio wrote:
>> PXP requires submissions to the HW for the following operations
>>
>> 1) Key invalidation, done via the VCS engine
>> 2) Communication with the GSC FW for session management, done via the
>> GSCCS
>>
>> For #1 we can allocate a simple kernel context, but #2 requires the
>> submissions to be done with PPGTT, which is not currently supported in Xe.
>> To add this support, the following changes have been included:
>>
>> - a new type of kernel-owned VM (marked as GSC)
>> - a new function to map a BO into a VM from within the kernel
>>
>> RFC note: I've tweaked some of the VM functions to return the fence
>> further up the stack, so I can wait on it from the PXP code. Not sure if
>> this is the best approach.
>>
>> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
>> Cc: Matthew Brost <matthew.brost at intel.com>
> Not a complete review but adding some thoughts. Looks sane enough to me.
>
> Random musing and nits below.
>
>> Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
>> ---
>> drivers/gpu/drm/xe/Makefile | 1 +
>> drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h | 7 +
>> drivers/gpu/drm/xe/xe_exec_queue.c | 3 +
>> drivers/gpu/drm/xe/xe_pxp.c | 25 ++-
>> drivers/gpu/drm/xe/xe_pxp_submit.c | 188 ++++++++++++++++++
>> drivers/gpu/drm/xe/xe_pxp_submit.h | 16 ++
>> drivers/gpu/drm/xe/xe_pxp_types.h | 33 +++
>> drivers/gpu/drm/xe/xe_vm.c | 100 +++++++++-
>> drivers/gpu/drm/xe/xe_vm.h | 6 +
>> drivers/gpu/drm/xe/xe_vm_types.h | 1 +
>> 10 files changed, 372 insertions(+), 8 deletions(-)
>> create mode 100644 drivers/gpu/drm/xe/xe_pxp_submit.c
>> create mode 100644 drivers/gpu/drm/xe/xe_pxp_submit.h
>>
>> diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
>> index 5f15e6dd5057..a4514265085b 100644
>> --- a/drivers/gpu/drm/xe/Makefile
>> +++ b/drivers/gpu/drm/xe/Makefile
>> @@ -105,6 +105,7 @@ xe-y += xe_bb.o \
>> xe_pt.o \
>> xe_pt_walk.o \
>> xe_pxp.o \
>> + xe_pxp_submit.o \
>> xe_query.o \
>> xe_range_fence.o \
>> xe_reg_sr.o \
>> diff --git a/drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h b/drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h
>> index 57520809e48d..f3c4cf10ba20 100644
>> --- a/drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h
>> +++ b/drivers/gpu/drm/xe/abi/gsc_pxp_commands_abi.h
>> @@ -6,6 +6,7 @@
>> #ifndef _ABI_GSC_PXP_COMMANDS_ABI_H
>> #define _ABI_GSC_PXP_COMMANDS_ABI_H
>>
>> +#include <linux/sizes.h>
>> #include <linux/types.h>
>>
>> /* Heci client ID for PXP commands */
>> @@ -13,6 +14,12 @@
>>
>> #define PXP_APIVER(x, y) (((x) & 0xFFFF) << 16 | ((y) & 0xFFFF))
>>
>> +/*
>> + * A PXP sub-section in an HECI packet can be up to 64K big in each direction.
>> + * This does not include the top-level GSC header.
>> + */
>> +#define PXP_MAX_PACKET_SIZE SZ_64K
>> +
>> /*
>> * there are a lot of status codes for PXP, but we only define the cross-API
>> * common ones that we actually can handle in the kernel driver. Other failure
>> diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
>> index 0ba37835849b..bc6e867aba17 100644
>> --- a/drivers/gpu/drm/xe/xe_exec_queue.c
>> +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
>> @@ -131,6 +131,9 @@ struct xe_exec_queue *xe_exec_queue_create(struct xe_device *xe, struct xe_vm *v
>> struct xe_exec_queue *q;
>> int err;
>>
>> + /* VMs for GSCCS queues (and only those) must have the XE_VM_FLAG_GSC flag */
>> + xe_assert(xe, !vm || (!!(vm->flags & XE_VM_FLAG_GSC) == !!(hwe->engine_id == XE_HW_ENGINE_GSCCS0)));
>> +
> We should be able to remove this soon. More on that below.
>
>> q = __xe_exec_queue_alloc(xe, vm, logical_mask, width, hwe, flags,
>> extensions);
>> if (IS_ERR(q))
>> diff --git a/drivers/gpu/drm/xe/xe_pxp.c b/drivers/gpu/drm/xe/xe_pxp.c
>> index cdb29b104006..01386b9f0c50 100644
>> --- a/drivers/gpu/drm/xe/xe_pxp.c
>> +++ b/drivers/gpu/drm/xe/xe_pxp.c
>> @@ -12,6 +12,7 @@
>> #include "xe_gt.h"
>> #include "xe_gt_types.h"
>> #include "xe_mmio.h"
>> +#include "xe_pxp_submit.h"
>> #include "xe_pxp_types.h"
>> #include "xe_uc_fw.h"
>> #include "regs/xe_pxp_regs.h"
>> @@ -50,6 +51,20 @@ static int kcr_pxp_enable(const struct xe_pxp *pxp)
>> return kcr_pxp_set_status(pxp, true);
>> }
>>
>> +static int kcr_pxp_disable(const struct xe_pxp *pxp)
>> +{
>> + return kcr_pxp_set_status(pxp, false);
>> +}
>> +
>> +static void pxp_fini(void *arg)
>> +{
>> + struct xe_pxp *pxp = arg;
>> +
>> + xe_pxp_destroy_execution_resources(pxp);
>> +
>> + /* no need to explicitly disable KCR since we're going to do an FLR */
>> +}
>> +
>> /**
>> * xe_pxp_init - initialize PXP support
>> * @xe: the xe_device structure
>> @@ -97,7 +112,15 @@ int xe_pxp_init(struct xe_device *xe)
>> if (err)
>> return err;
>>
>> + err = xe_pxp_allocate_execution_resources(pxp);
>> + if (err)
>> + goto kcr_disable;
>> +
>> xe->pxp = pxp;
>>
>> - return 0;
>> + return devm_add_action_or_reset(xe->drm.dev, pxp_fini, pxp);
>> +
>> +kcr_disable:
>> + kcr_pxp_disable(pxp);
>> + return err;
>> }
>> diff --git a/drivers/gpu/drm/xe/xe_pxp_submit.c b/drivers/gpu/drm/xe/xe_pxp_submit.c
>> new file mode 100644
>> index 000000000000..4fc3c7c58101
>> --- /dev/null
>> +++ b/drivers/gpu/drm/xe/xe_pxp_submit.c
>> @@ -0,0 +1,188 @@
>> +// SPDX-License-Identifier: MIT
>> +/*
>> + * Copyright(c) 2024 Intel Corporation.
>> + */
>> +
>> +#include "xe_pxp_submit.h"
>> +
>> +#include <drm/xe_drm.h>
>> +
>> +#include "xe_device_types.h"
>> +#include "xe_bo.h"
>> +#include "xe_exec_queue.h"
>> +#include "xe_gsc_submit.h"
>> +#include "xe_gt.h"
>> +#include "xe_pxp_types.h"
>> +#include "xe_vm.h"
>> +#include "regs/xe_gt_regs.h"
>> +
>> +static int create_vcs_context(struct xe_pxp *pxp)
>> +{
>> + struct xe_gt *gt = pxp->gt;
>> + struct xe_hw_engine *hwe;
>> + struct xe_exec_queue *q;
>> +
>> + hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_VIDEO_DECODE, 0, true);
>> + if (!hwe)
>> + return -ENODEV;
>> +
> Ugh, really want to completely decouple an exec queue from hwe (e.g.
> don't pass in hwe to xe_exec_queue_create). I guess this already in code
> so fine here just a reminder of this ugliness.
>
>> + q = xe_exec_queue_create(pxp->xe, NULL, BIT(hwe->logical_instance), 1, hwe,
>> + EXEC_QUEUE_FLAG_KERNEL | EXEC_QUEUE_FLAG_PERMANENT, 0);
>> + if (IS_ERR(q))
>> + return PTR_ERR(q);
>> +
>> + pxp->vcs_queue = q;
>> +
> So how is this used? Not attached to a VM? GGTT or ring instructions
> only? Any downside of attaching this to GSC VM?
Ring instruction only, yes; we only use it to submit a key termination
(next patch in the series).
I've made the GSC_VM only usable with the GSCCS so I didn't have to care
about potentially having a kernel-owned non-faulting VM on
user-accessible engines, where userspace might instead want to use a
faulting VM. If we're removing the limitation and allowing the 2 types
to mix that limitation for the GSC_VM should go away.
>
>> + return 0;
>> +}
>> +
>> +static void destroy_vcs_context(struct xe_pxp *pxp)
>> +{
>> + if (pxp->vcs_queue)
>> + xe_exec_queue_put(pxp->vcs_queue);
>> +}
>> +
>> +/*
>> + * We allocate a single object for the batch and the input and output BOs. PXP
>> + * commands can require a lot of BO space (see PXP_MAX_PACKET_SIZE), but we
>> + * currently only support a subset of commands that are small (< 20 dwords),
>> + * so a single page is enough for now.
>> + */
>> +#define PXP_BB_SIZE XE_PAGE_SIZE
>> +#define PXP_INOUT_SIZE XE_PAGE_SIZE
>> +#define PXP_BO_SIZE (PXP_BB_SIZE + (2 * PXP_INOUT_SIZE))
>> +#define PXP_BB_OFFSET 0
>> +#define PXP_MSG_IN_OFFSET PXP_BB_SIZE
>> +#define PXP_MSG_OUT_OFFSET (PXP_MSG_IN_OFFSET + PXP_INOUT_SIZE)
>> +static int allocate_gsc_execution_resources(struct xe_pxp *pxp)
>> +{
>> + struct xe_gt *gt = pxp->gt;
>> + struct xe_tile *tile = gt_to_tile(gt);
>> + struct xe_device *xe = pxp->xe;
>> + struct xe_hw_engine *hwe;
>> + struct xe_vm *vm;
>> + struct xe_bo *bo;
>> + struct xe_exec_queue *q;
>> + struct dma_fence *fence;
>> + long timeout;
>> + int err = 0;
>> +
>> + hwe = xe_gt_hw_engine(gt, XE_ENGINE_CLASS_OTHER, OTHER_GSC_INSTANCE, false);
>> +
>> + /* we shouldn't reach here if the GSC engine is not available */
>> + xe_assert(xe, hwe);
>> +
>> + /* PXP instructions must be issued from PPGTT */
>> + vm = xe_vm_create(xe, XE_VM_FLAG_GSC);
>> + if (IS_ERR(vm))
>> + return PTR_ERR(vm);
>> +
>> + /* We allocate a single object for the batch and the in/out memory */
>> + xe_vm_lock(vm, false);
>> + bo = xe_bo_create_pin_map(xe, tile, vm, PXP_BO_SIZE, ttm_bo_type_kernel,
>> + XE_BO_FLAG_SYSTEM | XE_BO_FLAG_PINNED | XE_BO_FLAG_NEEDS_UC);
>> + xe_vm_unlock(vm);
>> + if (IS_ERR(bo)) {
>> + err = PTR_ERR(bo);
>> + goto vm_out;
>> + }
>> +
>> + fence = xe_vm_bind_bo(vm, bo, NULL, 0, XE_CACHE_WB);
>> + if (IS_ERR(fence)) {
>> + err = PTR_ERR(fence);
>> + goto bo_out;
>> + }
>> +
>> + timeout = dma_fence_wait_timeout(fence, false, HZ);
>> + dma_fence_put(fence);
>> + if (timeout <= 0) {
>> + err = timeout ?: -ETIME;
>> + goto bo_out;
>> + }
>> +
>> + q = xe_exec_queue_create(xe, vm, BIT(hwe->logical_instance), 1, hwe,
>> + EXEC_QUEUE_FLAG_KERNEL |
>> + EXEC_QUEUE_FLAG_PERMANENT, 0);
>> + if (IS_ERR(q)) {
>> + err = PTR_ERR(q);
>> + goto bo_out;
>> + }
>> +
>> + pxp->gsc_exec.vm = vm;
>> + pxp->gsc_exec.bo = bo;
>> + pxp->gsc_exec.batch = IOSYS_MAP_INIT_OFFSET(&bo->vmap, PXP_BB_OFFSET);
>> + pxp->gsc_exec.msg_in = IOSYS_MAP_INIT_OFFSET(&bo->vmap, PXP_MSG_IN_OFFSET);
>> + pxp->gsc_exec.msg_out = IOSYS_MAP_INIT_OFFSET(&bo->vmap, PXP_MSG_OUT_OFFSET);
> So with this mapping, all GSC are serially executed and waited on. There
> won't ever be a need to pipeline things? If the later is true you could
> xe_bb_* plus suballocation of the BO you map. More complex so if serial
> execute is all you will ever need, then yea probably don't use that.
We only send 2 types of commands, session initialization and session
invalidation, which have to be serialized.
Even if we had other commands, the GSC is weird and submissions to it
can complete with a "wait a bit then try again" message, so we have to
wait until the fence is signaled, then check the memory and only if the
memory has a "success" return we can move on to the next submission.
>
>> + pxp->gsc_exec.q = q;
>> +
>> + /* initialize host-session-handle (for all Xe-to-gsc-firmware PXP cmds) */
>> + pxp->gsc_exec.host_session_handle = xe_gsc_create_host_session_id();
>> +
>> + return 0;
>> +
>> +bo_out:
>> + xe_vm_lock(vm, false);
>> + xe_bo_unpin(bo);
>> + xe_vm_unlock(vm);
>> +
>> + xe_bo_put(bo);
> Can use helper I mention below.
>
>> +vm_out:
>> + xe_vm_close_and_put(vm);
>> +
>> + return err;
>> +}
>> +
>> +static void destroy_gsc_execution_resources(struct xe_pxp *pxp)
>> +{
>> + if (!pxp->gsc_exec.q)
>> + return;
>> +
>> + iosys_map_clear(&pxp->gsc_exec.msg_out);
>> + iosys_map_clear(&pxp->gsc_exec.msg_in);
>> + iosys_map_clear(&pxp->gsc_exec.batch);
> I don't think this is strickly need as it just sets a pointer to NULL.
>
>> +
>> + xe_exec_queue_put(pxp->gsc_exec.q);
>> +
>> + xe_vm_lock(pxp->gsc_exec.vm, false);
>> + xe_bo_unpin(pxp->gsc_exec.bo);
>> + xe_vm_unlock(pxp->gsc_exec.vm);
>> + xe_bo_put(pxp->gsc_exec.bo);
>> +
> This looks awfully like xe_bo_unpin_map_no_vm. Maybe rename that
> function and just use it?
>
> If a BO is private to a VM (this one is, xe_bo_lock and xe_vm_lock mean
> the same thing).
I didn't know the 2 locks where equivalent. I'll switch to the helper.
>
>> + xe_vm_close_and_put(pxp->gsc_exec.vm);
>> +}
>> +
>> +/**
>> + * xe_pxp_allocate_execution_resources - Allocate PXP submission objects
>> + * @pxp: the xe_pxp structure
>> + *
>> + * Allocates exec_queues objects for VCS and GSCCS submission. The GSCCS
>> + * submissions are done via PPGTT, so this function allocates a VM for it and
>> + * maps the object into it.
>> + *
>> + * Returns 0 if the allocation and mapping is successful, an errno value
>> + * otherwise.
>> + */
>> +int xe_pxp_allocate_execution_resources(struct xe_pxp *pxp)
>> +{
>> + int err;
>> +
>> + err = create_vcs_context(pxp);
>> + if (err)
>> + return err;
>> +
>> + err = allocate_gsc_execution_resources(pxp);
>> + if (err)
>> + goto destroy_vcs_context;
>> +
>> + return 0;
>> +
>> +destroy_vcs_context:
>> + destroy_vcs_context(pxp);
>> + return err;
>> +}
>> +
>> +void xe_pxp_destroy_execution_resources(struct xe_pxp *pxp)
>> +{
>> + destroy_gsc_execution_resources(pxp);
>> + destroy_vcs_context(pxp);
>> +}
>> diff --git a/drivers/gpu/drm/xe/xe_pxp_submit.h b/drivers/gpu/drm/xe/xe_pxp_submit.h
>> new file mode 100644
>> index 000000000000..1a971fadc081
>> --- /dev/null
>> +++ b/drivers/gpu/drm/xe/xe_pxp_submit.h
>> @@ -0,0 +1,16 @@
>> +/* SPDX-License-Identifier: MIT */
>> +/*
>> + * Copyright(c) 2024, Intel Corporation. All rights reserved.
>> + */
>> +
>> +#ifndef __XE_PXP_SUBMIT_H__
>> +#define __XE_PXP_SUBMIT_H__
>> +
>> +#include <linux/types.h>
>> +
>> +struct xe_pxp;
>> +
>> +int xe_pxp_allocate_execution_resources(struct xe_pxp *pxp);
>> +void xe_pxp_destroy_execution_resources(struct xe_pxp *pxp);
>> +
>> +#endif /* __XE_PXP_SUBMIT_H__ */
>> diff --git a/drivers/gpu/drm/xe/xe_pxp_types.h b/drivers/gpu/drm/xe/xe_pxp_types.h
>> index 1561e3bd2676..c16813253b47 100644
>> --- a/drivers/gpu/drm/xe/xe_pxp_types.h
>> +++ b/drivers/gpu/drm/xe/xe_pxp_types.h
>> @@ -6,10 +6,14 @@
>> #ifndef __XE_PXP_TYPES_H__
>> #define __XE_PXP_TYPES_H__
>>
>> +#include <linux/iosys-map.h>
>> #include <linux/types.h>
>>
>> +struct xe_bo;
>> +struct xe_exec_queue;
>> struct xe_device;
>> struct xe_gt;
>> +struct xe_vm;
>>
>> /**
>> * struct xe_pxp - pxp state
>> @@ -23,6 +27,35 @@ struct xe_pxp {
>> * (VDBOX, KCR and GSC)
>> */
>> struct xe_gt *gt;
>> +
>> + /** @vcs_queue: kernel-owned VCS exec queue used for PXP operations */
>> + struct xe_exec_queue *vcs_queue;
>> +
>> + /** @gsc_exec: kernel-owned objects for PXP submissions to the GSCCS */
>> + struct {
>> + /**
>> + * @gsc_exec.host_session_handle: handle used in communications
>> + * with the GSC firmware.
>> + */
>> + u64 host_session_handle;
>> + /** @gsc_exec.vm: VM used for PXP submissions to the GSCCS */
>> + struct xe_vm *vm;
>> + /** @gsc_exec.q: GSCCS exec queue for PXP submissions */
>> + struct xe_exec_queue *q;
>> +
>> + /**
>> + * @gsc_exec.bo: BO used for submissions to the GSCCS and GSC
>> + * FW. It includes space for the GSCCS batch and the
>> + * input/output buffers read/written by the FW
>> + */
>> + struct xe_bo *bo;
>> + /** @gsc_exec.batch: iosys_map to the batch memory within the BO */
>> + struct iosys_map batch;
>> + /** @gsc_exec.msg_in: iosys_map to the input memory within the BO */
>> + struct iosys_map msg_in;
>> + /** @gsc_exec.msg_out: iosys_map to the output memory within the BO */
>> + struct iosys_map msg_out;
>> + } gsc_exec;
>> };
>>
>> #endif /* __XE_PXP_TYPES_H__ */
>> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
>> index 02f684c0330d..412ec9cb9650 100644
>> --- a/drivers/gpu/drm/xe/xe_vm.c
>> +++ b/drivers/gpu/drm/xe/xe_vm.c
>> @@ -1315,6 +1315,15 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
>> struct xe_tile *tile;
>> u8 id;
>>
>> + /*
>> + * All GSC VMs are owned by the kernel and can also only be used on
>> + * the GSCCS. We don't want a kernel-owned VM to put the device in
>> + * either fault or not fault mode, so we need to exclude the GSC VMs
>> + * from that count; this is only safe if we ensure that all GSC VMs are
>> + * non-faulting.
>> + */
>> + xe_assert(xe, !((flags & XE_VM_FLAG_GSC) && (flags & XE_VM_FLAG_FAULT_MODE)));
>> +
>> vm = kzalloc(sizeof(*vm), GFP_KERNEL);
>> if (!vm)
>> return ERR_PTR(-ENOMEM);
>> @@ -1442,7 +1451,7 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags)
>> mutex_lock(&xe->usm.lock);
>> if (flags & XE_VM_FLAG_FAULT_MODE)
>> xe->usm.num_vm_in_fault_mode++;
>> - else if (!(flags & XE_VM_FLAG_MIGRATION))
>> + else if (!(flags & (XE_VM_FLAG_MIGRATION | XE_VM_FLAG_GSC)))
> This change is good now but should become unnecessary once Francois
> lands some code to remove the restriction of mixing faulting and
> non-faulting VM within a device.
>
>> xe->usm.num_vm_in_non_fault_mode++;
>> mutex_unlock(&xe->usm.lock);
>>
>> @@ -2867,11 +2876,10 @@ static void vm_bind_ioctl_ops_fini(struct xe_vm *vm, struct xe_vma_ops *vops,
>> for (i = 0; i < vops->num_syncs; i++)
>> xe_sync_entry_signal(vops->syncs + i, fence);
>> xe_exec_queue_last_fence_set(wait_exec_queue, vm, fence);
>> - dma_fence_put(fence);
> Nit: I'd send this change and associated change in xe_vm_bind_ioctl +
> vm_bind_ioctl_ops_execute in its own patch, perhaps even as an
> independent series which I'd RB immediately.
>
> Change looks good though and could be useful else where too.
>
>> }
>>
>> -static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
>> - struct xe_vma_ops *vops)
>> +static struct dma_fence *vm_bind_ioctl_ops_execute(struct xe_vm *vm,
>> + struct xe_vma_ops *vops)
>> {
>> struct drm_exec exec;
>> struct dma_fence *fence;
>> @@ -2889,7 +2897,6 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
>>
>> fence = ops_execute(vm, vops);
>> if (IS_ERR(fence)) {
>> - err = PTR_ERR(fence);
>> /* FIXME: Killing VM rather than proper error handling */
>> xe_vm_kill(vm, false);
> Looks like you are on old baseline before this series landed [1]. I
> suggest rebasing as those changes creep up in the upper layers a bit.
>
> [1] https://patchwork.freedesktop.org/series/133034/
Yes, my local tree is from last week. I'll rebase and split out the
changes to their own patch as suggested.
>> goto unlock;
>> @@ -2900,7 +2907,7 @@ static int vm_bind_ioctl_ops_execute(struct xe_vm *vm,
>>
>> unlock:
>> drm_exec_fini(&exec);
>> - return err;
>> + return fence;
>> }
>>
>> #define SUPPORTED_FLAGS \
>> @@ -3114,6 +3121,7 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>> struct xe_sync_entry *syncs = NULL;
>> struct drm_xe_vm_bind_op *bind_ops;
>> struct xe_vma_ops vops;
>> + struct dma_fence *fence;
>> int err;
>> int i;
>>
>> @@ -3264,7 +3272,11 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>> goto unwind_ops;
>> }
>>
>> - err = vm_bind_ioctl_ops_execute(vm, &vops);
>> + fence = vm_bind_ioctl_ops_execute(vm, &vops);
>> + if (IS_ERR(fence))
>> + err = PTR_ERR(fence);
>> + else
>> + dma_fence_put(fence);
>>
>> unwind_ops:
>> if (err && err != -ENODATA)
>> @@ -3297,6 +3309,80 @@ int xe_vm_bind_ioctl(struct drm_device *dev, void *data, struct drm_file *file)
>> return err;
>> }
>>
>> +/**
>> + * xe_vm_bind_bo - bind a kernel BO to a VM
>> + * @vm: VM to bind the BO to
>> + * @bo: BO to bind
>> + * @q: exec queue to use for the bind (optional)
>> + * @addr: address at which to bind the BO
>> + * @cache_lvl: PAT cache level to use
>> + *
>> + * Execute a VM bind map operation on a kernel-owned BO to bind it into a
>> + * kernel-owned VM.
>> + *
>> + * Returns 0 if the ops execution is successful, an errno value otherwise.
>> + * TODO: return a fence instead.
>> + */
>> +struct dma_fence *xe_vm_bind_bo(struct xe_vm *vm, struct xe_bo *bo,
>> + struct xe_exec_queue *q, u64 addr,
>> + enum xe_cache_level cache_lvl)
>> +{
>> + struct xe_vma_ops vops;
>> + struct drm_gpuva_ops *ops = NULL;
>> + struct dma_fence *fence;
>> + int err;
>> +
>> + xe_bo_get(bo);
>> + xe_vm_get(vm);
>> + if (q)
>> + xe_exec_queue_get(q);
>> +
>> + down_write(&vm->lock);
>> +
>> + xe_vma_ops_init(&vops, vm, q, NULL, 0);
>> +
>> + ops = vm_bind_ioctl_ops_create(vm, bo, 0, addr, bo->size,
>> + DRM_XE_VM_BIND_OP_MAP, 0,
>> + vm->xe->pat.idx[cache_lvl], 0);
>> + if (IS_ERR(ops)) {
>> + err = PTR_ERR(ops);
>> + goto release_vm_lock;
>> + }
>> +
>> + err = vm_bind_ioctl_ops_parse(vm, q, ops, NULL, 0, &vops, true);
>> + if (err)
>> + goto release_vm_lock;
>> +
>> + /* Nothing to do */
>> + if (list_empty(&vops.list)) {
> Can this ever be true? In the current usage it appear so. Maybe convert
> to an asset !list_empty to simplify this function slightly?
will do.
Daniele
>
> Matt
>
>> + err = -ENODATA;
>> + goto unwind_ops;
>> + }
>> +
>> + fence = vm_bind_ioctl_ops_execute(vm, &vops);
>> + if (IS_ERR(fence))
>> + err = PTR_ERR(fence);
>> +
>> +unwind_ops:
>> + if (err && err != -ENODATA)
>> + vm_bind_ioctl_ops_unwind(vm, &ops, 1);
>> +
>> + drm_gpuva_ops_free(&vm->gpuvm, ops);
>> +
>> +release_vm_lock:
>> + up_write(&vm->lock);
>> +
>> + if (q)
>> + xe_exec_queue_put(q);
>> + xe_vm_put(vm);
>> + xe_bo_put(bo);
>> +
>> + if (err)
>> + fence = ERR_PTR(err);
>> +
>> + return fence;
>> +}
>> +
>> /**
>> * xe_vm_lock() - Lock the vm's dma_resv object
>> * @vm: The struct xe_vm whose lock is to be locked
>> diff --git a/drivers/gpu/drm/xe/xe_vm.h b/drivers/gpu/drm/xe/xe_vm.h
>> index b481608b12f1..5e298ac90dfc 100644
>> --- a/drivers/gpu/drm/xe/xe_vm.h
>> +++ b/drivers/gpu/drm/xe/xe_vm.h
>> @@ -19,6 +19,8 @@ struct drm_file;
>> struct ttm_buffer_object;
>> struct ttm_validate_buffer;
>>
>> +struct dma_fence;
>> +
>> struct xe_exec_queue;
>> struct xe_file;
>> struct xe_sync_entry;
>> @@ -248,6 +250,10 @@ int xe_vm_lock_vma(struct drm_exec *exec, struct xe_vma *vma);
>> int xe_vm_validate_rebind(struct xe_vm *vm, struct drm_exec *exec,
>> unsigned int num_fences);
>>
>> +struct dma_fence *xe_vm_bind_bo(struct xe_vm *vm, struct xe_bo *bo,
>> + struct xe_exec_queue *q, u64 addr,
>> + enum xe_cache_level cache_lvl);
>> +
>> /**
>> * xe_vm_resv() - Return's the vm's reservation object
>> * @vm: The vm
>> diff --git a/drivers/gpu/drm/xe/xe_vm_types.h b/drivers/gpu/drm/xe/xe_vm_types.h
>> index ce1a63a5e3e7..60ce327d303c 100644
>> --- a/drivers/gpu/drm/xe/xe_vm_types.h
>> +++ b/drivers/gpu/drm/xe/xe_vm_types.h
>> @@ -152,6 +152,7 @@ struct xe_vm {
>> #define XE_VM_FLAG_BANNED BIT(5)
>> #define XE_VM_FLAG_TILE_ID(flags) FIELD_GET(GENMASK(7, 6), flags)
>> #define XE_VM_FLAG_SET_TILE_ID(tile) FIELD_PREP(GENMASK(7, 6), (tile)->id)
>> +#define XE_VM_FLAG_GSC BIT(8)
>> unsigned long flags;
>>
>> /** @composite_fence_ctx: context composite fence */
>> --
>> 2.43.0
>>
More information about the Intel-xe
mailing list