[RFC PATCH 17/29] drm/xe: Add usermap exec queue extension
Matthew Brost
matthew.brost at intel.com
Mon Nov 18 23:37:45 UTC 2024
Implement uAPI which maps submit rings, indirect LRC state, and
doorbells to user space. This is required for UMD direction submission.
Signed-off-by: Matthew Brost <matthew.brost at intel.com>
---
drivers/gpu/drm/xe/xe_exec_queue.c | 125 ++++++++++++++++++++++-
drivers/gpu/drm/xe/xe_exec_queue_types.h | 13 +++
drivers/gpu/drm/xe/xe_execlist.c | 2 +-
drivers/gpu/drm/xe/xe_lrc.c | 59 +++++++----
drivers/gpu/drm/xe/xe_lrc.h | 2 +-
5 files changed, 176 insertions(+), 25 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
index aef5b130e7f8..c8d45133eb59 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue.c
+++ b/drivers/gpu/drm/xe/xe_exec_queue.c
@@ -11,6 +11,7 @@
#include <drm/drm_file.h>
#include <uapi/drm/xe_drm.h>
+#include "xe_bo.h"
#include "xe_device.h"
#include "xe_gt.h"
#include "xe_hw_engine_class_sysfs.h"
@@ -38,12 +39,18 @@ static int exec_queue_user_extensions_post_init(struct xe_device *xe, struct xe_
static void __xe_exec_queue_free(struct xe_exec_queue *q)
{
+ struct xe_device *xe = q->vm ? q->vm->xe : NULL;
+
if (q->vm)
xe_vm_put(q->vm);
if (q->xef)
xe_file_put(q->xef);
+ if (q->usermap)
+ xe_pm_runtime_put(xe);
+
+ kfree(q->usermap);
kfree(q);
}
@@ -110,6 +117,8 @@ static struct xe_exec_queue *__xe_exec_queue_alloc(struct xe_device *xe,
static int __xe_exec_queue_init(struct xe_exec_queue *q)
{
struct xe_vm *vm = q->vm;
+ u64 ring_addr = q->usermap ? q->usermap->ring_addr : 0;
+ u32 ring_size = q->usermap ? q->usermap->ring_size : SZ_16K;
int i, err;
if (vm) {
@@ -119,7 +128,8 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q)
}
for (i = 0; i < q->width; ++i) {
- q->lrc[i] = xe_lrc_create(q, q->hwe, q->vm, SZ_16K);
+ q->lrc[i] = xe_lrc_create(q, q->hwe, q->vm, ring_size,
+ ring_addr);
if (IS_ERR(q->lrc[i])) {
err = PTR_ERR(q->lrc[i]);
goto err_unlock;
@@ -444,12 +454,125 @@ typedef int (*xe_exec_queue_user_extension_fn)(struct xe_device *xe,
struct xe_exec_queue *q,
u64 extension);
+static int exec_queue_user_ext_usermap(struct xe_device *xe,
+ struct xe_exec_queue *q,
+ u64 extension)
+{
+ u64 __user *address = u64_to_user_ptr(extension);
+ struct drm_xe_exec_queue_ext_usermap ext;
+ int err;
+
+ /* Just parse args and make sure they are sane */
+
+ if (XE_IOCTL_DBG(xe, !xe_gt_has_indirect_ring_state(q->gt)))
+ return -EOPNOTSUPP;
+
+ if (XE_IOCTL_DBG(xe, q->width != 1))
+ return -EOPNOTSUPP;
+
+ if (XE_IOCTL_DBG(xe, q->flags & (EXEC_QUEUE_FLAG_KERNEL |
+ EXEC_QUEUE_FLAG_PERMANENT |
+ EXEC_QUEUE_FLAG_VM |
+ EXEC_QUEUE_FLAG_BIND_ENGINE_CHILD)))
+ return -EOPNOTSUPP;
+
+ if (XE_IOCTL_DBG(xe, q->width != 1))
+ return -EOPNOTSUPP;
+
+ /*
+ * XXX: More or less free to support this but targeting Mesa for now as
+ * LR mode has ULLS.
+ */
+ if (XE_IOCTL_DBG(xe, xe_vm_in_lr_mode(q->vm)))
+ return -EOPNOTSUPP;
+
+ if (XE_IOCTL_DBG(xe, q->flags & EXEC_QUEUE_FLAG_UMD_SUBMISSION))
+ return -EINVAL;
+
+ err = __copy_from_user(&ext, address, sizeof(ext));
+ if (XE_IOCTL_DBG(xe, err))
+ return -EFAULT;
+
+ if (XE_IOCTL_DBG(xe, ext.reserved[0] || ext.reserved[1]))
+ return -EINVAL;
+
+ if (XE_IOCTL_DBG(xe, ext.pad))
+ return -EINVAL;
+
+ if (XE_IOCTL_DBG(xe, ext.flags))
+ return -EINVAL;
+
+ if (XE_IOCTL_DBG(xe, ext.ring_size < SZ_4K ||
+ ext.ring_size > SZ_2M ||
+ ext.ring_size & ~PAGE_MASK))
+ return -EINVAL;
+
+ if (XE_IOCTL_DBG(xe, ext.version !=
+ DRM_XE_EXEC_QUEUE_USERMAP_VERSION_XE2_REV0))
+ return -EINVAL;
+
+ q->usermap = kzalloc(sizeof(struct xe_exec_queue_usermap), GFP_KERNEL);
+ if (!q->usermap)
+ return -ENOMEM;
+
+ q->usermap->ring_size = ext.ring_size;
+ q->usermap->ring_addr = ext.ring_addr;
+
+ xe_pm_runtime_get_noresume(xe);
+ q->flags |= EXEC_QUEUE_FLAG_UMD_SUBMISSION;
+
+ return 0;
+}
+
+static int exec_queue_user_ext_post_init_usermap(struct xe_device *xe,
+ struct xe_exec_queue *q,
+ u64 extension)
+{
+ struct drm_xe_exec_queue_ext_usermap ext;
+ struct xe_lrc *lrc = q->lrc[0];
+ u64 __user *address = u64_to_user_ptr(extension);
+ u32 indirect_ring_state_handle;
+ int err;
+
+ err = __copy_from_user(&ext, address, sizeof(ext));
+ if (XE_IOCTL_DBG(xe, err))
+ return -EFAULT;
+
+ err = drm_gem_handle_create(q->xef->drm,
+ &lrc->indirect_state->ttm.base,
+ &indirect_ring_state_handle);
+ if (err)
+ return err;
+
+ ext.indirect_ring_state_offset =
+ drm_vma_node_offset_addr(&lrc->indirect_state->ttm.base.vma_node);
+ ext.indirect_ring_state_handle = indirect_ring_state_handle;
+ ext.doorbell_offset = XE_MMIO_DOORBELL_MMAP_OFFSET +
+ SZ_4K * q->guc->db.id;
+ ext.doorbell_page_offset = 0;
+
+ err = copy_to_user(address, &ext, sizeof(ext));
+ if (XE_IOCTL_DBG(xe, err)) {
+ err = -EFAULT;
+ goto close_handles;
+ }
+
+ return 0;
+
+close_handles:
+ drm_gem_handle_delete(q->xef->drm, indirect_ring_state_handle);
+
+ return err;
+}
+
static const xe_exec_queue_user_extension_fn exec_queue_user_extension_funcs[] = {
[DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY] = exec_queue_user_ext_set_property,
+ [DRM_XE_EXEC_QUEUE_EXTENSION_USERMAP] = exec_queue_user_ext_usermap,
};
static const xe_exec_queue_user_extension_fn exec_queue_user_extension_post_init_funcs[] = {
[DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY] = NULL,
+ [DRM_XE_EXEC_QUEUE_EXTENSION_USERMAP] = exec_queue_user_ext_post_init_usermap,
};
#define MAX_USER_EXTENSIONS 16
diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
index 7f68587d4021..b30b5ee910fa 100644
--- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
+++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
@@ -31,6 +31,16 @@ enum xe_exec_queue_priority {
XE_EXEC_QUEUE_PRIORITY_COUNT
};
+/**
+ * struct xe_exec_queue_usermap - Execution queue usermap (UMD submission)
+ */
+struct xe_exec_queue_usermap {
+ /** @ring_addr: ring address (PPGTT) */
+ u64 ring_addr;
+ /** @ring_size: ring size */
+ u32 ring_size;
+};
+
/**
* struct xe_exec_queue - Execution queue
*
@@ -130,6 +140,9 @@ struct xe_exec_queue {
struct list_head link;
} lr;
+ /** @usermap: user map interface */
+ struct xe_exec_queue_usermap *usermap;
+
/** @ops: submission backend exec queue operations */
const struct xe_exec_queue_ops *ops;
diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
index 93f76280d453..803c84b2e4ed 100644
--- a/drivers/gpu/drm/xe/xe_execlist.c
+++ b/drivers/gpu/drm/xe/xe_execlist.c
@@ -265,7 +265,7 @@ struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe,
port->hwe = hwe;
- port->lrc = xe_lrc_create(NULL, hwe, NULL, SZ_16K);
+ port->lrc = xe_lrc_create(NULL, hwe, NULL, SZ_16K, 0);
if (IS_ERR(port->lrc)) {
err = PTR_ERR(port->lrc);
goto err;
diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
index 8a79470b52ae..8d5a65724c04 100644
--- a/drivers/gpu/drm/xe/xe_lrc.c
+++ b/drivers/gpu/drm/xe/xe_lrc.c
@@ -903,7 +903,7 @@ static void xe_lrc_finish(struct xe_lrc *lrc)
static int xe_lrc_init(struct xe_lrc *lrc, struct xe_exec_queue *q,
struct xe_hw_engine *hwe, struct xe_vm *vm,
- u32 ring_size)
+ u32 ring_size, u64 ring_addr)
{
struct xe_gt *gt = hwe->gt;
struct xe_tile *tile = gt_to_tile(gt);
@@ -919,6 +919,8 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_exec_queue *q,
XE_BO_FLAG_USER : 0;
int err;
+ xe_assert(xe, (!user_queue && !ring_addr) || (user_queue && ring_addr));
+
kref_init(&lrc->refcount);
lrc->flags = 0;
lrc_size = xe_gt_lrc_size(gt, hwe->class);
@@ -935,16 +937,18 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_exec_queue *q,
if (IS_ERR(lrc->bo))
return PTR_ERR(lrc->bo);
- lrc->submission_ring = xe_bo_create_pin_map(xe, tile, vm, SZ_32K,
- submit_type,
- submit_flags |
- XE_BO_FLAG_VRAM_IF_DGFX(tile) |
- XE_BO_FLAG_GGTT |
- XE_BO_FLAG_GGTT_INVALIDATE);
- if (IS_ERR(lrc->submission_ring)) {
- err = PTR_ERR(lrc->submission_ring);
- lrc->submission_ring = NULL;
- goto err_lrc_finish;
+ if (!user_queue) {
+ lrc->submission_ring = xe_bo_create_pin_map(xe, tile, vm, SZ_32K,
+ submit_type,
+ submit_flags |
+ XE_BO_FLAG_VRAM_IF_DGFX(tile) |
+ XE_BO_FLAG_GGTT |
+ XE_BO_FLAG_GGTT_INVALIDATE);
+ if (IS_ERR(lrc->submission_ring)) {
+ err = PTR_ERR(lrc->submission_ring);
+ lrc->submission_ring = NULL;
+ goto err_lrc_finish;
+ }
}
if (xe_gt_has_indirect_ring_state(gt)) {
@@ -1018,12 +1022,19 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_exec_queue *q,
}
if (xe_gt_has_indirect_ring_state(gt)) {
- xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE,
- __xe_lrc_indirect_ring_ggtt_addr(lrc));
-
- xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START,
- __xe_lrc_ring_ggtt_addr(lrc));
- xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW, 0);
+ if (ring_addr) { /* PPGTT */
+ xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE,
+ __xe_lrc_indirect_ring_ggtt_addr(lrc) | BIT(0));
+ xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START,
+ ring_addr);
+ } else {
+ xe_lrc_write_ctx_reg(lrc, CTX_INDIRECT_RING_STATE,
+ __xe_lrc_indirect_ring_ggtt_addr(lrc));
+ xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START,
+ __xe_lrc_ring_ggtt_addr(lrc));
+ }
+ xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_START_UDW,
+ ring_addr >> 32);
xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_HEAD, 0);
xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_TAIL, lrc->ring.tail);
xe_lrc_write_indirect_ctx_reg(lrc, INDIRECT_CTX_RING_CTL,
@@ -1056,8 +1067,10 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_exec_queue *q,
lrc->desc |= FIELD_PREP(LRC_ENGINE_CLASS, hwe->class);
}
- arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
- xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable));
+ if (lrc->submission_ring) {
+ arb_enable = MI_ARB_ON_OFF | MI_ARB_ENABLE;
+ xe_lrc_write_ring(lrc, &arb_enable, sizeof(arb_enable));
+ }
map = __xe_lrc_seqno_map(lrc);
xe_map_write32(lrc_to_xe(lrc), &map, lrc->fence_ctx.next_seqno - 1);
@@ -1078,6 +1091,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_exec_queue *q,
* @hwe: Hardware Engine
* @vm: The VM (address space)
* @ring_size: LRC ring size
+ * @ring_addr: LRC ring address, only valid for usermap queues
*
* Allocate and initialize the Logical Ring Context (LRC).
*
@@ -1085,7 +1099,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_exec_queue *q,
* upon failure.
*/
struct xe_lrc *xe_lrc_create(struct xe_exec_queue *q, struct xe_hw_engine *hwe,
- struct xe_vm *vm, u32 ring_size)
+ struct xe_vm *vm, u32 ring_size, u64 ring_addr)
{
struct xe_lrc *lrc;
int err;
@@ -1094,7 +1108,7 @@ struct xe_lrc *xe_lrc_create(struct xe_exec_queue *q, struct xe_hw_engine *hwe,
if (!lrc)
return ERR_PTR(-ENOMEM);
- err = xe_lrc_init(lrc, q, hwe, vm, ring_size);
+ err = xe_lrc_init(lrc, q, hwe, vm, ring_size, ring_addr);
if (err) {
kfree(lrc);
return ERR_PTR(err);
@@ -1717,7 +1731,8 @@ struct xe_lrc_snapshot *xe_lrc_snapshot_capture(struct xe_lrc *lrc)
xe_vm_get(lrc->bo->vm);
snapshot->context_desc = xe_lrc_ggtt_addr(lrc);
- snapshot->ring_addr = __xe_lrc_ring_ggtt_addr(lrc);
+ snapshot->ring_addr = lrc->submission_ring ?
+ __xe_lrc_ring_ggtt_addr(lrc) : 0;
snapshot->indirect_context_desc = xe_lrc_indirect_ring_ggtt_addr(lrc);
snapshot->head = xe_lrc_ring_head(lrc);
snapshot->tail.internal = lrc->ring.tail;
diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
index 23d71283c79d..a7facfa8bf51 100644
--- a/drivers/gpu/drm/xe/xe_lrc.h
+++ b/drivers/gpu/drm/xe/xe_lrc.h
@@ -42,7 +42,7 @@ struct xe_lrc_snapshot {
#define LRC_PPHWSP_SCRATCH_ADDR (0x34 * 4)
struct xe_lrc *xe_lrc_create(struct xe_exec_queue *q, struct xe_hw_engine *hwe,
- struct xe_vm *vm, u32 ring_size);
+ struct xe_vm *vm, u32 ring_size, u64 ring_addr);
void xe_lrc_destroy(struct kref *ref);
/**
--
2.34.1
More information about the dri-devel
mailing list