[PATCH 2/2] drm/xe/uapi: Restore uapi for access counting
Cavitt, Jonathan
jonathan.cavitt at intel.com
Tue Oct 1 16:35:02 UTC 2024
-----Original Message-----
From: Intel-xe <intel-xe-bounces at lists.freedesktop.org> On Behalf Of apoorva.singh at intel.com
Sent: Tuesday, October 1, 2024 6:55 AM
To: intel-xe at lists.freedesktop.org
Cc: Zeng, Oak <oak.zeng at intel.com>; Brian Welty <brian.welty at intel.com>; Singh, Apoorva <apoorva.singh at intel.com>
Subject: [PATCH 2/2] drm/xe/uapi: Restore uapi for access counting
>
> From: Brian Welty <brian.welty at intel.com>
>
> In order to enable access counters, this must be enabled in LRC for
> the exec_queue. Add basic uAPI to set configuration of access counter
> trigger threshold and granularity.
>
> When access counters are enabled and threshold is hit, the access
> counter handler in xe_gt_pagefault.c will migrate the buffer to that
> to that GT's local VRAM.
>
> Signed-off-by: Brian Welty <brian.welty at intel.com>
> Signed-off-by: Apoorva Singh <apoorva.singh at intel.com>
Reviewed-by: Jonathan Cavitt <jonathan.cavitt at intel.com>
Though, of course, any UAPI changes need to be additionally validated by the
UAPI team.
-Jonathan Cavitt
> ---
> drivers/gpu/drm/xe/xe_exec_queue.c | 46 ++++++++++++++++++++++--
> drivers/gpu/drm/xe/xe_exec_queue_types.h | 10 ++++++
> drivers/gpu/drm/xe/xe_execlist.c | 2 +-
> drivers/gpu/drm/xe/xe_lrc.c | 16 ++++++---
> drivers/gpu/drm/xe/xe_lrc.h | 2 +-
> include/uapi/drm/xe_drm.h | 11 ++++++
> 6 files changed, 79 insertions(+), 8 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue.c b/drivers/gpu/drm/xe/xe_exec_queue.c
> index d098d2dd1b2d..4114a5ca04cc 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue.c
> +++ b/drivers/gpu/drm/xe/xe_exec_queue.c
> @@ -117,7 +117,7 @@ static int __xe_exec_queue_init(struct xe_exec_queue *q)
> }
>
> for (i = 0; i < q->width; ++i) {
> - q->lrc[i] = xe_lrc_create(q->hwe, q->vm, SZ_16K);
> + q->lrc[i] = xe_lrc_create(q->hwe, q->vm, q, SZ_16K);
> if (IS_ERR(q->lrc[i])) {
> err = PTR_ERR(q->lrc[i]);
> goto err_unlock;
> @@ -392,6 +392,42 @@ static int exec_queue_set_timeslice(struct xe_device *xe, struct xe_exec_queue *
> return 0;
> }
>
> +static int exec_queue_set_acc_trigger(struct xe_device *xe, struct xe_exec_queue *q,
> + u64 value)
> +{
> + if (XE_IOCTL_DBG(xe, !xe->info.has_usm))
> + return -EINVAL;
> +
> + q->usm.acc_trigger = value;
> +
> + return 0;
> +}
> +
> +static int exec_queue_set_acc_notify(struct xe_device *xe, struct xe_exec_queue *q,
> + u64 value)
> +{
> + if (XE_IOCTL_DBG(xe, !xe->info.has_usm))
> + return -EINVAL;
> +
> + q->usm.acc_notify = value;
> +
> + return 0;
> +}
> +
> +static int exec_queue_set_acc_granularity(struct xe_device *xe, struct xe_exec_queue *q,
> + u64 value)
> +{
> + if (XE_IOCTL_DBG(xe, !xe->info.has_usm))
> + return -EINVAL;
> +
> + if (value > DRM_XE_ACC_GRANULARITY_64M)
> + return -EINVAL;
> +
> + q->usm.acc_granularity = value;
> +
> + return 0;
> +}
> +
> typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe,
> struct xe_exec_queue *q,
> u64 value);
> @@ -399,6 +435,9 @@ typedef int (*xe_exec_queue_set_property_fn)(struct xe_device *xe,
> static const xe_exec_queue_set_property_fn exec_queue_set_property_funcs[] = {
> [DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY] = exec_queue_set_priority,
> [DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE] = exec_queue_set_timeslice,
> + [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER] = exec_queue_set_acc_trigger,
> + [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY] = exec_queue_set_acc_notify,
> + [DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY] = exec_queue_set_acc_granularity,
> };
>
> static int exec_queue_user_ext_set_property(struct xe_device *xe,
> @@ -418,7 +457,10 @@ static int exec_queue_user_ext_set_property(struct xe_device *xe,
> ARRAY_SIZE(exec_queue_set_property_funcs)) ||
> XE_IOCTL_DBG(xe, ext.pad) ||
> XE_IOCTL_DBG(xe, ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY &&
> - ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE))
> + ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE &&
> + ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER &&
> + ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY &&
> + ext.property != DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY))
> return -EINVAL;
>
> idx = array_index_nospec(ext.property, ARRAY_SIZE(exec_queue_set_property_funcs));
> diff --git a/drivers/gpu/drm/xe/xe_exec_queue_types.h b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> index 7deb480e26af..cf4ec4bfdbfd 100644
> --- a/drivers/gpu/drm/xe/xe_exec_queue_types.h
> +++ b/drivers/gpu/drm/xe/xe_exec_queue_types.h
> @@ -128,6 +128,16 @@ struct xe_exec_queue {
> struct list_head link;
> } lr;
>
> + /** @usm: unified shared memory state */
> + struct {
> + /** @usm.acc_trigger: access counter trigger */
> + u32 acc_trigger;
> + /** @usm.acc_notify: access counter notify */
> + u32 acc_notify;
> + /** @usm.acc_granularity: access counter granularity */
> + u32 acc_granularity;
> + } usm;
> +
> /** @ops: submission backend exec queue operations */
> const struct xe_exec_queue_ops *ops;
>
> diff --git a/drivers/gpu/drm/xe/xe_execlist.c b/drivers/gpu/drm/xe/xe_execlist.c
> index f3b71fe7a96d..33f5b31ea0cb 100644
> --- a/drivers/gpu/drm/xe/xe_execlist.c
> +++ b/drivers/gpu/drm/xe/xe_execlist.c
> @@ -265,7 +265,7 @@ struct xe_execlist_port *xe_execlist_port_create(struct xe_device *xe,
>
> port->hwe = hwe;
>
> - port->lrc = xe_lrc_create(hwe, NULL, SZ_16K);
> + port->lrc = xe_lrc_create(hwe, NULL, NULL, SZ_16K);
> if (IS_ERR(port->lrc)) {
> err = PTR_ERR(port->lrc);
> goto err;
> diff --git a/drivers/gpu/drm/xe/xe_lrc.c b/drivers/gpu/drm/xe/xe_lrc.c
> index f0976230012a..3be668caf700 100644
> --- a/drivers/gpu/drm/xe/xe_lrc.c
> +++ b/drivers/gpu/drm/xe/xe_lrc.c
> @@ -891,9 +891,11 @@ static void xe_lrc_finish(struct xe_lrc *lrc)
>
> #define PVC_CTX_ASID (0x2e + 1)
> #define PVC_CTX_ACC_CTR_THOLD (0x2a + 1)
> +#define ACC_GRANULARITY_S 20
> +#define ACC_NOTIFY_S 16
>
> static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
> - struct xe_vm *vm, u32 ring_size)
> + struct xe_exec_queue *q, struct xe_vm *vm, u32 ring_size)
> {
> struct xe_gt *gt = hwe->gt;
> struct xe_tile *tile = gt_to_tile(gt);
> @@ -984,7 +986,13 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
> xe_lrc_write_ctx_reg(lrc, CTX_TIMESTAMP, 0);
>
> if (xe->info.has_asid && vm)
> - xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID, vm->usm.asid);
> + xe_lrc_write_ctx_reg(lrc, PVC_CTX_ASID,
> + (q->usm.acc_granularity <<
> + ACC_GRANULARITY_S) | vm->usm.asid);
> + if (xe->info.has_usm && vm)
> + xe_lrc_write_ctx_reg(lrc, PVC_CTX_ACC_CTR_THOLD,
> + (q->usm.acc_notify <<
> + ACC_NOTIFY_S) | q->usm.acc_trigger);
>
> lrc->desc = LRC_VALID;
> lrc->desc |= FIELD_PREP(LRC_ADDRESSING_MODE, LRC_LEGACY_64B_CONTEXT);
> @@ -1029,7 +1037,7 @@ static int xe_lrc_init(struct xe_lrc *lrc, struct xe_hw_engine *hwe,
> * upon failure.
> */
> struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
> - u32 ring_size)
> + struct xe_exec_queue *q, u32 ring_size)
> {
> struct xe_lrc *lrc;
> int err;
> @@ -1038,7 +1046,7 @@ struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
> if (!lrc)
> return ERR_PTR(-ENOMEM);
>
> - err = xe_lrc_init(lrc, hwe, vm, ring_size);
> + err = xe_lrc_init(lrc, hwe, q, vm, ring_size);
> if (err) {
> kfree(lrc);
> return ERR_PTR(err);
> diff --git a/drivers/gpu/drm/xe/xe_lrc.h b/drivers/gpu/drm/xe/xe_lrc.h
> index c24542e89318..3806f977cad0 100644
> --- a/drivers/gpu/drm/xe/xe_lrc.h
> +++ b/drivers/gpu/drm/xe/xe_lrc.h
> @@ -23,7 +23,7 @@ struct xe_vm;
> #define LRC_PPHWSP_SCRATCH_ADDR (0x34 * 4)
>
> struct xe_lrc *xe_lrc_create(struct xe_hw_engine *hwe, struct xe_vm *vm,
> - u32 ring_size);
> + struct xe_exec_queue *q, u32 ring_size);
> void xe_lrc_destroy(struct kref *ref);
>
> /**
> diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
> index b6fbe4988f2e..e2326bbedc81 100644
> --- a/include/uapi/drm/xe_drm.h
> +++ b/include/uapi/drm/xe_drm.h
> @@ -1108,6 +1108,17 @@ struct drm_xe_exec_queue_create {
> #define DRM_XE_EXEC_QUEUE_EXTENSION_SET_PROPERTY 0
> #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_PRIORITY 0
> #define DRM_XE_EXEC_QUEUE_SET_PROPERTY_TIMESLICE 1
> +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_TRIGGER 2
> +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_NOTIFY 3
> +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY_ACC_GRANULARITY 4
> +/* Monitor 128KB contiguous region with 4K sub-granularity */
> +#define DRM_XE_ACC_GRANULARITY_128K 0
> +/* Monitor 2MB contiguous region with 64KB sub-granularity */
> +#define DRM_XE_ACC_GRANULARITY_2M 1
> +/* Monitor 16MB contiguous region with 512KB sub-granularity */
> +#define DRM_XE_ACC_GRANULARITY_16M 2
> +/* Monitor 64MB contiguous region with 2M sub-granularity */
> +#define DRM_XE_ACC_GRANULARITY_64M 3
>
> /** @extensions: Pointer to the first extension struct, if any */
> __u64 extensions;
> --
> 2.34.1
>
>
More information about the Intel-xe
mailing list