[PATCH 01/11] drm/xe: Stub out new pagefault layer

Wed Aug 6 23:01:12 UTC 2025

Few basic comments below to start. I personally would rather this be
brought over from the existing fault handler rather than creating
something entirely new and then clobbering the older stuff - just so
the review of message format requests/replies is easier to review and
where we're deviating from the existing external interfaces
(HW/FW/GuC/etc). You already have this here though so not a huge deal.
I think most of this was in the giant blob of patches that got merged
with the initial driver, so I guess the counter argument is we can have
easy to reference historical reviews now.

On Tue, 2025-08-05 at 23:22 -0700, Matthew Brost wrote:
> Stub out the new page fault layer and add kernel documentation. This
> is
> intended as a replacement for the GT page fault layer, enabling
> multiple
> producers to hook into a shared page fault consumer interface.
> 
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> ---
>  drivers/gpu/drm/xe/Makefile             |   1 +
>  drivers/gpu/drm/xe/xe_pagefault.c       |  63 ++++++++++++
>  drivers/gpu/drm/xe/xe_pagefault.h       |  19 ++++
>  drivers/gpu/drm/xe/xe_pagefault_types.h | 125
> ++++++++++++++++++++++++
>  4 files changed, 208 insertions(+)
>  create mode 100644 drivers/gpu/drm/xe/xe_pagefault.c
>  create mode 100644 drivers/gpu/drm/xe/xe_pagefault.h
>  create mode 100644 drivers/gpu/drm/xe/xe_pagefault_types.h
> 
> diff --git a/drivers/gpu/drm/xe/Makefile
> b/drivers/gpu/drm/xe/Makefile
> index 8e0c3412a757..6fbebafe79c9 100644
> --- a/drivers/gpu/drm/xe/Makefile
> +++ b/drivers/gpu/drm/xe/Makefile
> @@ -93,6 +93,7 @@ xe-y += xe_bb.o \
>         xe_nvm.o \
>         xe_oa.o \
>         xe_observation.o \
> +       xe_pagefault.o \
>         xe_pat.o \
>         xe_pci.o \
>         xe_pcode.o \
> diff --git a/drivers/gpu/drm/xe/xe_pagefault.c
> b/drivers/gpu/drm/xe/xe_pagefault.c
> new file mode 100644
> index 000000000000..3ce0e8d74b9d
> --- /dev/null
> +++ b/drivers/gpu/drm/xe/xe_pagefault.c
> @@ -0,0 +1,63 @@
> +// SPDX-License-Identifier: MIT
> +/*
> + * Copyright © 2025 Intel Corporation
> + */
> +
> +#include "xe_pagefault.h"
> +#include "xe_pagefault_types.h"
> +
> +/**
> + * DOC: Xe page faults
> + *
> + * Xe page faults are handled in two layers. The producer layer
> interacts with
> + * hardware or firmware to receive and parse faults into struct
> xe_pagefault,
> + * then forwards them to the consumer. The consumer layer services
> the faults
> + * (e.g., memory migration, page table updates) and acknowledges the
> result back
> + * to the producer, which then forwards the results to the hardware
> or firmware.
> + * The consumer uses a page fault queue sized to absorb all
> potential faults and
> + * a multi-threaded worker to process them. Multiple producers are
> supported,
> + * with a single shared consumer.
> + */
> +
> +/**
> + * xe_pagefault_init() - Page fault init
> + * @xe: xe device instance
> + *
> + * Initialize Xe page fault state. Must be done after reading fuses.
> + *
> + * Return: 0 on Success, errno on failure
> + */
> +int xe_pagefault_init(struct xe_device *xe)
> +{
> +       /* TODO - implement */
> +       return 0;
> +}
> +
> +/**
> + * xe_pagefault_reset() - Page fault reset for a GT
> + * @xe: xe device instance
> + * @gt: GT being reset
> + *
> + * Reset the Xe page fault state for a GT; that is, squash any
> pending faults on
> + * the GT.
> + */
> +void xe_pagefault_reset(struct xe_device *xe, struct xe_gt *gt)
> +{
> +       /* TODO - implement */
> +}
> +
> +/**
> + * xe_pagefault_handler() - Page fault handler
> + * @xe: xe device instance
> + * @pf: Page fault
> + *
> + * Sink the page fault to a queue (i.e., a memory buffer) and queue
> a worker to
> + * service it. Safe to be called from IRQ or process context.
> Reclaim safe.
> + *
> + * Return: 0 on success, errno on failure
> + */
> +int xe_pagefault_handler(struct xe_device *xe, struct xe_pagefault
> *pf)
> +{
> +       /* TODO - implement */
> +       return 0;
> +}
> diff --git a/drivers/gpu/drm/xe/xe_pagefault.h
> b/drivers/gpu/drm/xe/xe_pagefault.h
> new file mode 100644
> index 000000000000..bd0cdf9ed37f
> --- /dev/null
> +++ b/drivers/gpu/drm/xe/xe_pagefault.h
> @@ -0,0 +1,19 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2025 Intel Corporation
> + */
> +
> +#ifndef _XE_PAGEFAULT_H_
> +#define _XE_PAGEFAULT_H_
> +
> +struct xe_device;
> +struct xe_gt;
> +struct xe_pagefault;
> +
> +int xe_pagefault_init(struct xe_device *xe);
> +
> +void xe_pagefault_reset(struct xe_device *xe, struct xe_gt *gt);
> +
> +int xe_pagefault_handler(struct xe_device *xe, struct xe_pagefault
> *pf);
> +
> +#endif
> diff --git a/drivers/gpu/drm/xe/xe_pagefault_types.h
> b/drivers/gpu/drm/xe/xe_pagefault_types.h
> new file mode 100644
> index 000000000000..fcff84f93dd8
> --- /dev/null
> +++ b/drivers/gpu/drm/xe/xe_pagefault_types.h
> @@ -0,0 +1,125 @@
> +/* SPDX-License-Identifier: MIT */
> +/*
> + * Copyright © 2025 Intel Corporation
> + */
> +
> +#ifndef _XE_PAGEFAULT_TYPES_H_
> +#define _XE_PAGEFAULT_TYPES_H_
> +
> +#include <linux/workqueue.h>
> +
> +struct xe_pagefault;
> +struct xe_gt;

Nit: Maybe reverse these structs to be in alphabetical order

> +
> +/** enum xe_pagefault_access_type - Xe page fault access type */
> +enum xe_pagefault_access_type {
> +       /** @XE_PAGEFAULT_ACCESS_TYPE_READ: Read access type */
> +       XE_PAGEFAULT_ACCESS_TYPE_READ   = 0,
> +       /** @XE_PAGEFAULT_ACCESS_TYPE_WRITE: Write access type */
> +       XE_PAGEFAULT_ACCESS_TYPE_WRITE  = 1,
> +       /** @XE_PAGEFAULT_ACCESS_TYPE_ATOMIC: Atomic access type */
> +       XE_PAGEFAULT_ACCESS_TYPE_ATOMIC = 2,
> +};
> +
> +/** enum xe_pagefault_type - Xe page fault type */
> +enum xe_pagefault_type {
> +       /** @XE_PAGEFAULT_TYPE_NOT_PRESENT: Not present */
> +       XE_PAGEFAULT_TYPE_NOT_PRESENT           = 0,
> +       /** @XE_PAGEFAULT_TYPE_WRITE_ACCESS_VIOLATION: Write access
> violation */
> +       XE_PAGEFAULT_WRITE_ACCESS_VIOLATION     = 1,
> +       /** @XE_PAGEFAULT_TYPE_WRITE_ACCESS_VIOLATION: Atomic access
> violation */

XE_PAGEFAULT_TYPE_WRITE_ACCESS_VIOLATION ->
XE_PAGEFAULT_ACCESS_TYPE_ATOMIC

> +       XE_PAGEFAULT_ATOMIC_ACCESS_VIOLATION    = 2,
> +};
> +
> +/** struct xe_pagefault_ops - Xe pagefault ops (producer) */
> +struct xe_pagefault_ops {
> +       /**
> +        * @ack_fault: Ack fault
> +        * @pf: Page fault
> +        * @err: Error state of fault
> +        *
> +        * Page fault producer receives acknowledgment from the
> consumer and
> +        * sends the result to the HW/FW interface.
> +        */
> +       void (*ack_fault)(struct xe_pagefault *pf, int err);
> +};
> +
> +/**
> + * struct xe_pagefault - Xe page fault
> + *
> + * Generic page fault structure for communication between producer
> and consumer.
> + * Carefully sized to be 64 bytes.
> + */
> +struct xe_pagefault {
> +       /**
> +        * @gt: GT of fault
> +        *
> +        * XXX: We may want to decouple the GT from individual
> faults, as it's
> +        * unclear whether future platforms will always have a GT for
> all page
> +        * fault producers. Internally, the GT is used for stats,
> identifying
> +        * the appropriate VRAM region, and locating the migration
> queue.
> +        * Leaving this as-is for now, but we can revisit later to
> see if we
> +        * can convert it to use the Xe device pointer instead.
> +        */

What if instead of assuming the GT stays static and we eventually
remove if we have some new HW abstraction layer that isn't a GT but
still uses the page fault, we instead push to have said theoretical
abstraction layer overload the GT here like we're doing with primary
and media today. Then we can keep the interface here simple and just
leave this in there, or change in the future if that doesn't make sense
without the suggestive comment?

> +       struct xe_gt *gt;
> +       /**
> +        * @consumer: State for the software handling the fault.
> Populated by
> +        * the producer and may be modified by the consumer to
> communicate
> +        * information back to the producer upon fault
> acknowledgment.
> +        */
> +       struct {
> +               /** @consumer.page_addr: address of page fault */
> +               u64 page_addr;
> +               /** @consumer.asid: address space ID */
> +               u32 asid;

Can we just call this an ID instead of a pasid or asid? I.e. the ID
could be anything, not strictly process-bound.

> +               /** @consumer.access_type: access type */
> +               u8 access_type;
> +               /** @consumer.fault_type: fault type */
> +               u8 fault_type;
> +#define XE_PAGEFAULT_LEVEL_NACK                0xff    /* Producer
> indicates nack fault */
> +               /** @consumer.fault_level: fault level */
> +               u8 fault_level;
> +               /** @consumer.engine_class: engine class */
> +               u8 engine_class;
> +               /** consumer.reserved: reserved bits for future
> expansion */
> +               u64 reserved;

What about engine instance? Or is that going to overload reserved here?

Thanks,
Stuart

> +       } consumer;
> +       /**
> +        * @producer: State for the producer (i.e., HW/FW interface).
> Populated
> +        * by the producer and should not be modified—or even
> inspected—by the
> +        * consumer, except for calling operations.
> +        */
> +       struct {
> +               /** @producer.private: private pointer */
> +               void *private;
> +               /** @producer.ops: operations */
> +               const struct xe_pagefault_ops *ops;
> +#define XE_PAGEFAULT_PRODUCER_MSG_LEN_DW       4
> +               /**
> +                * producer.msg: page fault message, used by producer
> in fault
> +                * acknowledgement to formulate response to HW/FW
> interface.
> +                */
> +               u32 msg[XE_PAGEFAULT_PRODUCER_MSG_LEN_DW];
> +       } producer;
> +};
> +
> +/** struct xe_pagefault_queue: Xe pagefault queue (consumer) */
> +struct xe_pagefault_queue {
> +       /**
> +        * @data: Data in queue containing struct xe_pagefault,
> protected by
> +        * @lock
> +        */
> +       void *data;
> +       /** @size: Size of queue in bytes */
> +       u32 size;
> +       /** @head: Head pointer in bytes, moved by producer,
> protected by @lock */
> +       u32 head;
> +       /** @tail: Tail pointer in bytes, moved by consumer,
> protected by @lock */
> +       u32 tail;
> +       /** @lock: protects page fault queue */
> +       spinlock_t lock;
> +       /** @worker: to process page faults */
> +       struct work_struct worker;
> +};
> +
> +#endif