[Intel-xe] [PATCH] drm/xe/uapi: Remove MMIO ioctl

Matt Roper matthew.d.roper at intel.com
Tue Sep 12 00:25:41 UTC 2023


On Mon, Sep 11, 2023 at 04:21:37AM +0000, Ofir Bitton wrote:
> On 11/09/2023 6:45, Lucas De Marchi wrote:
> > On Sun, Sep 10, 2023 at 04:34:24PM +0000, Ofir Bitton wrote:
> >> On 07/09/2023 22:35, Francois Dugast wrote:
> >>> This was previously used in UMD for timestamp correlation, which can now
> >>> be done with DRM_XE_QUERY_CS_CYCLES.
> >>>
> >>> Link:
> >>> https://lore.kernel.org/all/20230706042044.GR6953@mdroper-desk1.amr.corp.intel.com/
> >>> Closes:
> >>> https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/636
> >>> Signed-off-by: Francois Dugast <francois.dugast at intel.com>
> >>> ---
> >>>   drivers/gpu/drm/xe/xe_device.c |   1 -
> >>>   drivers/gpu/drm/xe/xe_mmio.c   | 102 ---------------------------------
> >>>   include/uapi/drm/xe_drm.h      |  31 ++--------
> >>>   3 files changed, 4 insertions(+), 130 deletions(-)
> >>>
> >>> diff --git a/drivers/gpu/drm/xe/xe_device.c
> >>> b/drivers/gpu/drm/xe/xe_device.c
> >>> index 109aeb25d19c..10fa1b55578a 100644
> >>> --- a/drivers/gpu/drm/xe/xe_device.c
> >>> +++ b/drivers/gpu/drm/xe/xe_device.c
> >>> @@ -107,7 +107,6 @@ static const struct drm_ioctl_desc xe_ioctls[] = {
> >>>       DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_DESTROY,
> >>> xe_exec_queue_destroy_ioctl,
> >>>                 DRM_RENDER_ALLOW),
> >>>       DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW),
> >>> -    DRM_IOCTL_DEF_DRV(XE_MMIO, xe_mmio_ioctl, DRM_RENDER_ALLOW),
> >>>       DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_SET_PROPERTY,
> >>> xe_exec_queue_set_property_ioctl,
> >>>                 DRM_RENDER_ALLOW),
> >>>       DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl,
> >>> diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
> >>> index 3ccc0af4430b..e636e3f3456d 100644
> >>> --- a/drivers/gpu/drm/xe/xe_mmio.c
> >>> +++ b/drivers/gpu/drm/xe/xe_mmio.c
> >>> @@ -422,108 +422,6 @@ int xe_mmio_init(struct xe_device *xe)
> >>>       return 0;
> >>>   }
> >>>
> >>> -#define VALID_MMIO_FLAGS (\
> >>> -    DRM_XE_MMIO_BITS_MASK |\
> >>> -    DRM_XE_MMIO_READ |\
> >>> -    DRM_XE_MMIO_WRITE)
> >>> -
> >>> -static const struct xe_reg mmio_read_whitelist[] = {
> >>> -    RING_TIMESTAMP(RENDER_RING_BASE),
> >>> -};
> >>> -
> >>> -int xe_mmio_ioctl(struct drm_device *dev, void *data,
> >>> -          struct drm_file *file)
> >>> -{
> >>> -    struct xe_device *xe = to_xe_device(dev);
> >>> -    struct xe_gt *gt = xe_root_mmio_gt(xe);
> >>> -    struct drm_xe_mmio *args = data;
> >>> -    unsigned int bits_flag, bytes;
> >>> -    struct xe_reg reg;
> >>> -    bool allowed;
> >>> -    int ret = 0;
> >>> -
> >>> -    if (XE_IOCTL_DBG(xe, args->extensions) ||
> >>> -        XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
> >>> -        return -EINVAL;
> >>> -
> >>> -    if (XE_IOCTL_DBG(xe, args->flags & ~VALID_MMIO_FLAGS))
> >>> -        return -EINVAL;
> >>> -
> >>> -    if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_MMIO_WRITE) &&
> >>> args->value))
> >>> -        return -EINVAL;
> >>> -
> >>> -    allowed = capable(CAP_SYS_ADMIN);
> >>> -    if (!allowed && ((args->flags & ~DRM_XE_MMIO_BITS_MASK) ==
> >>> DRM_XE_MMIO_READ)) {
> >>> -        unsigned int i;
> >>> -
> >>> -        for (i = 0; i < ARRAY_SIZE(mmio_read_whitelist); i++) {
> >>> -            if (mmio_read_whitelist[i].addr == args->addr) {
> >>> -                allowed = true;
> >>> -                break;
> >>> -            }
> >>> -        }
> >>> -    }
> >>> -
> >>> -    if (XE_IOCTL_DBG(xe, !allowed))
> >>> -        return -EPERM;
> >>> -
> >>> -    bits_flag = args->flags & DRM_XE_MMIO_BITS_MASK;
> >>> -    bytes = 1 << bits_flag;
> >>> -    if (XE_IOCTL_DBG(xe, args->addr + bytes > xe->mmio.size))
> >>> -        return -EINVAL;
> >>> -
> >>> -    /*
> >>> -     * TODO: migrate to xe_gt_mcr to lookup the mmio range and handle
> >>> -     * multicast registers. Steering would need uapi extension.
> >>> -     */
> >>> -    reg = XE_REG(args->addr);
> >>> -
> >>> -    xe_device_mem_access_get(xe);
> >>> -    xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
> >>> -
> >>> -    if (args->flags & DRM_XE_MMIO_WRITE) {
> >>> -        switch (bits_flag) {
> >>> -        case DRM_XE_MMIO_32BIT:
> >>> -            if (XE_IOCTL_DBG(xe, args->value > U32_MAX)) {
> >>> -                ret = -EINVAL;
> >>> -                goto exit;
> >>> -            }
> >>> -            xe_mmio_write32(gt, reg, args->value);
> >>> -            break;
> >>> -        default:
> >>> -            drm_dbg(&xe->drm, "Invalid MMIO bit size");
> >>> -            fallthrough;
> >>> -        case DRM_XE_MMIO_8BIT: /* TODO */
> >>> -        case DRM_XE_MMIO_16BIT: /* TODO */
> >>> -            ret = -EOPNOTSUPP;
> >>> -            goto exit;
> >>> -        }
> >>> -    }
> >>> -
> >>> -    if (args->flags & DRM_XE_MMIO_READ) {
> >>> -        switch (bits_flag) {
> >>> -        case DRM_XE_MMIO_32BIT:
> >>> -            args->value = xe_mmio_read32(gt, reg);
> >>> -            break;
> >>> -        case DRM_XE_MMIO_64BIT:
> >>> -            args->value = xe_mmio_read64_2x32(gt, reg);
> >>> -            break;
> >>> -        default:
> >>> -            drm_dbg(&xe->drm, "Invalid MMIO bit size");
> >>> -            fallthrough;
> >>> -        case DRM_XE_MMIO_8BIT: /* TODO */
> >>> -        case DRM_XE_MMIO_16BIT: /* TODO */
> >>> -            ret = -EOPNOTSUPP;
> >>> -        }
> >>> -    }
> >>> -
> >>> -exit:
> >>> -    xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
> >>> -    xe_device_mem_access_put(xe);
> >>> -
> >>> -    return ret;
> >>> -}
> >>> -
> >>>   /**
> >>>    * xe_mmio_read64_2x32() - Read a 64-bit register as two 32-bit reads
> >>>    * @gt: MMIO target GT
> >>> diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
> >>> index 86f16d50e9cc..6c6d1cfa415a 100644
> >>> --- a/include/uapi/drm/xe_drm.h
> >>> +++ b/include/uapi/drm/xe_drm.h
> >>> @@ -106,11 +106,10 @@ struct xe_user_extension {
> >>>   #define DRM_XE_EXEC_QUEUE_CREATE        0x06
> >>>   #define DRM_XE_EXEC_QUEUE_DESTROY        0x07
> >>>   #define DRM_XE_EXEC            0x08
> >>> -#define DRM_XE_MMIO            0x09
> >>> -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY    0x0a
> >>> -#define DRM_XE_WAIT_USER_FENCE        0x0b
> >>> -#define DRM_XE_VM_MADVISE        0x0c
> >>> -#define DRM_XE_EXEC_QUEUE_GET_PROPERTY    0x0d
> >>> +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY    0x09
> >>> +#define DRM_XE_WAIT_USER_FENCE        0x0a
> >>> +#define DRM_XE_VM_MADVISE        0x0b
> >>> +#define DRM_XE_EXEC_QUEUE_GET_PROPERTY    0x0c
> >>>
> >>>   /* Must be kept compact -- no holes */
> >>>   #define DRM_IOCTL_XE_DEVICE_QUERY        DRM_IOWR(DRM_COMMAND_BASE
> >>> + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query)
> >>> @@ -123,7 +122,6 @@ struct xe_user_extension {
> >>>   #define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY
> >>> DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct
> >>> drm_xe_exec_queue_get_property)
> >>>   #define DRM_IOCTL_XE_EXEC_QUEUE_DESTROY
> >>> DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_DESTROY, struct
> >>> drm_xe_exec_queue_destroy)
> >>>   #define DRM_IOCTL_XE_EXEC             DRM_IOW(DRM_COMMAND_BASE +
> >>> DRM_XE_EXEC, struct drm_xe_exec)
> >>> -#define DRM_IOCTL_XE_MMIO            DRM_IOWR(DRM_COMMAND_BASE +
> >>> DRM_XE_MMIO, struct drm_xe_mmio)
> >>>   #define DRM_IOCTL_XE_EXEC_QUEUE_SET_PROPERTY
> >>> DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_SET_PROPERTY, struct
> >>> drm_xe_exec_queue_set_property)
> >>>   #define DRM_IOCTL_XE_WAIT_USER_FENCE
> >>> DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct
> >>> drm_xe_wait_user_fence)
> >>>   #define DRM_IOCTL_XE_VM_MADVISE
> >>> DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_MADVISE, struct drm_xe_vm_madvise)
> >>> @@ -883,27 +881,6 @@ struct drm_xe_exec {
> >>>       __u64 reserved[2];
> >>>   };
> >>>
> >>> -struct drm_xe_mmio {
> >>> -    /** @extensions: Pointer to the first extension struct, if any */
> >>> -    __u64 extensions;
> >>> -
> >>> -    __u32 addr;
> >>> -
> >>> -#define DRM_XE_MMIO_8BIT    0x0
> >>> -#define DRM_XE_MMIO_16BIT    0x1
> >>> -#define DRM_XE_MMIO_32BIT    0x2
> >>> -#define DRM_XE_MMIO_64BIT    0x3
> >>> -#define DRM_XE_MMIO_BITS_MASK    0x3
> >>> -#define DRM_XE_MMIO_READ    0x4
> >>> -#define DRM_XE_MMIO_WRITE    0x8
> >>> -    __u32 flags;
> >>> -
> >>> -    __u64 value;
> >>> -
> >>> -    /** @reserved: Reserved */
> >>> -    __u64 reserved[2];
> >>> -};
> >>> -
> >>>   /**
> >>>    * struct drm_xe_wait_user_fence - wait user fence
> >>>    *
> >>
> >> I would prefer to keep this ioctl for debug, or an equivavlent
> >> alternative. We can remove the timestmap part but keep the general mmio
> >> ioctl interface for 'CAP_SYS_ADMIN' access only.
> >> Any objections?

Well, one general roadblock is that the DRM subsystem rules don't allow
adding ABI without a real, open-source userspace consumer.  Not only do
we not have a "real" consumer for this ioctl itself, but we also don't
even want to give anyone the impression that we have a backdoor
interface to allow non-opensource userspace to go behind the driver's
back and start controlling the hardware.  Userspace can still obviously
do that by running as root and mapping the BAR directly, but they're not
using official driver uapi at that point, and there are potentially
other ways that a sysadmin can close those holes if necessary.

> >
> > CAP_SYS_ADMIN can already map the bar and do the reads and
> > writes. What's the benefit of going through the kernel?
> >
> > Lucas De Marchi
> >
> >>
> >> Ofir
> 
> Some register read/write requires driver involvement, for example
> disabling clock gating or going through a register gateway.
> In addition, if a user would like to access directly through the bar he
> will need to be aware of the exact bar mapping. It is preferable that
> the user will be able to ask for read/write from/to a specific MMIO
> address and the driver will map it to the proper offset in bar.

Are you using the term "user" to describe the userspace _software_ or
the person sitting at the computer?  If the latter, then we don't need
this ioctl at all; the "intel_reg" tool from IGT has provided this kind
of functionality on i915 for years; it's a simple command-line tool that
can be used with commands like:

        intel_reg read 0x1234
        intel_reg write 0x5678 0xabcd

The tool uses libpciaccess under the hood to map the BAR, and can
perform other necessary pre/post operations (like grabbing releasing
forcewake to make sure the register is accessible).

If you're using "user" to refer to the userspace _software_ (e.g., IGT's
xe_reg debug tool which is using the ioctl right now), then there's even
more incentive to move away from the ioctl --- the ioctl interface we
have today already has a bunch of limitations that make it less useful
than it should be for debugging:

 * There's no way to specify/access registers on non-tile0.
 * There's no way to specify that you do/don't want it to do the "extra"
   steps like you describe (e.g., automatically grabbing forcewake is
   convenient a lot of the time, but if you're actually trying to debug
   forcewake itself, then the ioctl's automatic behavior just gets in
   your way).

While the ioctl could theoretically be extended to overcome these
limitations, we still shouldn't really be building up ABI like that for
debug purposes without real userspace consumers, especially if we have a
viable alternative that's been used for a long time.  It's relatively
simple for tools like intel_reg and xe_reg to just use libpciaccess to
map the BAR, figure out the appropriate offset, and then optionally
perform other operations (like forcewake) according to whatever options
were passed on the command line.  With debug tools it's especially
likely that we'll come up with new behaviors that we want to add (e.g.,
handle display register accesses via msgbus transactions on MTL/LNL?)
and it would be better if we didn't need to keep extending the ioctl
uapi and ABI for these debug-only purposes).


Matt

-- 
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation


More information about the Intel-xe mailing list