[Intel-xe] [PATCH] drm/xe/uapi: Remove MMIO ioctl

Ofir Bitton obitton at habana.ai
Tue Sep 12 08:43:06 UTC 2023


On 12/09/2023 3:25, Matt Roper wrote:
> On Mon, Sep 11, 2023 at 04:21:37AM +0000, Ofir Bitton wrote:
>> On 11/09/2023 6:45, Lucas De Marchi wrote:
>>> On Sun, Sep 10, 2023 at 04:34:24PM +0000, Ofir Bitton wrote:
>>>> On 07/09/2023 22:35, Francois Dugast wrote:
>>>>> This was previously used in UMD for timestamp correlation, which can now
>>>>> be done with DRM_XE_QUERY_CS_CYCLES.
>>>>>
>>>>> Link:
>>>>> https://lore.kernel.org/all/20230706042044.GR6953@mdroper-desk1.amr.corp.intel.com/
>>>>> Closes:
>>>>> https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/636
>>>>> Signed-off-by: Francois Dugast <francois.dugast at intel.com>
>>>>> ---
>>>>>    drivers/gpu/drm/xe/xe_device.c |   1 -
>>>>>    drivers/gpu/drm/xe/xe_mmio.c   | 102 ---------------------------------
>>>>>    include/uapi/drm/xe_drm.h      |  31 ++--------
>>>>>    3 files changed, 4 insertions(+), 130 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/xe/xe_device.c
>>>>> b/drivers/gpu/drm/xe/xe_device.c
>>>>> index 109aeb25d19c..10fa1b55578a 100644
>>>>> --- a/drivers/gpu/drm/xe/xe_device.c
>>>>> +++ b/drivers/gpu/drm/xe/xe_device.c
>>>>> @@ -107,7 +107,6 @@ static const struct drm_ioctl_desc xe_ioctls[] = {
>>>>>        DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_DESTROY,
>>>>> xe_exec_queue_destroy_ioctl,
>>>>>                  DRM_RENDER_ALLOW),
>>>>>        DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW),
>>>>> -    DRM_IOCTL_DEF_DRV(XE_MMIO, xe_mmio_ioctl, DRM_RENDER_ALLOW),
>>>>>        DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_SET_PROPERTY,
>>>>> xe_exec_queue_set_property_ioctl,
>>>>>                  DRM_RENDER_ALLOW),
>>>>>        DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl,
>>>>> diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
>>>>> index 3ccc0af4430b..e636e3f3456d 100644
>>>>> --- a/drivers/gpu/drm/xe/xe_mmio.c
>>>>> +++ b/drivers/gpu/drm/xe/xe_mmio.c
>>>>> @@ -422,108 +422,6 @@ int xe_mmio_init(struct xe_device *xe)
>>>>>        return 0;
>>>>>    }
>>>>>
>>>>> -#define VALID_MMIO_FLAGS (\
>>>>> -    DRM_XE_MMIO_BITS_MASK |\
>>>>> -    DRM_XE_MMIO_READ |\
>>>>> -    DRM_XE_MMIO_WRITE)
>>>>> -
>>>>> -static const struct xe_reg mmio_read_whitelist[] = {
>>>>> -    RING_TIMESTAMP(RENDER_RING_BASE),
>>>>> -};
>>>>> -
>>>>> -int xe_mmio_ioctl(struct drm_device *dev, void *data,
>>>>> -          struct drm_file *file)
>>>>> -{
>>>>> -    struct xe_device *xe = to_xe_device(dev);
>>>>> -    struct xe_gt *gt = xe_root_mmio_gt(xe);
>>>>> -    struct drm_xe_mmio *args = data;
>>>>> -    unsigned int bits_flag, bytes;
>>>>> -    struct xe_reg reg;
>>>>> -    bool allowed;
>>>>> -    int ret = 0;
>>>>> -
>>>>> -    if (XE_IOCTL_DBG(xe, args->extensions) ||
>>>>> -        XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
>>>>> -        return -EINVAL;
>>>>> -
>>>>> -    if (XE_IOCTL_DBG(xe, args->flags & ~VALID_MMIO_FLAGS))
>>>>> -        return -EINVAL;
>>>>> -
>>>>> -    if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_MMIO_WRITE) &&
>>>>> args->value))
>>>>> -        return -EINVAL;
>>>>> -
>>>>> -    allowed = capable(CAP_SYS_ADMIN);
>>>>> -    if (!allowed && ((args->flags & ~DRM_XE_MMIO_BITS_MASK) ==
>>>>> DRM_XE_MMIO_READ)) {
>>>>> -        unsigned int i;
>>>>> -
>>>>> -        for (i = 0; i < ARRAY_SIZE(mmio_read_whitelist); i++) {
>>>>> -            if (mmio_read_whitelist[i].addr == args->addr) {
>>>>> -                allowed = true;
>>>>> -                break;
>>>>> -            }
>>>>> -        }
>>>>> -    }
>>>>> -
>>>>> -    if (XE_IOCTL_DBG(xe, !allowed))
>>>>> -        return -EPERM;
>>>>> -
>>>>> -    bits_flag = args->flags & DRM_XE_MMIO_BITS_MASK;
>>>>> -    bytes = 1 << bits_flag;
>>>>> -    if (XE_IOCTL_DBG(xe, args->addr + bytes > xe->mmio.size))
>>>>> -        return -EINVAL;
>>>>> -
>>>>> -    /*
>>>>> -     * TODO: migrate to xe_gt_mcr to lookup the mmio range and handle
>>>>> -     * multicast registers. Steering would need uapi extension.
>>>>> -     */
>>>>> -    reg = XE_REG(args->addr);
>>>>> -
>>>>> -    xe_device_mem_access_get(xe);
>>>>> -    xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
>>>>> -
>>>>> -    if (args->flags & DRM_XE_MMIO_WRITE) {
>>>>> -        switch (bits_flag) {
>>>>> -        case DRM_XE_MMIO_32BIT:
>>>>> -            if (XE_IOCTL_DBG(xe, args->value > U32_MAX)) {
>>>>> -                ret = -EINVAL;
>>>>> -                goto exit;
>>>>> -            }
>>>>> -            xe_mmio_write32(gt, reg, args->value);
>>>>> -            break;
>>>>> -        default:
>>>>> -            drm_dbg(&xe->drm, "Invalid MMIO bit size");
>>>>> -            fallthrough;
>>>>> -        case DRM_XE_MMIO_8BIT: /* TODO */
>>>>> -        case DRM_XE_MMIO_16BIT: /* TODO */
>>>>> -            ret = -EOPNOTSUPP;
>>>>> -            goto exit;
>>>>> -        }
>>>>> -    }
>>>>> -
>>>>> -    if (args->flags & DRM_XE_MMIO_READ) {
>>>>> -        switch (bits_flag) {
>>>>> -        case DRM_XE_MMIO_32BIT:
>>>>> -            args->value = xe_mmio_read32(gt, reg);
>>>>> -            break;
>>>>> -        case DRM_XE_MMIO_64BIT:
>>>>> -            args->value = xe_mmio_read64_2x32(gt, reg);
>>>>> -            break;
>>>>> -        default:
>>>>> -            drm_dbg(&xe->drm, "Invalid MMIO bit size");
>>>>> -            fallthrough;
>>>>> -        case DRM_XE_MMIO_8BIT: /* TODO */
>>>>> -        case DRM_XE_MMIO_16BIT: /* TODO */
>>>>> -            ret = -EOPNOTSUPP;
>>>>> -        }
>>>>> -    }
>>>>> -
>>>>> -exit:
>>>>> -    xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
>>>>> -    xe_device_mem_access_put(xe);
>>>>> -
>>>>> -    return ret;
>>>>> -}
>>>>> -
>>>>>    /**
>>>>>     * xe_mmio_read64_2x32() - Read a 64-bit register as two 32-bit reads
>>>>>     * @gt: MMIO target GT
>>>>> diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
>>>>> index 86f16d50e9cc..6c6d1cfa415a 100644
>>>>> --- a/include/uapi/drm/xe_drm.h
>>>>> +++ b/include/uapi/drm/xe_drm.h
>>>>> @@ -106,11 +106,10 @@ struct xe_user_extension {
>>>>>    #define DRM_XE_EXEC_QUEUE_CREATE        0x06
>>>>>    #define DRM_XE_EXEC_QUEUE_DESTROY        0x07
>>>>>    #define DRM_XE_EXEC            0x08
>>>>> -#define DRM_XE_MMIO            0x09
>>>>> -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY    0x0a
>>>>> -#define DRM_XE_WAIT_USER_FENCE        0x0b
>>>>> -#define DRM_XE_VM_MADVISE        0x0c
>>>>> -#define DRM_XE_EXEC_QUEUE_GET_PROPERTY    0x0d
>>>>> +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY    0x09
>>>>> +#define DRM_XE_WAIT_USER_FENCE        0x0a
>>>>> +#define DRM_XE_VM_MADVISE        0x0b
>>>>> +#define DRM_XE_EXEC_QUEUE_GET_PROPERTY    0x0c
>>>>>
>>>>>    /* Must be kept compact -- no holes */
>>>>>    #define DRM_IOCTL_XE_DEVICE_QUERY        DRM_IOWR(DRM_COMMAND_BASE
>>>>> + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query)
>>>>> @@ -123,7 +122,6 @@ struct xe_user_extension {
>>>>>    #define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY
>>>>> DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct
>>>>> drm_xe_exec_queue_get_property)
>>>>>    #define DRM_IOCTL_XE_EXEC_QUEUE_DESTROY
>>>>> DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_DESTROY, struct
>>>>> drm_xe_exec_queue_destroy)
>>>>>    #define DRM_IOCTL_XE_EXEC             DRM_IOW(DRM_COMMAND_BASE +
>>>>> DRM_XE_EXEC, struct drm_xe_exec)
>>>>> -#define DRM_IOCTL_XE_MMIO            DRM_IOWR(DRM_COMMAND_BASE +
>>>>> DRM_XE_MMIO, struct drm_xe_mmio)
>>>>>    #define DRM_IOCTL_XE_EXEC_QUEUE_SET_PROPERTY
>>>>> DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_SET_PROPERTY, struct
>>>>> drm_xe_exec_queue_set_property)
>>>>>    #define DRM_IOCTL_XE_WAIT_USER_FENCE
>>>>> DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct
>>>>> drm_xe_wait_user_fence)
>>>>>    #define DRM_IOCTL_XE_VM_MADVISE
>>>>> DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_MADVISE, struct drm_xe_vm_madvise)
>>>>> @@ -883,27 +881,6 @@ struct drm_xe_exec {
>>>>>        __u64 reserved[2];
>>>>>    };
>>>>>
>>>>> -struct drm_xe_mmio {
>>>>> -    /** @extensions: Pointer to the first extension struct, if any */
>>>>> -    __u64 extensions;
>>>>> -
>>>>> -    __u32 addr;
>>>>> -
>>>>> -#define DRM_XE_MMIO_8BIT    0x0
>>>>> -#define DRM_XE_MMIO_16BIT    0x1
>>>>> -#define DRM_XE_MMIO_32BIT    0x2
>>>>> -#define DRM_XE_MMIO_64BIT    0x3
>>>>> -#define DRM_XE_MMIO_BITS_MASK    0x3
>>>>> -#define DRM_XE_MMIO_READ    0x4
>>>>> -#define DRM_XE_MMIO_WRITE    0x8
>>>>> -    __u32 flags;
>>>>> -
>>>>> -    __u64 value;
>>>>> -
>>>>> -    /** @reserved: Reserved */
>>>>> -    __u64 reserved[2];
>>>>> -};
>>>>> -
>>>>>    /**
>>>>>     * struct drm_xe_wait_user_fence - wait user fence
>>>>>     *
>>>>
>>>> I would prefer to keep this ioctl for debug, or an equivavlent
>>>> alternative. We can remove the timestmap part but keep the general mmio
>>>> ioctl interface for 'CAP_SYS_ADMIN' access only.
>>>> Any objections?
>
> Well, one general roadblock is that the DRM subsystem rules don't allow
> adding ABI without a real, open-source userspace consumer.  Not only do
> we not have a "real" consumer for this ioctl itself, but we also don't
> even want to give anyone the impression that we have a backdoor
> interface to allow non-opensource userspace to go behind the driver's
> back and start controlling the hardware.  Userspace can still obviously
> do that by running as root and mapping the BAR directly, but they're not
> using official driver uapi at that point, and there are potentially
> other ways that a sysadmin can close those holes if necessary.

Hey Matt, I totally undesrstand your concern, I might have another
suggestion. We can create another FD in debugfs and move this ioctl
there (I can take ownership on this), This way ABI is not an issue.

>
>>>
>>> CAP_SYS_ADMIN can already map the bar and do the reads and
>>> writes. What's the benefit of going through the kernel?
>>>
>>> Lucas De Marchi
>>>
>>>>
>>>> Ofir
>>
>> Some register read/write requires driver involvement, for example
>> disabling clock gating or going through a register gateway.
>> In addition, if a user would like to access directly through the bar he
>> will need to be aware of the exact bar mapping. It is preferable that
>> the user will be able to ask for read/write from/to a specific MMIO
>> address and the driver will map it to the proper offset in bar.
>
> Are you using the term "user" to describe the userspace _software_ or
> the person sitting at the computer?  If the latter, then we don't need
> this ioctl at all; the "intel_reg" tool from IGT has provided this kind
> of functionality on i915 for years; it's a simple command-line tool that
> can be used with commands like:
>
>          intel_reg read 0x1234
>          intel_reg write 0x5678 0xabcd
>
> The tool uses libpciaccess under the hood to map the BAR, and can
> perform other necessary pre/post operations (like grabbing releasing
> forcewake to make sure the register is accessible).

By 'user' I do mean userspace process. Intel_reg tool can work but it
has its own limitations.

>
> If you're using "user" to refer to the userspace _software_ (e.g., IGT's
> xe_reg debug tool which is using the ioctl right now), then there's even
> more incentive to move away from the ioctl --- the ioctl interface we
> have today already has a bunch of limitations that make it less useful
> than it should be for debugging:
>
>   * There's no way to specify/access registers on non-tile0.

I am aware, I will add support for multi-tile devices.

>   * There's no way to specify that you do/don't want it to do the "extra"
>     steps like you describe (e.g., automatically grabbing forcewake is
>     convenient a lot of the time, but if you're actually trying to debug
>     forcewake itself, then the ioctl's automatic behavior just gets in
>     your way).
>

Understood, for these kind of debugs the mmio ioctl is irrelevant.

> While the ioctl could theoretically be extended to overcome these
> limitations, we still shouldn't really be building up ABI like that for
> debug purposes without real userspace consumers, especially if we have a
> viable alternative that's been used for a long time.  It's relatively
> simple for tools like intel_reg and xe_reg to just use libpciaccess to
> map the BAR, figure out the appropriate offset, and then optionally
> perform other operations (like forcewake) according to whatever options
> were passed on the command line.  With debug tools it's especially
> likely that we'll come up with new behaviors that we want to add (e.g.,
> handle display register accesses via msgbus transactions on MTL/LNL?)
> and it would be better if we didn't need to keep extending the ioctl
> uapi and ABI for these debug-only purposes).

I agree we need an interface that will not be obligated to any ABI as
this is for pure debug, so we are left with 2 options:
1. My suggestion to create a debugfs FD and use the ioctl there.
2. Move code to innersource

I prefer option #1 as the latter will not be available for customers who
uses the opensource driver.

Ofir.

>
>
> Matt
>



More information about the Intel-xe mailing list