[Intel-xe] [PATCH] drm/xe/uapi: Remove MMIO ioctl
Ofir Bitton
obitton at habana.ai
Tue Sep 12 08:43:06 UTC 2023
On 12/09/2023 3:25, Matt Roper wrote:
> On Mon, Sep 11, 2023 at 04:21:37AM +0000, Ofir Bitton wrote:
>> On 11/09/2023 6:45, Lucas De Marchi wrote:
>>> On Sun, Sep 10, 2023 at 04:34:24PM +0000, Ofir Bitton wrote:
>>>> On 07/09/2023 22:35, Francois Dugast wrote:
>>>>> This was previously used in UMD for timestamp correlation, which can now
>>>>> be done with DRM_XE_QUERY_CS_CYCLES.
>>>>>
>>>>> Link:
>>>>> https://lore.kernel.org/all/20230706042044.GR6953@mdroper-desk1.amr.corp.intel.com/
>>>>> Closes:
>>>>> https://gitlab.freedesktop.org/drm/xe/kernel/-/issues/636
>>>>> Signed-off-by: Francois Dugast <francois.dugast at intel.com>
>>>>> ---
>>>>> drivers/gpu/drm/xe/xe_device.c | 1 -
>>>>> drivers/gpu/drm/xe/xe_mmio.c | 102 ---------------------------------
>>>>> include/uapi/drm/xe_drm.h | 31 ++--------
>>>>> 3 files changed, 4 insertions(+), 130 deletions(-)
>>>>>
>>>>> diff --git a/drivers/gpu/drm/xe/xe_device.c
>>>>> b/drivers/gpu/drm/xe/xe_device.c
>>>>> index 109aeb25d19c..10fa1b55578a 100644
>>>>> --- a/drivers/gpu/drm/xe/xe_device.c
>>>>> +++ b/drivers/gpu/drm/xe/xe_device.c
>>>>> @@ -107,7 +107,6 @@ static const struct drm_ioctl_desc xe_ioctls[] = {
>>>>> DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_DESTROY,
>>>>> xe_exec_queue_destroy_ioctl,
>>>>> DRM_RENDER_ALLOW),
>>>>> DRM_IOCTL_DEF_DRV(XE_EXEC, xe_exec_ioctl, DRM_RENDER_ALLOW),
>>>>> - DRM_IOCTL_DEF_DRV(XE_MMIO, xe_mmio_ioctl, DRM_RENDER_ALLOW),
>>>>> DRM_IOCTL_DEF_DRV(XE_EXEC_QUEUE_SET_PROPERTY,
>>>>> xe_exec_queue_set_property_ioctl,
>>>>> DRM_RENDER_ALLOW),
>>>>> DRM_IOCTL_DEF_DRV(XE_WAIT_USER_FENCE, xe_wait_user_fence_ioctl,
>>>>> diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
>>>>> index 3ccc0af4430b..e636e3f3456d 100644
>>>>> --- a/drivers/gpu/drm/xe/xe_mmio.c
>>>>> +++ b/drivers/gpu/drm/xe/xe_mmio.c
>>>>> @@ -422,108 +422,6 @@ int xe_mmio_init(struct xe_device *xe)
>>>>> return 0;
>>>>> }
>>>>>
>>>>> -#define VALID_MMIO_FLAGS (\
>>>>> - DRM_XE_MMIO_BITS_MASK |\
>>>>> - DRM_XE_MMIO_READ |\
>>>>> - DRM_XE_MMIO_WRITE)
>>>>> -
>>>>> -static const struct xe_reg mmio_read_whitelist[] = {
>>>>> - RING_TIMESTAMP(RENDER_RING_BASE),
>>>>> -};
>>>>> -
>>>>> -int xe_mmio_ioctl(struct drm_device *dev, void *data,
>>>>> - struct drm_file *file)
>>>>> -{
>>>>> - struct xe_device *xe = to_xe_device(dev);
>>>>> - struct xe_gt *gt = xe_root_mmio_gt(xe);
>>>>> - struct drm_xe_mmio *args = data;
>>>>> - unsigned int bits_flag, bytes;
>>>>> - struct xe_reg reg;
>>>>> - bool allowed;
>>>>> - int ret = 0;
>>>>> -
>>>>> - if (XE_IOCTL_DBG(xe, args->extensions) ||
>>>>> - XE_IOCTL_DBG(xe, args->reserved[0] || args->reserved[1]))
>>>>> - return -EINVAL;
>>>>> -
>>>>> - if (XE_IOCTL_DBG(xe, args->flags & ~VALID_MMIO_FLAGS))
>>>>> - return -EINVAL;
>>>>> -
>>>>> - if (XE_IOCTL_DBG(xe, !(args->flags & DRM_XE_MMIO_WRITE) &&
>>>>> args->value))
>>>>> - return -EINVAL;
>>>>> -
>>>>> - allowed = capable(CAP_SYS_ADMIN);
>>>>> - if (!allowed && ((args->flags & ~DRM_XE_MMIO_BITS_MASK) ==
>>>>> DRM_XE_MMIO_READ)) {
>>>>> - unsigned int i;
>>>>> -
>>>>> - for (i = 0; i < ARRAY_SIZE(mmio_read_whitelist); i++) {
>>>>> - if (mmio_read_whitelist[i].addr == args->addr) {
>>>>> - allowed = true;
>>>>> - break;
>>>>> - }
>>>>> - }
>>>>> - }
>>>>> -
>>>>> - if (XE_IOCTL_DBG(xe, !allowed))
>>>>> - return -EPERM;
>>>>> -
>>>>> - bits_flag = args->flags & DRM_XE_MMIO_BITS_MASK;
>>>>> - bytes = 1 << bits_flag;
>>>>> - if (XE_IOCTL_DBG(xe, args->addr + bytes > xe->mmio.size))
>>>>> - return -EINVAL;
>>>>> -
>>>>> - /*
>>>>> - * TODO: migrate to xe_gt_mcr to lookup the mmio range and handle
>>>>> - * multicast registers. Steering would need uapi extension.
>>>>> - */
>>>>> - reg = XE_REG(args->addr);
>>>>> -
>>>>> - xe_device_mem_access_get(xe);
>>>>> - xe_force_wake_get(gt_to_fw(gt), XE_FORCEWAKE_ALL);
>>>>> -
>>>>> - if (args->flags & DRM_XE_MMIO_WRITE) {
>>>>> - switch (bits_flag) {
>>>>> - case DRM_XE_MMIO_32BIT:
>>>>> - if (XE_IOCTL_DBG(xe, args->value > U32_MAX)) {
>>>>> - ret = -EINVAL;
>>>>> - goto exit;
>>>>> - }
>>>>> - xe_mmio_write32(gt, reg, args->value);
>>>>> - break;
>>>>> - default:
>>>>> - drm_dbg(&xe->drm, "Invalid MMIO bit size");
>>>>> - fallthrough;
>>>>> - case DRM_XE_MMIO_8BIT: /* TODO */
>>>>> - case DRM_XE_MMIO_16BIT: /* TODO */
>>>>> - ret = -EOPNOTSUPP;
>>>>> - goto exit;
>>>>> - }
>>>>> - }
>>>>> -
>>>>> - if (args->flags & DRM_XE_MMIO_READ) {
>>>>> - switch (bits_flag) {
>>>>> - case DRM_XE_MMIO_32BIT:
>>>>> - args->value = xe_mmio_read32(gt, reg);
>>>>> - break;
>>>>> - case DRM_XE_MMIO_64BIT:
>>>>> - args->value = xe_mmio_read64_2x32(gt, reg);
>>>>> - break;
>>>>> - default:
>>>>> - drm_dbg(&xe->drm, "Invalid MMIO bit size");
>>>>> - fallthrough;
>>>>> - case DRM_XE_MMIO_8BIT: /* TODO */
>>>>> - case DRM_XE_MMIO_16BIT: /* TODO */
>>>>> - ret = -EOPNOTSUPP;
>>>>> - }
>>>>> - }
>>>>> -
>>>>> -exit:
>>>>> - xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
>>>>> - xe_device_mem_access_put(xe);
>>>>> -
>>>>> - return ret;
>>>>> -}
>>>>> -
>>>>> /**
>>>>> * xe_mmio_read64_2x32() - Read a 64-bit register as two 32-bit reads
>>>>> * @gt: MMIO target GT
>>>>> diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
>>>>> index 86f16d50e9cc..6c6d1cfa415a 100644
>>>>> --- a/include/uapi/drm/xe_drm.h
>>>>> +++ b/include/uapi/drm/xe_drm.h
>>>>> @@ -106,11 +106,10 @@ struct xe_user_extension {
>>>>> #define DRM_XE_EXEC_QUEUE_CREATE 0x06
>>>>> #define DRM_XE_EXEC_QUEUE_DESTROY 0x07
>>>>> #define DRM_XE_EXEC 0x08
>>>>> -#define DRM_XE_MMIO 0x09
>>>>> -#define DRM_XE_EXEC_QUEUE_SET_PROPERTY 0x0a
>>>>> -#define DRM_XE_WAIT_USER_FENCE 0x0b
>>>>> -#define DRM_XE_VM_MADVISE 0x0c
>>>>> -#define DRM_XE_EXEC_QUEUE_GET_PROPERTY 0x0d
>>>>> +#define DRM_XE_EXEC_QUEUE_SET_PROPERTY 0x09
>>>>> +#define DRM_XE_WAIT_USER_FENCE 0x0a
>>>>> +#define DRM_XE_VM_MADVISE 0x0b
>>>>> +#define DRM_XE_EXEC_QUEUE_GET_PROPERTY 0x0c
>>>>>
>>>>> /* Must be kept compact -- no holes */
>>>>> #define DRM_IOCTL_XE_DEVICE_QUERY DRM_IOWR(DRM_COMMAND_BASE
>>>>> + DRM_XE_DEVICE_QUERY, struct drm_xe_device_query)
>>>>> @@ -123,7 +122,6 @@ struct xe_user_extension {
>>>>> #define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY
>>>>> DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct
>>>>> drm_xe_exec_queue_get_property)
>>>>> #define DRM_IOCTL_XE_EXEC_QUEUE_DESTROY
>>>>> DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_DESTROY, struct
>>>>> drm_xe_exec_queue_destroy)
>>>>> #define DRM_IOCTL_XE_EXEC DRM_IOW(DRM_COMMAND_BASE +
>>>>> DRM_XE_EXEC, struct drm_xe_exec)
>>>>> -#define DRM_IOCTL_XE_MMIO DRM_IOWR(DRM_COMMAND_BASE +
>>>>> DRM_XE_MMIO, struct drm_xe_mmio)
>>>>> #define DRM_IOCTL_XE_EXEC_QUEUE_SET_PROPERTY
>>>>> DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_SET_PROPERTY, struct
>>>>> drm_xe_exec_queue_set_property)
>>>>> #define DRM_IOCTL_XE_WAIT_USER_FENCE
>>>>> DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct
>>>>> drm_xe_wait_user_fence)
>>>>> #define DRM_IOCTL_XE_VM_MADVISE
>>>>> DRM_IOW(DRM_COMMAND_BASE + DRM_XE_VM_MADVISE, struct drm_xe_vm_madvise)
>>>>> @@ -883,27 +881,6 @@ struct drm_xe_exec {
>>>>> __u64 reserved[2];
>>>>> };
>>>>>
>>>>> -struct drm_xe_mmio {
>>>>> - /** @extensions: Pointer to the first extension struct, if any */
>>>>> - __u64 extensions;
>>>>> -
>>>>> - __u32 addr;
>>>>> -
>>>>> -#define DRM_XE_MMIO_8BIT 0x0
>>>>> -#define DRM_XE_MMIO_16BIT 0x1
>>>>> -#define DRM_XE_MMIO_32BIT 0x2
>>>>> -#define DRM_XE_MMIO_64BIT 0x3
>>>>> -#define DRM_XE_MMIO_BITS_MASK 0x3
>>>>> -#define DRM_XE_MMIO_READ 0x4
>>>>> -#define DRM_XE_MMIO_WRITE 0x8
>>>>> - __u32 flags;
>>>>> -
>>>>> - __u64 value;
>>>>> -
>>>>> - /** @reserved: Reserved */
>>>>> - __u64 reserved[2];
>>>>> -};
>>>>> -
>>>>> /**
>>>>> * struct drm_xe_wait_user_fence - wait user fence
>>>>> *
>>>>
>>>> I would prefer to keep this ioctl for debug, or an equivavlent
>>>> alternative. We can remove the timestmap part but keep the general mmio
>>>> ioctl interface for 'CAP_SYS_ADMIN' access only.
>>>> Any objections?
>
> Well, one general roadblock is that the DRM subsystem rules don't allow
> adding ABI without a real, open-source userspace consumer. Not only do
> we not have a "real" consumer for this ioctl itself, but we also don't
> even want to give anyone the impression that we have a backdoor
> interface to allow non-opensource userspace to go behind the driver's
> back and start controlling the hardware. Userspace can still obviously
> do that by running as root and mapping the BAR directly, but they're not
> using official driver uapi at that point, and there are potentially
> other ways that a sysadmin can close those holes if necessary.
Hey Matt, I totally undesrstand your concern, I might have another
suggestion. We can create another FD in debugfs and move this ioctl
there (I can take ownership on this), This way ABI is not an issue.
>
>>>
>>> CAP_SYS_ADMIN can already map the bar and do the reads and
>>> writes. What's the benefit of going through the kernel?
>>>
>>> Lucas De Marchi
>>>
>>>>
>>>> Ofir
>>
>> Some register read/write requires driver involvement, for example
>> disabling clock gating or going through a register gateway.
>> In addition, if a user would like to access directly through the bar he
>> will need to be aware of the exact bar mapping. It is preferable that
>> the user will be able to ask for read/write from/to a specific MMIO
>> address and the driver will map it to the proper offset in bar.
>
> Are you using the term "user" to describe the userspace _software_ or
> the person sitting at the computer? If the latter, then we don't need
> this ioctl at all; the "intel_reg" tool from IGT has provided this kind
> of functionality on i915 for years; it's a simple command-line tool that
> can be used with commands like:
>
> intel_reg read 0x1234
> intel_reg write 0x5678 0xabcd
>
> The tool uses libpciaccess under the hood to map the BAR, and can
> perform other necessary pre/post operations (like grabbing releasing
> forcewake to make sure the register is accessible).
By 'user' I do mean userspace process. Intel_reg tool can work but it
has its own limitations.
>
> If you're using "user" to refer to the userspace _software_ (e.g., IGT's
> xe_reg debug tool which is using the ioctl right now), then there's even
> more incentive to move away from the ioctl --- the ioctl interface we
> have today already has a bunch of limitations that make it less useful
> than it should be for debugging:
>
> * There's no way to specify/access registers on non-tile0.
I am aware, I will add support for multi-tile devices.
> * There's no way to specify that you do/don't want it to do the "extra"
> steps like you describe (e.g., automatically grabbing forcewake is
> convenient a lot of the time, but if you're actually trying to debug
> forcewake itself, then the ioctl's automatic behavior just gets in
> your way).
>
Understood, for these kind of debugs the mmio ioctl is irrelevant.
> While the ioctl could theoretically be extended to overcome these
> limitations, we still shouldn't really be building up ABI like that for
> debug purposes without real userspace consumers, especially if we have a
> viable alternative that's been used for a long time. It's relatively
> simple for tools like intel_reg and xe_reg to just use libpciaccess to
> map the BAR, figure out the appropriate offset, and then optionally
> perform other operations (like forcewake) according to whatever options
> were passed on the command line. With debug tools it's especially
> likely that we'll come up with new behaviors that we want to add (e.g.,
> handle display register accesses via msgbus transactions on MTL/LNL?)
> and it would be better if we didn't need to keep extending the ioctl
> uapi and ABI for these debug-only purposes).
I agree we need an interface that will not be obligated to any ABI as
this is for pure debug, so we are left with 2 options:
1. My suggestion to create a debugfs FD and use the ioctl there.
2. Move code to innersource
I prefer option #1 as the latter will not be available for customers who
uses the opensource driver.
Ofir.
>
>
> Matt
>
More information about the Intel-xe
mailing list