[Intel-xe] [RFC, 1/1] drm/xe: Introduce function pointers for MMIO functions

Thu Jun 8 06:54:32 UTC 2023

On 06/06/2023 16:24, Francois Dugast wrote:
> Hi,
>
> On Tue, Jun 06, 2023 at 12:09:34PM +0200, Ohad Sharabi wrote:
>> On 02/06/2023 17:25, Francois Dugast wrote:
>>> A local structure of function pointers is used as a minimal hardware
>>> abstraction layer to prepare for platform independent MMIO calls.
>>>
>>> Cc: Oded Gabbay <ogabbay at kernel.org>
>>> Cc: Ofir Bitton <ofir1.bitton at intel.com>
>>> Cc: Ohad Sharabi <ohadshar at intel.com>
>>> Signed-off-by: Francois Dugast <francois.dugast at intel.com>
>>> ---
>>>    drivers/gpu/drm/xe/xe_device_types.h |  3 ++
>>>    drivers/gpu/drm/xe/xe_mmio.c         | 81 ++++++++++++++++++++++++++++
>>>    drivers/gpu/drm/xe/xe_mmio.h         | 35 ++++++------
>>>    3 files changed, 99 insertions(+), 20 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
>>> index 17b6b1cc5adb..3f8fd0d8129b 100644
>>> --- a/drivers/gpu/drm/xe/xe_device_types.h
>>> +++ b/drivers/gpu/drm/xe/xe_device_types.h
>>> @@ -378,6 +378,9 @@ struct xe_device {
>>>        /** @d3cold_allowed: Indicates if d3cold is a valid device state */
>>>        bool d3cold_allowed;
>>>
>>> +     /** @mmio_funcs: function pointers for MMIO related functions */
>>> +     const struct xe_mmio_funcs *mmio_funcs;
>> Do we really want to divide the FP structs to such fine-grained resolution?
>>
>> Can't the MMIO be grouped as part of GT functions which can later host
>> other FPs as well?
>>
>> Or do we think that all other GT funcs will, probably, be "common to
>> more components" and so define a special struct for it?
> Sure, it is possible to use broader FP structs which would contain more
> functions. Actually, it was explored in the first revision of the original
> series: https://patchwork.freedesktop.org/series/114630/#rev1
>
> The concern is that we might not be able to correctly predict where future
> platforms would require specific code, which means we could end up having
> very large FP structs with dozens or hundreds of functions.
>
> This is why in earlier discussions we leaned towards having multiple FP structs,
> each local to a domain or to a .c file, like in this patch for MMIO.
Sure, I can see your point.
>
>>> +
>>>        /* private: */
>>>
>>>    #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
>>> diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
>>> index 475b14fe4356..f3d08676a77a 100644
>>> --- a/drivers/gpu/drm/xe/xe_mmio.c
>>> +++ b/drivers/gpu/drm/xe/xe_mmio.c
>>> @@ -25,6 +25,62 @@
>>>
>>>    #define BAR_SIZE_SHIFT 20
>>>
>>> +static void xe_mmio_write32_device(struct xe_gt *gt,
>>> +                                struct xe_reg reg, u32 val);
>>> +static u32 xe_mmio_read32_device(struct xe_gt *gt, struct xe_reg reg);
>>> +static void xe_mmio_write64_device(struct xe_gt *gt,
>>> +                                struct xe_reg reg, u64 val);
>>> +static u64 xe_mmio_read64_device(struct xe_gt *gt, struct xe_reg reg);
>>> +
>>> +static const struct xe_mmio_funcs xe_mmio_funcs_device = {
>>> +     .write32 = xe_mmio_write32_device,
>>> +     .read32 = xe_mmio_read32_device,
>>> +     .write64 = xe_mmio_write64_device,
>>> +     .read64 = xe_mmio_read64_device,
>>> +};
>>> +
>>> +static inline void xe_mmio_write32_device(struct xe_gt *gt,
>>> +                                struct xe_reg reg, u32 val)
>>> +{
>>> +     struct xe_tile *tile = gt_to_tile(gt);
>>> +
>>> +     if (reg.addr < gt->mmio.adj_limit)
>>> +             reg.addr += gt->mmio.adj_offset;
>>> +
>>> +     writel(val, tile->mmio.regs + reg.addr);
>>> +}

Can't the offset adjustment be done from the common caller (i.e.
xe_mmio_write[32/64])?

If so, can we pass the xe_tile instead of xe_gt to the function?

One can argue that we can also pass the complete iomem address but I
think we may need the xe_tile struct for other setups/platform (to be
implemented in other set of MMIO funcs FPs)

>>> +
>>> +static inline u32 xe_mmio_read32_device(struct xe_gt *gt, struct xe_reg reg)
>>> +{
>>> +     struct xe_tile *tile = gt_to_tile(gt);
>>> +
>>> +     if (reg.addr < gt->mmio.adj_limit)
>>> +             reg.addr += gt->mmio.adj_offset;
>>> +
>>> +     return readl(tile->mmio.regs + reg.addr);
>>> +}
>>> +
>>> +static inline void xe_mmio_write64_device(struct xe_gt *gt,
>>> +                                struct xe_reg reg, u64 val)
>>> +{
>>> +     struct xe_tile *tile = gt_to_tile(gt);
>>> +
>>> +     if (reg.addr < gt->mmio.adj_limit)
>>> +             reg.addr += gt->mmio.adj_offset;
>>> +
>>> +     writeq(val, tile->mmio.regs + reg.addr);
>>> +}
>>> +
>>> +static inline u64 xe_mmio_read64_device(struct xe_gt *gt, struct xe_reg reg)
>>> +{
>>> +     struct xe_tile *tile = gt_to_tile(gt);
>>> +
>>> +     if (reg.addr < gt->mmio.adj_limit)
>>> +             reg.addr += gt->mmio.adj_offset;
>>> +
>>> +     return readq(tile->mmio.regs + reg.addr);
>>> +}
>>> +
>>>    static int xe_set_dma_info(struct xe_device *xe)
>>>    {
>>>        unsigned int mask_size = xe->info.dma_mask_size;
>>> @@ -377,6 +433,29 @@ static void mmio_fini(struct drm_device *drm, void *arg)
>>>                iounmap(xe->mem.vram.mapping);
>>>    }
>>>
>>> +static void xe_mmio_set_funcs(struct xe_device *xe)
>> Wouldn't we like to fail the MMIO init on an unsupported platform?
> Yes I will fix that.
>
>>> +{
>>> +     /* For now all platforms use the set of MMIO functions for a
>>> +      * physical device.
>>> +      */
>>> +     switch (xe->info.platform) {
>>> +     case XE_TIGERLAKE:
>>> +     case XE_ROCKETLAKE:
>>> +     case XE_ALDERLAKE_S:
>>> +     case XE_ALDERLAKE_P:
>>> +     case XE_ALDERLAKE_N:
>>> +     case XE_DG1:
>>> +     case XE_DG2:
>>> +     case XE_PVC:
>>> +     case XE_METEORLAKE:
>>> +             xe->mmio_funcs = &xe_mmio_funcs_device;
>>> +             break;
>>> +     default:
>>> +             DRM_ERROR("Unsupported platform\n");
Maybe drm_err should be used instead?
>>> +             break;
>>> +     }
>>> +}
>>> +
>>>    int xe_mmio_init(struct xe_device *xe)
>>>    {
>>>        struct xe_tile *root_tile = xe_device_get_root_tile(xe);
>>> @@ -384,6 +463,8 @@ int xe_mmio_init(struct xe_device *xe)
>>>        const int mmio_bar = 0;
>>>        int err;
>>>
>>> +     xe_mmio_set_funcs(xe);
>>> +
>>>        /*
>>>         * Map the first 16MB of th BAR, which includes the registers (0-4MB),
>>>         * reserved space (4MB-8MB), and GGTT (8MB-16MB) for a single tile.
>>> diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
>>> index 3c547d78afba..80ce9de7aac4 100644
>>> --- a/drivers/gpu/drm/xe/xe_mmio.h
>>> +++ b/drivers/gpu/drm/xe/xe_mmio.h
>>> @@ -19,6 +19,13 @@ struct xe_device;
>>>
>>>    #define GEN12_LMEM_BAR              2
>>>
>>> +struct xe_mmio_funcs {
Shouldn't we add read8 as well?
>>> +     u32 (*read32)(struct xe_gt *gt, struct xe_reg reg);
>>> +     u64 (*read64)(struct xe_gt *gt, struct xe_reg reg);
>>> +     void (*write32)(struct xe_gt *gt, struct xe_reg reg, u32 val);
>>> +     void (*write64)(struct xe_gt *gt, struct xe_reg reg, u64 val);
>>> +};
>>> +
>>>    int xe_mmio_init(struct xe_device *xe);
>>>
>>>    static inline u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg)
>>> @@ -34,22 +41,16 @@ static inline u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg)
>>>    static inline void xe_mmio_write32(struct xe_gt *gt,
>>>                                   struct xe_reg reg, u32 val)
>>>    {
>>> -     struct xe_tile *tile = gt_to_tile(gt);
>>> -
>>> -     if (reg.addr < gt->mmio.adj_limit)
>>> -             reg.addr += gt->mmio.adj_offset;
>>> +     struct xe_device *xe = gt_to_xe(gt);
>>>
>>> -     writel(val, tile->mmio.regs + reg.addr);
>>> +     xe->mmio_funcs->write32(gt, reg, val);
>>>    }
>>>
>>>    static inline u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg)
>>>    {
>>> -     struct xe_tile *tile = gt_to_tile(gt);
>>> -
>>> -     if (reg.addr < gt->mmio.adj_limit)
>>> -             reg.addr += gt->mmio.adj_offset;
>>> +     struct xe_device *xe = gt_to_xe(gt);
>>>
>>> -     return readl(tile->mmio.regs + reg.addr);
>>> +     return xe->mmio_funcs->read32(gt, reg);
>>>    }
>>>
>>>    static inline u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr,
>>> @@ -67,22 +68,16 @@ static inline u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr,
>>>    static inline void xe_mmio_write64(struct xe_gt *gt,
>>>                                   struct xe_reg reg, u64 val)
>>>    {
>>> -     struct xe_tile *tile = gt_to_tile(gt);
>>> -
>>> -     if (reg.addr < gt->mmio.adj_limit)
>>> -             reg.addr += gt->mmio.adj_offset;
>>> +     struct xe_device *xe = gt_to_xe(gt);
>>>
>>> -     writeq(val, tile->mmio.regs + reg.addr);
>>> +     xe->mmio_funcs->write64(gt, reg, val);
>>>    }
>>>
>>>    static inline u64 xe_mmio_read64(struct xe_gt *gt, struct xe_reg reg)
>>>    {
>>> -     struct xe_tile *tile = gt_to_tile(gt);
>>> -
>>> -     if (reg.addr < gt->mmio.adj_limit)
>>> -             reg.addr += gt->mmio.adj_offset;
>>> +     struct xe_device *xe = gt_to_xe(gt);
>>>
>>> -     return readq(tile->mmio.regs + reg.addr);
>>> +     return xe->mmio_funcs->read64(gt, reg);
>>>    }
>>>
>>>    static inline int xe_mmio_write32_and_verify(struct xe_gt *gt,
>>>
>>>
>> This approach should work for us- thanks for the effort.
>>
>> I do think that if loading this data from the descriptor is acceptable
>> let's do it soon to avoid collection of all "switch-cases" from the code
>> while other FPs are being added (really not necessary at this phase- we
>> want this patch merged soon).
>>
>> Thanks,
>>
>> Ohad
>>
> I think we should be able to add this in the same series, but first I would like
> to get more feedback on this RFC before moving forward.
>
> Thanks,
> Francois

Francois, some other issues I was wondering about- please tell me what
you think,

Thanks,

Ohad