[Intel-xe] [RFC, 1/1] drm/xe: Introduce function pointers for MMIO functions

Tue Jun 6 13:24:51 UTC 2023

Hi,

On Tue, Jun 06, 2023 at 12:09:34PM +0200, Ohad Sharabi wrote:
> On 02/06/2023 17:25, Francois Dugast wrote:
> > A local structure of function pointers is used as a minimal hardware
> > abstraction layer to prepare for platform independent MMIO calls.
> >
> > Cc: Oded Gabbay <ogabbay at kernel.org>
> > Cc: Ofir Bitton <ofir1.bitton at intel.com>
> > Cc: Ohad Sharabi <ohadshar at intel.com>
> > Signed-off-by: Francois Dugast <francois.dugast at intel.com>
> > ---
> >   drivers/gpu/drm/xe/xe_device_types.h |  3 ++
> >   drivers/gpu/drm/xe/xe_mmio.c         | 81 ++++++++++++++++++++++++++++
> >   drivers/gpu/drm/xe/xe_mmio.h         | 35 ++++++------
> >   3 files changed, 99 insertions(+), 20 deletions(-)
> >
> > diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
> > index 17b6b1cc5adb..3f8fd0d8129b 100644
> > --- a/drivers/gpu/drm/xe/xe_device_types.h
> > +++ b/drivers/gpu/drm/xe/xe_device_types.h
> > @@ -378,6 +378,9 @@ struct xe_device {
> >       /** @d3cold_allowed: Indicates if d3cold is a valid device state */
> >       bool d3cold_allowed;
> >
> > +     /** @mmio_funcs: function pointers for MMIO related functions */
> > +     const struct xe_mmio_funcs *mmio_funcs;
> Do we really want to divide the FP structs to such fine-grained resolution?
> 
> Can't the MMIO be grouped as part of GT functions which can later host
> other FPs as well?
> 
> Or do we think that all other GT funcs will, probably, be "common to
> more components" and so define a special struct for it?

Sure, it is possible to use broader FP structs which would contain more
functions. Actually, it was explored in the first revision of the original
series: https://patchwork.freedesktop.org/series/114630/#rev1

The concern is that we might not be able to correctly predict where future
platforms would require specific code, which means we could end up having
very large FP structs with dozens or hundreds of functions.

This is why in earlier discussions we leaned towards having multiple FP structs,
each local to a domain or to a .c file, like in this patch for MMIO.

> 
> > +
> >       /* private: */
> >
> >   #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
> > diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
> > index 475b14fe4356..f3d08676a77a 100644
> > --- a/drivers/gpu/drm/xe/xe_mmio.c
> > +++ b/drivers/gpu/drm/xe/xe_mmio.c
> > @@ -25,6 +25,62 @@
> >
> >   #define BAR_SIZE_SHIFT 20
> >
> > +static void xe_mmio_write32_device(struct xe_gt *gt,
> > +                                struct xe_reg reg, u32 val);
> > +static u32 xe_mmio_read32_device(struct xe_gt *gt, struct xe_reg reg);
> > +static void xe_mmio_write64_device(struct xe_gt *gt,
> > +                                struct xe_reg reg, u64 val);
> > +static u64 xe_mmio_read64_device(struct xe_gt *gt, struct xe_reg reg);
> > +
> > +static const struct xe_mmio_funcs xe_mmio_funcs_device = {
> > +     .write32 = xe_mmio_write32_device,
> > +     .read32 = xe_mmio_read32_device,
> > +     .write64 = xe_mmio_write64_device,
> > +     .read64 = xe_mmio_read64_device,
> > +};
> > +
> > +static inline void xe_mmio_write32_device(struct xe_gt *gt,
> > +                                struct xe_reg reg, u32 val)
> > +{
> > +     struct xe_tile *tile = gt_to_tile(gt);
> > +
> > +     if (reg.addr < gt->mmio.adj_limit)
> > +             reg.addr += gt->mmio.adj_offset;
> > +
> > +     writel(val, tile->mmio.regs + reg.addr);
> > +}
> > +
> > +static inline u32 xe_mmio_read32_device(struct xe_gt *gt, struct xe_reg reg)
> > +{
> > +     struct xe_tile *tile = gt_to_tile(gt);
> > +
> > +     if (reg.addr < gt->mmio.adj_limit)
> > +             reg.addr += gt->mmio.adj_offset;
> > +
> > +     return readl(tile->mmio.regs + reg.addr);
> > +}
> > +
> > +static inline void xe_mmio_write64_device(struct xe_gt *gt,
> > +                                struct xe_reg reg, u64 val)
> > +{
> > +     struct xe_tile *tile = gt_to_tile(gt);
> > +
> > +     if (reg.addr < gt->mmio.adj_limit)
> > +             reg.addr += gt->mmio.adj_offset;
> > +
> > +     writeq(val, tile->mmio.regs + reg.addr);
> > +}
> > +
> > +static inline u64 xe_mmio_read64_device(struct xe_gt *gt, struct xe_reg reg)
> > +{
> > +     struct xe_tile *tile = gt_to_tile(gt);
> > +
> > +     if (reg.addr < gt->mmio.adj_limit)
> > +             reg.addr += gt->mmio.adj_offset;
> > +
> > +     return readq(tile->mmio.regs + reg.addr);
> > +}
> > +
> >   static int xe_set_dma_info(struct xe_device *xe)
> >   {
> >       unsigned int mask_size = xe->info.dma_mask_size;
> > @@ -377,6 +433,29 @@ static void mmio_fini(struct drm_device *drm, void *arg)
> >               iounmap(xe->mem.vram.mapping);
> >   }
> >
> > +static void xe_mmio_set_funcs(struct xe_device *xe)
> Wouldn't we like to fail the MMIO init on an unsupported platform?

Yes I will fix that.

> > +{
> > +     /* For now all platforms use the set of MMIO functions for a
> > +      * physical device.
> > +      */
> > +     switch (xe->info.platform) {
> > +     case XE_TIGERLAKE:
> > +     case XE_ROCKETLAKE:
> > +     case XE_ALDERLAKE_S:
> > +     case XE_ALDERLAKE_P:
> > +     case XE_ALDERLAKE_N:
> > +     case XE_DG1:
> > +     case XE_DG2:
> > +     case XE_PVC:
> > +     case XE_METEORLAKE:
> > +             xe->mmio_funcs = &xe_mmio_funcs_device;
> > +             break;
> > +     default:
> > +             DRM_ERROR("Unsupported platform\n");
> > +             break;
> > +     }
> > +}
> > +
> >   int xe_mmio_init(struct xe_device *xe)
> >   {
> >       struct xe_tile *root_tile = xe_device_get_root_tile(xe);
> > @@ -384,6 +463,8 @@ int xe_mmio_init(struct xe_device *xe)
> >       const int mmio_bar = 0;
> >       int err;
> >
> > +     xe_mmio_set_funcs(xe);
> > +
> >       /*
> >        * Map the first 16MB of th BAR, which includes the registers (0-4MB),
> >        * reserved space (4MB-8MB), and GGTT (8MB-16MB) for a single tile.
> > diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
> > index 3c547d78afba..80ce9de7aac4 100644
> > --- a/drivers/gpu/drm/xe/xe_mmio.h
> > +++ b/drivers/gpu/drm/xe/xe_mmio.h
> > @@ -19,6 +19,13 @@ struct xe_device;
> >
> >   #define GEN12_LMEM_BAR              2
> >
> > +struct xe_mmio_funcs {
> > +     u32 (*read32)(struct xe_gt *gt, struct xe_reg reg);
> > +     u64 (*read64)(struct xe_gt *gt, struct xe_reg reg);
> > +     void (*write32)(struct xe_gt *gt, struct xe_reg reg, u32 val);
> > +     void (*write64)(struct xe_gt *gt, struct xe_reg reg, u64 val);
> > +};
> > +
> >   int xe_mmio_init(struct xe_device *xe);
> >
> >   static inline u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg)
> > @@ -34,22 +41,16 @@ static inline u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg)
> >   static inline void xe_mmio_write32(struct xe_gt *gt,
> >                                  struct xe_reg reg, u32 val)
> >   {
> > -     struct xe_tile *tile = gt_to_tile(gt);
> > -
> > -     if (reg.addr < gt->mmio.adj_limit)
> > -             reg.addr += gt->mmio.adj_offset;
> > +     struct xe_device *xe = gt_to_xe(gt);
> >
> > -     writel(val, tile->mmio.regs + reg.addr);
> > +     xe->mmio_funcs->write32(gt, reg, val);
> >   }
> >
> >   static inline u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg)
> >   {
> > -     struct xe_tile *tile = gt_to_tile(gt);
> > -
> > -     if (reg.addr < gt->mmio.adj_limit)
> > -             reg.addr += gt->mmio.adj_offset;
> > +     struct xe_device *xe = gt_to_xe(gt);
> >
> > -     return readl(tile->mmio.regs + reg.addr);
> > +     return xe->mmio_funcs->read32(gt, reg);
> >   }
> >
> >   static inline u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr,
> > @@ -67,22 +68,16 @@ static inline u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr,
> >   static inline void xe_mmio_write64(struct xe_gt *gt,
> >                                  struct xe_reg reg, u64 val)
> >   {
> > -     struct xe_tile *tile = gt_to_tile(gt);
> > -
> > -     if (reg.addr < gt->mmio.adj_limit)
> > -             reg.addr += gt->mmio.adj_offset;
> > +     struct xe_device *xe = gt_to_xe(gt);
> >
> > -     writeq(val, tile->mmio.regs + reg.addr);
> > +     xe->mmio_funcs->write64(gt, reg, val);
> >   }
> >
> >   static inline u64 xe_mmio_read64(struct xe_gt *gt, struct xe_reg reg)
> >   {
> > -     struct xe_tile *tile = gt_to_tile(gt);
> > -
> > -     if (reg.addr < gt->mmio.adj_limit)
> > -             reg.addr += gt->mmio.adj_offset;
> > +     struct xe_device *xe = gt_to_xe(gt);
> >
> > -     return readq(tile->mmio.regs + reg.addr);
> > +     return xe->mmio_funcs->read64(gt, reg);
> >   }
> >
> >   static inline int xe_mmio_write32_and_verify(struct xe_gt *gt,
> >
> >
> This approach should work for us- thanks for the effort.
> 
> I do think that if loading this data from the descriptor is acceptable
> let's do it soon to avoid collection of all "switch-cases" from the code
> while other FPs are being added (really not necessary at this phase- we
> want this patch merged soon).
> 
> Thanks,
> 
> Ohad
> 

I think we should be able to add this in the same series, but first I would like
to get more feedback on this RFC before moving forward.

Thanks,
Francois