[Intel-xe] [RFC PATCH 1/1] drm/xe: Introduce function pointers for MMIO functions

Matt Roper matthew.d.roper at intel.com
Thu Jun 15 16:34:20 UTC 2023


On Thu, Jun 15, 2023 at 04:04:18PM +0300, Oded Gabbay wrote:
> On Thu, Jun 15, 2023 at 3:01 AM Matt Roper <matthew.d.roper at intel.com> wrote:
> >
> > On Mon, Jun 12, 2023 at 06:31:57PM +0200, Francois Dugast wrote:
> > > On Thu, Jun 08, 2023 at 10:35:29AM -0700, Lucas De Marchi wrote:
> > > > On Fri, Jun 02, 2023 at 02:25:01PM +0000, Francois Dugast wrote:
> > > > > A local structure of function pointers is used as a minimal hardware
> > > > > abstraction layer to prepare for platform independent MMIO calls.
> > > > >
> > > > > Cc: Oded Gabbay <ogabbay at kernel.org>
> > > > > Cc: Ofir Bitton <ofir1.bitton at intel.com>
> > > > > Cc: Ohad Sharabi <ohadshar at intel.com>
> > > > > Signed-off-by: Francois Dugast <francois.dugast at intel.com>
> > > > > ---
> > > > > drivers/gpu/drm/xe/xe_device_types.h |  3 ++
> > > > > drivers/gpu/drm/xe/xe_mmio.c         | 81 ++++++++++++++++++++++++++++
> > > > > drivers/gpu/drm/xe/xe_mmio.h         | 35 ++++++------
> > > > > 3 files changed, 99 insertions(+), 20 deletions(-)
> > > > >
> > > > > diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
> > > > > index 17b6b1cc5adb..3f8fd0d8129b 100644
> > > > > --- a/drivers/gpu/drm/xe/xe_device_types.h
> > > > > +++ b/drivers/gpu/drm/xe/xe_device_types.h
> > > > > @@ -378,6 +378,9 @@ struct xe_device {
> > > > >   /** @d3cold_allowed: Indicates if d3cold is a valid device state */
> > > > >   bool d3cold_allowed;
> > > > >
> > > > > + /** @mmio_funcs: function pointers for MMIO related functions */
> > > > > + const struct xe_mmio_funcs *mmio_funcs;
> > > > > +
> > > > >   /* private: */
> > > > >
> > > > > #if IS_ENABLED(CONFIG_DRM_XE_DISPLAY)
> > > > > diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
> > > > > index 475b14fe4356..f3d08676a77a 100644
> > > > > --- a/drivers/gpu/drm/xe/xe_mmio.c
> > > > > +++ b/drivers/gpu/drm/xe/xe_mmio.c
> > > > > @@ -25,6 +25,62 @@
> > > > >
> > > > > #define BAR_SIZE_SHIFT 20
> > > > >
> > > > > +static void xe_mmio_write32_device(struct xe_gt *gt,
> > > > > +                            struct xe_reg reg, u32 val);
> > > > > +static u32 xe_mmio_read32_device(struct xe_gt *gt, struct xe_reg reg);
> > > > > +static void xe_mmio_write64_device(struct xe_gt *gt,
> > > > > +                            struct xe_reg reg, u64 val);
> > > > > +static u64 xe_mmio_read64_device(struct xe_gt *gt, struct xe_reg reg);
> > > > > +
> > > > > +static const struct xe_mmio_funcs xe_mmio_funcs_device = {
> > > > > + .write32 = xe_mmio_write32_device,
> > > > > + .read32 = xe_mmio_read32_device,
> > > > > + .write64 = xe_mmio_write64_device,
> > > > > + .read64 = xe_mmio_read64_device,
> > > > > +};
> > > > > +
> > > > > +static inline void xe_mmio_write32_device(struct xe_gt *gt,
> > > > > +                            struct xe_reg reg, u32 val)
> > > > > +{
> > > > > + struct xe_tile *tile = gt_to_tile(gt);
> > > > > +
> > > > > + if (reg.addr < gt->mmio.adj_limit)
> > > > > +         reg.addr += gt->mmio.adj_offset;
> > > > > +
> > > > > + writel(val, tile->mmio.regs + reg.addr);
> > > > > +}
> > > > > +
> > > > > +static inline u32 xe_mmio_read32_device(struct xe_gt *gt, struct xe_reg reg)
> > > > > +{
> > > > > + struct xe_tile *tile = gt_to_tile(gt);
> > > > > +
> > > > > + if (reg.addr < gt->mmio.adj_limit)
> > > > > +         reg.addr += gt->mmio.adj_offset;
> > > > > +
> > > > > + return readl(tile->mmio.regs + reg.addr);
> > > > > +}
> > > > > +
> > > > > +static inline void xe_mmio_write64_device(struct xe_gt *gt,
> > > > > +                            struct xe_reg reg, u64 val)
> > > > > +{
> > > > > + struct xe_tile *tile = gt_to_tile(gt);
> > > > > +
> > > > > + if (reg.addr < gt->mmio.adj_limit)
> > > > > +         reg.addr += gt->mmio.adj_offset;
> > > > > +
> > > > > + writeq(val, tile->mmio.regs + reg.addr);
> > > > > +}
> > > > > +
> > > > > +static inline u64 xe_mmio_read64_device(struct xe_gt *gt, struct xe_reg reg)
> > > > > +{
> > > > > + struct xe_tile *tile = gt_to_tile(gt);
> > > > > +
> > > > > + if (reg.addr < gt->mmio.adj_limit)
> > > > > +         reg.addr += gt->mmio.adj_offset;
> > > > > +
> > > > > + return readq(tile->mmio.regs + reg.addr);
> > > > > +}
> > > > > +
> > > > > static int xe_set_dma_info(struct xe_device *xe)
> > > > > {
> > > > >   unsigned int mask_size = xe->info.dma_mask_size;
> > > > > @@ -377,6 +433,29 @@ static void mmio_fini(struct drm_device *drm, void *arg)
> > > > >           iounmap(xe->mem.vram.mapping);
> > > > > }
> > > > >
> > > > > +static void xe_mmio_set_funcs(struct xe_device *xe)
> > > > > +{
> > > > > + /* For now all platforms use the set of MMIO functions for a
> > > > > +  * physical device.
> > > > > +  */
> > > >
> > > >
> > > > what is "device" in this context? that seems confusing as we always ever
> > > > just support reading/writing to a real device (physical here may also
> > > > add to the confusion when thinking about SR-IOV and VFs).
> > > > We shouldn't add abstractions that are then never used and all platforms
> > > > end up using the same. Unless it's a preparation for a follow up series
> > > > adding the different handling.
> > >
> > > For now "device" is meant as "in opposition to simulator" but I agree
> > > we can find a better name. Existing platforms all use the same
> > > implementation but this is preparation for platforms that require a
> > > different implementation.
> >
> > I agree with Lucas that this doesn't really seem to be a good candidate
> > for a vtable; every platform uses exactly the same logic and I can't
> > really envision how/why this would need to change for future platforms
> > either, so this seems to just be adding unnecessary complexity.
> > Registers being accessed at some offset into a PCI BAR isn't likely to
> > change going forward.  On future platforms it's more likely that we'd
> > need changes to the part of the code that maps the MMIO BAR rather than
> > the code that reads/writes an offset into a mapping that's already been
> > setup.
> I agree with that for every *real hardware* platform the
> implementation will probably be the same.
> But for simulator/emulation, there is a different implementation.
> And even if we don't upstream that different implementation, doing
> this abstraction will help us upstream the rest of the driver as we
> minimize the differences between upstream and downstream.
> And helping us upstream the driver is a good enough reason imo to add
> this abstraction.

Adding extra abstraction layers to upstream code that provide no
upstream benefit and only come into play for some private, internal-only
workflow is generally frowned upon in the DRM subsystem.  Unless you
have some kind of public, open-source usage model, adding the extra
complexity and clutter here is probably a no-go for upstream.

Also, even if you do have an unusual form of simulation/emulation that
doesn't behave like real hardware, are you going to have several of them
that all work in different manners and need unique implementations?  If
we only ever expect to have two code paths total (and only one of those
upstream), then using a vtable seems like overkill.  A very simple

        if (special case)
                return alternate_implementation();

would be much easier to understand and maintain.


There are lots of other places in the Xe driver that _would_ benefit
from per-platform vtables; we should prioritize making changes like this
in the areas where they provide a clear benefit.


Matt

> 
> 
> Oded
> >
> > >
> > > >
> > > > +Matt as there is still (at least) one refactor planned in this area,
> > > > since gt is not always the proper target for the MMIOs. He was saying in
> > > > his earlier series about having a mmio_view or such to abstract the
> > > > offset and other differences between each IO range. Not sure if this
> > > > series would go the rigth direction, maybe we need to think in both
> > > > cases together.
> > >
> > > Matt, would this series block the refactor mentioned by Lucas?
> > >
> > > In general, are there objections to introducing functions pointers for
> > > MMIO functions (extended to all of them, as suggested by Ohad)?
> >
> > It probably makes more sense to do vtable conversion on other parts of
> > the driver where we already have different implementations per platform
> > and where we already know that design makes sense.  We can always come
> > back and do this to the MMIO functions later once there are actually
> > multiple implementations, but it doesn't seem to serve any purpose right
> > now.
> >
> >
> > Matt
> >
> > >
> > > Thanks,
> > > Francois
> > >
> > > >
> > > >
> > > > Lucas De Marchi
> > > >
> > > > > + switch (xe->info.platform) {
> > > > > + case XE_TIGERLAKE:
> > > > > + case XE_ROCKETLAKE:
> > > > > + case XE_ALDERLAKE_S:
> > > > > + case XE_ALDERLAKE_P:
> > > > > + case XE_ALDERLAKE_N:
> > > > > + case XE_DG1:
> > > > > + case XE_DG2:
> > > > > + case XE_PVC:
> > > > > + case XE_METEORLAKE:
> > > > > +         xe->mmio_funcs = &xe_mmio_funcs_device;
> > > > > +         break;
> > > > > + default:
> > > > > +         DRM_ERROR("Unsupported platform\n");
> > > > > +         break;
> > > > > + }
> > > > > +}
> > > > > +
> > > > > int xe_mmio_init(struct xe_device *xe)
> > > > > {
> > > > >   struct xe_tile *root_tile = xe_device_get_root_tile(xe);
> > > > > @@ -384,6 +463,8 @@ int xe_mmio_init(struct xe_device *xe)
> > > > >   const int mmio_bar = 0;
> > > > >   int err;
> > > > >
> > > > > + xe_mmio_set_funcs(xe);
> > > > > +
> > > > >   /*
> > > > >    * Map the first 16MB of th BAR, which includes the registers (0-4MB),
> > > > >    * reserved space (4MB-8MB), and GGTT (8MB-16MB) for a single tile.
> > > > > diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
> > > > > index 3c547d78afba..80ce9de7aac4 100644
> > > > > --- a/drivers/gpu/drm/xe/xe_mmio.h
> > > > > +++ b/drivers/gpu/drm/xe/xe_mmio.h
> > > > > @@ -19,6 +19,13 @@ struct xe_device;
> > > > >
> > > > > #define GEN12_LMEM_BAR            2
> > > > >
> > > > > +struct xe_mmio_funcs {
> > > > > + u32 (*read32)(struct xe_gt *gt, struct xe_reg reg);
> > > > > + u64 (*read64)(struct xe_gt *gt, struct xe_reg reg);
> > > > > + void (*write32)(struct xe_gt *gt, struct xe_reg reg, u32 val);
> > > > > + void (*write64)(struct xe_gt *gt, struct xe_reg reg, u64 val);
> > > > > +};
> > > > > +
> > > > > int xe_mmio_init(struct xe_device *xe);
> > > > >
> > > > > static inline u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg)
> > > > > @@ -34,22 +41,16 @@ static inline u8 xe_mmio_read8(struct xe_gt *gt, struct xe_reg reg)
> > > > > static inline void xe_mmio_write32(struct xe_gt *gt,
> > > > >                              struct xe_reg reg, u32 val)
> > > > > {
> > > > > - struct xe_tile *tile = gt_to_tile(gt);
> > > > > -
> > > > > - if (reg.addr < gt->mmio.adj_limit)
> > > > > -         reg.addr += gt->mmio.adj_offset;
> > > > > + struct xe_device *xe = gt_to_xe(gt);
> > > > >
> > > > > - writel(val, tile->mmio.regs + reg.addr);
> > > > > + xe->mmio_funcs->write32(gt, reg, val);
> > > > > }
> > > > >
> > > > > static inline u32 xe_mmio_read32(struct xe_gt *gt, struct xe_reg reg)
> > > > > {
> > > > > - struct xe_tile *tile = gt_to_tile(gt);
> > > > > -
> > > > > - if (reg.addr < gt->mmio.adj_limit)
> > > > > -         reg.addr += gt->mmio.adj_offset;
> > > > > + struct xe_device *xe = gt_to_xe(gt);
> > > > >
> > > > > - return readl(tile->mmio.regs + reg.addr);
> > > > > + return xe->mmio_funcs->read32(gt, reg);
> > > > > }
> > > > >
> > > > > static inline u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr,
> > > > > @@ -67,22 +68,16 @@ static inline u32 xe_mmio_rmw32(struct xe_gt *gt, struct xe_reg reg, u32 clr,
> > > > > static inline void xe_mmio_write64(struct xe_gt *gt,
> > > > >                              struct xe_reg reg, u64 val)
> > > > > {
> > > > > - struct xe_tile *tile = gt_to_tile(gt);
> > > > > -
> > > > > - if (reg.addr < gt->mmio.adj_limit)
> > > > > -         reg.addr += gt->mmio.adj_offset;
> > > > > + struct xe_device *xe = gt_to_xe(gt);
> > > > >
> > > > > - writeq(val, tile->mmio.regs + reg.addr);
> > > > > + xe->mmio_funcs->write64(gt, reg, val);
> > > > > }
> > > > >
> > > > > static inline u64 xe_mmio_read64(struct xe_gt *gt, struct xe_reg reg)
> > > > > {
> > > > > - struct xe_tile *tile = gt_to_tile(gt);
> > > > > -
> > > > > - if (reg.addr < gt->mmio.adj_limit)
> > > > > -         reg.addr += gt->mmio.adj_offset;
> > > > > + struct xe_device *xe = gt_to_xe(gt);
> > > > >
> > > > > - return readq(tile->mmio.regs + reg.addr);
> > > > > + return xe->mmio_funcs->read64(gt, reg);
> > > > > }
> > > > >
> > > > > static inline int xe_mmio_write32_and_verify(struct xe_gt *gt,
> > > > > --
> > > > > 2.34.1
> > > > >
> >
> > --
> > Matt Roper
> > Graphics Software Engineer
> > Linux GPU Platform Enablement
> > Intel Corporation

-- 
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation


More information about the dri-devel mailing list