[PATCH v5 1/1] drm/xe: Expose number of dss per group and helpers

Wed Jan 31 18:30:18 UTC 2024

On Wed, Jan 31, 2024 at 10:47:32AM -0500, Dong, Zhanjun wrote:
> 
> 
> On 2024-01-30 7:09 p.m., Matt Roper wrote:
> > On Tue, Jan 30, 2024 at 01:22:08PM -0800, Zhanjun Dong wrote:
> > > Expose helper for dss per group. This is a precursor patch to allow
> > > for easier iteration through MCR registers and other per-DSS uses.
> > > 
> > > Signed-off-by: Zhanjun Dong <zhanjun.dong at intel.com>
> > > ---
> > >   drivers/gpu/drm/xe/xe_gt_mcr.c      | 40 ++++++++++++++++++++++++++++-
> > >   drivers/gpu/drm/xe/xe_gt_mcr.h      | 17 ++++++++++++
> > >   drivers/gpu/drm/xe/xe_gt_topology.c |  3 ---
> > >   drivers/gpu/drm/xe/xe_gt_types.h    |  2 ++
> > >   4 files changed, 58 insertions(+), 4 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.c b/drivers/gpu/drm/xe/xe_gt_mcr.c
> > > index 77925b35cf8d..ad2e42dc2218 100644
> > > --- a/drivers/gpu/drm/xe/xe_gt_mcr.c
> > > +++ b/drivers/gpu/drm/xe/xe_gt_mcr.c
> > > @@ -291,11 +291,16 @@ static void init_steering_mslice(struct xe_gt *gt)
> > >   	gt->steering[LNCF].instance_target = 0;		/* unused */
> > >   }
> > > +int xe_gt_mcr_get_dss_per_group(struct xe_gt *gt)
> > > +{
> > > +	return gt_to_xe(gt)->info.platform == XE_PVC ? 8 : 4;
> > > +}
> > > +
> > >   static void init_steering_dss(struct xe_gt *gt)
> > >   {
> > >   	unsigned int dss = min(xe_dss_mask_group_ffs(gt->fuse_topo.g_dss_mask, 0, 0),
> > >   			       xe_dss_mask_group_ffs(gt->fuse_topo.c_dss_mask, 0, 0));
> > > -	unsigned int dss_per_grp = gt_to_xe(gt)->info.platform == XE_PVC ? 8 : 4;
> > > +	unsigned int dss_per_grp = xe_gt_mcr_get_dss_per_group(gt);
> > >   	gt->steering[DSS].group_target = dss / dss_per_grp;
> > >   	gt->steering[DSS].instance_target = dss % dss_per_grp;
> > 
> > To avoid duplicating the logic, we could just replace these lines with a
> > call to your new xe_gt_mcr_get_dss_steering() function as well.  Then we
> > don't even need the dss_per_grp local variable.
> 
> There are pointer type mismatch issue, both gt->steering[DSS].group_target
> and gt->steering[DSS].instance_target are u16, while
> xe_gt_mcr_get_dss_steering take uint*, which called by the macro and get the
> pointer from macro. I would prefer leave int type as macro argument type as
> it is easy and safe way to avoid write 4/8 bytes to 2 bytes data.
> Let's leave the part as is to avoid u16* vs uint* type issue.
> 
> > 
> > > @@ -683,3 +688,36 @@ void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p)
> > >   		}
> > >   	}
> > >   }
> > > +
> > > +/**
> > > + * xe_gt_mcr_get_dss_steering - returns the group/instance steering for a DSS
> > > + * @gt: GT structure
> > > + * @dss: DSS ID to obtain steering for
> > > + * @group: pointer to storage for steering group ID
> > > + * @instance: pointer to storage for steering instance ID
> > > + *
> > > + * Returns the steering IDs (via the @group and @instance parameters) that
> > > + * correspond to a specific DSS ID.
> > > + */
> > > +void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, unsigned int *group,
> > > +				unsigned int *instance)
> > > +{
> > > +	int dss_per_grp = xe_gt_mcr_get_dss_per_group(gt);
> > > +
> > > +	*group = dss / dss_per_grp;
> > > +	*instance = dss % dss_per_grp;
> > > +}
> > > +
> > > +bool xe_gt_mcr_dss_has_subslice(struct xe_gt *gt, int slice, int subslice)
> > 
> > "dss" and "subslice" are basically two terms for the same thing so this
> > name is a bit redundant.  Something like "xe_gt_mcr_has_dss" would
> > probably be fine?
> > 
> > Although this should probably be in xe_gt_topology rather than xe_gt_mcr
> > since it deals more with the hardware topology than with steering.  So
> > moving it over to that file, the name would become
> > "xe_gt_topology_has_dss."
> Moved to xe_gt_topology is fine, because the function need dss per group to
> do calculation, then it requires xe_gt_mcr_get_dss_per_group exposed.
> 
> > 
> > > +{
> > > +	int dss_per_grp = xe_gt_mcr_get_dss_per_group(gt);
> > > +	int index = slice * dss_per_grp + subslice;
> > > +
> > > +	if (index >= XE_MAX_DSS_FUSE_BITS) {
> > > +		xe_gt_dbg(gt, "DSS id out of range: slice:%d subslice:%d\n", slice, subslice);
> > > +		return false;
> > > +	}
> > > +
> > > +	return test_bit(index, gt->fuse_topo.g_dss_mask) ||
> > > +	       test_bit(index, gt->fuse_topo.c_dss_mask);
> > > +}
> > > diff --git a/drivers/gpu/drm/xe/xe_gt_mcr.h b/drivers/gpu/drm/xe/xe_gt_mcr.h
> > > index 27ca1bc880a0..356f21978eb8 100644
> > > --- a/drivers/gpu/drm/xe/xe_gt_mcr.h
> > > +++ b/drivers/gpu/drm/xe/xe_gt_mcr.h
> > > @@ -7,6 +7,7 @@
> > >   #define _XE_GT_MCR_H_
> > >   #include "regs/xe_reg_defs.h"
> > > +#include "xe_gt_types.h"
> > >   struct drm_printer;
> > >   struct xe_gt;
> > > @@ -25,5 +26,21 @@ void xe_gt_mcr_multicast_write(struct xe_gt *gt, struct xe_reg_mcr mcr_reg,
> > >   			       u32 value);
> > >   void xe_gt_mcr_steering_dump(struct xe_gt *gt, struct drm_printer *p);
> > > +int xe_gt_mcr_get_dss_per_group(struct xe_gt *gt);
> > 
> > It doesn't look like you actually use this outside of xe_gt_mcr.c
> > anymore so we probably don't need to export it.  It can remain a static
> > function for now.
> Same to above, move xe_gt_mcr_has_dss to xe_gt_topology.c conflicts with
> this static prototype definition.
> 
> > 
> > > +void xe_gt_mcr_get_dss_steering(struct xe_gt *gt, unsigned int dss, unsigned int *group,
> > > +				unsigned int *instance);
> > > +bool xe_gt_mcr_dss_has_subslice(struct xe_gt *gt, int slice, int subslice);
> > > +
> > > +#define _HAS_DSS(gt__, group__, instance__) xe_gt_mcr_dss_has_subslice(gt__, group__, instance__)
> > 
> > I don't think we need this macro which is just a pure alias for the real
> > function.
> Sure, will take it off.
> 
> > 
> > > +
> > > +/*
> > > + * Loop over each subslice/DSS and determine the group and instance IDs that
> > > + * should be used to steer MCR accesses toward this DSS.
> > > + */
> > > +#define for_each_dss_steering(dss_, gt_, group_, instance_) \
> > > +	for (dss_ = 0, xe_gt_mcr_get_dss_steering(gt_, 0, &(group_), &(instance_)); \
> > > +	     dss_ < XE_MAX_DSS_FUSE_BITS; \
> > > +	     dss_++, xe_gt_mcr_get_dss_steering(gt_, dss_, &(group_), &(instance_))) \
> > > +		for_each_if(_HAS_DSS(gt_, (group_), (instance_)))
> > 
> > We could also probably implement this more efficiently if we had a
> > topology helper like this (untested):
> > 
> >      int xe_gt_topology_get_next_dss(struct xe_gt *gt, int from) {
> >              xe_dss_mask_t all_dss;
> >              unsigned long next;
> >              bitmap_or(all_dss,
> >                        gt->fuse_topo.g_dss_mask,
> >                        gt->fuse_topo.c_dss_mask,
> >                        XE_MAX_DSS_FUSE_BITS);
> > 
> >              next = find_next_bit(all_dss, XE_MAX_DSS_FUSE_BITS, from);
> >              if (next == XE_MAX_DSS_FUSE_BITS)
> >                      return -1;
> >              return next;
> >      }
> > 
> > Then you could write these loops as something like:
> > 
> >     #define for_each_dss_steering(dss_, gt_, group_, instance_) \
> >             for (dss_ = xe_gt_topology_get_next_dss(gt, 0); \
> >                  dss_ >= 0; \
> >                  dss_ = xe_gt_topology_get_next_dss(gt, dss_ + 1)) \
> > 
> The macro above actually not set group_ and instance_, while the macro
> for_each_dss_steering designed is to iterate all dss id, call a code block,
> with group_ and instance_ already set. In my review comments to Jose's
> patch, I have similar optimization, but revert back because of the same
> group_ and instance_ not set reason.

Sorry, typed it up too fast.  Something like this should work I think?

     #define for_each_dss_steering(dss_, gt_, group_, instance_) \
             for (dss_ = xe_gt_topology_get_next_dss(gt, 0),
                  xe_gt_mcr_get_dss_steering(gt_, 0, &(group_), &(instance_)); \
                  dss_ >= 0; \
                  dss_ = xe_gt_topology_get_next_dss(gt, dss_ + 1), \
                  xe_gt_mcr_get_dss_steering(gt_, dss_, &(group_), &(instance_))) \

Or if the commas are too ugly, you can make xe_gt_mcr_get_dss_steering()
return an int and do something like

     #define for_each_dss_steering(dss_, gt_, group_, instance_) \
         for (dss_ = xe_gt_topology_get_next_dss(gt, 0); \
              dss_ >= 0; \
              dss_ = xe_gt_topology_get_next_dss(gt, dss_ + 1)) \
                 for_each_if(xe_gt_mcr_get_dss_steering(gt_, dss, &(group_), &(instance_)))

so that the for_each_if assigns the group/instance.

Matt

> 
> > That also avoids leaking the XE_MAX_DSS_FUSE_BITS internal detail
> > outside the topology files.
> > 
> > >   #endif /* _XE_GT_MCR_H_ */
> > > diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
> > > index a8d7f272c30a..e973eeaac7f1 100644
> > > --- a/drivers/gpu/drm/xe/xe_gt_topology.c
> > > +++ b/drivers/gpu/drm/xe/xe_gt_topology.c
> > > @@ -11,9 +11,6 @@
> > >   #include "xe_gt.h"
> > >   #include "xe_mmio.h"
> > > -#define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS)
> > > -#define XE_MAX_EU_FUSE_BITS (32 * XE_MAX_EU_FUSE_REGS)
> > > -
> > >   static void
> > >   load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...)
> > >   {
> > > diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
> > > index 70c615dd1498..b926606edb38 100644
> > > --- a/drivers/gpu/drm/xe/xe_gt_types.h
> > > +++ b/drivers/gpu/drm/xe/xe_gt_types.h
> > > @@ -25,7 +25,9 @@ enum xe_gt_type {
> > >   };
> > >   #define XE_MAX_DSS_FUSE_REGS	3
> > > +#define XE_MAX_DSS_FUSE_BITS	(32 * XE_MAX_DSS_FUSE_REGS)
> > 
> > Based on the suggestions above, I don't think moving these will actually
> > be necessary.
> This bind with for_each_dss_steering macro define.
> > 
> > 
> > Matt
> > 
> > >   #define XE_MAX_EU_FUSE_REGS	1
> > > +#define XE_MAX_EU_FUSE_BITS	(32 * XE_MAX_EU_FUSE_REGS)
> > >   typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(32 * XE_MAX_DSS_FUSE_REGS)];
> > >   typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(32 * XE_MAX_EU_FUSE_REGS)];
> > > -- 
> > > 2.34.1
> > > 
> > 

-- 
Matt Roper
Graphics Software Engineer
Linux GPU Platform Enablement
Intel Corporation