[PATCH v2] drm/xe/uapi: Add L3 bank mask to topology query

Francois Dugast francois.dugast at intel.com
Thu Mar 21 13:19:57 UTC 2024


On Wed, Mar 20, 2024 at 12:14:21PM -0400, Dong, Zhanjun wrote:
> See my comments below.

Hi,

Thanks for the review, please see my reply below.

> 
> Regards,
> Zhanjun
> 
> On 2024-03-20 11:00 a.m., Francois Dugast wrote:
> > Extend the existing topology uAPI to expose the masks of L3 banks
> > to user space. L3 count is not sufficient because in some configuration
> > not all banks are enabled. User space needs to know which ones are
> > enabled, in the context of OA.
> > 
> > v2:
> > - Remove "Fixes" and make uAPI change explicit in commit message
> >    (Lucas De Marchi and Matt Roper)
> > - Add separate conditions for Xe2 and for PVC (Matt Roper)
> > - Return the L3 bank mask instead of the L3 node mask and the L3
> >    banks per node mask, as the node mask can be derived from the
> >    bank mask by user space, just like slice masks are derived from
> >    subslice masks today (Francois Dugast)
> > 
> > Bspec: 52545, 52546, 62482
> > Signed-off-by: Francois Dugast <francois.dugast at intel.com>
> > Cc: Jonathan Cavitt <jonathan.cavitt at intel.com>
> > Cc: Robert Krzemien <robert.krzemien at intel.com>
> > Cc: Matt Roper <matthew.d.roper at intel.com>
> > ---
> >   drivers/gpu/drm/xe/regs/xe_gt_regs.h |  3 ++
> >   drivers/gpu/drm/xe/xe_gt_topology.c  | 72 ++++++++++++++++++++++++++++
> >   drivers/gpu/drm/xe/xe_gt_types.h     | 14 ++++--
> >   drivers/gpu/drm/xe/xe_query.c        | 12 ++++-
> >   include/uapi/drm/xe_drm.h            |  1 +
> >   5 files changed, 96 insertions(+), 6 deletions(-)
> > 
> > diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> > index 95969935f58b..be5b4936eb53 100644
> > --- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> > +++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
> > @@ -156,7 +156,10 @@
> >   #define	MIRROR_FUSE3				XE_REG(0x9118)
> >   #define   XE2_NODE_ENABLE_MASK			REG_GENMASK(31, 16)
> >   #define   L3BANK_PAIR_COUNT			4
> > +#define   XEHPC_GT_L3_MODE_MASK			REG_GENMASK(7, 4)
> > +#define   XE2_GT_L3_MODE_MASK			REG_GENMASK(7, 4)
> >   #define   L3BANK_MASK				REG_GENMASK(3, 0)
> > +#define   XELP_GT_L3_MODE_MASK			REG_GENMASK(7, 0)
> >   /* on Xe_HP the same fuses indicates mslices instead of L3 banks */
> >   #define   MAX_MSLICES				4
> >   #define   MEML3_EN_MASK				REG_GENMASK(3, 0)
> > diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
> > index f5773a14f3c8..8920426332dd 100644
> > --- a/drivers/gpu/drm/xe/xe_gt_topology.c
> > +++ b/drivers/gpu/drm/xe/xe_gt_topology.c
> > @@ -59,6 +59,75 @@ load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask)
> >   	bitmap_from_arr32(mask, &val, XE_MAX_EU_FUSE_BITS);
> >   }
> > +static void
> > +load_l3_bank_mask(struct xe_gt *gt, xe_l3_bank_mask_t l3_bank_mask)
> > +{
> > +	struct xe_device *xe = gt_to_xe(gt);
> > +	u64 node_mask, bank_mask = 0;
> > +	u64 banks_per_node_fuse, banks_per_node_mask = 0;
> > +	int i;
> > +
> > +	if (GRAPHICS_VER(xe) >= 20) {
> > +		node_mask =
> > +			REG_FIELD_GET(XE2_NODE_ENABLE_MASK,
> > +				      xe_mmio_read32(gt, MIRROR_FUSE3));
> > +		banks_per_node_mask =
> > +			REG_FIELD_GET(XE2_GT_L3_MODE_MASK,
> > +				      xe_mmio_read32(gt, MIRROR_FUSE3));
> > +		for (i = 0; i < fls(node_mask); i++)
> > +			if (node_mask & BIT(i))
> 
> Is the for/if equal to "for_each_set_bit"?

Yes conceptually but it seems "for_each_set_bit" only applies to bitmaps.
Here the intermediate masks come from using registers and they are
used in that format, before the end result (bank_mask) is converted to
bitmap in the end, see last line of this function and other functions
in this file.

> 
> > +				bank_mask |= banks_per_node_mask << 4 * i;
> > +	} else if (GRAPHICS_VERx100(xe) >= 1270) {
> > +		node_mask =
> > +			REG_FIELD_GET(MEML3_EN_MASK,
> > +				      xe_mmio_read32(gt, MIRROR_FUSE3));
> > +		banks_per_node_fuse =
> > +			REG_FIELD_GET(GT_L3_EXC_MASK,
> > +				      xe_mmio_read32(gt, XEHP_FUSE4));
> > +		/* Each bit represents 2 banks in the node. */
> > +		for (i = 0; i < fls(banks_per_node_fuse); i++)
> > +			if (banks_per_node_fuse & BIT(i))
> 
> multiple dittos ...
> 
> > +				banks_per_node_mask |= 0x3 << 2 * i;
> > +		for (i = 0; i < fls(node_mask); i++)
> > +			if (node_mask & BIT(i))
> > +				bank_mask |= banks_per_node_mask << 4 * i;
> > +	} else if (xe->info.platform == XE_PVC) {
> > +		node_mask =
> > +			REG_FIELD_GET(MEML3_EN_MASK,
> > +				      xe_mmio_read32(gt, MIRROR_FUSE3));
> > +		banks_per_node_fuse =
> > +			REG_FIELD_GET(XEHPC_GT_L3_MODE_MASK,
> > +				      xe_mmio_read32(gt, MIRROR_FUSE3));
> > +		/* Each bit represents 4 banks in the node. */
> > +		for (i = 0; i < fls(banks_per_node_fuse); i++)
> > +			if (banks_per_node_fuse & BIT(i))
> > +				banks_per_node_mask |= 0xf << 4 * i;
> > +		for (i = 0; i < fls(node_mask); i++)
> > +			if (node_mask & BIT(i))
> > +				bank_mask |= banks_per_node_mask << 16 * i;
> > +	} else if (xe->info.platform == XE_DG2) {
> > +		node_mask =
> > +			REG_FIELD_GET(MEML3_EN_MASK,
> > +				      xe_mmio_read32(gt, MIRROR_FUSE3));
> > +		/*
> > +		 * In this case, if a node is present then all 8 banks inside of
> > +		 * it are present.
> > +		 */
> > +		for (i = 0; i < fls(node_mask); i++)
> > +			if (node_mask & BIT(i))
> > +				bank_mask |= 0xfful << 8 * i;
> > +	} else {
> > +		/*
> > +		 * Here the mask logic is reversed: a bit is set if the bank is
> > +		 * disabled.
> > +		 */
> > +		bank_mask = REG_FIELD_GET(XELP_GT_L3_MODE_MASK,
> > +					  ~xe_mmio_read32(gt, MIRROR_FUSE3));
> > +	}
> > +
> > +	bitmap_from_arr64(l3_bank_mask, &bank_mask, XE_MAX_L3_BANK_FUSE_BITS);
> > +}
> > +
> >   static void
> >   get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs)
> >   {
> > @@ -103,6 +172,7 @@ xe_gt_topology_init(struct xe_gt *gt)
> >   		      XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,
> >   		      XE2_GT_COMPUTE_DSS_2);
> >   	load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss);
> > +	load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask);
> >   	p = drm_dbg_printer(&gt_to_xe(gt)->drm, DRM_UT_DRIVER, "GT topology");
> > @@ -120,6 +190,8 @@ xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
> >   	drm_printf(p, "EU mask per DSS:     %*pb\n", XE_MAX_EU_FUSE_BITS,
> >   		   gt->fuse_topo.eu_mask_per_dss);
> > +	drm_printf(p, "L3 bank mask:        %*pb\n", XE_MAX_L3_BANK_FUSE_BITS,
> > +		   gt->fuse_topo.l3_bank_mask);
> >   }
> >   /*
> > diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
> > index f6da2ad9719f..e1438a478f94 100644
> > --- a/drivers/gpu/drm/xe/xe_gt_types.h
> > +++ b/drivers/gpu/drm/xe/xe_gt_types.h
> > @@ -24,13 +24,16 @@ enum xe_gt_type {
> >   	XE_GT_TYPE_MEDIA,
> >   };
> > -#define XE_MAX_DSS_FUSE_REGS	3
> > -#define XE_MAX_DSS_FUSE_BITS	(32 * XE_MAX_DSS_FUSE_REGS)
> > -#define XE_MAX_EU_FUSE_REGS	1
> > -#define XE_MAX_EU_FUSE_BITS	(32 * XE_MAX_EU_FUSE_REGS)
> > +#define XE_MAX_DSS_FUSE_REGS		3
> > +#define XE_MAX_DSS_FUSE_BITS		(32 * XE_MAX_DSS_FUSE_REGS)
> > +#define XE_MAX_EU_FUSE_REGS		1
> > +#define XE_MAX_EU_FUSE_BITS		(32 * XE_MAX_EU_FUSE_REGS)
> > +#define XE_MAX_L3_BANK_FUSE_REGS	2
> > +#define XE_MAX_L3_BANK_FUSE_BITS	(32 * XE_MAX_L3_BANK_FUSE_REGS)
> >   typedef unsigned long xe_dss_mask_t[BITS_TO_LONGS(XE_MAX_DSS_FUSE_BITS)];
> >   typedef unsigned long xe_eu_mask_t[BITS_TO_LONGS(XE_MAX_EU_FUSE_BITS)];
> > +typedef unsigned long xe_l3_bank_mask_t[BITS_TO_LONGS(XE_MAX_L3_BANK_FUSE_BITS)];
> >   struct xe_mmio_range {
> >   	u32 start;
> > @@ -334,6 +337,9 @@ struct xe_gt {
> >   		/** @fuse_topo.eu_mask_per_dss: EU mask per DSS*/
> >   		xe_eu_mask_t eu_mask_per_dss;
> > +
> > +		/** @l3_bank_mask: L3 bank mask */
> > +		xe_l3_bank_mask_t l3_bank_mask;
> >   	} fuse_topo;
> >   	/** @steering: register steering for individual HW units */
> > diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
> > index e80321b34918..0a064c12d80a 100644
> > --- a/drivers/gpu/drm/xe/xe_query.c
> > +++ b/drivers/gpu/drm/xe/xe_query.c
> > @@ -453,10 +453,11 @@ static int query_hwconfig(struct xe_device *xe,
> >   static size_t calc_topo_query_size(struct xe_device *xe)
> >   {
> >   	return xe->info.gt_count *
> > -		(3 * sizeof(struct drm_xe_query_topology_mask) +
> > +		(4 * sizeof(struct drm_xe_query_topology_mask) +
> 
> I know the 3 to 4 is for numbers of masks, can we make it a marco? For easy
> to read.

Sure, I will make it a separate patch as this is not caused by the new
code added here.

> 
> >   		 sizeof_field(struct xe_gt, fuse_topo.g_dss_mask) +
> >   		 sizeof_field(struct xe_gt, fuse_topo.c_dss_mask) +
> > -		 sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss));
> > +		 sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss) +
> > +		 sizeof_field(struct xe_gt, fuse_topo.l3_bank_mask));
> >   }
> >   static int copy_mask(void __user **ptr,
> > @@ -515,6 +516,13 @@ static int query_gt_topology(struct xe_device *xe,
> >   				sizeof(gt->fuse_topo.eu_mask_per_dss));
> >   		if (err)
> >   			return err;
> > +
> > +		topo.type = DRM_XE_TOPO_L3_BANK;
> > +		err = copy_mask(&query_ptr, &topo,
> > +				gt->fuse_topo.l3_bank_mask,
> > +				sizeof(gt->fuse_topo.l3_bank_mask));
> > +		if (err)
> > +			return err;
> >   	}
> >   	return 0;
> > diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
> > index 808ad1c308ec..aa917308cdeb 100644
> > --- a/include/uapi/drm/xe_drm.h
> > +++ b/include/uapi/drm/xe_drm.h
> > @@ -521,6 +521,7 @@ struct drm_xe_query_topology_mask {
> >   #define DRM_XE_TOPO_DSS_GEOMETRY	(1 << 0)
> >   #define DRM_XE_TOPO_DSS_COMPUTE		(1 << 1)
> >   #define DRM_XE_TOPO_EU_PER_DSS		(1 << 2)
> > +#define DRM_XE_TOPO_L3_BANK		(1 << 3)
> 
> Here is a good place to define numbers of masks
> for example
> 
> #define DRM_XE_TOPO_L3_BANK		(1 << 3)
> #define DRM_XE_TOPO_NUMBER_OF_MASKS	(4)

Unless this is required by user space we could keep it internal to the
source file where it is used.

Francois

> 
> 
> >   	/** @type: type of mask */
> >   	__u16 type;


More information about the Intel-xe mailing list