[PATCH] drm/xe/uapi: Expose EU width via topology query

Souza, Jose jose.souza at intel.com
Wed Jul 10 15:38:35 UTC 2024


On Wed, 2024-07-10 at 09:36 -0500, Lucas De Marchi wrote:
> On Wed, Jul 10, 2024 at 09:05:43AM GMT, Jose Souza wrote:
> > On Tue, 2024-07-09 at 22:53 -0700, Lucas De Marchi wrote:
> > > PVC, Xe2 and later platforms have a 16 wide EU. We were implicitly
> > > reporting for PVC the number of 16-wide EUs without giving userspace any
> > > hint that they were different than for other platforms. Xe2 and later
> > > also have 16-wide, but in those case the reported number would
> > > correspond to the 8-wide count.
> > > 
> > > Add a new item to the topology that aims to clarify what the EU_PER_DSS
> > > mask means. This new item uses mask[] as a single u8 value. Xe2 and
> > > later platforms start returning the number of SIMD16 EUs.
> > > 
> > > Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
> > > ---
> > > 
> > > Tested on TGL and LNL to check the behavior in dmesg, debugfs and ioctl.
> > > Test-with: https://lore.kernel.org/igt-dev/20240710054446.1985069-1-lucas.demarchi@intel.com/
> > > 
> > >  drivers/gpu/drm/xe/xe_gt_topology.c | 13 ++++++++-----
> > >  drivers/gpu/drm/xe/xe_gt_types.h    |  6 ++++++
> > >  drivers/gpu/drm/xe/xe_query.c       | 12 ++++++++++--
> > >  include/uapi/drm/xe_drm.h           | 20 ++++++++++++++++++++
> > >  4 files changed, 44 insertions(+), 7 deletions(-)
> > > 
> > > diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
> > > index 25ff03ab8448..55ae1c79048f 100644
> > > --- a/drivers/gpu/drm/xe/xe_gt_topology.c
> > > +++ b/drivers/gpu/drm/xe/xe_gt_topology.c
> > > @@ -31,7 +31,7 @@ load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...)
> > >  }
> > > 
> > >  static void
> > > -load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask)
> > > +load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask, u8 *eu_width)
> > >  {
> > >  	struct xe_device *xe = gt_to_xe(gt);
> > >  	u32 reg_val = xe_mmio_read32(gt, XELP_EU_ENABLE);
> > > @@ -47,11 +47,13 @@ load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask)
> > >  	if (GRAPHICS_VERx100(xe) < 1250)
> > >  		reg_val = ~reg_val & XELP_EU_MASK;
> > > 
> > > -	/* On PVC, one bit = one EU */
> > > -	if (GRAPHICS_VERx100(xe) == 1260) {
> > > +	if (GRAPHICS_VERx100(xe) == 1260 || GRAPHICS_VER(xe) >= 20) {
> > > +		/* SIMD16 EUs, one bit = one EU */
> > > +		*eu_width = DRM_XE_TOPO_EU_WIDTH_SIMD16;
> > 
> > In my opinion would be better to return the actual/HW EU count and legacy/Windows/Marketing EU count.
> 
> I think you interpreted this wrong.
> Actual/HW is what we are currently *not* exporting... not sure
> those 2 groups make sense the way you grouped them:
> 
> 	- actual/HW: new number, also matches hwconfig
> 	- legacy: SIMD8
> 	- Windows: I'm not sure, probably depends on what stack you're
> 	  looking, but I think it matches hwconfig, so the actual/HW
> 	- Marketing:  actual/HW

legacy, Windows and Marketing EU count is the same, based on SIMD8.
There was a few threads asking to move Xe2 to actual/HW EU count but that never moved forward.

So would be better if both numbers were provided, the Marketing/legacy whatever name that should be printed to user and the actual/HW EU count used to
program 3D/compute states.

If then in future a new platforms moves to another SIMD number, nothing in the uAPI needs to be modified.

> 
> However if we export the actual number and try to compare with other
> platforms, it doesn't make sense because we are counting apples and
> oranges. Hence the need to say if it's apple or orange.
> 
> The alternative is to always convert it to SIMD8 (and keep doing it
> forever for all new platforms) to maintain compatibility.
> 
> I'm not calling the old one "legacy" on purpose: it only created a huge
> confusion (spec still to be updated). I want to export what the HW
> actually is, then userspace can decide what it wants to use that number
> for.  Also note that if you get the max # from hwconfig, it is expected to
> match this number (see igt test xe_query at query-hwconfig)
> 
> Always keep the conversion in the kernel, always exporting as SIMD8
> number, not matching what the hw actually is, not being very future
> proof, not matching hwconfig, doesn't sound appealing.
> 
> Lucas De Marchi
> 
> > 
> > >  		val = reg_val;
> > >  	} else {
> > > -		/* All other platforms, one bit = 2 EU */
> > > +		/* SIMD8 EUs, one bit = 2 EU */
> > > +		*eu_width = DRM_XE_TOPO_EU_WIDTH_SIMD8;
> > >  		for (i = 0; i < fls(reg_val); i++)
> > >  			if (reg_val & BIT(i))
> > >  				val |= 0x3 << 2 * i;
> > > @@ -213,7 +215,7 @@ xe_gt_topology_init(struct xe_gt *gt)
> > >  		      XEHP_GT_COMPUTE_DSS_ENABLE,
> > >  		      XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,
> > >  		      XE2_GT_COMPUTE_DSS_2);
> > > -	load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss);
> > > +	load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss, &gt->fuse_topo.eu_width);
> > >  	load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask);
> > > 
> > >  	p = drm_dbg_printer(&gt_to_xe(gt)->drm, DRM_UT_DRIVER, "GT topology");
> > > @@ -231,6 +233,7 @@ xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
> > > 
> > >  	drm_printf(p, "EU mask per DSS:     %*pb\n", XE_MAX_EU_FUSE_BITS,
> > >  		   gt->fuse_topo.eu_mask_per_dss);
> > > +	drm_printf(p, "EU width:            %u\n", gt->fuse_topo.eu_width);
> > > 
> > >  	drm_printf(p, "L3 bank mask:        %*pb\n", XE_MAX_L3_BANK_MASK_BITS,
> > >  		   gt->fuse_topo.l3_bank_mask);
> > > diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
> > > index 6b5e0b45efb0..71b4834080f5 100644
> > > --- a/drivers/gpu/drm/xe/xe_gt_types.h
> > > +++ b/drivers/gpu/drm/xe/xe_gt_types.h
> > > @@ -343,6 +343,12 @@ struct xe_gt {
> > > 
> > >  		/** @fuse_topo.l3_bank_mask: L3 bank mask */
> > >  		xe_l3_bank_mask_t l3_bank_mask;
> > > +
> > > +		/**
> > > +		 * @fuse_topo.eu_width: EU width - SIMD8, SIMD16, etc. See
> > > +		 * enum drm_xe_topo_eu_width.
> > > +		 */
> > > +		u8 eu_width;
> > >  	} fuse_topo;
> > > 
> > >  	/** @steering: register steering for individual HW units */
> > > diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
> > > index 4e01df6b1b7a..82c966b73302 100644
> > > --- a/drivers/gpu/drm/xe/xe_query.c
> > > +++ b/drivers/gpu/drm/xe/xe_query.c
> > > @@ -455,11 +455,12 @@ static int query_hwconfig(struct xe_device *xe,
> > >  static size_t calc_topo_query_size(struct xe_device *xe)
> > >  {
> > >  	return xe->info.gt_count *
> > > -		(4 * sizeof(struct drm_xe_query_topology_mask) +
> > > +		(5 * sizeof(struct drm_xe_query_topology_mask) +
> > >  		 sizeof_field(struct xe_gt, fuse_topo.g_dss_mask) +
> > >  		 sizeof_field(struct xe_gt, fuse_topo.c_dss_mask) +
> > >  		 sizeof_field(struct xe_gt, fuse_topo.l3_bank_mask) +
> > > -		 sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss));
> > > +		 sizeof_field(struct xe_gt, fuse_topo.eu_mask_per_dss) +
> > > +		 sizeof_field(struct xe_gt, fuse_topo.eu_width));
> > >  }
> > > 
> > >  static int copy_mask(void __user **ptr,
> > > @@ -524,6 +525,13 @@ static int query_gt_topology(struct xe_device *xe,
> > >  				sizeof(gt->fuse_topo.eu_mask_per_dss));
> > >  		if (err)
> > >  			return err;
> > > +
> > > +		topo.type = DRM_XE_TOPO_EU_WIDTH;
> > > +		err = copy_mask(&query_ptr, &topo,
> > > +				&gt->fuse_topo.eu_width,
> > > +				sizeof(gt->fuse_topo.eu_width));
> > > +		if (err)
> > > +			return err;
> > >  	}
> > > 
> > >  	return 0;
> > > diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h
> > > index 19619d4952a8..6114a9064849 100644
> > > --- a/include/uapi/drm/xe_drm.h
> > > +++ b/include/uapi/drm/xe_drm.h
> > > @@ -491,6 +491,19 @@ struct drm_xe_query_gt_list {
> > >  	struct drm_xe_gt gt_list[];
> > >  };
> > > 
> > > +enum drm_xe_topo_eu_width {
> > > +	/**
> > > +	 * @DRM_XE_TOPO_EU_WIDTH_SIMD8 - EUs reported with
> > > +	 * DRM_XE_TOPO_EU_PER_DSS have SIMD8 width
> > > +	 */
> > > +	DRM_XE_TOPO_EU_WIDTH_SIMD8 = 0,
> > > +	/**
> > > +	 * @DRM_XE_TOPO_EU_WIDTH_SIMD16 - EUs reported with
> > > +	 * DRM_XE_TOPO_EU_PER_DSS have SIMD16 width
> > > +	 */
> > > +	DRM_XE_TOPO_EU_WIDTH_SIMD16,
> > > +};
> > > +
> > >  /**
> > >   * struct drm_xe_query_topology_mask - describe the topology mask of a GT
> > >   *
> > > @@ -518,6 +531,12 @@ struct drm_xe_query_gt_list {
> > >   *    containing the following in mask:
> > >   *    ``EU_PER_DSS    ff ff 00 00 00 00 00 00``
> > >   *    means each DSS has 16 EU.
> > > + *  - %DRM_XE_TOPO_EU_WIDTH - To query the width of EUs as per
> > > + *    enum drm_xe_topo_eu_width. It always has num_bytes == 1 and the only byte
> > > + *    in the mask being the value. For example, a query response containing the
> > > + *    following in mask:
> > > + *    ``EU_WIDTH    00``
> > > + *    means the EUs are SIMD8.
> > >   */
> > >  struct drm_xe_query_topology_mask {
> > >  	/** @gt_id: GT ID the mask is associated with */
> > > @@ -527,6 +546,7 @@ struct drm_xe_query_topology_mask {
> > >  #define DRM_XE_TOPO_DSS_COMPUTE		2
> > >  #define DRM_XE_TOPO_L3_BANK		3
> > >  #define DRM_XE_TOPO_EU_PER_DSS		4
> > > +#define DRM_XE_TOPO_EU_WIDTH		5
> > >  	/** @type: type of mask */
> > >  	__u16 type;
> > > 
> > 



More information about the Intel-xe mailing list