[PATCH v2] drm/xe/uapi: Expose SIMD16 EU mask in topology query

Souza, Jose jose.souza at intel.com
Tue Jul 16 18:39:26 UTC 2024


On Tue, 2024-07-16 at 09:00 -0500, Lucas De Marchi wrote:
> On Thu, Jul 11, 2024 at 02:16:11PM GMT, Lucas De Marchi wrote:
> > On Thu, Jul 11, 2024 at 01:45:15PM GMT, Rodrigo Vivi wrote:
> > > On Wed, Jul 10, 2024 at 03:02:27PM -0700, Lucas De Marchi wrote:
> > > > PVC, Xe2 and later platforms have 16-wide EUs. We were implicitly
> > > > reporting for PVC the number of 16-wide EUs without giving userspace any
> > > > hint that they were different than for other platforms. Xe2 and later
> > > > also have 16-wide, but in those cases the reported number would
> > > > correspond to the 8-wide count.
> > > > 
> > > > To avoid confusion and make sure the right number is used by userspace
> > > > depending on the platform, add a new item to the topology query and drop
> > > > the one that is not available. The new mask reported for both PVC and
> > > > Xe2 should now match the numbers reported via hwconfig.
> > > > 
> > > > v2: Use a different topo item with EU type in its name to report the
> > > >    new mask instead of adding the type itself as the item (Matt Roper)
> > > > 
> > > > Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
> > > > ---
> > > > 
> > > > Tested on LNL to check the behavior in dmesg, debugfs and ioctl.
> > > > Test-with: https://lore.kernel.org/igt-dev/20240710220050.2169596-1-lucas.demarchi@intel.com/
> > > > 
> > > > drivers/gpu/drm/xe/xe_gt_topology.c | 27 ++++++++++++++++++++++-----
> > > > drivers/gpu/drm/xe/xe_gt_types.h    | 11 +++++++++++
> > > > drivers/gpu/drm/xe/xe_query.c       |  4 +++-
> > > > include/uapi/drm/xe_drm.h           | 10 +++++++++-
> > > > 4 files changed, 45 insertions(+), 7 deletions(-)
> > > > 
> > > > diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
> > > > index 25ff03ab8448..5a1559edf3e9 100644
> > > > --- a/drivers/gpu/drm/xe/xe_gt_topology.c
> > > > +++ b/drivers/gpu/drm/xe/xe_gt_topology.c
> > > > @@ -6,6 +6,7 @@
> > > > #include "xe_gt_topology.h"
> > > > 
> > > > #include <linux/bitmap.h>
> > > > +#include <linux/compiler.h>
> > > > 
> > > > #include "regs/xe_gt_regs.h"
> > > > #include "xe_assert.h"
> > > > @@ -31,7 +32,7 @@ load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...)
> > > > }
> > > > 
> > > > static void
> > > > -load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask)
> > > > +load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask, enum xe_gt_eu_type *eu_type)
> > > > {
> > > > 	struct xe_device *xe = gt_to_xe(gt);
> > > > 	u32 reg_val = xe_mmio_read32(gt, XELP_EU_ENABLE);
> > > > @@ -47,11 +48,13 @@ load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask)
> > > > 	if (GRAPHICS_VERx100(xe) < 1250)
> > > > 		reg_val = ~reg_val & XELP_EU_MASK;
> > > > 
> > > > -	/* On PVC, one bit = one EU */
> > > > -	if (GRAPHICS_VERx100(xe) == 1260) {
> > > > +	if (GRAPHICS_VERx100(xe) == 1260 || GRAPHICS_VER(xe) >= 20) {
> > > > +		/* SIMD16 EUs, one bit == one EU */
> > > > +		*eu_type = XE_GT_EU_TYPE_SIMD16;
> > > > 		val = reg_val;
> > > > 	} else {
> > > > -		/* All other platforms, one bit = 2 EU */
> > > > +		/* SIMD8 EUs, one bit == 2 EU */
> > > > +		*eu_type = XE_GT_EU_TYPE_SIMD8;
> > > > 		for (i = 0; i < fls(reg_val); i++)
> > > > 			if (reg_val & BIT(i))
> > > > 				val |= 0x3 << 2 * i;
> > > > @@ -213,7 +216,7 @@ xe_gt_topology_init(struct xe_gt *gt)
> > > > 		      XEHP_GT_COMPUTE_DSS_ENABLE,
> > > > 		      XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,
> > > > 		      XE2_GT_COMPUTE_DSS_2);
> > > > -	load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss);
> > > > +	load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss, &gt->fuse_topo.eu_type);
> > > > 	load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask);
> > > > 
> > > > 	p = drm_dbg_printer(&gt_to_xe(gt)->drm, DRM_UT_DRIVER, "GT topology");
> > > > @@ -221,6 +224,18 @@ xe_gt_topology_init(struct xe_gt *gt)
> > > > 	xe_gt_topology_dump(gt, &p);
> > > > }
> > > > 
> > > > +static const char *eu_type_to_str(enum xe_gt_eu_type eu_type)
> > > > +{
> > > > +	switch (eu_type) {
> > > > +	case XE_GT_EU_TYPE_SIMD16:
> > > > +		return "simd16";
> > > > +	case XE_GT_EU_TYPE_SIMD8:
> > > > +		return "simd8";
> > > > +	}
> > > > +
> > > > +	unreachable();
> > > > +}
> > > > +
> > > > void
> > > > xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
> > > > {
> > > > @@ -231,6 +246,8 @@ xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
> > > > 
> > > > 	drm_printf(p, "EU mask per DSS:     %*pb\n", XE_MAX_EU_FUSE_BITS,
> > > > 		   gt->fuse_topo.eu_mask_per_dss);
> > > > +	drm_printf(p, "EU type:             %s\n",
> > > > +		   eu_type_to_str(gt->fuse_topo.eu_type));
> > > > 
> > > > 	drm_printf(p, "L3 bank mask:        %*pb\n", XE_MAX_L3_BANK_MASK_BITS,
> > > > 		   gt->fuse_topo.l3_bank_mask);
> > > > diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
> > > > index 6b5e0b45efb0..01ea87d8886b 100644
> > > > --- a/drivers/gpu/drm/xe/xe_gt_types.h
> > > > +++ b/drivers/gpu/drm/xe/xe_gt_types.h
> > > > @@ -27,6 +27,11 @@ enum xe_gt_type {
> > > > 	XE_GT_TYPE_MEDIA,
> > > > };
> > > > 
> > > > +enum xe_gt_eu_type {
> > > > +	XE_GT_EU_TYPE_SIMD8,
> > > > +	XE_GT_EU_TYPE_SIMD16,
> > > > +};
> > > > +
> > > > #define XE_MAX_DSS_FUSE_REGS		3
> > > > #define XE_MAX_DSS_FUSE_BITS		(32 * XE_MAX_DSS_FUSE_REGS)
> > > > #define XE_MAX_EU_FUSE_REGS		1
> > > > @@ -343,6 +348,12 @@ struct xe_gt {
> > > > 
> > > > 		/** @fuse_topo.l3_bank_mask: L3 bank mask */
> > > > 		xe_l3_bank_mask_t l3_bank_mask;
> > > > +
> > > > +		/**
> > > > +		 * @fuse_tipo.eu_type: type/width of EU stored in
> > > > +		 * fuse_topo.eu_mask_per_dss
> > > > +		 */
> > > > +		enum xe_gt_eu_type eu_type;
> > > > 	} fuse_topo;
> > > > 
> > > > 	/** @steering: register steering for individual HW units */
> > > > diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
> > > > index 4e01df6b1b7a..73ef6e4c2dc9 100644
> > > > --- a/drivers/gpu/drm/xe/xe_query.c
> > > > +++ b/drivers/gpu/drm/xe/xe_query.c
> > > > @@ -518,7 +518,9 @@ static int query_gt_topology(struct xe_device *xe,
> > > > 		if (err)
> > > > 			return err;
> > > > 
> > > > -		topo.type = DRM_XE_TOPO_EU_PER_DSS;
> > > > +		topo.type = gt->fuse_topo.eu_type == XE_GT_EU_TYPE_SIMD16 ?
> > > > +			DRM_XE_TOPO_SIMD16_EU_PER_DSS :
> > > > +			DRM_XE_TOPO_EU_PER_DSS;
> > > 
> > > have you considered setting both for backward compatibility?
> > > And then highlighting in the documentation this duplication in the case
> > > of simd16 and that that should be checked first?!
> > 
> > that would be a silently break, with software utilizing the wrong number
> > until someone noticed, probably after force_probe removal... and also bring
> > the question: does it have N SIMD16 EUs + M SIMD8 EUs?
> 
> humn... if we decide to set both for backward compatibility, why not
> rather go with v1 of the patch?  v1 of the patch documents that the EU
> reported in EU_PER_DSS is of type X. Having both masks here doesn't
> answer that question and requires that we always convert back to the
> simd8 number (and eventually to the simd16 number) for newer platforms.
> 
> so suppose 5 years from now we have simd123 (using arbitrary/absurd name
> on purpose to avoid people jumping to conclusions):  would we keep
> exposing the simd8, simd16, simdX, fooY numbers for the sake of
> backward compatibility with previous HW?  On the other hand, the
> alternative means: if userspace X really supports platform Y, it has to
> know about that type of EU if it's using that mask for anything.
> 
> So my preference would be, in order:
> 
> 1) v2
> 2) v1
> 3) v2-with-forever-bc-between-platforms
> 
> Both (2) and (3) have the silent misbehavior in userspace if userspace
> didn't adapt to new UAPI.

option 1) v2 is better in my opinion.

> 
> Lucas De Marchi



More information about the Intel-xe mailing list