[PATCH v2] drm/xe/uapi: Expose SIMD16 EU mask in topology query

Lucas De Marchi lucas.demarchi at intel.com
Tue Jul 16 14:00:22 UTC 2024


On Thu, Jul 11, 2024 at 02:16:11PM GMT, Lucas De Marchi wrote:
>On Thu, Jul 11, 2024 at 01:45:15PM GMT, Rodrigo Vivi wrote:
>>On Wed, Jul 10, 2024 at 03:02:27PM -0700, Lucas De Marchi wrote:
>>>PVC, Xe2 and later platforms have 16-wide EUs. We were implicitly
>>>reporting for PVC the number of 16-wide EUs without giving userspace any
>>>hint that they were different than for other platforms. Xe2 and later
>>>also have 16-wide, but in those cases the reported number would
>>>correspond to the 8-wide count.
>>>
>>>To avoid confusion and make sure the right number is used by userspace
>>>depending on the platform, add a new item to the topology query and drop
>>>the one that is not available. The new mask reported for both PVC and
>>>Xe2 should now match the numbers reported via hwconfig.
>>>
>>>v2: Use a different topo item with EU type in its name to report the
>>>    new mask instead of adding the type itself as the item (Matt Roper)
>>>
>>>Signed-off-by: Lucas De Marchi <lucas.demarchi at intel.com>
>>>---
>>>
>>>Tested on LNL to check the behavior in dmesg, debugfs and ioctl.
>>>Test-with: https://lore.kernel.org/igt-dev/20240710220050.2169596-1-lucas.demarchi@intel.com/
>>>
>>> drivers/gpu/drm/xe/xe_gt_topology.c | 27 ++++++++++++++++++++++-----
>>> drivers/gpu/drm/xe/xe_gt_types.h    | 11 +++++++++++
>>> drivers/gpu/drm/xe/xe_query.c       |  4 +++-
>>> include/uapi/drm/xe_drm.h           | 10 +++++++++-
>>> 4 files changed, 45 insertions(+), 7 deletions(-)
>>>
>>>diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
>>>index 25ff03ab8448..5a1559edf3e9 100644
>>>--- a/drivers/gpu/drm/xe/xe_gt_topology.c
>>>+++ b/drivers/gpu/drm/xe/xe_gt_topology.c
>>>@@ -6,6 +6,7 @@
>>> #include "xe_gt_topology.h"
>>>
>>> #include <linux/bitmap.h>
>>>+#include <linux/compiler.h>
>>>
>>> #include "regs/xe_gt_regs.h"
>>> #include "xe_assert.h"
>>>@@ -31,7 +32,7 @@ load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...)
>>> }
>>>
>>> static void
>>>-load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask)
>>>+load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask, enum xe_gt_eu_type *eu_type)
>>> {
>>> 	struct xe_device *xe = gt_to_xe(gt);
>>> 	u32 reg_val = xe_mmio_read32(gt, XELP_EU_ENABLE);
>>>@@ -47,11 +48,13 @@ load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask)
>>> 	if (GRAPHICS_VERx100(xe) < 1250)
>>> 		reg_val = ~reg_val & XELP_EU_MASK;
>>>
>>>-	/* On PVC, one bit = one EU */
>>>-	if (GRAPHICS_VERx100(xe) == 1260) {
>>>+	if (GRAPHICS_VERx100(xe) == 1260 || GRAPHICS_VER(xe) >= 20) {
>>>+		/* SIMD16 EUs, one bit == one EU */
>>>+		*eu_type = XE_GT_EU_TYPE_SIMD16;
>>> 		val = reg_val;
>>> 	} else {
>>>-		/* All other platforms, one bit = 2 EU */
>>>+		/* SIMD8 EUs, one bit == 2 EU */
>>>+		*eu_type = XE_GT_EU_TYPE_SIMD8;
>>> 		for (i = 0; i < fls(reg_val); i++)
>>> 			if (reg_val & BIT(i))
>>> 				val |= 0x3 << 2 * i;
>>>@@ -213,7 +216,7 @@ xe_gt_topology_init(struct xe_gt *gt)
>>> 		      XEHP_GT_COMPUTE_DSS_ENABLE,
>>> 		      XEHPC_GT_COMPUTE_DSS_ENABLE_EXT,
>>> 		      XE2_GT_COMPUTE_DSS_2);
>>>-	load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss);
>>>+	load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss, &gt->fuse_topo.eu_type);
>>> 	load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask);
>>>
>>> 	p = drm_dbg_printer(&gt_to_xe(gt)->drm, DRM_UT_DRIVER, "GT topology");
>>>@@ -221,6 +224,18 @@ xe_gt_topology_init(struct xe_gt *gt)
>>> 	xe_gt_topology_dump(gt, &p);
>>> }
>>>
>>>+static const char *eu_type_to_str(enum xe_gt_eu_type eu_type)
>>>+{
>>>+	switch (eu_type) {
>>>+	case XE_GT_EU_TYPE_SIMD16:
>>>+		return "simd16";
>>>+	case XE_GT_EU_TYPE_SIMD8:
>>>+		return "simd8";
>>>+	}
>>>+
>>>+	unreachable();
>>>+}
>>>+
>>> void
>>> xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
>>> {
>>>@@ -231,6 +246,8 @@ xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p)
>>>
>>> 	drm_printf(p, "EU mask per DSS:     %*pb\n", XE_MAX_EU_FUSE_BITS,
>>> 		   gt->fuse_topo.eu_mask_per_dss);
>>>+	drm_printf(p, "EU type:             %s\n",
>>>+		   eu_type_to_str(gt->fuse_topo.eu_type));
>>>
>>> 	drm_printf(p, "L3 bank mask:        %*pb\n", XE_MAX_L3_BANK_MASK_BITS,
>>> 		   gt->fuse_topo.l3_bank_mask);
>>>diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
>>>index 6b5e0b45efb0..01ea87d8886b 100644
>>>--- a/drivers/gpu/drm/xe/xe_gt_types.h
>>>+++ b/drivers/gpu/drm/xe/xe_gt_types.h
>>>@@ -27,6 +27,11 @@ enum xe_gt_type {
>>> 	XE_GT_TYPE_MEDIA,
>>> };
>>>
>>>+enum xe_gt_eu_type {
>>>+	XE_GT_EU_TYPE_SIMD8,
>>>+	XE_GT_EU_TYPE_SIMD16,
>>>+};
>>>+
>>> #define XE_MAX_DSS_FUSE_REGS		3
>>> #define XE_MAX_DSS_FUSE_BITS		(32 * XE_MAX_DSS_FUSE_REGS)
>>> #define XE_MAX_EU_FUSE_REGS		1
>>>@@ -343,6 +348,12 @@ struct xe_gt {
>>>
>>> 		/** @fuse_topo.l3_bank_mask: L3 bank mask */
>>> 		xe_l3_bank_mask_t l3_bank_mask;
>>>+
>>>+		/**
>>>+		 * @fuse_tipo.eu_type: type/width of EU stored in
>>>+		 * fuse_topo.eu_mask_per_dss
>>>+		 */
>>>+		enum xe_gt_eu_type eu_type;
>>> 	} fuse_topo;
>>>
>>> 	/** @steering: register steering for individual HW units */
>>>diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c
>>>index 4e01df6b1b7a..73ef6e4c2dc9 100644
>>>--- a/drivers/gpu/drm/xe/xe_query.c
>>>+++ b/drivers/gpu/drm/xe/xe_query.c
>>>@@ -518,7 +518,9 @@ static int query_gt_topology(struct xe_device *xe,
>>> 		if (err)
>>> 			return err;
>>>
>>>-		topo.type = DRM_XE_TOPO_EU_PER_DSS;
>>>+		topo.type = gt->fuse_topo.eu_type == XE_GT_EU_TYPE_SIMD16 ?
>>>+			DRM_XE_TOPO_SIMD16_EU_PER_DSS :
>>>+			DRM_XE_TOPO_EU_PER_DSS;
>>
>>have you considered setting both for backward compatibility?
>>And then highlighting in the documentation this duplication in the case
>>of simd16 and that that should be checked first?!
>
>that would be a silently break, with software utilizing the wrong number
>until someone noticed, probably after force_probe removal... and also bring
>the question: does it have N SIMD16 EUs + M SIMD8 EUs?

humn... if we decide to set both for backward compatibility, why not
rather go with v1 of the patch?  v1 of the patch documents that the EU
reported in EU_PER_DSS is of type X. Having both masks here doesn't
answer that question and requires that we always convert back to the
simd8 number (and eventually to the simd16 number) for newer platforms.

so suppose 5 years from now we have simd123 (using arbitrary/absurd name
on purpose to avoid people jumping to conclusions):  would we keep
exposing the simd8, simd16, simdX, fooY numbers for the sake of
backward compatibility with previous HW?  On the other hand, the
alternative means: if userspace X really supports platform Y, it has to
know about that type of EU if it's using that mask for anything.

So my preference would be, in order:

1) v2
2) v1
3) v2-with-forever-bc-between-platforms

Both (2) and (3) have the silent misbehavior in userspace if userspace
didn't adapt to new UAPI.

Lucas De Marchi


More information about the Intel-xe mailing list