[PATCH] drm/i915/sseu: Disassociate internal EU mask representation from uapi

Matt Roper matthew.d.roper at intel.com
Thu Apr 14 21:55:21 UTC 2022


The sseu_dev_info structure currently stores slice, subslice, and EU
masks in u8[] arrays that match the format returned by the query uapi.
While this makes the query handler's copy_to_user simple, it makes the
masks harder to work with inside the driver.  On gen11 platforms and
beyond it also wastes a lot of space because the EU mask is
architecturally guaranteed to be identical for each subslice of the
platform --- on Xe_HP we currently allocate 64 bytes of storage for the
EU mask, even though we only truly need 2 bytes; the wasted space will
only continue to increase as future platforms increase DSS counts.

Let's switch to a more natural internal representation of the EU mask,
in a format that's compatible with linux/bitmap.h operations.  On
pre-gen11 platforms we'll store the full-device EU mask (which can be up
to 96 bits on gen8/gen9) and on gen11+ we'll store a single-SS EU mask
(which only needs 16 bits).

Signed-off-by: Matt Roper <matthew.d.roper at intel.com>
---
 drivers/gpu/drm/i915/gt/intel_sseu.c | 115 +++++++++++++++++++--------
 drivers/gpu/drm/i915/gt/intel_sseu.h |  27 ++++++-
 drivers/gpu/drm/i915/i915_query.c    |  28 +++++--
 3 files changed, 133 insertions(+), 37 deletions(-)

diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
index 9881a6790574..296c505aa598 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -80,45 +80,59 @@ intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice)
 	return hweight32(intel_sseu_get_subslices(sseu, slice));
 }
 
-static int sseu_eu_idx(const struct sseu_dev_info *sseu, int slice,
-		       int subslice)
+static intel_sseu_eu_mask_t
+sseu_get_eus(const struct sseu_dev_info *sseu, int slice, int subslice)
 {
-	int slice_stride = sseu->max_subslices * sseu->eu_stride;
-
-	return slice * slice_stride + subslice * sseu->eu_stride;
-}
-
-static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice,
-			int subslice)
-{
-	int i, offset = sseu_eu_idx(sseu, slice, subslice);
-	u16 eu_mask = 0;
+	intel_sseu_eu_mask_t ret = {};
+	int first_eu =
+		slice * sseu->max_subslices * sseu->max_eus_per_subslice +
+		subslice * sseu->max_eus_per_subslice;
+
+	if (sseu->has_common_ss_eumask) {
+		if (intel_sseu_get_subslices(sseu, 0) & BIT(subslice))
+			return sseu->eu_mask;
+		else
+			/* Empty bitmap */
+			return ret;
+	}
 
-	for (i = 0; i < sseu->eu_stride; i++)
-		eu_mask |=
-			((u16)sseu->eu_mask[offset + i]) << (i * BITS_PER_BYTE);
+	bitmap_cut(ret.b, sseu->eu_mask.b, first_eu,
+		   sseu->max_eus_per_subslice, I915_MAX_EU_FUSE_BITS);
 
-	return eu_mask;
+	return ret;
 }
 
 static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice,
-			 u16 eu_mask)
+			 u32 eu_mask)
 {
-	int i, offset = sseu_eu_idx(sseu, slice, subslice);
+	intel_sseu_eu_mask_t eu_bitmap = {};
+	int numbits = sizeof(eu_mask);
+	int first_eu =
+		slice * sseu->max_subslices * sseu->max_eus_per_subslice +
+		subslice * sseu->max_eus_per_subslice;
 
-	for (i = 0; i < sseu->eu_stride; i++)
-		sseu->eu_mask[offset + i] =
-			(eu_mask >> (BITS_PER_BYTE * i)) & 0xff;
+	bitmap_from_arr32(eu_bitmap.b, &eu_mask, numbits);
+
+	if (sseu->has_common_ss_eumask) {
+		WARN_ON(slice > 0 || subslice > 0);
+		bitmap_copy(sseu->eu_mask.b, eu_bitmap.b, numbits);
+	} else {
+		bitmap_shift_left(eu_bitmap.b, eu_bitmap.b, first_eu, numbits);
+		bitmap_clear(sseu->eu_mask.b, first_eu, numbits);
+		bitmap_or(sseu->eu_mask.b, sseu->eu_mask.b, eu_bitmap.b, numbits);
+	}
 }
 
 static u16 compute_eu_total(const struct sseu_dev_info *sseu)
 {
-	u16 i, total = 0;
+	int mult;
 
-	for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++)
-		total += hweight8(sseu->eu_mask[i]);
+	if (sseu->has_common_ss_eumask)
+		mult = hweight32(intel_sseu_get_subslices(sseu, 0));
+	else
+		mult = 1;
 
-	return total;
+	return mult * bitmap_weight(sseu->eu_mask.b, I915_MAX_EU_FUSE_BITS);
 }
 
 static u32 get_ss_stride_mask(struct sseu_dev_info *sseu, u8 s, u32 ss_en)
@@ -167,6 +181,7 @@ static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en,
 			if (intel_sseu_has_subslice(sseu, s, ss))
 				sseu_set_eus(sseu, s, ss, eu_en);
 	}
+	sseu->has_common_ss_eumask = 1;
 	sseu->eu_per_subslice = hweight16(eu_en);
 	sseu->eu_total = compute_eu_total(sseu);
 }
@@ -738,10 +753,11 @@ static void sseu_print_hsw_topology(const struct sseu_dev_info *sseu,
 			   intel_sseu_get_subslices(sseu, s));
 
 		for (ss = 0; ss < sseu->max_subslices; ss++) {
-			u16 enabled_eus = sseu_get_eus(sseu, s, ss);
+			intel_sseu_eu_mask_t enabled_eus = sseu_get_eus(sseu, s, ss);
 
-			drm_printf(p, "\tsubslice%d: %u EUs (0x%hx)\n",
-				   ss, hweight16(enabled_eus), enabled_eus);
+			drm_printf(p, "\tsubslice%d: %u EUs (0x%*pb)\n",
+				   ss, bitmap_weight(enabled_eus.b, I915_MAX_EU_FUSE_BITS),
+				   I915_MAX_EU_FUSE_BITS, enabled_eus.b);
 		}
 	}
 }
@@ -754,12 +770,13 @@ static void sseu_print_xehp_topology(const struct sseu_dev_info *sseu,
 	int dss;
 
 	for (dss = 0; dss < sseu->max_subslices; dss++) {
-		u16 enabled_eus = sseu_get_eus(sseu, 0, dss);
+		intel_sseu_eu_mask_t enabled_eus = sseu_get_eus(sseu, 0, dss);
 
-		drm_printf(p, "DSS_%02d: G:%3s C:%3s, %2u EUs (0x%04hx)\n", dss,
+		drm_printf(p, "DSS_%02d: G:%3s C:%3s, %2u EUs (0x%*pb)\n", dss,
 			   str_yes_no(g_dss_mask & BIT(dss)),
 			   str_yes_no(c_dss_mask & BIT(dss)),
-			   hweight16(enabled_eus), enabled_eus);
+			   bitmap_weight(enabled_eus.b, I915_MAX_EU_FUSE_BITS),
+			   I915_MAX_EU_FUSE_BITS, enabled_eus.b);
 	}
 }
 
@@ -793,3 +810,39 @@ u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice)
 	return slice_mask;
 }
 
+/**
+ * intel_sseu_get_uapi_eumask - Allocate and fill a uapi-compatible EU mask
+ * @sseu: device SSEU info
+ *
+ * For gen11 platforms each subslice has the same set of EUs enabled/disabled
+ * so we only store the data for a single subslice in our internal mask.  When
+ * queried by userspace, we need to expand this into a full-device EU mask.
+ *
+ * Returns a pointer to a newly allocated buffer containing the full-device
+ * EU mask in the format expected by userspace.  The caller is responsible
+ * for freeing the buffer when done.
+ */
+u8 *intel_sseu_get_uapi_eumask(const struct sseu_dev_info *sseu)
+{
+	u8 *buf;
+	u32 ssmask;
+	int ss;
+
+	if (!WARN_ON(sseu->has_common_ss_eumask))
+		return NULL;
+
+	WARN_ON(sseu->max_slices > 1);
+	ssmask = intel_sseu_get_subslices(sseu, 0);
+
+	buf = kzalloc(sseu->max_slices * sseu->max_subslices * sseu->eu_stride,
+		      GFP_KERNEL);
+	if (!buf)
+		return NULL;
+
+	for (ss = 0; ss < sseu->max_subslices; ss++)
+		if (ssmask & BIT(ss))
+			memcpy(&buf[ss * sseu->eu_stride], sseu->eu_mask.b,
+			       sseu->eu_stride);
+
+	return buf;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
index 5c078df4729c..b6e49a2a6973 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.h
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
@@ -52,12 +52,34 @@ struct drm_printer;
 #define GEN_MAX_GSLICES		(GEN_MAX_DSS / GEN_DSS_PER_GSLICE)
 #define GEN_MAX_CSLICES		(GEN_MAX_DSS / GEN_DSS_PER_CSLICE)
 
+/*
+ * Maximum number of 32-bit registers used by hardware to express the
+ * enabled/disabled EUs.  Note that starting with gen11 each SS has an
+ * identical set of EUs enabled/disabled, so newer platforms may require less
+ * storage than the older gen8/gen9 platforms.
+ */
+#define I915_MAX_EU_FUSE_REGS	3
+#define I915_MAX_EU_FUSE_BITS	(I915_MAX_EU_FUSE_REGS * 32)
+
+/* Compatible with helpers in linux/bitmap.h */
+typedef struct {
+	unsigned long b[BITS_TO_LONGS(I915_MAX_EU_FUSE_BITS)];
+} intel_sseu_eu_mask_t;
+
 struct sseu_dev_info {
 	u8 slice_mask;
 	u8 subslice_mask[GEN_SS_MASK_SIZE];
 	u8 geometry_subslice_mask[GEN_SS_MASK_SIZE];
 	u8 compute_subslice_mask[GEN_SS_MASK_SIZE];
-	u8 eu_mask[GEN_SS_MASK_SIZE * GEN_MAX_EU_STRIDE];
+
+	/*
+	 * On gen11 and beyond, eu_mask represents the EUs enabled/disabled
+	 * for a single subslice (each SS has identical EUs).  On earlier
+	 * platforms it holds the individual enable/disable state for each
+	 * EU of the platform.
+	 */
+	intel_sseu_eu_mask_t eu_mask;
+
 	u16 eu_total;
 	u8 eu_per_subslice;
 	u8 min_eu_in_pool;
@@ -66,6 +88,7 @@ struct sseu_dev_info {
 	u8 has_slice_pg:1;
 	u8 has_subslice_pg:1;
 	u8 has_eu_pg:1;
+	u8 has_common_ss_eumask:1;
 
 	/* Topology fields */
 	u8 max_slices;
@@ -145,4 +168,6 @@ void intel_sseu_print_topology(struct drm_i915_private *i915,
 
 u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice);
 
+u8 *intel_sseu_get_uapi_eumask(const struct sseu_dev_info *sseu);
+
 #endif /* __INTEL_SSEU_H__ */
diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
index b5ca00cb6cf6..563de06add30 100644
--- a/drivers/gpu/drm/i915/i915_query.c
+++ b/drivers/gpu/drm/i915/i915_query.c
@@ -81,11 +81,29 @@ static int query_topology_info(struct drm_i915_private *dev_priv,
 			   sseu->subslice_mask, subslice_length))
 		return -EFAULT;
 
-	if (copy_to_user(u64_to_user_ptr(query_item->data_ptr +
-					   sizeof(topo) +
-					   slice_length + subslice_length),
-			   sseu->eu_mask, eu_length))
-		return -EFAULT;
+	if (sseu->has_common_ss_eumask) {
+		u8 *uapi_eumask;
+		int ret;
+
+		/* Expand internal representation to full-device eumask */
+		uapi_eumask = intel_sseu_get_uapi_eumask(sseu);
+		if (!uapi_eumask)
+			return -ENOMEM;
+
+		ret = copy_to_user(u64_to_user_ptr(query_item->data_ptr +
+						   sizeof(topo) +
+						   slice_length + subslice_length),
+				   uapi_eumask, eu_length);
+		kfree(uapi_eumask);
+		if (ret)
+			return -EFAULT;
+	} else {
+		if (copy_to_user(u64_to_user_ptr(query_item->data_ptr +
+						 sizeof(topo) +
+						 slice_length + subslice_length),
+				 sseu->eu_mask.b, eu_length))
+			return -EFAULT;
+	}
 
 	return total_length;
 }
-- 
2.34.1



More information about the Intel-gfx-trybot mailing list