[PATCH] Decouple interal sseu from uapi

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Wed Mar 30 12:16:32 UTC 2022


From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

- Uses multi-dimensional arrays and linux/bitmap.h.

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Cc: Matt Roper <matthew.d.roper at intel.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_context.c  |   9 +-
 drivers/gpu/drm/i915/gt/intel_engine_types.h |   2 +-
 drivers/gpu/drm/i915/gt/intel_sseu.c         | 218 ++++++++++---------
 drivers/gpu/drm/i915/gt/intel_sseu.h         | 123 +++++++----
 drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c |  57 +++--
 drivers/gpu/drm/i915/gt/intel_workarounds.c  |   7 +-
 drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c   |   2 +-
 drivers/gpu/drm/i915/i915_getparam.c         |   6 +-
 drivers/gpu/drm/i915/i915_query.c            |  69 ++++--
 9 files changed, 273 insertions(+), 220 deletions(-)

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 54a901673e94..5394fbed8bf0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1859,10 +1859,10 @@ i915_gem_user_to_context_sseu(struct intel_gt *gt,
 		return -EINVAL;
 
 	/* Check validity against hardware. */
-	if (user->slice_mask & ~device->slice_mask)
+	if (user->slice_mask & ~intel_sseu_slice_mask(device))
 		return -EINVAL;
 
-	if (user->subslice_mask & ~device->subslice_mask[0])
+	if (user->subslice_mask & ~intel_sseu_get_subslices(device, 0))
 		return -EINVAL;
 
 	if (user->max_eus_per_subslice > device->max_eus_per_subslice)
@@ -1875,8 +1875,9 @@ i915_gem_user_to_context_sseu(struct intel_gt *gt,
 
 	/* Part specific restrictions. */
 	if (GRAPHICS_VER(i915) == 11) {
-		unsigned int hw_s = hweight8(device->slice_mask);
-		unsigned int hw_ss_per_s = hweight8(device->subslice_mask[0]);
+		unsigned int hw_s = intel_sseu_slice_count(device);
+		unsigned int hw_ss_per_s =
+			intel_sseu_subslices_per_slice(device, 0);
 		unsigned int req_s = hweight8(context->slice_mask);
 		unsigned int req_ss = hweight8(context->subslice_mask);
 
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index eac20112709c..36eefebe3add 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -630,7 +630,7 @@ intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine)
 }
 
 #define instdone_has_slice(dev_priv___, sseu___, slice___) \
-	((GRAPHICS_VER(dev_priv___) == 7 ? 1 : ((sseu___)->slice_mask)) & BIT(slice___))
+	(GRAPHICS_VER(dev_priv___) == 7 ? 1 : intel_sseu_has_slice((sseu___), (slice___)))
 
 #define instdone_has_subslice(dev_priv__, sseu__, slice__, subslice__) \
 	(GRAPHICS_VER(dev_priv__) == 7 ? (1 & BIT(subslice__)) : \
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
index 9881a6790574..8cd081e3e767 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -12,17 +12,35 @@
 
 #include "linux/string_helpers.h"
 
-void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices,
-			 u8 max_subslices, u8 max_eus_per_subslice)
+struct intel_sseu intel_sseu_from_device_info(const struct sseu_dev_info *sseu)
 {
+	struct intel_sseu value = {
+		.slice_mask = (u8)bitmap_get_value8(sseu->slice_mask.b, 0),
+		.subslice_mask = (u8)bitmap_get_value8(sseu->ss_mask[0].b, 0),
+		.min_eus_per_subslice = sseu->max_eus_per_subslice,
+		.max_eus_per_subslice = sseu->max_eus_per_subslice,
+	};
+
+	return value;
+}
+
+void intel_sseu_set_info(struct sseu_dev_info *sseu,
+			 unsigned int max_slices,
+			 unsigned int max_subslices,
+			 unsigned int max_eus_per_subslice)
+{
+	GEM_BUG_ON(max_slices > BITMAP_BITS(sseu->slice_mask.b));
+	GEM_BUG_ON(max_slices > ARRAY_SIZE(sseu->ss_mask));
+	GEM_BUG_ON(max_slices > ARRAY_SIZE(sseu->eu_mask));
+
+	GEM_BUG_ON(max_subslices > BITMAP_BITS(sseu->ss_mask[0].b));
+	GEM_BUG_ON(max_subslices > ARRAY_SIZE(sseu->eu_mask[0]));
+
+	GEM_BUG_ON(max_eus_per_subslice > BITMAP_BITS(sseu->eu_mask[0][0].b));
+
 	sseu->max_slices = max_slices;
 	sseu->max_subslices = max_subslices;
 	sseu->max_eus_per_subslice = max_eus_per_subslice;
-
-	sseu->ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices);
-	GEM_BUG_ON(sseu->ss_stride > GEN_MAX_SUBSLICE_STRIDE);
-	sseu->eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice);
-	GEM_BUG_ON(sseu->eu_stride > GEN_MAX_EU_STRIDE);
 }
 
 unsigned int
@@ -30,121 +48,106 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu)
 {
 	unsigned int i, total = 0;
 
-	for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++)
-		total += hweight8(sseu->subslice_mask[i]);
+	for (i = 0; i < ARRAY_SIZE(sseu->ss_mask); i++)
+		total += bitmap_weight(sseu->ss_mask[i].b,
+				       BITMAP_BITS(sseu->ss_mask[i].b));
 
 	return total;
 }
 
-static u32
-sseu_get_subslices(const struct sseu_dev_info *sseu,
-		   const u8 *subslice_mask, u8 slice)
+static u32 sseu_get_subslices(const unsigned long *ss_mask, int nrbits)
 {
-	int i, offset = slice * sseu->ss_stride;
-	u32 mask = 0;
+	u32 mask;
 
-	GEM_BUG_ON(slice >= sseu->max_slices);
-
-	for (i = 0; i < sseu->ss_stride; i++)
-		mask |= (u32)subslice_mask[offset + i] << i * BITS_PER_BYTE;
+	bitmap_to_arr32(&mask, ss_mask, sizeof(mask) * BITS_PER_BYTE);
 
 	return mask;
 }
 
-u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice)
+u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, int slice)
 {
-	return sseu_get_subslices(sseu, sseu->subslice_mask, slice);
+
+	GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->ss_mask));
+	return sseu_get_subslices(sseu->ss_mask[slice].b,
+				  BITMAP_BITS(sseu->ss_mask[slice].b));
 }
 
 static u32 sseu_get_geometry_subslices(const struct sseu_dev_info *sseu)
 {
-	return sseu_get_subslices(sseu, sseu->geometry_subslice_mask, 0);
+	return sseu_get_subslices(sseu->geometry_ss_mask[0].b,
+				  BITMAP_BITS(sseu->geometry_ss_mask[0].b));
 }
 
 u32 intel_sseu_get_compute_subslices(const struct sseu_dev_info *sseu)
 {
-	return sseu_get_subslices(sseu, sseu->compute_subslice_mask, 0);
+	return sseu_get_subslices(sseu->compute_ss_mask[0].b,
+				  BITMAP_BITS(sseu->compute_ss_mask[0].b));
 }
 
-void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
-			      u8 *subslice_mask, u32 ss_mask)
+static void __sseu_set_subslices(unsigned long *bitmap, int nrbits, u32 ss)
 {
-	int offset = slice * sseu->ss_stride;
-
-	memcpy(&subslice_mask[offset], &ss_mask, sseu->ss_stride);
+	GEM_BUG_ON(ss && (__fls(ss) > nrbits));
+	bitmap_from_arr32(bitmap, &ss, sizeof(ss) * BITS_PER_BYTE);
 }
 
+#define sseu_set_subslices(b, ss) \
+	__sseu_set_subslices((b), BITMAP_BITS(b), (ss))
+
 unsigned int
-intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice)
+intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, int slice)
 {
-	return hweight32(intel_sseu_get_subslices(sseu, slice));
-}
-
-static int sseu_eu_idx(const struct sseu_dev_info *sseu, int slice,
-		       int subslice)
-{
-	int slice_stride = sseu->max_subslices * sseu->eu_stride;
-
-	return slice * slice_stride + subslice * sseu->eu_stride;
+	return bitmap_weight(sseu->ss_mask[slice].b,
+			     BITMAP_BITS(sseu->ss_mask[slice].b));
 }
 
 static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice,
 			int subslice)
 {
-	int i, offset = sseu_eu_idx(sseu, slice, subslice);
-	u16 eu_mask = 0;
+	const unsigned int nrbits =
+		BITMAP_BITS(sseu->eu_mask[slice][subslice].b);
+	u32 mask;
 
-	for (i = 0; i < sseu->eu_stride; i++)
-		eu_mask |=
-			((u16)sseu->eu_mask[offset + i]) << (i * BITS_PER_BYTE);
+	GEM_BUG_ON(nrbits > sizeof(mask) * BITS_PER_BYTE);
+	bitmap_to_arr32(&mask, sseu->eu_mask[slice][subslice].b, nrbits);
 
-	return eu_mask;
+	return (u16)mask;
 }
 
 static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice,
-			 u16 eu_mask)
+			 u32 mask)
 {
-	int i, offset = sseu_eu_idx(sseu, slice, subslice);
+	const unsigned int nrbits =
+		BITMAP_BITS(sseu->eu_mask[slice][subslice].b);
 
-	for (i = 0; i < sseu->eu_stride; i++)
-		sseu->eu_mask[offset + i] =
-			(eu_mask >> (BITS_PER_BYTE * i)) & 0xff;
+	GEM_BUG_ON(mask && (__fls(mask) > nrbits));
+	bitmap_from_arr32(sseu->eu_mask[slice][subslice].b, &mask,
+			  sizeof(mask) * BITS_PER_BYTE);
 }
 
-static u16 compute_eu_total(const struct sseu_dev_info *sseu)
+static unsigned int compute_eu_total(const struct sseu_dev_info *sseu)
 {
-	u16 i, total = 0;
+	unsigned int s, ss, total = 0;
 
-	for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++)
-		total += hweight8(sseu->eu_mask[i]);
+	for (s = 0; s < ARRAY_SIZE(sseu->ss_mask); s++) {
+		for (ss = 0; ss < ARRAY_SIZE(sseu->eu_mask[s]); ss++) {
+			total += bitmap_weight(sseu->eu_mask[s][ss].b,
+					       BITMAP_BITS(sseu->eu_mask[s][ss].b));
+		}
+	}
 
 	return total;
 }
 
-static u32 get_ss_stride_mask(struct sseu_dev_info *sseu, u8 s, u32 ss_en)
-{
-	u32 ss_mask;
-
-	ss_mask = ss_en >> (s * sseu->max_subslices);
-	ss_mask &= GENMASK(sseu->max_subslices - 1, 0);
-
-	return ss_mask;
-}
-
 static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en,
 				    u32 g_ss_en, u32 c_ss_en, u16 eu_en)
 {
 	int s, ss;
 
-	/* g_ss_en/c_ss_en represent entire subslice mask across all slices */
-	GEM_BUG_ON(sseu->max_slices * sseu->max_subslices >
-		   sizeof(g_ss_en) * BITS_PER_BYTE);
-
 	for (s = 0; s < sseu->max_slices; s++) {
 		if ((s_en & BIT(s)) == 0)
 			continue;
 
-		sseu->slice_mask |= BIT(s);
+		intel_sseu_enable_slice(sseu, s);
 
 		/*
 		 * XeHP introduces the concept of compute vs geometry DSS. To
@@ -155,13 +158,9 @@ static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en,
 		 * for the purposes of selecting subslices to use in a
 		 * particular GEM context.
 		 */
-		intel_sseu_set_subslices(sseu, s, sseu->compute_subslice_mask,
-					 get_ss_stride_mask(sseu, s, c_ss_en));
-		intel_sseu_set_subslices(sseu, s, sseu->geometry_subslice_mask,
-					 get_ss_stride_mask(sseu, s, g_ss_en));
-		intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
-					 get_ss_stride_mask(sseu, s,
-							    g_ss_en | c_ss_en));
+		sseu_set_subslices(sseu->compute_ss_mask[s].b, c_ss_en);
+		sseu_set_subslices(sseu->geometry_ss_mask[s].b, g_ss_en);
+		sseu_set_subslices(sseu->ss_mask[s].b, g_ss_en | c_ss_en);
 
 		for (ss = 0; ss < sseu->max_subslices; ss++)
 			if (intel_sseu_has_subslice(sseu, s, ss))
@@ -261,11 +260,11 @@ static void cherryview_sseu_info_init(struct intel_gt *gt)
 	u32 fuse;
 	u8 subslice_mask = 0;
 
-	fuse = intel_uncore_read(gt->uncore, CHV_FUSE_GT);
-
-	sseu->slice_mask = BIT(0);
 	intel_sseu_set_info(sseu, 1, 2, 8);
 
+	intel_sseu_enable_slice(sseu, 0);
+
+	fuse = intel_uncore_read(gt->uncore, CHV_FUSE_GT);
 	if (!(fuse & CHV_FGT_DISABLE_SS0)) {
 		u8 disabled_mask =
 			((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >>
@@ -288,7 +287,7 @@ static void cherryview_sseu_info_init(struct intel_gt *gt)
 		sseu_set_eus(sseu, 0, 1, ~disabled_mask);
 	}
 
-	intel_sseu_set_subslices(sseu, 0, sseu->subslice_mask, subslice_mask);
+	sseu_set_subslices(sseu->ss_mask[0].b, subslice_mask);
 
 	sseu->eu_total = compute_eu_total(sseu);
 
@@ -320,13 +319,15 @@ static void gen9_sseu_info_init(struct intel_gt *gt)
 	const u8 eu_mask = 0xff;
 	int s, ss;
 
-	fuse2 = intel_uncore_read(uncore, GEN8_FUSE2);
-	sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
-
 	/* BXT has a single slice and at most 3 subslices. */
 	intel_sseu_set_info(sseu, IS_GEN9_LP(i915) ? 1 : 3,
 			    IS_GEN9_LP(i915) ? 3 : 4, 8);
 
+	fuse2 = intel_uncore_read(uncore, GEN8_FUSE2);
+	intel_sseu_enable_slices(sseu,
+				 (fuse2 & GEN8_F2_S_ENA_MASK) >>
+				 GEN8_F2_S_ENA_SHIFT);
+
 	/*
 	 * The subslice disable field is global, i.e. it applies
 	 * to each of the enabled slices.
@@ -340,12 +341,11 @@ static void gen9_sseu_info_init(struct intel_gt *gt)
 	 * count the total enabled EU.
 	 */
 	for (s = 0; s < sseu->max_slices; s++) {
-		if (!(sseu->slice_mask & BIT(s)))
+		if (!intel_sseu_has_slice(sseu, s))
 			/* skip disabled slice */
 			continue;
 
-		intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
-					 subslice_mask);
+		sseu_set_subslices(sseu->ss_mask[s].b, subslice_mask);
 
 		eu_disable = intel_uncore_read(uncore, GEN9_EU_DISABLE(s));
 		for (ss = 0; ss < sseu->max_subslices; ss++) {
@@ -396,14 +396,15 @@ static void gen9_sseu_info_init(struct intel_gt *gt)
 	 * pair per subslice.
 	 */
 	sseu->has_slice_pg =
-		!IS_GEN9_LP(i915) && hweight8(sseu->slice_mask) > 1;
+		!IS_GEN9_LP(i915) && intel_sseu_slice_count(sseu) > 1;
 	sseu->has_subslice_pg =
 		IS_GEN9_LP(i915) && intel_sseu_subslice_total(sseu) > 1;
 	sseu->has_eu_pg = sseu->eu_per_subslice > 2;
 
 	if (IS_GEN9_LP(i915)) {
-#define IS_SS_DISABLED(ss)	(!(sseu->subslice_mask[0] & BIT(ss)))
-		info->has_pooled_eu = hweight8(sseu->subslice_mask[0]) == 3;
+#define IS_SS_DISABLED(ss)	(!test_bit(ss, sseu->ss_mask[0].b))
+		info->has_pooled_eu =
+			intel_sseu_subslices_per_slice(sseu, 0) == 3;
 
 		sseu->min_eu_in_pool = 0;
 		if (info->has_pooled_eu) {
@@ -426,10 +427,13 @@ static void bdw_sseu_info_init(struct intel_gt *gt)
 	u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */
 	u32 eu_disable0, eu_disable1, eu_disable2;
 
-	fuse2 = intel_uncore_read(uncore, GEN8_FUSE2);
-	sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
 	intel_sseu_set_info(sseu, 3, 3, 8);
 
+	fuse2 = intel_uncore_read(uncore, GEN8_FUSE2);
+	intel_sseu_enable_slices(sseu,
+				 (fuse2 & GEN8_F2_S_ENA_MASK) >>
+				 GEN8_F2_S_ENA_SHIFT);
+
 	/*
 	 * The subslice disable field is global, i.e. it applies
 	 * to each of the enabled slices.
@@ -453,12 +457,11 @@ static void bdw_sseu_info_init(struct intel_gt *gt)
 	 * count the total enabled EU.
 	 */
 	for (s = 0; s < sseu->max_slices; s++) {
-		if (!(sseu->slice_mask & BIT(s)))
+		if (!intel_sseu_has_slice(sseu, s))
 			/* skip disabled slice */
 			continue;
 
-		intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
-					 subslice_mask);
+		sseu_set_subslices(sseu->ss_mask[s].b, subslice_mask);
 
 		for (ss = 0; ss < sseu->max_subslices; ss++) {
 			u8 eu_disabled_mask;
@@ -499,7 +502,7 @@ static void bdw_sseu_info_init(struct intel_gt *gt)
 	 * BDW supports slice power gating on devices with more than
 	 * one slice.
 	 */
-	sseu->has_slice_pg = hweight8(sseu->slice_mask) > 1;
+	sseu->has_slice_pg = intel_sseu_slice_count(sseu) > 1;
 	sseu->has_subslice_pg = 0;
 	sseu->has_eu_pg = 0;
 }
@@ -521,15 +524,15 @@ static void hsw_sseu_info_init(struct intel_gt *gt)
 		MISSING_CASE(INTEL_INFO(i915)->gt);
 		fallthrough;
 	case 1:
-		sseu->slice_mask = BIT(0);
+		intel_sseu_enable_slice(sseu, 0);
 		subslice_mask = BIT(0);
 		break;
 	case 2:
-		sseu->slice_mask = BIT(0);
+		intel_sseu_enable_slice(sseu, 0);
 		subslice_mask = BIT(0) | BIT(1);
 		break;
 	case 3:
-		sseu->slice_mask = BIT(0) | BIT(1);
+		intel_sseu_enable_slices(sseu, 0x3);
 		subslice_mask = BIT(0) | BIT(1);
 		break;
 	}
@@ -550,13 +553,11 @@ static void hsw_sseu_info_init(struct intel_gt *gt)
 		break;
 	}
 
-	intel_sseu_set_info(sseu, hweight8(sseu->slice_mask),
-			    hweight8(subslice_mask),
-			    sseu->eu_per_subslice);
+	intel_sseu_set_info(sseu, intel_sseu_slice_count(sseu),
+			    hweight8(subslice_mask), sseu->eu_per_subslice);
 
 	for (s = 0; s < sseu->max_slices; s++) {
-		intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
-					 subslice_mask);
+		sseu_set_subslices(sseu->ss_mask[s].b, subslice_mask);
 
 		for (ss = 0; ss < sseu->max_subslices; ss++) {
 			sseu_set_eus(sseu, s, ss,
@@ -643,7 +644,7 @@ u32 intel_sseu_make_rpcs(struct intel_gt *gt,
 	 */
 	if (GRAPHICS_VER(i915) == 11 &&
 	    slices == 1 &&
-	    subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) {
+	    subslices > min_t(u8, 4, intel_sseu_subslices_per_slice(sseu, 0) / 2)) {
 		GEM_BUG_ON(subslices & 1);
 
 		subslice_pg = false;
@@ -709,13 +710,16 @@ void intel_sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p)
 {
 	int s;
 
-	drm_printf(p, "slice total: %u, mask=%04x\n",
-		   hweight8(sseu->slice_mask), sseu->slice_mask);
-	drm_printf(p, "subslice total: %u\n", intel_sseu_subslice_total(sseu));
+	drm_printf(p, "slices total: %u, present=%*pbl\n",
+		   intel_sseu_slice_count(sseu),
+		   __intel_sseu_slice_mask_bits(sseu),
+		   __intel_sseu_slice_mask_bitmap(sseu));
+	drm_printf(p, "subslices total: %u\n", intel_sseu_subslice_total(sseu));
 	for (s = 0; s < sseu->max_slices; s++) {
-		drm_printf(p, "slice%d: %u subslices, mask=%08x\n",
+		drm_printf(p, "slice%d: %u subslices, present=%*pbl\n",
 			   s, intel_sseu_subslices_per_slice(sseu, s),
-			   intel_sseu_get_subslices(sseu, s));
+			   BITMAP_BITS(sseu->ss_mask[s].b),
+			   sseu->ss_mask[s].b);
 	}
 	drm_printf(p, "EU total: %u\n", sseu->eu_total);
 	drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice);
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
index 5c078df4729c..c8e3833ff286 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.h
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
@@ -6,8 +6,9 @@
 #ifndef __INTEL_SSEU_H__
 #define __INTEL_SSEU_H__
 
-#include <linux/types.h>
+#include <linux/bitmap.h>
 #include <linux/kernel.h>
+#include <linux/types.h>
 
 #include "i915_gem.h"
 
@@ -21,6 +22,7 @@ struct drm_printer;
  * are not expressed through fusing).
  */
 #define GEN_MAX_HSW_SLICES		3
+#define SSEU_MAX_SLICES GEN_MAX_HSW_SLICES
 
 /*
  * Maximum number of subslices that can exist within a HSW-style slice.  This
@@ -40,10 +42,7 @@ struct drm_printer;
 /* The maximum number of bits needed to express each subslice/DSS independently */
 #define GEN_SS_MASK_SIZE		SSEU_MAX(GEN_MAX_DSS, \
 						 GEN_MAX_HSW_SLICES * GEN_MAX_SS_PER_HSW_SLICE)
-
-#define GEN_SSEU_STRIDE(max_entries)	DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
-#define GEN_MAX_SUBSLICE_STRIDE		GEN_SSEU_STRIDE(GEN_SS_MASK_SIZE)
-#define GEN_MAX_EU_STRIDE		GEN_SSEU_STRIDE(GEN_MAX_EUS_PER_SS)
+#define SSEU_MAX_SUBSLICES	SSEU_MAX(GEN_MAX_DSS, GEN_MAX_SS_PER_HSW_SLICE)
 
 #define GEN_DSS_PER_GSLICE	4
 #define GEN_DSS_PER_CSLICE	8
@@ -52,15 +51,25 @@ struct drm_printer;
 #define GEN_MAX_GSLICES		(GEN_MAX_DSS / GEN_DSS_PER_GSLICE)
 #define GEN_MAX_CSLICES		(GEN_MAX_DSS / GEN_DSS_PER_CSLICE)
 
+#define BITMAP_BITS(b) ((unsigned int)(sizeof(b) * BITS_PER_BYTE))
+
+typedef struct { unsigned long b[BITS_TO_LONGS(SSEU_MAX_SLICES)]; } intel_sseu_slice_mask_t;
+typedef struct { unsigned long b[BITS_TO_LONGS(SSEU_MAX_SUBSLICES)]; } intel_sseu_subslice_mask_t;
+typedef struct { unsigned long b[BITS_TO_LONGS(GEN_MAX_EUS_PER_SS)]; } intel_sseu_eu_mask_t;
+
 struct sseu_dev_info {
-	u8 slice_mask;
-	u8 subslice_mask[GEN_SS_MASK_SIZE];
-	u8 geometry_subslice_mask[GEN_SS_MASK_SIZE];
-	u8 compute_subslice_mask[GEN_SS_MASK_SIZE];
-	u8 eu_mask[GEN_SS_MASK_SIZE * GEN_MAX_EU_STRIDE];
-	u16 eu_total;
-	u8 eu_per_subslice;
-	u8 min_eu_in_pool;
+	intel_sseu_slice_mask_t slice_mask;
+
+	intel_sseu_subslice_mask_t ss_mask[SSEU_MAX_SLICES];
+	intel_sseu_subslice_mask_t geometry_ss_mask[SSEU_MAX_SLICES];
+	intel_sseu_subslice_mask_t compute_ss_mask[SSEU_MAX_SLICES];
+
+	intel_sseu_eu_mask_t eu_mask[SSEU_MAX_SLICES][SSEU_MAX_SUBSLICES];
+
+	unsigned int eu_total;
+	unsigned int eu_per_subslice;
+	unsigned int min_eu_in_pool;
+
 	/* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */
 	u8 subslice_7eu[3];
 	u8 has_slice_pg:1;
@@ -68,12 +77,9 @@ struct sseu_dev_info {
 	u8 has_eu_pg:1;
 
 	/* Topology fields */
-	u8 max_slices;
-	u8 max_subslices;
-	u8 max_eus_per_subslice;
-
-	u8 ss_stride;
-	u8 eu_stride;
+	unsigned int max_slices;
+	unsigned int max_subslices;
+	unsigned int max_eus_per_subslice;
 };
 
 /*
@@ -86,53 +92,78 @@ struct intel_sseu {
 	u8 max_eus_per_subslice;
 };
 
-static inline struct intel_sseu
-intel_sseu_from_device_info(const struct sseu_dev_info *sseu)
+struct intel_sseu intel_sseu_from_device_info(const struct sseu_dev_info *sseu);
+
+static inline void
+intel_sseu_enable_slice(struct sseu_dev_info *sseu, int slice)
 {
-	struct intel_sseu value = {
-		.slice_mask = sseu->slice_mask,
-		.subslice_mask = sseu->subslice_mask[0],
-		.min_eus_per_subslice = sseu->max_eus_per_subslice,
-		.max_eus_per_subslice = sseu->max_eus_per_subslice,
-	};
-
-	return value;
+	set_bit(slice, sseu->slice_mask.b);
+}
+
+static inline void
+intel_sseu_enable_slices(struct sseu_dev_info *sseu, u8 mask)
+{
+	bitmap_set_value8(sseu->slice_mask.b, mask, 0);
 }
 
 static inline bool
-intel_sseu_has_subslice(const struct sseu_dev_info *sseu, int slice,
-			int subslice)
+intel_sseu_has_slice(const struct sseu_dev_info *sseu, int slice)
 {
-	u8 mask;
-	int ss_idx = subslice / BITS_PER_BYTE;
+	return test_bit(slice, sseu->slice_mask.b);
+}
+
+static inline const unsigned long *
+__intel_sseu_slice_mask_bitmap(const struct sseu_dev_info *sseu)
+{
+	return sseu->slice_mask.b;
+}
+
+static inline unsigned int
+__intel_sseu_slice_mask_bits(const struct sseu_dev_info *sseu)
+{
+	return BITMAP_BITS(sseu->slice_mask.b);
+}
+
+static inline u32 intel_sseu_slice_mask(const struct sseu_dev_info *sseu)
+{
+	u32 mask;
 
-	if (slice >= sseu->max_slices ||
-	    subslice >= sseu->max_subslices)
-		return false;
+	bitmap_to_arr32(&mask, sseu->slice_mask.b, sizeof(mask) * BITS_PER_BYTE);
 
-	GEM_BUG_ON(ss_idx >= sseu->ss_stride);
+	return mask;
+}
 
-	mask = sseu->subslice_mask[slice * sseu->ss_stride + ss_idx];
+static inline unsigned int
+intel_sseu_slice_count(const struct sseu_dev_info *sseu)
+{
+	return bitmap_weight(sseu->slice_mask.b,
+			     BITMAP_BITS(sseu->slice_mask.b));
+}
 
-	return mask & BIT(subslice % BITS_PER_BYTE);
+static inline bool
+intel_sseu_has_subslice(const struct sseu_dev_info *sseu, int slice,
+			int subslice)
+{
+	GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->ss_mask));
+
+	return test_bit(subslice, sseu->ss_mask[slice].b);
 }
 
-void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices,
-			 u8 max_subslices, u8 max_eus_per_subslice);
+void intel_sseu_set_info(struct sseu_dev_info *sseu,
+			 unsigned int max_slices,
+			 unsigned int max_subslices,
+			 unsigned int max_eus_per_subslice);
 
 unsigned int
 intel_sseu_subslice_total(const struct sseu_dev_info *sseu);
 
 unsigned int
-intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice);
+intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, int slice);
 
-u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice);
+u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, int slice);
 
 u32 intel_sseu_get_compute_subslices(const struct sseu_dev_info *sseu);
 
-void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
-			      u8 *subslice_mask, u32 ss_mask);
-
 void intel_sseu_info_init(struct intel_gt *gt);
 
 u32 intel_sseu_make_rpcs(struct intel_gt *gt,
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c
index 2d5d011e01db..ae3e8972121f 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c
@@ -12,11 +12,10 @@
 #include "intel_sseu_debugfs.h"
 
 static void sseu_copy_subslices(const struct sseu_dev_info *sseu,
-				int slice, u8 *to_mask)
+				int slice, unsigned long *to)
 {
-	int offset = slice * sseu->ss_stride;
-
-	memcpy(&to_mask[offset], &sseu->subslice_mask[offset], sseu->ss_stride);
+	bitmap_copy(to, sseu->ss_mask[slice].b,
+		    BITMAP_BITS(sseu->ss_mask[slice].b));
 }
 
 static void cherryview_sseu_device_status(struct intel_gt *gt,
@@ -40,8 +39,8 @@ static void cherryview_sseu_device_status(struct intel_gt *gt,
 			/* skip disabled subslice */
 			continue;
 
-		sseu->slice_mask = BIT(0);
-		sseu->subslice_mask[0] |= BIT(ss);
+		intel_sseu_enable_slice(sseu, 0);
+		set_bit(ss, sseu->ss_mask[0].b);
 		eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) +
 			 ((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) +
 			 ((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) +
@@ -91,8 +90,8 @@ static void gen11_sseu_device_status(struct intel_gt *gt,
 			/* skip disabled slice */
 			continue;
 
-		sseu->slice_mask |= BIT(s);
-		sseu_copy_subslices(&info->sseu, s, sseu->subslice_mask);
+		intel_sseu_enable_slice(sseu, s);
+		sseu_copy_subslices(&info->sseu, s, sseu->ss_mask[s].b);
 
 		for (ss = 0; ss < info->sseu.max_subslices; ss++) {
 			unsigned int eu_cnt;
@@ -144,24 +143,20 @@ static void gen9_sseu_device_status(struct intel_gt *gt,
 			/* skip disabled slice */
 			continue;
 
-		sseu->slice_mask |= BIT(s);
+		intel_sseu_enable_slice(sseu, s);
 
 		if (IS_GEN9_BC(gt->i915))
-			sseu_copy_subslices(&info->sseu, s,
-					    sseu->subslice_mask);
+			sseu_copy_subslices(&info->sseu, s, sseu->ss_mask[s].b);
 
 		for (ss = 0; ss < info->sseu.max_subslices; ss++) {
 			unsigned int eu_cnt;
-			u8 ss_idx = s * info->sseu.ss_stride +
-				    ss / BITS_PER_BYTE;
 
 			if (IS_GEN9_LP(gt->i915)) {
 				if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
 					/* skip disabled subslice */
 					continue;
 
-				sseu->subslice_mask[ss_idx] |=
-					BIT(ss % BITS_PER_BYTE);
+				set_bit(ss, sseu->ss_mask[s].b);
 			}
 
 			eu_cnt = eu_reg[2 * s + ss / 2] & eu_mask[ss % 2];
@@ -183,22 +178,19 @@ static void bdw_sseu_device_status(struct intel_gt *gt,
 	u32 slice_info = intel_uncore_read(gt->uncore, GEN8_GT_SLICE_INFO);
 	int s;
 
-	sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK;
+	intel_sseu_enable_slices(sseu, slice_info & GEN8_LSLICESTAT_MASK);
 
-	if (sseu->slice_mask) {
+	if (!bitmap_empty(sseu->slice_mask.b,
+			  BITMAP_BITS(sseu->slice_mask.b))) {
 		sseu->eu_per_subslice = info->sseu.eu_per_subslice;
-		for (s = 0; s < fls(sseu->slice_mask); s++)
-			sseu_copy_subslices(&info->sseu, s,
-					    sseu->subslice_mask);
+		for (s = 0; s < ARRAY_SIZE(sseu->ss_mask); s++)
+			sseu_copy_subslices(&info->sseu, s, sseu->ss_mask[s].b);
 		sseu->eu_total = sseu->eu_per_subslice *
 				 intel_sseu_subslice_total(sseu);
 
 		/* subtract fused off EU(s) from enabled slice(s) */
-		for (s = 0; s < fls(sseu->slice_mask); s++) {
-			u8 subslice_7eu = info->sseu.subslice_7eu[s];
-
-			sseu->eu_total -= hweight8(subslice_7eu);
-		}
+		for (s = 0; s < ARRAY_SIZE(info->sseu.subslice_7eu); s++)
+			sseu->eu_total -= hweight8(info->sseu.subslice_7eu[s]);
 	}
 }
 
@@ -210,14 +202,15 @@ static void i915_print_sseu_info(struct seq_file *m,
 	const char *type = is_available_info ? "Available" : "Enabled";
 	int s;
 
-	seq_printf(m, "  %s Slice Mask: %04x\n", type,
-		   sseu->slice_mask);
-	seq_printf(m, "  %s Slice Total: %u\n", type,
-		   hweight8(sseu->slice_mask));
-	seq_printf(m, "  %s Subslice Total: %u\n", type,
+	seq_printf(m, "  %s Slices: %*pbl\n", type,
+		   __intel_sseu_slice_mask_bits(sseu),
+		   __intel_sseu_slice_mask_bitmap(sseu));
+	seq_printf(m, "  %s Slices Total: %u\n", type,
+		   intel_sseu_slice_count(sseu));
+	seq_printf(m, "  %s Subslices Total: %u\n", type,
 		   intel_sseu_subslice_total(sseu));
-	for (s = 0; s < fls(sseu->slice_mask); s++) {
-		seq_printf(m, "  %s Slice%i subslices: %u\n", type,
+	for (s = 0; s < sseu->max_slices; s++) {
+		seq_printf(m, "  %s Slice%i count: %u\n", type,
 			   s, intel_sseu_subslices_per_slice(sseu, s));
 	}
 	seq_printf(m, "  %s EU Total: %u\n", type,
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index dc0ffff6f655..c6f010f8da11 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -947,8 +947,9 @@ gen9_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
 	 * occasions, such as INSTDONE, where this value is dependent
 	 * on s/ss combo, the read should be done with read_subslice_reg.
 	 */
-	slice = ffs(sseu->slice_mask) - 1;
-	GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask));
+	slice = find_first_bit(sseu->slice_mask.b,
+			       BITMAP_BITS(sseu->slice_mask.b));
+	GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->ss_mask));
 	subslice = ffs(intel_sseu_get_subslices(sseu, slice));
 	GEM_BUG_ON(!subslice);
 	subslice--;
@@ -1090,7 +1091,7 @@ icl_wa_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
 	unsigned int slice, subslice;
 
 	GEM_BUG_ON(GRAPHICS_VER(gt->i915) < 11);
-	GEM_BUG_ON(hweight8(sseu->slice_mask) > 1);
+	GEM_BUG_ON(intel_sseu_slice_count(sseu) > 1);
 	slice = 0;
 
 	/*
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index 17004bca4d24..1337ca210a06 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -741,7 +741,7 @@ static void __guc_ads_init(struct intel_guc *guc)
 	fill_engine_enable_masks(gt, &info_map);
 
 	ads_blob_write(guc, system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_SLICE_ENABLED],
-		       hweight8(gt->info.sseu.slice_mask));
+		       intel_sseu_slice_count(&gt->info.sseu));
 	ads_blob_write(guc, system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_VDBOX_SFC_SUPPORT_MASK],
 		       gt->info.vdbox_sfc_access);
 
diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
index c12a0adefda5..563f190d94ad 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -148,14 +148,12 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
 		value = intel_engines_has_context_isolation(i915);
 		break;
 	case I915_PARAM_SLICE_MASK:
-		value = sseu->slice_mask;
+		value = intel_sseu_slice_mask(sseu);
 		if (!value)
 			return -ENODEV;
 		break;
 	case I915_PARAM_SUBSLICE_MASK:
-		/* Only copy bits from the first slice */
-		memcpy(&value, sseu->subslice_mask,
-		       min(sseu->ss_stride, (u8)sizeof(value)));
+		value = intel_sseu_get_subslices(sseu, 0);
 		if (!value)
 			return -ENODEV;
 		break;
diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
index b5ca00cb6cf6..7e3631ee14a3 100644
--- a/drivers/gpu/drm/i915/i915_query.c
+++ b/drivers/gpu/drm/i915/i915_query.c
@@ -32,8 +32,11 @@ static int query_topology_info(struct drm_i915_private *dev_priv,
 			       struct drm_i915_query_item *query_item)
 {
 	const struct sseu_dev_info *sseu = &to_gt(dev_priv)->info.sseu;
+	unsigned int slice_length, subslice_length, eu_length, total_length;
 	struct drm_i915_query_topology_info topo;
-	u32 slice_length, subslice_length, eu_length, total_length;
+	unsigned int ss_stride, eu_stride;
+	unsigned int s, ss, i;
+	u8 __user *out;
 	int ret;
 
 	if (query_item->flags != 0)
@@ -42,11 +45,11 @@ static int query_topology_info(struct drm_i915_private *dev_priv,
 	if (sseu->max_slices == 0)
 		return -ENODEV;
 
-	BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask));
-
-	slice_length = sizeof(sseu->slice_mask);
-	subslice_length = sseu->max_slices * sseu->ss_stride;
-	eu_length = sseu->max_slices * sseu->max_subslices * sseu->eu_stride;
+	slice_length = DIV_ROUND_UP(sseu->max_slices, BITS_PER_BYTE);
+	ss_stride = DIV_ROUND_UP(sseu->max_subslices, BITS_PER_BYTE);
+	eu_stride = DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE);
+	subslice_length = sseu->max_slices * ss_stride;
+	eu_length = sseu->max_slices * sseu->max_subslices * eu_stride;
 	total_length = sizeof(topo) + slice_length + subslice_length +
 		       eu_length;
 
@@ -64,28 +67,50 @@ static int query_topology_info(struct drm_i915_private *dev_priv,
 	topo.max_eus_per_subslice = sseu->max_eus_per_subslice;
 
 	topo.subslice_offset = slice_length;
-	topo.subslice_stride = sseu->ss_stride;
+	topo.subslice_stride = ss_stride;
 	topo.eu_offset = slice_length + subslice_length;
-	topo.eu_stride = sseu->eu_stride;
+	topo.eu_stride = eu_stride;
 
-	if (copy_to_user(u64_to_user_ptr(query_item->data_ptr),
-			   &topo, sizeof(topo)))
-		return -EFAULT;
+	out = u64_to_user_ptr(query_item->data_ptr);
 
-	if (copy_to_user(u64_to_user_ptr(query_item->data_ptr + sizeof(topo)),
-			   &sseu->slice_mask, slice_length))
+	if (copy_to_user(out, &topo, sizeof(topo)))
 		return -EFAULT;
+	out += sizeof(topo);
 
-	if (copy_to_user(u64_to_user_ptr(query_item->data_ptr +
-					   sizeof(topo) + slice_length),
-			   sseu->subslice_mask, subslice_length))
-		return -EFAULT;
+	for (i = 0; i < slice_length; i++) {
+		u8 mask;
 
-	if (copy_to_user(u64_to_user_ptr(query_item->data_ptr +
-					   sizeof(topo) +
-					   slice_length + subslice_length),
-			   sseu->eu_mask, eu_length))
-		return -EFAULT;
+		mask = bitmap_get_value8(sseu->slice_mask.b, i * BITS_PER_BYTE);
+		if (copy_to_user(out, &mask, sizeof(mask)))
+			return -EFAULT;
+		out += sizeof(mask);
+	}
+
+	for (s = 0; s < sseu->max_slices; s++) {
+		for (i = 0; i < ss_stride; i++) {
+			u8 mask;
+
+			mask = bitmap_get_value8(sseu->ss_mask[s].b,
+						 i * BITS_PER_BYTE);
+			if (copy_to_user(out, &mask, sizeof(mask)))
+				return -EFAULT;
+			out += sizeof(mask);
+		}
+	}
+
+	for (s = 0; s < sseu->max_slices; s++) {
+		for (ss = 0; ss < sseu->max_subslices; ss++) {
+			for (i = 0; i < eu_stride; i++) {
+				u8 mask;
+
+				mask = bitmap_get_value8(sseu->eu_mask[s][ss].b,
+							 i * BITS_PER_BYTE);
+				if (copy_to_user(out, &mask, sizeof(mask)))
+					return -EFAULT;
+				out += sizeof(mask);
+			}
+		}
+	}
 
 	return total_length;
 }
-- 
2.32.0



More information about the Intel-gfx-trybot mailing list