[Intel-gfx] [PATCH v15 1/6] drm/i915: store all subslice masks

Lionel Landwerlin lionel.g.landwerlin at intel.com
Mon Feb 26 17:57:06 UTC 2018


Up to now, subslice mask was assumed to be uniform across slices. But
starting with Cannonlake, slices can be asymmetric (for example slice0
has different number of subslices as slice1+). This change stores all
subslices masks for all slices rather than having a single mask that
applies to all slices.

v2: Rework how we store total numbers in sseu_dev_info (Tvrtko)
    Fix CHV eu masks, was reading disabled as enabled (Tvrtko)
    Readability changes (Tvrtko)
    Add EU index helper (Tvrtko)

v3: Turn ALIGN(v, 8) / 8 into DIV_ROUND_UP(v, BITS_PER_BYTE) (Tvrtko)
    Reuse sseu_eu_idx() for setting eu_mask on CHV (Tvrtko)
    Reformat debug prints for subslices (Tvrtko)

v4: Change eu_mask helper into sseu_set_eus() (Tvrtko)

v5: With Haswell reporting masks & counts, bump sseu_*_eus() functions
    to use u16 (Lionel)

v6: Fix sseu_get_eus() for > 8 EUs per subslice (Lionel)

v7: Change debugfs enabels for number of subslices per slice, will
    need a small igt/pm_sseu change (Lionel)
    Drop subslice_total field from sseu_dev_info, rely on
    sseu_subslice_total() to recompute the value instead (Lionel)

Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
Reviewed-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Acked-by: Chris Wilson <chris at chris-wilson.co.uk>
---
 drivers/gpu/drm/i915/i915_debugfs.c      |  28 ++--
 drivers/gpu/drm/i915/i915_drv.c          |   2 +-
 drivers/gpu/drm/i915/intel_device_info.c | 218 +++++++++++++++++++++++--------
 drivers/gpu/drm/i915/intel_device_info.h |  62 ++++++++-
 drivers/gpu/drm/i915/intel_lrc.c         |   2 +-
 drivers/gpu/drm/i915/intel_ringbuffer.h  |   2 +-
 6 files changed, 247 insertions(+), 67 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 33fbf3965309..8db6d74eb5b1 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -4322,7 +4322,7 @@ static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv,
 			continue;
 
 		sseu->slice_mask = BIT(0);
-		sseu->subslice_mask |= BIT(ss);
+		sseu->subslice_mask[0] |= BIT(ss);
 		eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) +
 			 ((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) +
 			 ((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) +
@@ -4369,7 +4369,7 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
 			continue;
 
 		sseu->slice_mask |= BIT(s);
-		sseu->subslice_mask = info->sseu.subslice_mask;
+		sseu->subslice_mask[s] = info->sseu.subslice_mask[s];
 
 		for (ss = 0; ss < ss_max; ss++) {
 			unsigned int eu_cnt;
@@ -4424,8 +4424,8 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
 		sseu->slice_mask |= BIT(s);
 
 		if (IS_GEN9_BC(dev_priv))
-			sseu->subslice_mask =
-				INTEL_INFO(dev_priv)->sseu.subslice_mask;
+			sseu->subslice_mask[s] =
+				INTEL_INFO(dev_priv)->sseu.subslice_mask[s];
 
 		for (ss = 0; ss < ss_max; ss++) {
 			unsigned int eu_cnt;
@@ -4435,7 +4435,7 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
 					/* skip disabled subslice */
 					continue;
 
-				sseu->subslice_mask |= BIT(ss);
+				sseu->subslice_mask[s] |= BIT(ss);
 			}
 
 			eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] &
@@ -4457,9 +4457,12 @@ static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv,
 	sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK;
 
 	if (sseu->slice_mask) {
-		sseu->subslice_mask = INTEL_INFO(dev_priv)->sseu.subslice_mask;
 		sseu->eu_per_subslice =
 				INTEL_INFO(dev_priv)->sseu.eu_per_subslice;
+		for (s = 0; s < fls(sseu->slice_mask); s++) {
+			sseu->subslice_mask[s] =
+				INTEL_INFO(dev_priv)->sseu.subslice_mask[s];
+		}
 		sseu->eu_total = sseu->eu_per_subslice *
 				 sseu_subslice_total(sseu);
 
@@ -4478,6 +4481,7 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info,
 {
 	struct drm_i915_private *dev_priv = node_to_i915(m->private);
 	const char *type = is_available_info ? "Available" : "Enabled";
+	int s;
 
 	seq_printf(m, "  %s Slice Mask: %04x\n", type,
 		   sseu->slice_mask);
@@ -4485,10 +4489,10 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info,
 		   hweight8(sseu->slice_mask));
 	seq_printf(m, "  %s Subslice Total: %u\n", type,
 		   sseu_subslice_total(sseu));
-	seq_printf(m, "  %s Subslice Mask: %04x\n", type,
-		   sseu->subslice_mask);
-	seq_printf(m, "  %s Subslice Per Slice: %u\n", type,
-		   hweight8(sseu->subslice_mask));
+	for (s = 0; s < fls(sseu->slice_mask); s++) {
+		seq_printf(m, "  %s Slice%i subslices: %u\n", type,
+			   s, hweight8(sseu->subslice_mask[s]));
+	}
 	seq_printf(m, "  %s EU Total: %u\n", type,
 		   sseu->eu_total);
 	seq_printf(m, "  %s EU Per Subslice: %u\n", type,
@@ -4522,6 +4526,10 @@ static int i915_sseu_status(struct seq_file *m, void *unused)
 
 	seq_puts(m, "SSEU Device Status\n");
 	memset(&sseu, 0, sizeof(sseu));
+	sseu.max_slices = INTEL_INFO(dev_priv)->sseu.max_slices;
+	sseu.max_subslices = INTEL_INFO(dev_priv)->sseu.max_subslices;
+	sseu.max_eus_per_subslice =
+		INTEL_INFO(dev_priv)->sseu.max_eus_per_subslice;
 
 	intel_runtime_pm_get(dev_priv);
 
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index aaa861b51024..655a072290e3 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -428,7 +428,7 @@ static int i915_getparam_ioctl(struct drm_device *dev, void *data,
 			return -ENODEV;
 		break;
 	case I915_PARAM_SUBSLICE_MASK:
-		value = INTEL_INFO(dev_priv)->sseu.subslice_mask;
+		value = INTEL_INFO(dev_priv)->sseu.subslice_mask[0];
 		if (!value)
 			return -ENODEV;
 		break;
diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
index 1c780cc4cd48..7a5de8f302fd 100644
--- a/drivers/gpu/drm/i915/intel_device_info.c
+++ b/drivers/gpu/drm/i915/intel_device_info.c
@@ -81,12 +81,16 @@ void intel_device_info_dump_flags(const struct intel_device_info *info,
 
 static void sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p)
 {
+	int s;
+
 	drm_printf(p, "slice mask: %04x\n", sseu->slice_mask);
 	drm_printf(p, "slice total: %u\n", hweight8(sseu->slice_mask));
 	drm_printf(p, "subslice total: %u\n", sseu_subslice_total(sseu));
-	drm_printf(p, "subslice mask %04x\n", sseu->subslice_mask);
-	drm_printf(p, "subslice per slice: %u\n",
-		   hweight8(sseu->subslice_mask));
+	for (s = 0; s < ARRAY_SIZE(sseu->subslice_mask); s++) {
+		drm_printf(p, "slice%d %u subslices mask=%04x\n",
+			   s, hweight8(sseu->subslice_mask[s]),
+			   sseu->subslice_mask[s]);
+	}
 	drm_printf(p, "EU total: %u\n", sseu->eu_total);
 	drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice);
 	drm_printf(p, "has slice power gating: %s\n",
@@ -120,22 +124,86 @@ void intel_device_info_dump(const struct intel_device_info *info,
 	intel_device_info_dump_flags(info, p);
 }
 
+static u16 compute_eu_total(const struct sseu_dev_info *sseu)
+{
+	u16 i, total = 0;
+
+	for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++)
+		total += hweight8(sseu->eu_mask[i]);
+
+	return total;
+}
+
+static u16 compute_subslice_total(const struct sseu_dev_info *sseu)
+{
+	u16 i, total = 0;
+
+	for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++)
+		total += hweight8(sseu->subslice_mask[i]);
+
+	return total;
+}
+
 static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
 {
 	struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
 	const u32 fuse2 = I915_READ(GEN8_FUSE2);
+	int s, ss;
+	const int eu_mask = 0xff;
+	u32 subslice_mask, eu_en;
 
 	sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >>
 			    GEN10_F2_S_ENA_SHIFT;
-	sseu->subslice_mask = (1 << 4) - 1;
-	sseu->subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
-				 GEN10_F2_SS_DIS_SHIFT);
+	sseu->max_slices = 6;
+	sseu->max_subslices = 4;
+	sseu->max_eus_per_subslice = 8;
+
+	subslice_mask = (1 << 4) - 1;
+	subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
+			   GEN10_F2_SS_DIS_SHIFT);
+
+	/*
+	 * Slice0 can have up to 3 subslices, but there are only 2 in
+	 * slice1/2.
+	 */
+	sseu->subslice_mask[0] = subslice_mask;
+	for (s = 1; s < sseu->max_slices; s++)
+		sseu->subslice_mask[s] = subslice_mask & 0x3;
+
+	/* Slice0 */
+	eu_en = ~I915_READ(GEN8_EU_DISABLE0);
+	for (ss = 0; ss < sseu->max_subslices; ss++)
+		sseu_set_eus(sseu, 0, ss, (eu_en >> (8 * ss)) & eu_mask);
+	/* Slice1 */
+	sseu_set_eus(sseu, 1, 0, (eu_en >> 24) & eu_mask);
+	eu_en = ~I915_READ(GEN8_EU_DISABLE1);
+	sseu_set_eus(sseu, 1, 1, eu_en & eu_mask);
+	/* Slice2 */
+	sseu_set_eus(sseu, 2, 0, (eu_en >> 8) & eu_mask);
+	sseu_set_eus(sseu, 2, 1, (eu_en >> 16) & eu_mask);
+	/* Slice3 */
+	sseu_set_eus(sseu, 3, 0, (eu_en >> 24) & eu_mask);
+	eu_en = ~I915_READ(GEN8_EU_DISABLE2);
+	sseu_set_eus(sseu, 3, 1, eu_en & eu_mask);
+	/* Slice4 */
+	sseu_set_eus(sseu, 4, 0, (eu_en >> 8) & eu_mask);
+	sseu_set_eus(sseu, 4, 1, (eu_en >> 16) & eu_mask);
+	/* Slice5 */
+	sseu_set_eus(sseu, 5, 0, (eu_en >> 24) & eu_mask);
+	eu_en = ~I915_READ(GEN10_EU_DISABLE3);
+	sseu_set_eus(sseu, 5, 1, eu_en & eu_mask);
+
+	/* Do a second pass where we mark the subslices disabled if all their
+	 * eus are off.
+	 */
+	for (s = 0; s < sseu->max_slices; s++) {
+		for (ss = 0; ss < sseu->max_subslices; ss++) {
+			if (sseu_get_eus(sseu, s, ss) == 0)
+				sseu->subslice_mask[s] &= ~BIT(ss);
+		}
+	}
 
-	sseu->eu_total = hweight32(~I915_READ(GEN8_EU_DISABLE0));
-	sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE1));
-	sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE2));
-	sseu->eu_total += hweight8(~(I915_READ(GEN10_EU_DISABLE3) &
-				     GEN10_EU_DIS_SS_MASK));
+	sseu->eu_total = compute_eu_total(sseu);
 
 	/*
 	 * CNL is expected to always have a uniform distribution
@@ -156,26 +224,39 @@ static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
 static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv)
 {
 	struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
-	u32 fuse, eu_dis;
+	u32 fuse;
 
 	fuse = I915_READ(CHV_FUSE_GT);
 
 	sseu->slice_mask = BIT(0);
+	sseu->max_slices = 1;
+	sseu->max_subslices = 2;
+	sseu->max_eus_per_subslice = 8;
 
 	if (!(fuse & CHV_FGT_DISABLE_SS0)) {
-		sseu->subslice_mask |= BIT(0);
-		eu_dis = fuse & (CHV_FGT_EU_DIS_SS0_R0_MASK |
-				 CHV_FGT_EU_DIS_SS0_R1_MASK);
-		sseu->eu_total += 8 - hweight32(eu_dis);
+		u8 disabled_mask =
+			((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >>
+			 CHV_FGT_EU_DIS_SS0_R0_SHIFT) |
+			(((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >>
+			  CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4);
+
+		sseu->subslice_mask[0] |= BIT(0);
+		sseu_set_eus(sseu, 0, 0, ~disabled_mask);
 	}
 
 	if (!(fuse & CHV_FGT_DISABLE_SS1)) {
-		sseu->subslice_mask |= BIT(1);
-		eu_dis = fuse & (CHV_FGT_EU_DIS_SS1_R0_MASK |
-				 CHV_FGT_EU_DIS_SS1_R1_MASK);
-		sseu->eu_total += 8 - hweight32(eu_dis);
+		u8 disabled_mask =
+			((fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >>
+			 CHV_FGT_EU_DIS_SS1_R0_SHIFT) |
+			(((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >>
+			  CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4);
+
+		sseu->subslice_mask[0] |= BIT(1);
+		sseu_set_eus(sseu, 0, 1, ~disabled_mask);
 	}
 
+	sseu->eu_total = compute_eu_total(sseu);
+
 	/*
 	 * CHV expected to always have a uniform distribution of EU
 	 * across subslices.
@@ -197,41 +278,52 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
 {
 	struct intel_device_info *info = mkwrite_device_info(dev_priv);
 	struct sseu_dev_info *sseu = &info->sseu;
-	int s_max = 3, ss_max = 4, eu_max = 8;
 	int s, ss;
-	u32 fuse2, eu_disable;
-	u8 eu_mask = 0xff;
+	u32 fuse2, eu_disable, subslice_mask;
+	const u8 eu_mask = 0xff;
 
 	fuse2 = I915_READ(GEN8_FUSE2);
 	sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
 
+	/* BXT has a single slice and at most 3 subslices. */
+	sseu->max_slices = IS_GEN9_LP(dev_priv) ? 1 : 3;
+	sseu->max_subslices = IS_GEN9_LP(dev_priv) ? 3 : 4;
+	sseu->max_eus_per_subslice = 8;
+
 	/*
 	 * The subslice disable field is global, i.e. it applies
 	 * to each of the enabled slices.
 	*/
-	sseu->subslice_mask = (1 << ss_max) - 1;
-	sseu->subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
-				 GEN9_F2_SS_DIS_SHIFT);
+	subslice_mask = (1 << sseu->max_subslices) - 1;
+	subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
+			   GEN9_F2_SS_DIS_SHIFT);
 
 	/*
 	 * Iterate through enabled slices and subslices to
 	 * count the total enabled EU.
 	*/
-	for (s = 0; s < s_max; s++) {
+	for (s = 0; s < sseu->max_slices; s++) {
 		if (!(sseu->slice_mask & BIT(s)))
 			/* skip disabled slice */
 			continue;
 
+		sseu->subslice_mask[s] = subslice_mask;
+
 		eu_disable = I915_READ(GEN9_EU_DISABLE(s));
-		for (ss = 0; ss < ss_max; ss++) {
+		for (ss = 0; ss < sseu->max_subslices; ss++) {
 			int eu_per_ss;
+			u8 eu_disabled_mask;
 
-			if (!(sseu->subslice_mask & BIT(ss)))
+			if (!(sseu->subslice_mask[s] & BIT(ss)))
 				/* skip disabled subslice */
 				continue;
 
-			eu_per_ss = eu_max - hweight8((eu_disable >> (ss*8)) &
-						      eu_mask);
+			eu_disabled_mask = (eu_disable >> (ss*8)) & eu_mask;
+
+			sseu_set_eus(sseu, s, ss, ~eu_disabled_mask);
+
+			eu_per_ss = sseu->max_eus_per_subslice -
+				hweight8(eu_disabled_mask);
 
 			/*
 			 * Record which subslice(s) has(have) 7 EUs. we
@@ -240,11 +332,11 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
 			 */
 			if (eu_per_ss == 7)
 				sseu->subslice_7eu[s] |= BIT(ss);
-
-			sseu->eu_total += eu_per_ss;
 		}
 	}
 
+	sseu->eu_total = compute_eu_total(sseu);
+
 	/*
 	 * SKL is expected to always have a uniform distribution
 	 * of EU across subslices with the exception that any one
@@ -270,8 +362,8 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
 	sseu->has_eu_pg = sseu->eu_per_subslice > 2;
 
 	if (IS_GEN9_LP(dev_priv)) {
-#define IS_SS_DISABLED(ss)	(!(sseu->subslice_mask & BIT(ss)))
-		info->has_pooled_eu = hweight8(sseu->subslice_mask) == 3;
+#define IS_SS_DISABLED(ss)	(!(sseu->subslice_mask[0] & BIT(ss)))
+		info->has_pooled_eu = hweight8(sseu->subslice_mask[0]) == 3;
 
 		sseu->min_eu_in_pool = 0;
 		if (info->has_pooled_eu) {
@@ -289,19 +381,22 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
 static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
 {
 	struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
-	const int s_max = 3, ss_max = 3, eu_max = 8;
 	int s, ss;
-	u32 fuse2, eu_disable[3]; /* s_max */
+	u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */
 
 	fuse2 = I915_READ(GEN8_FUSE2);
 	sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
+	sseu->max_slices = 3;
+	sseu->max_subslices = 3;
+	sseu->max_eus_per_subslice = 8;
+
 	/*
 	 * The subslice disable field is global, i.e. it applies
 	 * to each of the enabled slices.
 	 */
-	sseu->subslice_mask = GENMASK(ss_max - 1, 0);
-	sseu->subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
-				 GEN8_F2_SS_DIS_SHIFT);
+	subslice_mask = GENMASK(sseu->max_subslices - 1, 0);
+	subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
+			   GEN8_F2_SS_DIS_SHIFT);
 
 	eu_disable[0] = I915_READ(GEN8_EU_DISABLE0) & GEN8_EU_DIS0_S0_MASK;
 	eu_disable[1] = (I915_READ(GEN8_EU_DISABLE0) >> GEN8_EU_DIS0_S1_SHIFT) |
@@ -315,30 +410,38 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
 	 * Iterate through enabled slices and subslices to
 	 * count the total enabled EU.
 	 */
-	for (s = 0; s < s_max; s++) {
+	for (s = 0; s < sseu->max_slices; s++) {
 		if (!(sseu->slice_mask & BIT(s)))
 			/* skip disabled slice */
 			continue;
 
-		for (ss = 0; ss < ss_max; ss++) {
+		sseu->subslice_mask[s] = subslice_mask;
+
+		for (ss = 0; ss < sseu->max_subslices; ss++) {
+			u8 eu_disabled_mask;
 			u32 n_disabled;
 
-			if (!(sseu->subslice_mask & BIT(ss)))
+			if (!(sseu->subslice_mask[ss] & BIT(ss)))
 				/* skip disabled subslice */
 				continue;
 
-			n_disabled = hweight8(eu_disable[s] >> (ss * eu_max));
+			eu_disabled_mask =
+				eu_disable[s] >> (ss * sseu->max_eus_per_subslice);
+
+			sseu_set_eus(sseu, s, ss, ~eu_disabled_mask);
+
+			n_disabled = hweight8(eu_disabled_mask);
 
 			/*
 			 * Record which subslices have 7 EUs.
 			 */
-			if (eu_max - n_disabled == 7)
+			if (sseu->max_eus_per_subslice - n_disabled == 7)
 				sseu->subslice_7eu[s] |= 1 << ss;
-
-			sseu->eu_total += eu_max - n_disabled;
 		}
 	}
 
+	sseu->eu_total = compute_eu_total(sseu);
+
 	/*
 	 * BDW is expected to always have a uniform distribution of EU across
 	 * subslices with the exception that any one EU in any one subslice may
@@ -362,6 +465,7 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv)
 	struct intel_device_info *info = mkwrite_device_info(dev_priv);
 	struct sseu_dev_info *sseu = &info->sseu;
 	u32 fuse1;
+	int s, ss;
 
 	/*
 	 * There isn't a register to tell us how many slices/subslices. We
@@ -373,18 +477,22 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv)
 		/* fall through */
 	case 1:
 		sseu->slice_mask = BIT(0);
-		sseu->subslice_mask = BIT(0);
+		sseu->subslice_mask[0] = BIT(0);
 		break;
 	case 2:
 		sseu->slice_mask = BIT(0);
-		sseu->subslice_mask = BIT(0) | BIT(1);
+		sseu->subslice_mask[0] = BIT(0) | BIT(1);
 		break;
 	case 3:
 		sseu->slice_mask = BIT(0) | BIT(1);
-		sseu->subslice_mask = BIT(0) | BIT(1);
+		sseu->subslice_mask[0] = BIT(0) | BIT(1);
+		sseu->subslice_mask[1] = BIT(0) | BIT(1);
 		break;
 	}
 
+	sseu->max_slices = hweight8(sseu->slice_mask);
+	sseu->max_subslices = hweight8(sseu->subslice_mask[0]);
+
 	fuse1 = I915_READ(HSW_PAVP_FUSE1);
 	switch ((fuse1 & HSW_F1_EU_DIS_MASK) >> HSW_F1_EU_DIS_SHIFT) {
 	default:
@@ -401,8 +509,16 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv)
 		sseu->eu_per_subslice = 6;
 		break;
 	}
+	sseu->max_eus_per_subslice = sseu->eu_per_subslice;
+
+	for (s = 0; s < sseu->max_slices; s++) {
+		for (ss = 0; ss < sseu->max_subslices; ss++) {
+			sseu_set_eus(sseu, s, ss,
+				     (1UL << sseu->eu_per_subslice) - 1);
+		}
+	}
 
-	sseu->eu_total = sseu_subslice_total(sseu) * sseu->eu_per_subslice;
+	sseu->eu_total = compute_eu_total(sseu);
 
 	/* No powergating for you. */
 	sseu->has_slice_pg = 0;
diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h
index 8904ad87bf37..6f65d8f0ac89 100644
--- a/drivers/gpu/drm/i915/intel_device_info.h
+++ b/drivers/gpu/drm/i915/intel_device_info.h
@@ -112,10 +112,13 @@ enum intel_platform {
 	func(supports_tv); \
 	func(has_ipc);
 
+#define GEN_MAX_SLICES		(6) /* CNL upper bound */
+#define GEN_MAX_SUBSLICES	(7)
+
 struct sseu_dev_info {
 	u8 slice_mask;
-	u8 subslice_mask;
-	u8 eu_total;
+	u8 subslice_mask[GEN_MAX_SUBSLICES];
+	u16 eu_total;
 	u8 eu_per_subslice;
 	u8 min_eu_in_pool;
 	/* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */
@@ -123,6 +126,17 @@ struct sseu_dev_info {
 	u8 has_slice_pg:1;
 	u8 has_subslice_pg:1;
 	u8 has_eu_pg:1;
+
+	/* Topology fields */
+	u8 max_slices;
+	u8 max_subslices;
+	u8 max_eus_per_subslice;
+
+	/* We don't have more than 8 eus per subslice at the moment and as we
+	 * store eus enabled using bits, no need to multiply by eus per
+	 * subslice.
+	 */
+	u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
 };
 
 struct intel_device_info {
@@ -173,7 +187,49 @@ struct intel_driver_caps {
 
 static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu)
 {
-	return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask);
+	unsigned int i, total = 0;
+
+	for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++)
+		total += hweight8(sseu->subslice_mask[i]);
+
+	return total;
+}
+
+static inline int sseu_eu_idx(const struct sseu_dev_info *sseu,
+			      int slice, int subslice)
+{
+	int subslice_stride = DIV_ROUND_UP(sseu->max_eus_per_subslice,
+					   BITS_PER_BYTE);
+	int slice_stride = sseu->max_subslices * subslice_stride;
+
+	return slice * slice_stride + subslice * subslice_stride;
+}
+
+static inline u16 sseu_get_eus(const struct sseu_dev_info *sseu,
+			       int slice, int subslice)
+{
+	int i, offset = sseu_eu_idx(sseu, slice, subslice);
+	u16 eu_mask = 0;
+
+	for (i = 0;
+	     i < DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE); i++) {
+		eu_mask |= ((u16) sseu->eu_mask[offset + i]) <<
+			(i * BITS_PER_BYTE);
+	}
+
+	return eu_mask;
+}
+
+static inline void sseu_set_eus(struct sseu_dev_info *sseu,
+				int slice, int subslice, u16 eu_mask)
+{
+	int i, offset = sseu_eu_idx(sseu, slice, subslice);
+
+	for (i = 0;
+	     i < DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE); i++) {
+		sseu->eu_mask[offset + i] =
+			(eu_mask >> (BITS_PER_BYTE * i)) & 0xff;
+	}
 }
 
 const char *intel_platform_name(enum intel_platform platform);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 14288743909f..39909e02a3a9 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -2149,7 +2149,7 @@ make_rpcs(struct drm_i915_private *dev_priv)
 
 	if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) {
 		rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
-		rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask) <<
+		rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask[0]) <<
 			GEN8_RPCS_SS_CNT_SHIFT;
 		rpcs |= GEN8_RPCS_ENABLE;
 	}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index bbacf4d0f4cb..068c220e6480 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -92,7 +92,7 @@ hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
 
 #define instdone_subslice_mask(dev_priv__) \
 	(INTEL_GEN(dev_priv__) == 7 ? \
-	 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask)
+	 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask[0])
 
 #define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \
 	for ((slice__) = 0, (subslice__) = 0; \
-- 
2.16.1



More information about the Intel-gfx mailing list