[Intel-gfx] [PATCH 1/6] drm/i915: store all subslice masks

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Thu Jan 11 11:12:46 UTC 2018


On 18/12/2017 15:35, Lionel Landwerlin wrote:
> Up to now, subslice mask was assumed to be uniform across slices. But
> starting with Cannonlake, slices can be asymetric (for example slice0

asymmetric, thanks auto spell checker. :)

> has different number of subslices as slice1+). This change stores all
> subslices masks for all slices rather than having a single mask that
> applies to all slices.
> 
> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> ---
>   drivers/gpu/drm/i915/i915_debugfs.c      |  24 +++--
>   drivers/gpu/drm/i915/i915_drv.c          |   2 +-
>   drivers/gpu/drm/i915/i915_drv.h          |  23 ++++-
>   drivers/gpu/drm/i915/intel_device_info.c | 169 ++++++++++++++++++++++---------
>   drivers/gpu/drm/i915/intel_lrc.c         |   2 +-
>   drivers/gpu/drm/i915/intel_ringbuffer.h  |   2 +-
>   6 files changed, 161 insertions(+), 61 deletions(-)
> 
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index 0ddce72552bf..0c7890b695c5 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -4293,7 +4293,7 @@ static void cherryview_sseu_device_status(struct drm_i915_private *dev_priv,
>   			continue;
>   
>   		sseu->slice_mask = BIT(0);
> -		sseu->subslice_mask |= BIT(ss);
> +		sseu->subslices_mask[0] |= BIT(ss);
>   		eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) +
>   			 ((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) +
>   			 ((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) +
> @@ -4340,7 +4340,7 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
>   			continue;
>   
>   		sseu->slice_mask |= BIT(s);
> -		sseu->subslice_mask = info->sseu.subslice_mask;
> +		sseu->subslices_mask[s] = info->sseu.subslices_mask[s];
>   
>   		for (ss = 0; ss < ss_max; ss++) {
>   			unsigned int eu_cnt;
> @@ -4395,8 +4395,8 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
>   		sseu->slice_mask |= BIT(s);
>   
>   		if (IS_GEN9_BC(dev_priv))
> -			sseu->subslice_mask =
> -				INTEL_INFO(dev_priv)->sseu.subslice_mask;
> +			sseu->subslices_mask[s] =
> +				INTEL_INFO(dev_priv)->sseu.subslices_mask[s];
>   
>   		for (ss = 0; ss < ss_max; ss++) {
>   			unsigned int eu_cnt;
> @@ -4406,7 +4406,7 @@ static void gen9_sseu_device_status(struct drm_i915_private *dev_priv,
>   					/* skip disabled subslice */
>   					continue;
>   
> -				sseu->subslice_mask |= BIT(ss);
> +				sseu->subslices_mask[s] |= BIT(ss);
>   			}
>   
>   			eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] &
> @@ -4428,9 +4428,12 @@ static void broadwell_sseu_device_status(struct drm_i915_private *dev_priv,
>   	sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK;
>   
>   	if (sseu->slice_mask) {
> -		sseu->subslice_mask = INTEL_INFO(dev_priv)->sseu.subslice_mask;
>   		sseu->eu_per_subslice =
>   				INTEL_INFO(dev_priv)->sseu.eu_per_subslice;
> +		for (s = 0; s < fls(sseu->slice_mask); s++) {
> +			sseu->subslices_mask[s] =
> +				INTEL_INFO(dev_priv)->sseu.subslices_mask[s];
> +		}
>   		sseu->eu_total = sseu->eu_per_subslice *
>   				 sseu_subslice_total(sseu);
>   
> @@ -4449,6 +4452,7 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info,
>   {
>   	struct drm_i915_private *dev_priv = node_to_i915(m->private);
>   	const char *type = is_available_info ? "Available" : "Enabled";
> +	int s;
>   
>   	seq_printf(m, "  %s Slice Mask: %04x\n", type,
>   		   sseu->slice_mask);
> @@ -4456,10 +4460,10 @@ static void i915_print_sseu_info(struct seq_file *m, bool is_available_info,
>   		   hweight8(sseu->slice_mask));
>   	seq_printf(m, "  %s Subslice Total: %u\n", type,
>   		   sseu_subslice_total(sseu));
> -	seq_printf(m, "  %s Subslice Mask: %04x\n", type,
> -		   sseu->subslice_mask);
> -	seq_printf(m, "  %s Subslice Per Slice: %u\n", type,
> -		   hweight8(sseu->subslice_mask));
> +	for (s = 0; s < fls(sseu->slice_mask); s++) {

Slice mask is always contiguous bits?

> +		seq_printf(m, "  %s Slice%i Subslice Mask: %04x\n", type,
> +			   s, sseu->subslices_mask[s]);

Don't want to keep printing the count? Like maybe " %s Slice%u %u 
sublices, mask=%04x\n" ?

> +	}
>   	seq_printf(m, "  %s EU Total: %u\n", type,
>   		   sseu->eu_total);
>   	seq_printf(m, "  %s EU Per Subslice: %u\n", type,
> diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
> index 72bea281edb7..8b99e415c345 100644
> --- a/drivers/gpu/drm/i915/i915_drv.c
> +++ b/drivers/gpu/drm/i915/i915_drv.c
> @@ -414,7 +414,7 @@ static int i915_getparam(struct drm_device *dev, void *data,
>   			return -ENODEV;
>   		break;
>   	case I915_PARAM_SUBSLICE_MASK:
> -		value = INTEL_INFO(dev_priv)->sseu.subslice_mask;
> +		value = INTEL_INFO(dev_priv)->sseu.subslices_mask[0];
>   		if (!value)
>   			return -ENODEV;
>   		break;
> diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
> index 1aba5657f5f0..82fc59078c6a 100644
> --- a/drivers/gpu/drm/i915/i915_drv.h
> +++ b/drivers/gpu/drm/i915/i915_drv.h
> @@ -802,9 +802,12 @@ struct intel_csr {
>   	func(supports_tv); \
>   	func(has_ipc);
>   
> +#define GEN_MAX_SLICES		(6) /* CNL upper bound */
> +#define GEN_MAX_SUBSLICES	(7)
> +
>   struct sseu_dev_info {
>   	u8 slice_mask;
> -	u8 subslice_mask;
> +	u8 subslices_mask[GEN_MAX_SLICES];

Personally I would probably kept the subslice_mask name and just turned 
it into an array, but that is bike-shed territory.

>   	u8 eu_total;
>   	u8 eu_per_subslice;
>   	u8 min_eu_in_pool;
> @@ -813,11 +816,27 @@ struct sseu_dev_info {
>   	u8 has_slice_pg:1;
>   	u8 has_subslice_pg:1;
>   	u8 has_eu_pg:1;
> +
> +	/* Topology fields */
> +	u8 max_slices;
> +	u8 max_subslices;
> +	u8 max_eus_per_subslice;
> +
> +	/* We don't have more than 8 eus per subslice at the moment and as we
> +	 * store eus enabled using bits, no need to multiply by eus per
> +	 * subslice.
> +	 */
> +	u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
>   };
>   
>   static inline unsigned int sseu_subslice_total(const struct sseu_dev_info *sseu)
>   {
> -	return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask);
> +	unsigned s, total = 0;
> +
> +	for (s = 0; s < ARRAY_SIZE(sseu->subslices_mask); s++)
> +		total += hweight8(sseu->subslices_mask[s]);
> +
> +	return total;

The function is bigger now so maybe it needs stops being inline, but 
either moved to the .c file, or even the total stored in struct 
sseu_dev_info, and then this becomes just a getter?

>   }
>   
>   /* Keep in gen based order, and chronological order within a gen */
> diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
> index f478be3ae0ba..6a3c40439e83 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.c
> +++ b/drivers/gpu/drm/i915/intel_device_info.c
> @@ -82,22 +82,74 @@ void intel_device_info_dump(struct drm_i915_private *dev_priv)
>   #undef PRINT_FLAG
>   }
>   
> +static u8 compute_eu_total(const struct sseu_dev_info *sseu)
> +{
> +	u8 i, total = 0;

sseu_subslice_total returns an unsinged int, while this is u8, 
suggesting there can be more sublices and eus. ;) Jokes aside, I'd 
probably make them consistent. The index variable is probably best as 
unsigned int, but the total and return can be u8 if big enough for total 
eus? Either way best it matches the types in the struct they get 
assigned to.

> +
> +	for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++)
> +		total += hweight8(sseu->eu_mask[i]);
> +
> +	return total;
> +}
> +
>   static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
>   {
>   	struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
>   	const u32 fuse2 = I915_READ(GEN8_FUSE2);
> +	int s, ss, eu_mask = 0xff;

const eu_mask ?

> +	u32 subslice_mask, eu_en;
>   
>   	sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >>
>   			    GEN10_F2_S_ENA_SHIFT;
> -	sseu->subslice_mask = (1 << 4) - 1;
> -	sseu->subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
> -				 GEN10_F2_SS_DIS_SHIFT);
> +	sseu->max_slices = 6;
> +	sseu->max_subslices = 4;
> +	sseu->max_eus_per_subslice = 8;
> +
> +	subslice_mask = (1 << 4) - 1;
> +	subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
> +			   GEN10_F2_SS_DIS_SHIFT);
> +
> +	/* Slice0 can have up to 3 subslices, but there are only 2 in
> +	 * slice1/2.
> +	 */
> +	sseu->subslices_mask[0] = subslice_mask;
> +	for (s = 1; s < sseu->max_slices; s++)
> +		sseu->subslices_mask[s] = subslice_mask & 0x3;
> +
> +	/* Slice0 */
> +	eu_en = ~I915_READ(GEN8_EU_DISABLE0);
> +	for (ss = 0; ss < sseu->max_subslices; ss++)
> +		sseu->eu_mask[ss]     = (eu_en >> (8 * ss)) & eu_mask;
> +	/* Slice1 */
> +	sseu->eu_mask[sseu->max_subslices]         = (eu_en >> 24) & eu_mask;
> +	eu_en = ~I915_READ(GEN8_EU_DISABLE1);
> +	sseu->eu_mask[sseu->max_subslices + 1]     = eu_en & eu_mask;

I suggest a helper to index into sse->eu_mask, like 
sseu->eu_mask[_eu_mask_idx(slice, subslice)] or something, so it is more 
readable what is happening here. Or even:

  _eu_mask(sseu, slice, subslice) = mask;

Doable? I am not 100% I picked up exactly on the layout of the eu_mask 
array.. each element is one subslice? Consecutive for slice0, 
subslice0..N, slice1... sliceN ?

> +	/* Slice2 */
> +	sseu->eu_mask[2 * sseu->max_subslices]     = (eu_en >> 8) & eu_mask;
> +	sseu->eu_mask[2 * sseu->max_subslices + 1] = (eu_en >> 16) & eu_mask;
> +	/* Slice3 */
> +	sseu->eu_mask[3 * sseu->max_subslices]     = (eu_en >> 24) & eu_mask;
> +	eu_en = ~I915_READ(GEN8_EU_DISABLE2);
> +	sseu->eu_mask[3 * sseu->max_subslices + 1] = eu_en & eu_mask;
> +	/* Slice4 */
> +	sseu->eu_mask[4 * sseu->max_subslices]     = (eu_en >> 8) & eu_mask;
> +	sseu->eu_mask[4 * sseu->max_subslices + 1] = (eu_en >> 16) & eu_mask;
> +	/* Slice5 */
> +	sseu->eu_mask[5 * sseu->max_subslices]     = (eu_en >> 24) & eu_mask;
> +	eu_en = ~I915_READ(GEN10_EU_DISABLE3);
> +	sseu->eu_mask[5 * sseu->max_subslices + 1] = eu_en & eu_mask;
> +
> +	/* Do a second pass where we marked the subslices disabled if all
> +	 * their eus are off.
> +	 */
> +	for (s = 0; s < sseu->max_slices; s++) {
> +		for (ss = 0; ss < sseu->max_subslices; ss++) {
> +			if (sseu->eu_mask[s * sseu->max_subslices + ss] == 0)
> +				sseu->subslices_mask[s] &= ~BIT(ss);
> +		}
> +	}
>   
> -	sseu->eu_total = hweight32(~I915_READ(GEN8_EU_DISABLE0));
> -	sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE1));
> -	sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE2));
> -	sseu->eu_total += hweight8(~(I915_READ(GEN10_EU_DISABLE3) &
> -				     GEN10_EU_DIS_SS_MASK));
> +	sseu->eu_total = compute_eu_total(sseu);
>   
>   	/*
>   	 * CNL is expected to always have a uniform distribution
> @@ -118,26 +170,30 @@ static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
>   static void cherryview_sseu_info_init(struct drm_i915_private *dev_priv)
>   {
>   	struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
> -	u32 fuse, eu_dis;
> +	u32 fuse;
>   
>   	fuse = I915_READ(CHV_FUSE_GT);
>   
>   	sseu->slice_mask = BIT(0);
> +	sseu->max_slices = 1;
> +	sseu->max_subslices = 2;
> +	sseu->max_eus_per_subslice = 8;
>   
>   	if (!(fuse & CHV_FGT_DISABLE_SS0)) {
> -		sseu->subslice_mask |= BIT(0);
> -		eu_dis = fuse & (CHV_FGT_EU_DIS_SS0_R0_MASK |
> -				 CHV_FGT_EU_DIS_SS0_R1_MASK);
> -		sseu->eu_total += 8 - hweight32(eu_dis);
> +		sseu->subslices_mask[0] |= BIT(0);
> +		sseu->eu_mask[0] = (fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >> CHV_FGT_EU_DIS_SS0_R0_SHIFT;
> +		sseu->eu_mask[0] |= ((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >> CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4;

Do you need to invert the bits here?

> +		sseu->subslices_mask[0] = 1;
>   	}
>   
>   	if (!(fuse & CHV_FGT_DISABLE_SS1)) {
> -		sseu->subslice_mask |= BIT(1);
> -		eu_dis = fuse & (CHV_FGT_EU_DIS_SS1_R0_MASK |
> -				 CHV_FGT_EU_DIS_SS1_R1_MASK);
> -		sseu->eu_total += 8 - hweight32(eu_dis);
> +		sseu->subslices_mask[0] |= BIT(1);
> +		sseu->eu_mask[1] = (fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >> CHV_FGT_EU_DIS_SS0_R0_SHIFT;
> +		sseu->eu_mask[2] |= ((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >> CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4;

Why eu_mask indices 1 and 2 here, while for subslice 0 you merged them 
into index 0?

>   	}
>   
> +	sseu->eu_total = compute_eu_total(sseu);
> +
>   	/*
>   	 * CHV expected to always have a uniform distribution of EU
>   	 * across subslices.
> @@ -159,41 +215,50 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
>   {
>   	struct intel_device_info *info = mkwrite_device_info(dev_priv);
>   	struct sseu_dev_info *sseu = &info->sseu;
> -	int s_max = 3, ss_max = 4, eu_max = 8;
>   	int s, ss;
> -	u32 fuse2, eu_disable;
> +	u32 fuse2, eu_disable, subslice_mask;
>   	u8 eu_mask = 0xff;
>   
>   	fuse2 = I915_READ(GEN8_FUSE2);
>   	sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
>   
> +	/* BXT has a single slice and at most 3 subslices. */
> +	sseu->max_slices = IS_GEN9_LP(dev_priv) ? 1 : 3;
> +	sseu->max_subslices = IS_GEN9_LP(dev_priv) ? 3 : 4;
> +	sseu->max_eus_per_subslice = 8;
> +
>   	/*
>   	 * The subslice disable field is global, i.e. it applies
>   	 * to each of the enabled slices.
>   	*/
> -	sseu->subslice_mask = (1 << ss_max) - 1;
> -	sseu->subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
> -				 GEN9_F2_SS_DIS_SHIFT);
> +	subslice_mask = (1 << sseu->max_subslices) - 1;
> +	subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
> +			   GEN9_F2_SS_DIS_SHIFT);
>   
>   	/*
>   	 * Iterate through enabled slices and subslices to
>   	 * count the total enabled EU.
>   	*/
> -	for (s = 0; s < s_max; s++) {
> +	for (s = 0; s < sseu->max_slices; s++) {
>   		if (!(sseu->slice_mask & BIT(s)))
>   			/* skip disabled slice */
>   			continue;
>   
> +		sseu->subslices_mask[s] = subslice_mask;
> +
>   		eu_disable = I915_READ(GEN9_EU_DISABLE(s));
> -		for (ss = 0; ss < ss_max; ss++) {
> +		for (ss = 0; ss < sseu->max_subslices; ss++) {
>   			int eu_per_ss;
>   
> -			if (!(sseu->subslice_mask & BIT(ss)))
> +			if (!(sseu->subslices_mask[s] & BIT(ss)))
>   				/* skip disabled subslice */
>   				continue;
>   
> -			eu_per_ss = eu_max - hweight8((eu_disable >> (ss*8)) &
> -						      eu_mask);
> +			sseu->eu_mask[ss + s * sseu->max_subslices] =
> +				~((eu_disable >> (ss*8)) & eu_mask);
> +
> +			eu_per_ss = sseu->max_eus_per_subslice -
> +				hweight8((eu_disable >> (ss*8)) & eu_mask);

Store "eu_disable >> (ss*8)) & eu_mask" in a local for readability since 
it is calculated twice?

>   
>   			/*
>   			 * Record which subslice(s) has(have) 7 EUs. we
> @@ -202,11 +267,11 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
>   			 */
>   			if (eu_per_ss == 7)
>   				sseu->subslice_7eu[s] |= BIT(ss);
> -
> -			sseu->eu_total += eu_per_ss;

You could theoretically keep this running total and then at the end 
compare against compute_eu_total under a GEM_BUG_ON, just to check 
everyting is working as expected.

>   		}
>   	}
>   
> +	sseu->eu_total = compute_eu_total(sseu);
> +
>   	/*
>   	 * SKL is expected to always have a uniform distribution
>   	 * of EU across subslices with the exception that any one
> @@ -232,8 +297,8 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
>   	sseu->has_eu_pg = sseu->eu_per_subslice > 2;
>   
>   	if (IS_GEN9_LP(dev_priv)) {
> -#define IS_SS_DISABLED(ss)	(!(sseu->subslice_mask & BIT(ss)))
> -		info->has_pooled_eu = hweight8(sseu->subslice_mask) == 3;
> +#define IS_SS_DISABLED(ss)	(!(sseu->subslices_mask[0] & BIT(ss)))
> +		info->has_pooled_eu = hweight8(sseu->subslices_mask[0]) == 3;
>   
>   		sseu->min_eu_in_pool = 0;
>   		if (info->has_pooled_eu) {
> @@ -251,19 +316,22 @@ static void gen9_sseu_info_init(struct drm_i915_private *dev_priv)
>   static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
>   {
>   	struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
> -	const int s_max = 3, ss_max = 3, eu_max = 8;
>   	int s, ss;
> -	u32 fuse2, eu_disable[3]; /* s_max */
> +	u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */
>   
>   	fuse2 = I915_READ(GEN8_FUSE2);
>   	sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
> +	sseu->max_slices = 3;
> +	sseu->max_subslices = 3;
> +	sseu->max_eus_per_subslice = 8;
> +
>   	/*
>   	 * The subslice disable field is global, i.e. it applies
>   	 * to each of the enabled slices.
>   	 */
> -	sseu->subslice_mask = GENMASK(ss_max - 1, 0);
> -	sseu->subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
> -				 GEN8_F2_SS_DIS_SHIFT);
> +	subslice_mask = GENMASK(sseu->max_subslices - 1, 0);
> +	subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
> +			   GEN8_F2_SS_DIS_SHIFT);
>   
>   	eu_disable[0] = I915_READ(GEN8_EU_DISABLE0) & GEN8_EU_DIS0_S0_MASK;
>   	eu_disable[1] = (I915_READ(GEN8_EU_DISABLE0) >> GEN8_EU_DIS0_S1_SHIFT) |
> @@ -277,30 +345,36 @@ static void broadwell_sseu_info_init(struct drm_i915_private *dev_priv)
>   	 * Iterate through enabled slices and subslices to
>   	 * count the total enabled EU.
>   	 */
> -	for (s = 0; s < s_max; s++) {
> +	for (s = 0; s < sseu->max_slices; s++) {
>   		if (!(sseu->slice_mask & BIT(s)))
>   			/* skip disabled slice */
>   			continue;
>   
> -		for (ss = 0; ss < ss_max; ss++) {
> +		sseu->subslices_mask[s] = subslice_mask;
> +
> +		for (ss = 0; ss < sseu->max_subslices; ss++) {
>   			u32 n_disabled;
>   
> -			if (!(sseu->subslice_mask & BIT(ss)))
> +			if (!(sseu->subslices_mask[ss] & BIT(ss)))
>   				/* skip disabled subslice */
>   				continue;
>   
> -			n_disabled = hweight8(eu_disable[s] >> (ss * eu_max));
> +			sseu->eu_mask[ss + s * sseu->max_subslices] =
> +				~(eu_disable[s] >>
> +				  (ss * sseu->max_eus_per_subslice));
> +			n_disabled = hweight8(eu_disable[s] >>
> +					      (ss * sseu->max_eus_per_subslice));

As above could use a local for "eu_disable[s] >> (ss * 
sseu->max_eus_per_subslice)"

>   
>   			/*
>   			 * Record which subslices have 7 EUs.
>   			 */
> -			if (eu_max - n_disabled == 7)
> +			if (sseu->max_eus_per_subslice - n_disabled == 7)
>   				sseu->subslice_7eu[s] |= 1 << ss;
> -
> -			sseu->eu_total += eu_max - n_disabled;
>   		}
>   	}
>   
> +	sseu->eu_total = compute_eu_total(sseu);
> +
>   	/*
>   	 * BDW is expected to always have a uniform distribution of EU across
>   	 * subslices with the exception that any one EU in any one subslice may
> @@ -437,6 +511,7 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
>   {
>   	struct intel_device_info *info = mkwrite_device_info(dev_priv);
>   	enum pipe pipe;
> +	int s;
>   
>   	if (INTEL_GEN(dev_priv) >= 10) {
>   		for_each_pipe(dev_priv, pipe)
> @@ -548,9 +623,11 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
>   	DRM_DEBUG_DRIVER("slice total: %u\n", hweight8(info->sseu.slice_mask));
>   	DRM_DEBUG_DRIVER("subslice total: %u\n",
>   			 sseu_subslice_total(&info->sseu));
> -	DRM_DEBUG_DRIVER("subslice mask %04x\n", info->sseu.subslice_mask);
> -	DRM_DEBUG_DRIVER("subslice per slice: %u\n",
> -			 hweight8(info->sseu.subslice_mask));
> +	for (s = 0; s < ARRAY_SIZE(info->sseu.subslices_mask); s++) {
> +		DRM_DEBUG_DRIVER("subslice mask %04x\n", info->sseu.subslices_mask[s]);
> +		DRM_DEBUG_DRIVER("subslice per slice: %u\n",
> +				 hweight8(info->sseu.subslices_mask[s]));

Put a subslice index into messages?

> +	}
>   	DRM_DEBUG_DRIVER("EU total: %u\n", info->sseu.eu_total);
>   	DRM_DEBUG_DRIVER("EU per subslice: %u\n", info->sseu.eu_per_subslice);
>   	DRM_DEBUG_DRIVER("has slice power gating: %s\n",
> diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
> index 2e38fbfdf08f..6e55b842c21d 100644
> --- a/drivers/gpu/drm/i915/intel_lrc.c
> +++ b/drivers/gpu/drm/i915/intel_lrc.c
> @@ -2072,7 +2072,7 @@ make_rpcs(struct drm_i915_private *dev_priv)
>   
>   	if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) {
>   		rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
> -		rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask) <<
> +		rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslices_mask[0]) <<
>   			GEN8_RPCS_SS_CNT_SHIFT;
>   		rpcs |= GEN8_RPCS_ENABLE;
>   	}
> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
> index c5ff203e42d6..a94bc1b5c502 100644
> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
> @@ -90,7 +90,7 @@ hangcheck_action_to_str(const enum intel_engine_hangcheck_action a)
>   
>   #define instdone_subslice_mask(dev_priv__) \
>   	(INTEL_GEN(dev_priv__) == 7 ? \
> -	 1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask)
> +	 1 : INTEL_INFO(dev_priv__)->sseu.subslices_mask[0])
>   
>   #define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \
>   	for ((slice__) = 0, (subslice__) = 0; \
> 

Regards,

Tvrtko


More information about the Intel-gfx mailing list