[Intel-gfx] [PATCH 2/4] drm/i915/tgl: s/ss/eu fuse reading support

Lionel Landwerlin lionel.g.landwerlin at intel.com
Thu Sep 12 13:48:14 UTC 2019


On 12/09/2019 16:38, Mika Kuoppala wrote:
> From: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
>
> Gen12 has dual-subslices (DSS), which compared to gen11 subslices have
> some duplicated resources/paths. Although DSS behave similarly to 2
> subslices, instead of splitting this and presenting userspace with bits
> not directly representative of hardware resources, present userspace
> with a subslice_mask made up of DSS bits instead.
>
> Bspec: 29547
> Bspec: 12247
> Cc: Kelvin Gardiner <kelvin.gardiner at intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> CC: Radhakrishna Sripada <radhakrishna.sripada at intel.com>
> Cc: Michel Thierry <michel.thierry at intel.com> #v1
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Cc: José Roberto de Souza <jose.souza at intel.com>
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Signed-off-by: James Ausmus <james.ausmus at intel.com>
> Signed-off-by: Oscar Mateo <oscar.mateo at intel.com>
> Signed-off-by: Sudeep Dutt <sudeep.dutt at intel.com>
> Signed-off-by: Stuart Summers <stuart.summers at intel.com>
> ---
>   drivers/gpu/drm/i915/gt/intel_sseu.h     |  9 +--
>   drivers/gpu/drm/i915/i915_debugfs.c      |  3 +-
>   drivers/gpu/drm/i915/i915_reg.h          |  2 +
>   drivers/gpu/drm/i915/intel_device_info.c | 87 ++++++++++++++++++------
>   include/uapi/drm/i915_drm.h              |  6 +-
>   5 files changed, 76 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
> index 4070f6ff1db6..d1d225204f09 100644
> --- a/drivers/gpu/drm/i915/gt/intel_sseu.h
> +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
> @@ -18,12 +18,13 @@ struct drm_i915_private;
>   #define GEN_MAX_SUBSLICES	(8) /* ICL upper bound */
>   #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
>   #define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES)
> -#define GEN_MAX_EUS		(10) /* HSW upper bound */
> +#define GEN_MAX_EUS		(16) /* TGL upper bound */
>   #define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS)
>   
>   struct sseu_dev_info {
>   	u8 slice_mask;
>   	u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
> +	u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE];
>   	u16 eu_total;
>   	u8 eu_per_subslice;
>   	u8 min_eu_in_pool;
> @@ -40,12 +41,6 @@ struct sseu_dev_info {
>   
>   	u8 ss_stride;
>   	u8 eu_stride;
> -
> -	/* We don't have more than 8 eus per subslice at the moment and as we
> -	 * store eus enabled using bits, no need to multiply by eus per
> -	 * subslice.
> -	 */
> -	u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
>   };
>   
>   /*
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index e5835337f022..f2b92be44adf 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -3820,7 +3820,8 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
>   		for (ss = 0; ss < info->sseu.max_subslices; ss++) {
>   			unsigned int eu_cnt;
>   
> -			if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
> +			if (info->sseu.has_subslice_pg &&
> +			    !(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
>   				/* skip disabled subslice */
>   				continue;
>   
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index bf37ecebc82f..47847135a11f 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -2956,6 +2956,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
>   
>   #define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913C)
>   
> +#define GEN12_GT_DSS_ENABLE _MMIO(0x913C)
> +
>   #define GEN6_BSD_SLEEP_PSMI_CONTROL	_MMIO(0x12050)
>   #define   GEN6_BSD_SLEEP_MSG_DISABLE	(1 << 0)
>   #define   GEN6_BSD_SLEEP_FLUSH_DISABLE	(1 << 2)
> diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
> index d9b5baaef5d0..792ca3202073 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.c
> +++ b/drivers/gpu/drm/i915/intel_device_info.c
> @@ -182,13 +182,73 @@ static u16 compute_eu_total(const struct sseu_dev_info *sseu)
>   	return total;
>   }
>   
> +static void gen11_compute_sseu_info(struct sseu_dev_info *sseu,
> +				    u8 s_en, u32 ss_en, u16 eu_en)
> +{
> +	int s, ss;
> +
> +	/* ss_en represents entire subslice mask across all slices */
> +	if (sseu->max_slices * sseu->max_subslices >
> +	    sizeof(ss_en) * BITS_PER_BYTE) {
> +		DRM_ERROR("Invalid topology, max_slices: %d, max_subslices %d\n",
> +			  sseu->max_slices, sseu->max_subslices);


Don't you want a GEM_BUG_ON() here to match the rest of the code?

Seems like a driver bug if we reach that case.


Otherwise :


Acked-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>


Cheers,


-Lionel


> +		return;
> +	}
> +
> +	for (s = 0; s < sseu->max_slices; s++) {
> +		if ((s_en & BIT(s)) == 0)
> +			continue;
> +
> +		sseu->slice_mask |= BIT(s);
> +
> +		intel_sseu_set_subslices(sseu, s, ss_en);
> +
> +		for (ss = 0; ss < sseu->max_subslices; ss++)
> +			if (intel_sseu_has_subslice(sseu, s, ss))
> +				sseu_set_eus(sseu, s, ss, eu_en);
> +	}
> +	sseu->eu_per_subslice = hweight16(eu_en);
> +	sseu->eu_total = compute_eu_total(sseu);
> +}
> +
> +static void gen12_sseu_info_init(struct drm_i915_private *dev_priv)
> +{
> +	struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
> +	u8 s_en;
> +	u32 dss_en;
> +	u16 eu_en = 0;
> +	u8 eu_en_fuse;
> +	int eu;
> +
> +	/*
> +	 * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS.
> +	 * Instead of splitting these, provide userspace with an array
> +	 * of DSS to more closely represent the hardware resource.
> +	 */
> +	intel_sseu_set_info(sseu, 1, 6, 16);
> +
> +	s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK;
> +
> +	dss_en = I915_READ(GEN12_GT_DSS_ENABLE);
> +
> +	/* one bit per pair of EUs */
> +	eu_en_fuse = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK);
> +	for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++)
> +		if (eu_en_fuse & BIT(eu))
> +			eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
> +
> +	gen11_compute_sseu_info(sseu, s_en, dss_en, eu_en);
> +
> +	/* TGL only supports slice-level power gating */
> +	sseu->has_slice_pg = 1;
> +}
> +
>   static void gen11_sseu_info_init(struct drm_i915_private *dev_priv)
>   {
>   	struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
>   	u8 s_en;
> -	u32 ss_en, ss_en_mask;
> +	u32 ss_en;
>   	u8 eu_en;
> -	int s;
>   
>   	if (IS_ELKHARTLAKE(dev_priv))
>   		intel_sseu_set_info(sseu, 1, 4, 8);
> @@ -197,26 +257,9 @@ static void gen11_sseu_info_init(struct drm_i915_private *dev_priv)
>   
>   	s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK;
>   	ss_en = ~I915_READ(GEN11_GT_SUBSLICE_DISABLE);
> -	ss_en_mask = BIT(sseu->max_subslices) - 1;
>   	eu_en = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK);
>   
> -	for (s = 0; s < sseu->max_slices; s++) {
> -		if (s_en & BIT(s)) {
> -			int ss_idx = sseu->max_subslices * s;
> -			int ss;
> -
> -			sseu->slice_mask |= BIT(s);
> -
> -			intel_sseu_set_subslices(sseu, s, (ss_en >> ss_idx) &
> -							  ss_en_mask);
> -
> -			for (ss = 0; ss < sseu->max_subslices; ss++)
> -				if (intel_sseu_has_subslice(sseu, s, ss))
> -					sseu_set_eus(sseu, s, ss, eu_en);
> -		}
> -	}
> -	sseu->eu_per_subslice = hweight8(eu_en);
> -	sseu->eu_total = compute_eu_total(sseu);
> +	gen11_compute_sseu_info(sseu, s_en, ss_en, eu_en);
>   
>   	/* ICL has no power gating restrictions. */
>   	sseu->has_slice_pg = 1;
> @@ -959,8 +1002,10 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
>   		gen9_sseu_info_init(dev_priv);
>   	else if (IS_GEN(dev_priv, 10))
>   		gen10_sseu_info_init(dev_priv);
> -	else if (INTEL_GEN(dev_priv) >= 11)
> +	else if (IS_GEN(dev_priv, 11))
>   		gen11_sseu_info_init(dev_priv);
> +	else if (INTEL_GEN(dev_priv) >= 12)
> +		gen12_sseu_info_init(dev_priv);
>   
>   	if (IS_GEN(dev_priv, 6) && intel_vtd_active()) {
>   		DRM_INFO("Disabling ppGTT for VT-d support\n");
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 469dc512cca3..30c542144016 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -2033,8 +2033,10 @@ struct drm_i915_query {
>    *           (data[X / 8] >> (X % 8)) & 1
>    *
>    * - the subslice mask for each slice with one bit per subslice telling
> - *   whether a subslice is available. The availability of subslice Y in slice
> - *   X can be queried with the following formula :
> + *   whether a subslice is available. Gen12 has dual-subslices, which are
> + *   similar to two gen11 subslices. For gen12, this array represents dual-
> + *   subslices. The availability of subslice Y in slice X can be queried
> + *   with the following formula :
>    *
>    *           (data[subslice_offset +
>    *                 X * subslice_stride +




More information about the Intel-gfx mailing list