[Intel-gfx] [PATCH 2/4] drm/i915/tgl: s/ss/eu fuse reading support
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Thu Sep 12 15:18:08 UTC 2019
On 12/09/2019 14:38, Mika Kuoppala wrote:
> From: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
>
> Gen12 has dual-subslices (DSS), which compared to gen11 subslices have
> some duplicated resources/paths. Although DSS behave similarly to 2
> subslices, instead of splitting this and presenting userspace with bits
> not directly representative of hardware resources, present userspace
> with a subslice_mask made up of DSS bits instead.
>
> Bspec: 29547
> Bspec: 12247
> Cc: Kelvin Gardiner <kelvin.gardiner at intel.com>
> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
> Cc: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> CC: Radhakrishna Sripada <radhakrishna.sripada at intel.com>
> Cc: Michel Thierry <michel.thierry at intel.com> #v1
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Cc: José Roberto de Souza <jose.souza at intel.com>
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Signed-off-by: James Ausmus <james.ausmus at intel.com>
> Signed-off-by: Oscar Mateo <oscar.mateo at intel.com>
> Signed-off-by: Sudeep Dutt <sudeep.dutt at intel.com>
> Signed-off-by: Stuart Summers <stuart.summers at intel.com>
> ---
> drivers/gpu/drm/i915/gt/intel_sseu.h | 9 +--
> drivers/gpu/drm/i915/i915_debugfs.c | 3 +-
> drivers/gpu/drm/i915/i915_reg.h | 2 +
> drivers/gpu/drm/i915/intel_device_info.c | 87 ++++++++++++++++++------
> include/uapi/drm/i915_drm.h | 6 +-
> 5 files changed, 76 insertions(+), 31 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
> index 4070f6ff1db6..d1d225204f09 100644
> --- a/drivers/gpu/drm/i915/gt/intel_sseu.h
> +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
> @@ -18,12 +18,13 @@ struct drm_i915_private;
> #define GEN_MAX_SUBSLICES (8) /* ICL upper bound */
> #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
> #define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES)
> -#define GEN_MAX_EUS (10) /* HSW upper bound */
> +#define GEN_MAX_EUS (16) /* TGL upper bound */
> #define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS)
>
> struct sseu_dev_info {
> u8 slice_mask;
> u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
> + u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE];
> u16 eu_total;
> u8 eu_per_subslice;
> u8 min_eu_in_pool;
> @@ -40,12 +41,6 @@ struct sseu_dev_info {
>
> u8 ss_stride;
> u8 eu_stride;
> -
> - /* We don't have more than 8 eus per subslice at the moment and as we
> - * store eus enabled using bits, no need to multiply by eus per
> - * subslice.
> - */
> - u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
Comment indeed looks obsolete even with old GEM_MAX_EUS.
Howewer more importantly, was the code broken before? Now we size
considering the stride, but before GEN_MAX_EU_STRIDE was
GEN_SSEU_STRIDE(GEN_MAX_EUS) = DIV_ROUND_UP(10, BITS_PER_BYTE) = 2, no?
So wasn't the array too small?
P.S. Moving the position of the field is just noise?
> };
>
> /*
> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
> index e5835337f022..f2b92be44adf 100644
> --- a/drivers/gpu/drm/i915/i915_debugfs.c
> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
> @@ -3820,7 +3820,8 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
> for (ss = 0; ss < info->sseu.max_subslices; ss++) {
> unsigned int eu_cnt;
>
> - if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
> + if (info->sseu.has_subslice_pg &&
> + !(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
> /* skip disabled subslice */
> continue;
>
> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
> index bf37ecebc82f..47847135a11f 100644
> --- a/drivers/gpu/drm/i915/i915_reg.h
> +++ b/drivers/gpu/drm/i915/i915_reg.h
> @@ -2956,6 +2956,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
>
> #define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913C)
>
> +#define GEN12_GT_DSS_ENABLE _MMIO(0x913C)
> +
> #define GEN6_BSD_SLEEP_PSMI_CONTROL _MMIO(0x12050)
> #define GEN6_BSD_SLEEP_MSG_DISABLE (1 << 0)
> #define GEN6_BSD_SLEEP_FLUSH_DISABLE (1 << 2)
> diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
> index d9b5baaef5d0..792ca3202073 100644
> --- a/drivers/gpu/drm/i915/intel_device_info.c
> +++ b/drivers/gpu/drm/i915/intel_device_info.c
> @@ -182,13 +182,73 @@ static u16 compute_eu_total(const struct sseu_dev_info *sseu)
> return total;
> }
>
> +static void gen11_compute_sseu_info(struct sseu_dev_info *sseu,
> + u8 s_en, u32 ss_en, u16 eu_en)
> +{
> + int s, ss;
> +
> + /* ss_en represents entire subslice mask across all slices */
> + if (sseu->max_slices * sseu->max_subslices >
> + sizeof(ss_en) * BITS_PER_BYTE) {
> + DRM_ERROR("Invalid topology, max_slices: %d, max_subslices %d\n",
> + sseu->max_slices, sseu->max_subslices);
> + return;
> + }
> +
> + for (s = 0; s < sseu->max_slices; s++) {
> + if ((s_en & BIT(s)) == 0)
> + continue;
> +
> + sseu->slice_mask |= BIT(s);
> +
> + intel_sseu_set_subslices(sseu, s, ss_en);
> +
> + for (ss = 0; ss < sseu->max_subslices; ss++)
> + if (intel_sseu_has_subslice(sseu, s, ss))
> + sseu_set_eus(sseu, s, ss, eu_en);
> + }
> + sseu->eu_per_subslice = hweight16(eu_en);
> + sseu->eu_total = compute_eu_total(sseu);
> +}
Can we be kind to reviewers and extract this helper in a separate
preceding patch? It even modifies the loop while extracting it so double
reason to separate.
> +
> +static void gen12_sseu_info_init(struct drm_i915_private *dev_priv)
> +{
> + struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
> + u8 s_en;
> + u32 dss_en;
> + u16 eu_en = 0;
> + u8 eu_en_fuse;
> + int eu;
> +
> + /*
> + * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS.
> + * Instead of splitting these, provide userspace with an array
> + * of DSS to more closely represent the hardware resource.
> + */
My pet peeve about this code shouldnt' have been made to be about
userspace at all... i915_query.c/query_topology_info is where userspace
struct drm_i915_query_topology_info is populated :I Rant over.. Sorry
Mika, not directed to you as the current shepherd of this patch.
> + intel_sseu_set_info(sseu, 1, 6, 16);
> +
> + s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK;
> +
> + dss_en = I915_READ(GEN12_GT_DSS_ENABLE);
> +
> + /* one bit per pair of EUs */
> + eu_en_fuse = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK);
> + for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++)
> + if (eu_en_fuse & BIT(eu))
> + eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
> +
> + gen11_compute_sseu_info(sseu, s_en, dss_en, eu_en);
> +
> + /* TGL only supports slice-level power gating */
> + sseu->has_slice_pg = 1;
> +}
> +
> static void gen11_sseu_info_init(struct drm_i915_private *dev_priv)
> {
> struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
> u8 s_en;
> - u32 ss_en, ss_en_mask;
> + u32 ss_en;
> u8 eu_en;
> - int s;
>
> if (IS_ELKHARTLAKE(dev_priv))
> intel_sseu_set_info(sseu, 1, 4, 8);
> @@ -197,26 +257,9 @@ static void gen11_sseu_info_init(struct drm_i915_private *dev_priv)
>
> s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK;
> ss_en = ~I915_READ(GEN11_GT_SUBSLICE_DISABLE);
> - ss_en_mask = BIT(sseu->max_subslices) - 1;
> eu_en = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK);
>
> - for (s = 0; s < sseu->max_slices; s++) {
> - if (s_en & BIT(s)) {
> - int ss_idx = sseu->max_subslices * s;
> - int ss;
> -
> - sseu->slice_mask |= BIT(s);
> -
> - intel_sseu_set_subslices(sseu, s, (ss_en >> ss_idx) &
> - ss_en_mask);
> -
> - for (ss = 0; ss < sseu->max_subslices; ss++)
> - if (intel_sseu_has_subslice(sseu, s, ss))
> - sseu_set_eus(sseu, s, ss, eu_en);
> - }
> - }
> - sseu->eu_per_subslice = hweight8(eu_en);
> - sseu->eu_total = compute_eu_total(sseu);
> + gen11_compute_sseu_info(sseu, s_en, ss_en, eu_en);
>
> /* ICL has no power gating restrictions. */
> sseu->has_slice_pg = 1;
> @@ -959,8 +1002,10 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
> gen9_sseu_info_init(dev_priv);
> else if (IS_GEN(dev_priv, 10))
> gen10_sseu_info_init(dev_priv);
> - else if (INTEL_GEN(dev_priv) >= 11)
> + else if (IS_GEN(dev_priv, 11))
> gen11_sseu_info_init(dev_priv);
> + else if (INTEL_GEN(dev_priv) >= 12)
> + gen12_sseu_info_init(dev_priv);
>
> if (IS_GEN(dev_priv, 6) && intel_vtd_active()) {
> DRM_INFO("Disabling ppGTT for VT-d support\n");
> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
> index 469dc512cca3..30c542144016 100644
> --- a/include/uapi/drm/i915_drm.h
> +++ b/include/uapi/drm/i915_drm.h
> @@ -2033,8 +2033,10 @@ struct drm_i915_query {
> * (data[X / 8] >> (X % 8)) & 1
> *
> * - the subslice mask for each slice with one bit per subslice telling
> - * whether a subslice is available. The availability of subslice Y in slice
> - * X can be queried with the following formula :
> + * whether a subslice is available. Gen12 has dual-subslices, which are
> + * similar to two gen11 subslices. For gen12, this array represents dual-
It's ugly in user facing documentation if we cannot decide whether it is
Gen12 or gen12. Gen12 special case also probably warrants to be in its
own paragraph.
Maybe also be clearer by saying each *bit* in this array represents one
dual-subslice in Gen12.
> + * subslices. The availability of subslice Y in slice X can be queried
> + * with the following formula :
> *
> * (data[subslice_offset +
> * X * subslice_stride +
>
Just a casual read since I was copied, I am not assigning myself to be a
reviewer. :)
Regards,
Tvrtko
More information about the Intel-gfx
mailing list