[Intel-gfx] [PATCH 2/2] drm/i915/tgl: s/ss/eu fuse reading support
Lionel Landwerlin
lionel.g.landwerlin at intel.com
Sun Sep 22 16:48:43 UTC 2019
On 21/09/2019 03:39, Lucas De Marchi wrote:
> On Fri, Sep 13, 2019 at 12:51 AM Chris Wilson <chris at chris-wilson.co.uk> wrote:
>> From: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
>>
>> Gen12 has dual-subslices (DSS), which compared to gen11 subslices have
>> some duplicated resources/paths. Although DSS behave similarly to 2
>> subslices, instead of splitting this and presenting userspace with bits
>> not directly representative of hardware resources, present userspace
>> with a subslice_mask made up of DSS bits instead.
>>
>> v2: GEM_BUG_ON on mask size (Lionel)
>>
>> Bspec: 29547
>> Bspec: 12247
>> Cc: Kelvin Gardiner <kelvin.gardiner at intel.com>
>> Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
>> Cc: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
>> CC: Radhakrishna Sripada <radhakrishna.sripada at intel.com>
>> Cc: Michel Thierry <michel.thierry at intel.com> #v1
>> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
>> Cc: José Roberto de Souza <jose.souza at intel.com>
>> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
>> Signed-off-by: James Ausmus <james.ausmus at intel.com>
>> Signed-off-by: Oscar Mateo <oscar.mateo at intel.com>
>> Signed-off-by: Sudeep Dutt <sudeep.dutt at intel.com>
>> Signed-off-by: Stuart Summers <stuart.summers at intel.com>
>> Signed-off-by: Mika Kuoppala <mika.kuoppala at linux.intel.com>
>> Acked-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
>> ---
> After this I get the correct values for TGL:
> - Available Subslice Total: 2
> - Available Slice0 subslices: 2
> - Available EU Total: 16
> - Available EU Per Subslice: 8
> + Available Subslice Total: 6
> + Available Slice0 subslices: 6
> + Available EU Total: 96
> + Available EU Per Subslice: 16
>
> Reviewed-by: Lucas De Marchi <lucas.demarchi at intel.com>
>
> Lucas De Marchi
Btw, shouldn't we print "Dualsubslice" rather than "Subslice" for TGL?
-Lionel
>> drivers/gpu/drm/i915/gt/intel_sseu.h | 9 +--
>> drivers/gpu/drm/i915/i915_debugfs.c | 3 +-
>> drivers/gpu/drm/i915/i915_reg.h | 2 +
>> drivers/gpu/drm/i915/intel_device_info.c | 83 ++++++++++++++++++------
>> include/uapi/drm/i915_drm.h | 6 +-
>> 5 files changed, 72 insertions(+), 31 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
>> index 4070f6ff1db6..d1d225204f09 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_sseu.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
>> @@ -18,12 +18,13 @@ struct drm_i915_private;
>> #define GEN_MAX_SUBSLICES (8) /* ICL upper bound */
>> #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
>> #define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES)
>> -#define GEN_MAX_EUS (10) /* HSW upper bound */
>> +#define GEN_MAX_EUS (16) /* TGL upper bound */
>> #define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS)
>>
>> struct sseu_dev_info {
>> u8 slice_mask;
>> u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
>> + u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE];
>> u16 eu_total;
>> u8 eu_per_subslice;
>> u8 min_eu_in_pool;
>> @@ -40,12 +41,6 @@ struct sseu_dev_info {
>>
>> u8 ss_stride;
>> u8 eu_stride;
>> -
>> - /* We don't have more than 8 eus per subslice at the moment and as we
>> - * store eus enabled using bits, no need to multiply by eus per
>> - * subslice.
>> - */
>> - u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
>> };
>>
>> /*
>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
>> index 43db50095257..b5b449a88cf1 100644
>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>> @@ -3823,7 +3823,8 @@ static void gen10_sseu_device_status(struct drm_i915_private *dev_priv,
>> for (ss = 0; ss < info->sseu.max_subslices; ss++) {
>> unsigned int eu_cnt;
>>
>> - if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
>> + if (info->sseu.has_subslice_pg &&
>> + !(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
>> /* skip disabled subslice */
>> continue;
>>
>> diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h
>> index bf37ecebc82f..47847135a11f 100644
>> --- a/drivers/gpu/drm/i915/i915_reg.h
>> +++ b/drivers/gpu/drm/i915/i915_reg.h
>> @@ -2956,6 +2956,8 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
>>
>> #define GEN11_GT_SUBSLICE_DISABLE _MMIO(0x913C)
>>
>> +#define GEN12_GT_DSS_ENABLE _MMIO(0x913C)
>> +
>> #define GEN6_BSD_SLEEP_PSMI_CONTROL _MMIO(0x12050)
>> #define GEN6_BSD_SLEEP_MSG_DISABLE (1 << 0)
>> #define GEN6_BSD_SLEEP_FLUSH_DISABLE (1 << 2)
>> diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c
>> index 50b05a5de53b..b91a960b037f 100644
>> --- a/drivers/gpu/drm/i915/intel_device_info.c
>> +++ b/drivers/gpu/drm/i915/intel_device_info.c
>> @@ -182,13 +182,69 @@ static u16 compute_eu_total(const struct sseu_dev_info *sseu)
>> return total;
>> }
>>
>> +static void gen11_compute_sseu_info(struct sseu_dev_info *sseu,
>> + u8 s_en, u32 ss_en, u16 eu_en)
>> +{
>> + int s, ss;
>> +
>> + /* ss_en represents entire subslice mask across all slices */
>> + GEM_BUG_ON(sseu->max_slices * sseu->max_subslices >
>> + sizeof(ss_en) * BITS_PER_BYTE);
>> +
>> + for (s = 0; s < sseu->max_slices; s++) {
>> + if ((s_en & BIT(s)) == 0)
>> + continue;
>> +
>> + sseu->slice_mask |= BIT(s);
>> +
>> + intel_sseu_set_subslices(sseu, s, ss_en);
>> +
>> + for (ss = 0; ss < sseu->max_subslices; ss++)
>> + if (intel_sseu_has_subslice(sseu, s, ss))
>> + sseu_set_eus(sseu, s, ss, eu_en);
>> + }
>> + sseu->eu_per_subslice = hweight16(eu_en);
>> + sseu->eu_total = compute_eu_total(sseu);
>> +}
>> +
>> +static void gen12_sseu_info_init(struct drm_i915_private *dev_priv)
>> +{
>> + struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
>> + u8 s_en;
>> + u32 dss_en;
>> + u16 eu_en = 0;
>> + u8 eu_en_fuse;
>> + int eu;
>> +
>> + /*
>> + * Gen12 has Dual-Subslices, which behave similarly to 2 gen11 SS.
>> + * Instead of splitting these, provide userspace with an array
>> + * of DSS to more closely represent the hardware resource.
>> + */
>> + intel_sseu_set_info(sseu, 1, 6, 16);
>> +
>> + s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK;
>> +
>> + dss_en = I915_READ(GEN12_GT_DSS_ENABLE);
>> +
>> + /* one bit per pair of EUs */
>> + eu_en_fuse = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK);
>> + for (eu = 0; eu < sseu->max_eus_per_subslice / 2; eu++)
>> + if (eu_en_fuse & BIT(eu))
>> + eu_en |= BIT(eu * 2) | BIT(eu * 2 + 1);
>> +
>> + gen11_compute_sseu_info(sseu, s_en, dss_en, eu_en);
>> +
>> + /* TGL only supports slice-level power gating */
>> + sseu->has_slice_pg = 1;
>> +}
>> +
>> static void gen11_sseu_info_init(struct drm_i915_private *dev_priv)
>> {
>> struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
>> u8 s_en;
>> - u32 ss_en, ss_en_mask;
>> + u32 ss_en;
>> u8 eu_en;
>> - int s;
>>
>> if (IS_ELKHARTLAKE(dev_priv))
>> intel_sseu_set_info(sseu, 1, 4, 8);
>> @@ -197,26 +253,9 @@ static void gen11_sseu_info_init(struct drm_i915_private *dev_priv)
>>
>> s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK;
>> ss_en = ~I915_READ(GEN11_GT_SUBSLICE_DISABLE);
>> - ss_en_mask = BIT(sseu->max_subslices) - 1;
>> eu_en = ~(I915_READ(GEN11_EU_DISABLE) & GEN11_EU_DIS_MASK);
>>
>> - for (s = 0; s < sseu->max_slices; s++) {
>> - if (s_en & BIT(s)) {
>> - int ss_idx = sseu->max_subslices * s;
>> - int ss;
>> -
>> - sseu->slice_mask |= BIT(s);
>> -
>> - intel_sseu_set_subslices(sseu, s, (ss_en >> ss_idx) &
>> - ss_en_mask);
>> -
>> - for (ss = 0; ss < sseu->max_subslices; ss++)
>> - if (intel_sseu_has_subslice(sseu, s, ss))
>> - sseu_set_eus(sseu, s, ss, eu_en);
>> - }
>> - }
>> - sseu->eu_per_subslice = hweight8(eu_en);
>> - sseu->eu_total = compute_eu_total(sseu);
>> + gen11_compute_sseu_info(sseu, s_en, ss_en, eu_en);
>>
>> /* ICL has no power gating restrictions. */
>> sseu->has_slice_pg = 1;
>> @@ -959,8 +998,10 @@ void intel_device_info_runtime_init(struct drm_i915_private *dev_priv)
>> gen9_sseu_info_init(dev_priv);
>> else if (IS_GEN(dev_priv, 10))
>> gen10_sseu_info_init(dev_priv);
>> - else if (INTEL_GEN(dev_priv) >= 11)
>> + else if (IS_GEN(dev_priv, 11))
>> gen11_sseu_info_init(dev_priv);
>> + else if (INTEL_GEN(dev_priv) >= 12)
>> + gen12_sseu_info_init(dev_priv);
>>
>> if (IS_GEN(dev_priv, 6) && intel_vtd_active()) {
>> DRM_INFO("Disabling ppGTT for VT-d support\n");
>> diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
>> index 469dc512cca3..30c542144016 100644
>> --- a/include/uapi/drm/i915_drm.h
>> +++ b/include/uapi/drm/i915_drm.h
>> @@ -2033,8 +2033,10 @@ struct drm_i915_query {
>> * (data[X / 8] >> (X % 8)) & 1
>> *
>> * - the subslice mask for each slice with one bit per subslice telling
>> - * whether a subslice is available. The availability of subslice Y in slice
>> - * X can be queried with the following formula :
>> + * whether a subslice is available. Gen12 has dual-subslices, which are
>> + * similar to two gen11 subslices. For gen12, this array represents dual-
>> + * subslices. The availability of subslice Y in slice X can be queried
>> + * with the following formula :
>> *
>> * (data[subslice_offset +
>> * X * subslice_stride +
>> --
>> 2.23.0
>>
>> _______________________________________________
>> Intel-gfx mailing list
>> Intel-gfx at lists.freedesktop.org
>> https://lists.freedesktop.org/mailman/listinfo/intel-gfx
>
>
More information about the Intel-gfx
mailing list