[Intel-gfx] [PATCH 6/6] drm/i915: Expand subslice mask
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Wed May 1 18:29:18 UTC 2019
On 01/05/2019 19:22, Tvrtko Ursulin wrote:
[snip]
>> +#define SS_STR_MAX_SIZE (GEN_MAX_SUBSLICE_STRIDE * 2)
>> +
>> +static u8 *
>> +subslice_per_slice_str(u8 *buf, const struct sseu_dev_info *sseu, u8
>> slice)
>> +{
>> + int i;
>> + u8 ss_offset = slice * sseu->ss_stride;
>> +
>> + GEM_BUG_ON(slice >= sseu->max_slices);
>> +
>> + memset(buf, 0, SS_STR_MAX_SIZE);
>
> I suggest a more hardened approach of caller passing in the buffer size,
> since it is their buffer.
Having said this..
>> +
>> + /*
>> + * Print subslice information in reverse order to match
>> + * userspace expectations.
>> + */
>> + for (i = 0; i < sseu->ss_stride; i++)
>> + sprintf(&buf[i * 2], "%02x",
>> + sseu->subslice_mask[ss_offset + sseu->ss_stride -
>> + (i + 1)]);
...sprintf also needs to check against overflowing the buffer.
(Relationship between loop boundary (ss_stride) and buffer size is a bit
decoupled.)
And buffer should probably be char *.
Regards,
Tvrtko
>> +
>> + return buf;
>> +}
>> +
>> static void sseu_dump(const struct sseu_dev_info *sseu, struct
>> drm_printer *p)
>> {
>> int s;
>> + u8 buf[SS_STR_MAX_SIZE];
>> drm_printf(p, "slice total: %u, mask=%04x\n",
>> hweight8(sseu->slice_mask), sseu->slice_mask);
>> drm_printf(p, "subslice total: %u\n",
>> intel_sseu_subslice_total(sseu));
>> for (s = 0; s < sseu->max_slices; s++) {
>> - drm_printf(p, "slice%d: %u subslices, mask=%04x\n",
>> + drm_printf(p, "slice%d: %u subslices, mask=%s\n",
>> s, intel_sseu_subslices_per_slice(sseu, s),
>> - sseu->subslice_mask[s]);
>> + subslice_per_slice_str(buf, sseu, s));
>> }
>> drm_printf(p, "EU total: %u\n", sseu->eu_total);
>> drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice);
>> @@ -118,6 +143,7 @@ void intel_device_info_dump_topology(const struct
>> sseu_dev_info *sseu,
>> struct drm_printer *p)
>> {
>> int s, ss;
>> + u8 buf[SS_STR_MAX_SIZE];
>> if (sseu->max_slices == 0) {
>> drm_printf(p, "Unavailable\n");
>> @@ -125,9 +151,9 @@ void intel_device_info_dump_topology(const struct
>> sseu_dev_info *sseu,
>> }
>> for (s = 0; s < sseu->max_slices; s++) {
>> - drm_printf(p, "slice%d: %u subslice(s) (0x%hhx):\n",
>> + drm_printf(p, "slice%d: %u subslice(s) (0x%s):\n",
>> s, intel_sseu_subslices_per_slice(sseu, s),
>> - sseu->subslice_mask[s]);
>> + subslice_per_slice_str(buf, sseu, s));
>> for (ss = 0; ss < sseu->max_subslices; ss++) {
>> u16 enabled_eus = intel_sseu_get_eus(sseu, s, ss);
>> @@ -156,15 +182,10 @@ static void gen11_sseu_info_init(struct
>> drm_i915_private *dev_priv)
>> u8 eu_en;
>> int s;
>> - if (IS_ELKHARTLAKE(dev_priv)) {
>> - sseu->max_slices = 1;
>> - sseu->max_subslices = 4;
>> - sseu->max_eus_per_subslice = 8;
>> - } else {
>> - sseu->max_slices = 1;
>> - sseu->max_subslices = 8;
>> - sseu->max_eus_per_subslice = 8;
>> - }
>> + if (IS_ELKHARTLAKE(dev_priv))
>> + intel_sseu_set_info(sseu, 1, 4, 8);
>> + else
>> + intel_sseu_set_info(sseu, 1, 8, 8);
>> s_en = I915_READ(GEN11_GT_SLICE_ENABLE) & GEN11_GT_S_ENA_MASK;
>> ss_en = ~I915_READ(GEN11_GT_SUBSLICE_DISABLE);
>> @@ -177,9 +198,11 @@ static void gen11_sseu_info_init(struct
>> drm_i915_private *dev_priv)
>> int ss;
>> sseu->slice_mask |= BIT(s);
>> - sseu->subslice_mask[s] = (ss_en >> ss_idx) & ss_en_mask;
>> + sseu->subslice_mask[s * sseu->ss_stride] =
>> + (ss_en >> ss_idx) & ss_en_mask;
>> for (ss = 0; ss < sseu->max_subslices; ss++) {
>> - if (sseu->subslice_mask[s] & BIT(ss))
>> + if (sseu->subslice_mask[s * sseu->ss_stride] &
>> + BIT(ss))
>> intel_sseu_set_eus(sseu, s, ss, eu_en);
>> }
>> }
>> @@ -201,23 +224,10 @@ static void gen10_sseu_info_init(struct
>> drm_i915_private *dev_priv)
>> const int eu_mask = 0xff;
>> u32 subslice_mask, eu_en;
>> + intel_sseu_set_info(sseu, 6, 4, 8);
>> +
>> sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >>
>> GEN10_F2_S_ENA_SHIFT;
>> - sseu->max_slices = 6;
>> - sseu->max_subslices = 4;
>> - sseu->max_eus_per_subslice = 8;
>> -
>> - subslice_mask = (1 << 4) - 1;
>> - subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
>> - GEN10_F2_SS_DIS_SHIFT);
>> -
>> - /*
>> - * Slice0 can have up to 3 subslices, but there are only 2 in
>> - * slice1/2.
>> - */
>> - sseu->subslice_mask[0] = subslice_mask;
>> - for (s = 1; s < sseu->max_slices; s++)
>> - sseu->subslice_mask[s] = subslice_mask & 0x3;
>> /* Slice0 */
>> eu_en = ~I915_READ(GEN8_EU_DISABLE0);
>> @@ -242,14 +252,22 @@ static void gen10_sseu_info_init(struct
>> drm_i915_private *dev_priv)
>> eu_en = ~I915_READ(GEN10_EU_DISABLE3);
>> intel_sseu_set_eus(sseu, 5, 1, eu_en & eu_mask);
>> - /* Do a second pass where we mark the subslices disabled if all
>> their
>> - * eus are off.
>> - */
>> + subslice_mask = (1 << 4) - 1;
>> + subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
>> + GEN10_F2_SS_DIS_SHIFT);
>> +
>> for (s = 0; s < sseu->max_slices; s++) {
>> for (ss = 0; ss < sseu->max_subslices; ss++) {
>> if (intel_sseu_get_eus(sseu, s, ss) == 0)
>> - sseu->subslice_mask[s] &= ~BIT(ss);
>> + subslice_mask &= ~BIT(ss);
>> }
>> +
>> + /*
>> + * Slice0 can have up to 3 subslices, but there are only 2 in
>> + * slice1/2.
>> + */
>> + intel_sseu_set_subslices(sseu, s, s == 0 ? subslice_mask :
>> + subslice_mask & 0x3);
>> }
>> sseu->eu_total = compute_eu_total(sseu);
>> @@ -275,13 +293,12 @@ static void cherryview_sseu_info_init(struct
>> drm_i915_private *dev_priv)
>> {
>> struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
>> u32 fuse;
>> + u8 subslice_mask;
>> fuse = I915_READ(CHV_FUSE_GT);
>> sseu->slice_mask = BIT(0);
>> - sseu->max_slices = 1;
>> - sseu->max_subslices = 2;
>> - sseu->max_eus_per_subslice = 8;
>> + intel_sseu_set_info(sseu, 1, 2, 8);
>> if (!(fuse & CHV_FGT_DISABLE_SS0)) {
>> u8 disabled_mask =
>> @@ -290,7 +307,7 @@ static void cherryview_sseu_info_init(struct
>> drm_i915_private *dev_priv)
>> (((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >>
>> CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4);
>> - sseu->subslice_mask[0] |= BIT(0);
>> + subslice_mask |= BIT(0);
>> intel_sseu_set_eus(sseu, 0, 0, ~disabled_mask);
>> }
>> @@ -301,10 +318,12 @@ static void cherryview_sseu_info_init(struct
>> drm_i915_private *dev_priv)
>> (((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >>
>> CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4);
>> - sseu->subslice_mask[0] |= BIT(1);
>> + subslice_mask |= BIT(1);
>> intel_sseu_set_eus(sseu, 0, 1, ~disabled_mask);
>> }
>> + intel_sseu_set_subslices(sseu, 0, subslice_mask);
>> +
>> sseu->eu_total = compute_eu_total(sseu);
>> /*
>> @@ -312,7 +331,8 @@ static void cherryview_sseu_info_init(struct
>> drm_i915_private *dev_priv)
>> * across subslices.
>> */
>> sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ?
>> - sseu->eu_total / intel_sseu_subslice_total(sseu) :
>> + sseu->eu_total /
>> + intel_sseu_subslice_total(sseu) :
>> 0;
>> /*
>> * CHV supports subslice power gating on devices with more than
>> @@ -336,9 +356,8 @@ static void gen9_sseu_info_init(struct
>> drm_i915_private *dev_priv)
>> sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >>
>> GEN8_F2_S_ENA_SHIFT;
>> /* BXT has a single slice and at most 3 subslices. */
>> - sseu->max_slices = IS_GEN9_LP(dev_priv) ? 1 : 3;
>> - sseu->max_subslices = IS_GEN9_LP(dev_priv) ? 3 : 4;
>> - sseu->max_eus_per_subslice = 8;
>> + intel_sseu_set_info(sseu, IS_GEN9_LP(dev_priv) ? 1 : 3,
>> + IS_GEN9_LP(dev_priv) ? 3 : 4, 8);
>> /*
>> * The subslice disable field is global, i.e. it applies
>> @@ -357,14 +376,16 @@ static void gen9_sseu_info_init(struct
>> drm_i915_private *dev_priv)
>> /* skip disabled slice */
>> continue;
>> - sseu->subslice_mask[s] = subslice_mask;
>> + intel_sseu_set_subslices(sseu, s, subslice_mask);
>> eu_disable = I915_READ(GEN9_EU_DISABLE(s));
>> for (ss = 0; ss < sseu->max_subslices; ss++) {
>> int eu_per_ss;
>> u8 eu_disabled_mask;
>> + u8 ss_idx = s * sseu->ss_stride + ss / BITS_PER_BYTE;
>> - if (!(sseu->subslice_mask[s] & BIT(ss)))
>> + if (!(sseu->subslice_mask[ss_idx] &
>> + BIT(ss % BITS_PER_BYTE)))
>> /* skip disabled subslice */
>> continue;
>> @@ -437,9 +458,7 @@ static void broadwell_sseu_info_init(struct
>> drm_i915_private *dev_priv)
>> fuse2 = I915_READ(GEN8_FUSE2);
>> sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >>
>> GEN8_F2_S_ENA_SHIFT;
>> - sseu->max_slices = 3;
>> - sseu->max_subslices = 3;
>> - sseu->max_eus_per_subslice = 8;
>> + intel_sseu_set_info(sseu, 3, 3, 8);
>> /*
>> * The subslice disable field is global, i.e. it applies
>> @@ -466,18 +485,21 @@ static void broadwell_sseu_info_init(struct
>> drm_i915_private *dev_priv)
>> /* skip disabled slice */
>> continue;
>> - sseu->subslice_mask[s] = subslice_mask;
>> + intel_sseu_set_subslices(sseu, s, subslice_mask);
>> for (ss = 0; ss < sseu->max_subslices; ss++) {
>> u8 eu_disabled_mask;
>> + u8 ss_idx = s * sseu->ss_stride + ss / BITS_PER_BYTE;
>> u32 n_disabled;
>> - if (!(sseu->subslice_mask[s] & BIT(ss)))
>> + if (!(sseu->subslice_mask[ss_idx] &
>> + BIT(ss % BITS_PER_BYTE)))
>> /* skip disabled subslice */
>> continue;
>> eu_disabled_mask =
>> - eu_disable[s] >> (ss * sseu->max_eus_per_subslice);
>> + eu_disable[s] >>
>> + (ss * sseu->max_eus_per_subslice);
>> intel_sseu_set_eus(sseu, s, ss, ~eu_disabled_mask);
>> @@ -517,6 +539,7 @@ static void haswell_sseu_info_init(struct
>> drm_i915_private *dev_priv)
>> struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
>> u32 fuse1;
>> int s, ss;
>> + u32 subslice_mask;
>> /*
>> * There isn't a register to tell us how many slices/subslices. We
>> @@ -528,22 +551,18 @@ static void haswell_sseu_info_init(struct
>> drm_i915_private *dev_priv)
>> /* fall through */
>> case 1:
>> sseu->slice_mask = BIT(0);
>> - sseu->subslice_mask[0] = BIT(0);
>> + subslice_mask = BIT(0);
>> break;
>> case 2:
>> sseu->slice_mask = BIT(0);
>> - sseu->subslice_mask[0] = BIT(0) | BIT(1);
>> + subslice_mask = BIT(0) | BIT(1);
>> break;
>> case 3:
>> sseu->slice_mask = BIT(0) | BIT(1);
>> - sseu->subslice_mask[0] = BIT(0) | BIT(1);
>> - sseu->subslice_mask[1] = BIT(0) | BIT(1);
>> + subslice_mask = BIT(0) | BIT(1);
>> break;
>> }
>> - sseu->max_slices = hweight8(sseu->slice_mask);
>> - sseu->max_subslices = hweight8(sseu->subslice_mask[0]);
>> -
>> fuse1 = I915_READ(HSW_PAVP_FUSE1);
>> switch ((fuse1 & HSW_F1_EU_DIS_MASK) >> HSW_F1_EU_DIS_SHIFT) {
>> default:
>> @@ -560,9 +579,14 @@ static void haswell_sseu_info_init(struct
>> drm_i915_private *dev_priv)
>> sseu->eu_per_subslice = 6;
>> break;
>> }
>> - sseu->max_eus_per_subslice = sseu->eu_per_subslice;
>> +
>> + intel_sseu_set_info(sseu, hweight8(sseu->slice_mask),
>> + hweight8(subslice_mask),
>> + sseu->eu_per_subslice);
>> for (s = 0; s < sseu->max_slices; s++) {
>> + intel_sseu_set_subslices(sseu, s, subslice_mask);
>> +
>> for (ss = 0; ss < sseu->max_subslices; ss++) {
>> intel_sseu_set_eus(sseu, s, ss,
>> (1UL << sseu->eu_per_subslice) - 1);
>>
More information about the Intel-gfx
mailing list