[Intel-gfx] [PATCH 6/6] drm/i915: Expand subslice mask

Summers, Stuart stuart.summers at intel.com
Wed May 1 19:40:37 UTC 2019


On Wed, 2019-05-01 at 19:29 +0100, Tvrtko Ursulin wrote:
> On 01/05/2019 19:22, Tvrtko Ursulin wrote:
> 
> [snip]
> 
> > > +#define SS_STR_MAX_SIZE (GEN_MAX_SUBSLICE_STRIDE * 2)
> > > +
> > > +static u8 *
> > > +subslice_per_slice_str(u8 *buf, const struct sseu_dev_info
> > > *sseu, u8 
> > > slice)
> > > +{
> > > +    int i;
> > > +    u8 ss_offset = slice * sseu->ss_stride;
> > > +
> > > +    GEM_BUG_ON(slice >= sseu->max_slices);
> > > +
> > > +    memset(buf, 0, SS_STR_MAX_SIZE);
> > 
> > I suggest a more hardened approach of caller passing in the buffer
> > size, 
> > since it is their buffer.

Not a bad idea. I had the define to make this explicit and handle the
future cases, but probably right it's better to isolate this. I'll make
the change in the next series update.

> 
> Having said this..
> 
> > > +
> > > +    /*
> > > +     * Print subslice information in reverse order to match
> > > +     * userspace expectations.
> > > +     */
> > > +    for (i = 0; i < sseu->ss_stride; i++)
> > > +        sprintf(&buf[i * 2], "%02x",
> > > +            sseu->subslice_mask[ss_offset + sseu->ss_stride -
> > > +                        (i + 1)]);
> 
> ...sprintf also needs to check against overflowing the buffer. 
> (Relationship between loop boundary (ss_stride) and buffer size is a
> bit 
> decoupled.)

I'll add the check, makes sense.

> 
> And buffer should probably be char *.

No problem. I'll make this change. Thanks for the feedback!

- Stuart

> 
> Regards,
> 
> Tvrtko
> 
> > > +
> > > +    return buf;
> > > +}
> > > +
> > >   static void sseu_dump(const struct sseu_dev_info *sseu, struct 
> > > drm_printer *p)
> > >   {
> > >       int s;
> > > +    u8 buf[SS_STR_MAX_SIZE];
> > >       drm_printf(p, "slice total: %u, mask=%04x\n",
> > >              hweight8(sseu->slice_mask), sseu->slice_mask);
> > >       drm_printf(p, "subslice total: %u\n", 
> > > intel_sseu_subslice_total(sseu));
> > >       for (s = 0; s < sseu->max_slices; s++) {
> > > -        drm_printf(p, "slice%d: %u subslices, mask=%04x\n",
> > > +        drm_printf(p, "slice%d: %u subslices, mask=%s\n",
> > >                  s, intel_sseu_subslices_per_slice(sseu, s),
> > > -               sseu->subslice_mask[s]);
> > > +               subslice_per_slice_str(buf, sseu, s));
> > >       }
> > >       drm_printf(p, "EU total: %u\n", sseu->eu_total);
> > >       drm_printf(p, "EU per subslice: %u\n", sseu-
> > > >eu_per_subslice);
> > > @@ -118,6 +143,7 @@ void intel_device_info_dump_topology(const
> > > struct 
> > > sseu_dev_info *sseu,
> > >                        struct drm_printer *p)
> > >   {
> > >       int s, ss;
> > > +    u8 buf[SS_STR_MAX_SIZE];
> > >       if (sseu->max_slices == 0) {
> > >           drm_printf(p, "Unavailable\n");
> > > @@ -125,9 +151,9 @@ void intel_device_info_dump_topology(const
> > > struct 
> > > sseu_dev_info *sseu,
> > >       }
> > >       for (s = 0; s < sseu->max_slices; s++) {
> > > -        drm_printf(p, "slice%d: %u subslice(s) (0x%hhx):\n",
> > > +        drm_printf(p, "slice%d: %u subslice(s) (0x%s):\n",
> > >                  s, intel_sseu_subslices_per_slice(sseu, s),
> > > -               sseu->subslice_mask[s]);
> > > +               subslice_per_slice_str(buf, sseu, s));
> > >           for (ss = 0; ss < sseu->max_subslices; ss++) {
> > >               u16 enabled_eus = intel_sseu_get_eus(sseu, s, ss);
> > > @@ -156,15 +182,10 @@ static void gen11_sseu_info_init(struct 
> > > drm_i915_private *dev_priv)
> > >       u8 eu_en;
> > >       int s;
> > > -    if (IS_ELKHARTLAKE(dev_priv)) {
> > > -        sseu->max_slices = 1;
> > > -        sseu->max_subslices = 4;
> > > -        sseu->max_eus_per_subslice = 8;
> > > -    } else {
> > > -        sseu->max_slices = 1;
> > > -        sseu->max_subslices = 8;
> > > -        sseu->max_eus_per_subslice = 8;
> > > -    }
> > > +    if (IS_ELKHARTLAKE(dev_priv))
> > > +        intel_sseu_set_info(sseu, 1, 4, 8);
> > > +    else
> > > +        intel_sseu_set_info(sseu, 1, 8, 8);
> > >       s_en = I915_READ(GEN11_GT_SLICE_ENABLE) &
> > > GEN11_GT_S_ENA_MASK;
> > >       ss_en = ~I915_READ(GEN11_GT_SUBSLICE_DISABLE);
> > > @@ -177,9 +198,11 @@ static void gen11_sseu_info_init(struct 
> > > drm_i915_private *dev_priv)
> > >               int ss;
> > >               sseu->slice_mask |= BIT(s);
> > > -            sseu->subslice_mask[s] = (ss_en >> ss_idx) &
> > > ss_en_mask;
> > > +            sseu->subslice_mask[s * sseu->ss_stride] =
> > > +                (ss_en >> ss_idx) & ss_en_mask;
> > >               for (ss = 0; ss < sseu->max_subslices; ss++) {
> > > -                if (sseu->subslice_mask[s] & BIT(ss))
> > > +                if (sseu->subslice_mask[s * sseu->ss_stride] &
> > > +                    BIT(ss))
> > >                       intel_sseu_set_eus(sseu, s, ss, eu_en);
> > >               }
> > >           }
> > > @@ -201,23 +224,10 @@ static void gen10_sseu_info_init(struct 
> > > drm_i915_private *dev_priv)
> > >       const int eu_mask = 0xff;
> > >       u32 subslice_mask, eu_en;
> > > +    intel_sseu_set_info(sseu, 6, 4, 8);
> > > +
> > >       sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >>
> > >                   GEN10_F2_S_ENA_SHIFT;
> > > -    sseu->max_slices = 6;
> > > -    sseu->max_subslices = 4;
> > > -    sseu->max_eus_per_subslice = 8;
> > > -
> > > -    subslice_mask = (1 << 4) - 1;
> > > -    subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
> > > -               GEN10_F2_SS_DIS_SHIFT);
> > > -
> > > -    /*
> > > -     * Slice0 can have up to 3 subslices, but there are only 2
> > > in
> > > -     * slice1/2.
> > > -     */
> > > -    sseu->subslice_mask[0] = subslice_mask;
> > > -    for (s = 1; s < sseu->max_slices; s++)
> > > -        sseu->subslice_mask[s] = subslice_mask & 0x3;
> > >       /* Slice0 */
> > >       eu_en = ~I915_READ(GEN8_EU_DISABLE0);
> > > @@ -242,14 +252,22 @@ static void gen10_sseu_info_init(struct 
> > > drm_i915_private *dev_priv)
> > >       eu_en = ~I915_READ(GEN10_EU_DISABLE3);
> > >       intel_sseu_set_eus(sseu, 5, 1, eu_en & eu_mask);
> > > -    /* Do a second pass where we mark the subslices disabled if
> > > all 
> > > their
> > > -     * eus are off.
> > > -     */
> > > +    subslice_mask = (1 << 4) - 1;
> > > +    subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
> > > +               GEN10_F2_SS_DIS_SHIFT);
> > > +
> > >       for (s = 0; s < sseu->max_slices; s++) {
> > >           for (ss = 0; ss < sseu->max_subslices; ss++) {
> > >               if (intel_sseu_get_eus(sseu, s, ss) == 0)
> > > -                sseu->subslice_mask[s] &= ~BIT(ss);
> > > +                subslice_mask &= ~BIT(ss);
> > >           }
> > > +
> > > +        /*
> > > +         * Slice0 can have up to 3 subslices, but there are only
> > > 2 in
> > > +         * slice1/2.
> > > +         */
> > > +        intel_sseu_set_subslices(sseu, s, s == 0 ? subslice_mask
> > > :
> > > +                               subslice_mask & 0x3);
> > >       }
> > >       sseu->eu_total = compute_eu_total(sseu);
> > > @@ -275,13 +293,12 @@ static void
> > > cherryview_sseu_info_init(struct 
> > > drm_i915_private *dev_priv)
> > >   {
> > >       struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
> > >       u32 fuse;
> > > +    u8 subslice_mask;
> > >       fuse = I915_READ(CHV_FUSE_GT);
> > >       sseu->slice_mask = BIT(0);
> > > -    sseu->max_slices = 1;
> > > -    sseu->max_subslices = 2;
> > > -    sseu->max_eus_per_subslice = 8;
> > > +    intel_sseu_set_info(sseu, 1, 2, 8);
> > >       if (!(fuse & CHV_FGT_DISABLE_SS0)) {
> > >           u8 disabled_mask =
> > > @@ -290,7 +307,7 @@ static void cherryview_sseu_info_init(struct 
> > > drm_i915_private *dev_priv)
> > >               (((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >>
> > >                 CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4);
> > > -        sseu->subslice_mask[0] |= BIT(0);
> > > +        subslice_mask |= BIT(0);
> > >           intel_sseu_set_eus(sseu, 0, 0, ~disabled_mask);
> > >       }
> > > @@ -301,10 +318,12 @@ static void
> > > cherryview_sseu_info_init(struct 
> > > drm_i915_private *dev_priv)
> > >               (((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >>
> > >                 CHV_FGT_EU_DIS_SS1_R1_SHIFT) << 4);
> > > -        sseu->subslice_mask[0] |= BIT(1);
> > > +        subslice_mask |= BIT(1);
> > >           intel_sseu_set_eus(sseu, 0, 1, ~disabled_mask);
> > >       }
> > > +    intel_sseu_set_subslices(sseu, 0, subslice_mask);
> > > +
> > >       sseu->eu_total = compute_eu_total(sseu);
> > >       /*
> > > @@ -312,7 +331,8 @@ static void cherryview_sseu_info_init(struct 
> > > drm_i915_private *dev_priv)
> > >        * across subslices.
> > >       */
> > >       sseu->eu_per_subslice = intel_sseu_subslice_total(sseu) ?
> > > -                sseu->eu_total / intel_sseu_subslice_total(sseu)
> > > :
> > > +                sseu->eu_total /
> > > +                    intel_sseu_subslice_total(sseu) :
> > >                   0;
> > >       /*
> > >        * CHV supports subslice power gating on devices with more
> > > than
> > > @@ -336,9 +356,8 @@ static void gen9_sseu_info_init(struct 
> > > drm_i915_private *dev_priv)
> > >       sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> 
> > > GEN8_F2_S_ENA_SHIFT;
> > >       /* BXT has a single slice and at most 3 subslices. */
> > > -    sseu->max_slices = IS_GEN9_LP(dev_priv) ? 1 : 3;
> > > -    sseu->max_subslices = IS_GEN9_LP(dev_priv) ? 3 : 4;
> > > -    sseu->max_eus_per_subslice = 8;
> > > +    intel_sseu_set_info(sseu, IS_GEN9_LP(dev_priv) ? 1 : 3,
> > > +                IS_GEN9_LP(dev_priv) ? 3 : 4, 8);
> > >       /*
> > >        * The subslice disable field is global, i.e. it applies
> > > @@ -357,14 +376,16 @@ static void gen9_sseu_info_init(struct 
> > > drm_i915_private *dev_priv)
> > >               /* skip disabled slice */
> > >               continue;
> > > -        sseu->subslice_mask[s] = subslice_mask;
> > > +        intel_sseu_set_subslices(sseu, s, subslice_mask);
> > >           eu_disable = I915_READ(GEN9_EU_DISABLE(s));
> > >           for (ss = 0; ss < sseu->max_subslices; ss++) {
> > >               int eu_per_ss;
> > >               u8 eu_disabled_mask;
> > > +            u8 ss_idx = s * sseu->ss_stride + ss /
> > > BITS_PER_BYTE;
> > > -            if (!(sseu->subslice_mask[s] & BIT(ss)))
> > > +            if (!(sseu->subslice_mask[ss_idx] &
> > > +                  BIT(ss % BITS_PER_BYTE)))
> > >                   /* skip disabled subslice */
> > >                   continue;
> > > @@ -437,9 +458,7 @@ static void broadwell_sseu_info_init(struct 
> > > drm_i915_private *dev_priv)
> > >       fuse2 = I915_READ(GEN8_FUSE2);
> > >       sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> 
> > > GEN8_F2_S_ENA_SHIFT;
> > > -    sseu->max_slices = 3;
> > > -    sseu->max_subslices = 3;
> > > -    sseu->max_eus_per_subslice = 8;
> > > +    intel_sseu_set_info(sseu, 3, 3, 8);
> > >       /*
> > >        * The subslice disable field is global, i.e. it applies
> > > @@ -466,18 +485,21 @@ static void
> > > broadwell_sseu_info_init(struct 
> > > drm_i915_private *dev_priv)
> > >               /* skip disabled slice */
> > >               continue;
> > > -        sseu->subslice_mask[s] = subslice_mask;
> > > +        intel_sseu_set_subslices(sseu, s, subslice_mask);
> > >           for (ss = 0; ss < sseu->max_subslices; ss++) {
> > >               u8 eu_disabled_mask;
> > > +            u8 ss_idx = s * sseu->ss_stride + ss /
> > > BITS_PER_BYTE;
> > >               u32 n_disabled;
> > > -            if (!(sseu->subslice_mask[s] & BIT(ss)))
> > > +            if (!(sseu->subslice_mask[ss_idx] &
> > > +                  BIT(ss % BITS_PER_BYTE)))
> > >                   /* skip disabled subslice */
> > >                   continue;
> > >               eu_disabled_mask =
> > > -                eu_disable[s] >> (ss * sseu-
> > > >max_eus_per_subslice);
> > > +                eu_disable[s] >>
> > > +                    (ss * sseu->max_eus_per_subslice);
> > >               intel_sseu_set_eus(sseu, s, ss, ~eu_disabled_mask);
> > > @@ -517,6 +539,7 @@ static void haswell_sseu_info_init(struct 
> > > drm_i915_private *dev_priv)
> > >       struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
> > >       u32 fuse1;
> > >       int s, ss;
> > > +    u32 subslice_mask;
> > >       /*
> > >        * There isn't a register to tell us how many
> > > slices/subslices. We
> > > @@ -528,22 +551,18 @@ static void haswell_sseu_info_init(struct 
> > > drm_i915_private *dev_priv)
> > >           /* fall through */
> > >       case 1:
> > >           sseu->slice_mask = BIT(0);
> > > -        sseu->subslice_mask[0] = BIT(0);
> > > +        subslice_mask = BIT(0);
> > >           break;
> > >       case 2:
> > >           sseu->slice_mask = BIT(0);
> > > -        sseu->subslice_mask[0] = BIT(0) | BIT(1);
> > > +        subslice_mask = BIT(0) | BIT(1);
> > >           break;
> > >       case 3:
> > >           sseu->slice_mask = BIT(0) | BIT(1);
> > > -        sseu->subslice_mask[0] = BIT(0) | BIT(1);
> > > -        sseu->subslice_mask[1] = BIT(0) | BIT(1);
> > > +        subslice_mask = BIT(0) | BIT(1);
> > >           break;
> > >       }
> > > -    sseu->max_slices = hweight8(sseu->slice_mask);
> > > -    sseu->max_subslices = hweight8(sseu->subslice_mask[0]);
> > > -
> > >       fuse1 = I915_READ(HSW_PAVP_FUSE1);
> > >       switch ((fuse1 & HSW_F1_EU_DIS_MASK) >>
> > > HSW_F1_EU_DIS_SHIFT) {
> > >       default:
> > > @@ -560,9 +579,14 @@ static void haswell_sseu_info_init(struct 
> > > drm_i915_private *dev_priv)
> > >           sseu->eu_per_subslice = 6;
> > >           break;
> > >       }
> > > -    sseu->max_eus_per_subslice = sseu->eu_per_subslice;
> > > +
> > > +    intel_sseu_set_info(sseu, hweight8(sseu->slice_mask),
> > > +                hweight8(subslice_mask),
> > > +                sseu->eu_per_subslice);
> > >       for (s = 0; s < sseu->max_slices; s++) {
> > > +        intel_sseu_set_subslices(sseu, s, subslice_mask);
> > > +
> > >           for (ss = 0; ss < sseu->max_subslices; ss++) {
> > >               intel_sseu_set_eus(sseu, s, ss,
> > >                          (1UL << sseu->eu_per_subslice) - 1);
> > > 
-------------- next part --------------
A non-text attachment was scrubbed...
Name: smime.p7s
Type: application/x-pkcs7-signature
Size: 3270 bytes
Desc: not available
URL: <https://lists.freedesktop.org/archives/intel-gfx/attachments/20190501/c506f793/attachment.bin>


More information about the Intel-gfx mailing list