[Intel-gfx] [PATCH v2 1/6] drm/i915: store all subslice masks

Lionel Landwerlin lionel.g.landwerlin at intel.com
Fri Jan 12 10:58:28 UTC 2018


On 12/01/18 10:15, Tvrtko Ursulin wrote:
>
> On 11/01/2018 19:53, Lionel Landwerlin wrote:
>> Up to now, subslice mask was assumed to be uniform across slices. But
>> starting with Cannonlake, slices can be asymmetric (for example slice0
>> has different number of subslices as slice1+). This change stores all
>> subslices masks for all slices rather than having a single mask that
>> applies to all slices.
>>
>> v2: Rework how we store total numbers in sseu_dev_info (Tvrtko)
>>      Fix CHV eu masks, was reading disabled as enabled (Tvrtko)
>>      Readability changes (Tvrtko)
>>      Add EU index helper (Tvrtko)
>>
>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
>> ---
>>   drivers/gpu/drm/i915/i915_debugfs.c      |  25 ++--
>>   drivers/gpu/drm/i915/i915_drv.c          |   2 +-
>>   drivers/gpu/drm/i915/intel_device_info.c | 196 
>> +++++++++++++++++++++++--------
>>   drivers/gpu/drm/i915/intel_device_info.h |  36 +++++-
>>   drivers/gpu/drm/i915/intel_lrc.c         |   2 +-
>>   drivers/gpu/drm/i915/intel_ringbuffer.h  |   2 +-
>>   6 files changed, 200 insertions(+), 63 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
>> b/drivers/gpu/drm/i915/i915_debugfs.c
>> index 2bb63073d73f..463029f72a0b 100644
>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>> @@ -4285,7 +4285,7 @@ static void 
>> cherryview_sseu_device_status(struct drm_i915_private *dev_priv,
>>               continue;
>>             sseu->slice_mask = BIT(0);
>> -        sseu->subslice_mask |= BIT(ss);
>> +        sseu->subslice_mask[0] |= BIT(ss);
>>           eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) +
>>                ((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) +
>>                ((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) +
>> @@ -4332,7 +4332,7 @@ static void gen10_sseu_device_status(struct 
>> drm_i915_private *dev_priv,
>>               continue;
>>             sseu->slice_mask |= BIT(s);
>> -        sseu->subslice_mask = info->sseu.subslice_mask;
>> +        sseu->subslice_mask[s] = info->sseu.subslice_mask[s];
>>             for (ss = 0; ss < ss_max; ss++) {
>>               unsigned int eu_cnt;
>> @@ -4387,8 +4387,8 @@ static void gen9_sseu_device_status(struct 
>> drm_i915_private *dev_priv,
>>           sseu->slice_mask |= BIT(s);
>>             if (IS_GEN9_BC(dev_priv))
>> -            sseu->subslice_mask =
>> -                INTEL_INFO(dev_priv)->sseu.subslice_mask;
>> +            sseu->subslice_mask[s] =
>> +                INTEL_INFO(dev_priv)->sseu.subslice_mask[s];
>>             for (ss = 0; ss < ss_max; ss++) {
>>               unsigned int eu_cnt;
>> @@ -4398,7 +4398,7 @@ static void gen9_sseu_device_status(struct 
>> drm_i915_private *dev_priv,
>>                       /* skip disabled subslice */
>>                       continue;
>>   -                sseu->subslice_mask |= BIT(ss);
>> +                sseu->subslice_mask[s] |= BIT(ss);
>>               }
>>                 eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] &
>> @@ -4420,9 +4420,12 @@ static void 
>> broadwell_sseu_device_status(struct drm_i915_private *dev_priv,
>>       sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK;
>>         if (sseu->slice_mask) {
>> -        sseu->subslice_mask = INTEL_INFO(dev_priv)->sseu.subslice_mask;
>>           sseu->eu_per_subslice =
>>                   INTEL_INFO(dev_priv)->sseu.eu_per_subslice;
>> +        for (s = 0; s < fls(sseu->slice_mask); s++) {
>> +            sseu->subslice_mask[s] =
>> +                INTEL_INFO(dev_priv)->sseu.subslice_mask[s];
>> +        }
>>           sseu->eu_total = sseu->eu_per_subslice *
>>                    sseu_subslice_total(sseu);
>>   @@ -4441,6 +4444,7 @@ static void i915_print_sseu_info(struct 
>> seq_file *m, bool is_available_info,
>>   {
>>       struct drm_i915_private *dev_priv = node_to_i915(m->private);
>>       const char *type = is_available_info ? "Available" : "Enabled";
>> +    int s;
>>         seq_printf(m, "  %s Slice Mask: %04x\n", type,
>>              sseu->slice_mask);
>> @@ -4448,10 +4452,11 @@ static void i915_print_sseu_info(struct 
>> seq_file *m, bool is_available_info,
>>              hweight8(sseu->slice_mask));
>>       seq_printf(m, "  %s Subslice Total: %u\n", type,
>>              sseu_subslice_total(sseu));
>> -    seq_printf(m, "  %s Subslice Mask: %04x\n", type,
>> -           sseu->subslice_mask);
>> -    seq_printf(m, "  %s Subslice Per Slice: %u\n", type,
>> -           hweight8(sseu->subslice_mask));
>> +    for (s = 0; s < fls(sseu->slice_mask); s++) {
>> +        seq_printf(m, "  %s Slice%i %u subslices, mask=%04x\n", type,
>> +               s, hweight8(sseu->subslice_mask[s]),
>> +               sseu->subslice_mask[s]);
>> +    }
>>       seq_printf(m, "  %s EU Total: %u\n", type,
>>              sseu->eu_total);
>>       seq_printf(m, "  %s EU Per Subslice: %u\n", type,
>> diff --git a/drivers/gpu/drm/i915/i915_drv.c 
>> b/drivers/gpu/drm/i915/i915_drv.c
>> index 6c8da9d20c33..969835d3cbcd 100644
>> --- a/drivers/gpu/drm/i915/i915_drv.c
>> +++ b/drivers/gpu/drm/i915/i915_drv.c
>> @@ -414,7 +414,7 @@ static int i915_getparam(struct drm_device *dev, 
>> void *data,
>>               return -ENODEV;
>>           break;
>>       case I915_PARAM_SUBSLICE_MASK:
>> -        value = INTEL_INFO(dev_priv)->sseu.subslice_mask;
>> +        value = INTEL_INFO(dev_priv)->sseu.subslice_mask[0];
>>           if (!value)
>>               return -ENODEV;
>>           break;
>> diff --git a/drivers/gpu/drm/i915/intel_device_info.c 
>> b/drivers/gpu/drm/i915/intel_device_info.c
>> index d28592e43512..0bf6ef51cdb7 100644
>> --- a/drivers/gpu/drm/i915/intel_device_info.c
>> +++ b/drivers/gpu/drm/i915/intel_device_info.c
>> @@ -80,12 +80,17 @@ void intel_device_info_dump_flags(const struct 
>> intel_device_info *info,
>>     static void sseu_dump(const struct sseu_dev_info *sseu, struct 
>> drm_printer *p)
>>   {
>> +    int s;
>> +
>>       drm_printf(p, "slice mask: %04x\n", sseu->slice_mask);
>>       drm_printf(p, "slice total: %u\n", hweight8(sseu->slice_mask));
>>       drm_printf(p, "subslice total: %u\n", sseu_subslice_total(sseu));
>> -    drm_printf(p, "subslice mask %04x\n", sseu->subslice_mask);
>> -    drm_printf(p, "subslice per slice: %u\n",
>> -           hweight8(sseu->subslice_mask));
>> +    for (s = 0; s < ARRAY_SIZE(sseu->subslice_mask); s++) {
>> +        drm_printf(p, "slice%d subslice mask %04x\n",
>> +               s, sseu->subslice_mask[s]);
>> +        drm_printf(p, "slice%d subslice per slice: %u\n",
>> +               s, hweight8(sseu->subslice_mask[s]));
>
> Cosmetic only but consider condensing this into one line if you don't 
> have a preference to either.

Sure, just conscious about the 80characters.

>
>> +    }
>>       drm_printf(p, "EU total: %u\n", sseu->eu_total);
>>       drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice);
>>       drm_printf(p, "has slice power gating: %s\n",
>> @@ -119,22 +124,88 @@ void intel_device_info_dump(const struct 
>> intel_device_info *info,
>>       intel_device_info_dump_flags(info, p);
>>   }
>>   +static u16 compute_eu_total(const struct sseu_dev_info *sseu)
>> +{
>> +    u16 i, total = 0;
>> +
>> +    for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++)
>> +        total += hweight8(sseu->eu_mask[i]);
>> +
>> +    return total;
>> +}
>> +
>> +static u16 compute_subslice_total(const struct sseu_dev_info *sseu)
>> +{
>> +    u16 i, total = 0;
>> +
>> +    for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++)
>> +        total += hweight8(sseu->subslice_mask[i]);
>> +
>> +    return total;
>> +}
>> +
>>   static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
>>   {
>>       struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
>>       const u32 fuse2 = I915_READ(GEN8_FUSE2);
>> +    int s, ss;
>> +    const int eu_mask = 0xff;
>> +    u32 subslice_mask, eu_en;
>>         sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >>
>>                   GEN10_F2_S_ENA_SHIFT;
>> -    sseu->subslice_mask = (1 << 4) - 1;
>> -    sseu->subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
>> -                 GEN10_F2_SS_DIS_SHIFT);
>> +    sseu->max_slices = 6;
>> +    sseu->max_subslices = 4;
>> +    sseu->max_eus_per_subslice = 8;
>>   -    sseu->eu_total = hweight32(~I915_READ(GEN8_EU_DISABLE0));
>> -    sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE1));
>> -    sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE2));
>> -    sseu->eu_total += hweight8(~(I915_READ(GEN10_EU_DISABLE3) &
>> -                     GEN10_EU_DIS_SS_MASK));
>> +    subslice_mask = (1 << 4) - 1;
>> +    subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
>> +               GEN10_F2_SS_DIS_SHIFT);
>> +
>> +    /* Slice0 can have up to 3 subslices, but there are only 2 in
>> +     * slice1/2.
>> +     */
>
> Sorry for not spotting this earlier - this comment style is frowned 
> upon. Instead for multi-line please use:
>
> /*
>  * Blah blah...
>  */

Done.

>
>> +    sseu->subslice_mask[0] = subslice_mask;
>> +    for (s = 1; s < sseu->max_slices; s++)
>> +        sseu->subslice_mask[s] = subslice_mask & 0x3;
>> +
>> +    /* Slice0 */
>> +    eu_en = ~I915_READ(GEN8_EU_DISABLE0);
>> +    for (ss = 0; ss < sseu->max_subslices; ss++) {
>> +        sseu->eu_mask[sseu_eu_idx(sseu, 0, ss, 0)] =
>> +            (eu_en >> (8 * ss)) & eu_mask;
>> +    }
>> +    /* Slice1 */
>> +    sseu->eu_mask[sseu_eu_idx(sseu, 1, 0, 0)] = (eu_en >> 24) & 
>> eu_mask;
>> +    eu_en = ~I915_READ(GEN8_EU_DISABLE1);
>> +    sseu->eu_mask[sseu_eu_idx(sseu, 1, 1, 0)] = eu_en & eu_mask;
>> +    /* Slice2 */
>> +    sseu->eu_mask[sseu_eu_idx(sseu, 2, 0, 0)] = (eu_en >> 8) & eu_mask;
>> +    sseu->eu_mask[sseu_eu_idx(sseu, 2, 1, 0)] = (eu_en >> 16) & 
>> eu_mask;
>> +    /* Slice3 */
>> +    sseu->eu_mask[sseu_eu_idx(sseu, 3, 0, 0)] = (eu_en >> 24) & 
>> eu_mask;
>> +    eu_en = ~I915_READ(GEN8_EU_DISABLE2);
>> +    sseu->eu_mask[sseu_eu_idx(sseu, 3, 1, 0)] = eu_en & eu_mask;
>> +    /* Slice4 */
>> +    sseu->eu_mask[sseu_eu_idx(sseu, 4, 0, 0)] = (eu_en >> 8) & eu_mask;
>> +    sseu->eu_mask[sseu_eu_idx(sseu, 4, 1, 0)] = (eu_en >> 16) & 
>> eu_mask;
>> +    /* Slice5 */
>> +    sseu->eu_mask[sseu_eu_idx(sseu, 5, 0, 0)] = (eu_en >> 24) & 
>> eu_mask;
>> +    eu_en = ~I915_READ(GEN10_EU_DISABLE3);
>> +    sseu->eu_mask[sseu_eu_idx(sseu, 5, 1, 0)] = eu_en & eu_mask;
>
> Indices are guaranteed to be unique in these operations? No need to RMW?

For a given max_slices/max_subslices/DIV_ROUND_UP(max_eus_per_subslice, 
BITS_PER_BYTE) indices are unique.

>
>> +
>> +    /* Do a second pass where we marked the subslices disabled if all
>> +     * their eus are off.
>> +     */
>> +    for (s = 0; s < sseu->max_slices; s++) {
>> +        for (ss = 0; ss < sseu->max_subslices; ss++) {
>> +            if (sseu_eu_mask(sseu, s, ss, 0) == 0)
>> +                sseu->subslice_mask[s] &= ~BIT(ss);
>> +        }
>> +    }
>> +
>> +    sseu->subslice_total = compute_subslice_total(sseu);
>> +    sseu->eu_total = compute_eu_total(sseu);
>>         /*
>>        * CNL is expected to always have a uniform distribution
>> @@ -155,26 +226,31 @@ static void gen10_sseu_info_init(struct 
>> drm_i915_private *dev_priv)
>>   static void cherryview_sseu_info_init(struct drm_i915_private 
>> *dev_priv)
>>   {
>>       struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
>> -    u32 fuse, eu_dis;
>> +    u32 fuse;
>>         fuse = I915_READ(CHV_FUSE_GT);
>>         sseu->slice_mask = BIT(0);
>> +    sseu->max_slices = 1;
>> +    sseu->max_subslices = 2;
>> +    sseu->max_eus_per_subslice = 8;
>>         if (!(fuse & CHV_FGT_DISABLE_SS0)) {
>> -        sseu->subslice_mask |= BIT(0);
>> -        eu_dis = fuse & (CHV_FGT_EU_DIS_SS0_R0_MASK |
>> -                 CHV_FGT_EU_DIS_SS0_R1_MASK);
>> -        sseu->eu_total += 8 - hweight32(eu_dis);
>> +        sseu->subslice_mask[0] |= BIT(0);
>> +        sseu->eu_mask[0] = ~((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >> 
>> CHV_FGT_EU_DIS_SS0_R0_SHIFT);
>> +        sseu->eu_mask[0] |= ~(((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) 
>> >> CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4);
>
> Use sseu_eu_idx?

Done.

>
>> +        sseu->subslice_mask[0] = 1;
>>       }
>>         if (!(fuse & CHV_FGT_DISABLE_SS1)) {
>> -        sseu->subslice_mask |= BIT(1);
>> -        eu_dis = fuse & (CHV_FGT_EU_DIS_SS1_R0_MASK |
>> -                 CHV_FGT_EU_DIS_SS1_R1_MASK);
>> -        sseu->eu_total += 8 - hweight32(eu_dis);
>> +        sseu->subslice_mask[0] |= BIT(1);
>> +        sseu->eu_mask[1] = ~((fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >> 
>> CHV_FGT_EU_DIS_SS0_R0_SHIFT);
>> +        sseu->eu_mask[1] |= ~(((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) 
>> >> CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4);
>>       }
>>   +    sseu->subslice_total = compute_subslice_total(sseu);
>> +    sseu->eu_total = compute_eu_total(sseu);
>> +
>>       /*
>>        * CHV expected to always have a uniform distribution of EU
>>        * across subslices.
>> @@ -196,41 +272,53 @@ static void gen9_sseu_info_init(struct 
>> drm_i915_private *dev_priv)
>>   {
>>       struct intel_device_info *info = mkwrite_device_info(dev_priv);
>>       struct sseu_dev_info *sseu = &info->sseu;
>> -    int s_max = 3, ss_max = 4, eu_max = 8;
>>       int s, ss;
>> -    u32 fuse2, eu_disable;
>> -    u8 eu_mask = 0xff;
>> +    u32 fuse2, eu_disable, subslice_mask;
>> +    const u8 eu_mask = 0xff;
>>         fuse2 = I915_READ(GEN8_FUSE2);
>>       sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> 
>> GEN8_F2_S_ENA_SHIFT;
>>   +    /* BXT has a single slice and at most 3 subslices. */
>> +    sseu->max_slices = IS_GEN9_LP(dev_priv) ? 1 : 3;
>> +    sseu->max_subslices = IS_GEN9_LP(dev_priv) ? 3 : 4;
>> +    sseu->max_eus_per_subslice = 8;
>> +
>>       /*
>>        * The subslice disable field is global, i.e. it applies
>>        * to each of the enabled slices.
>>       */
>> -    sseu->subslice_mask = (1 << ss_max) - 1;
>> -    sseu->subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
>> -                 GEN9_F2_SS_DIS_SHIFT);
>> +    subslice_mask = (1 << sseu->max_subslices) - 1;
>> +    subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
>> +               GEN9_F2_SS_DIS_SHIFT);
>>         /*
>>        * Iterate through enabled slices and subslices to
>>        * count the total enabled EU.
>>       */
>> -    for (s = 0; s < s_max; s++) {
>> +    for (s = 0; s < sseu->max_slices; s++) {
>>           if (!(sseu->slice_mask & BIT(s)))
>>               /* skip disabled slice */
>>               continue;
>>   +        sseu->subslice_mask[s] = subslice_mask;
>> +
>>           eu_disable = I915_READ(GEN9_EU_DISABLE(s));
>> -        for (ss = 0; ss < ss_max; ss++) {
>> +        for (ss = 0; ss < sseu->max_subslices; ss++) {
>>               int eu_per_ss;
>> +            u8 eu_disabled_mask;
>>   -            if (!(sseu->subslice_mask & BIT(ss)))
>> +            if (!(sseu->subslice_mask[s] & BIT(ss)))
>>                   /* skip disabled subslice */
>>                   continue;
>>   -            eu_per_ss = eu_max - hweight8((eu_disable >> (ss*8)) &
>> -                              eu_mask);
>> +            eu_disabled_mask = (eu_disable >> (ss*8)) & eu_mask; > +
>> +            sseu->eu_mask[sseu_eu_idx(sseu, s, ss, 0)] =
>> +                ~eu_disabled_mask;
>> +
>> +            eu_per_ss = sseu->max_eus_per_subslice -
>> +                hweight8(eu_disabled_mask);
>>                 /*
>>                * Record which subslice(s) has(have) 7 EUs. we
>> @@ -239,11 +327,12 @@ static void gen9_sseu_info_init(struct 
>> drm_i915_private *dev_priv)
>>                */
>>               if (eu_per_ss == 7)
>>                   sseu->subslice_7eu[s] |= BIT(ss);
>> -
>> -            sseu->eu_total += eu_per_ss;
>>           }
>>       }
>>   +    sseu->subslice_total = compute_subslice_total(sseu);
>> +    sseu->eu_total = compute_eu_total(sseu);
>> +
>>       /*
>>        * SKL is expected to always have a uniform distribution
>>        * of EU across subslices with the exception that any one
>> @@ -269,8 +358,8 @@ static void gen9_sseu_info_init(struct 
>> drm_i915_private *dev_priv)
>>       sseu->has_eu_pg = sseu->eu_per_subslice > 2;
>>         if (IS_GEN9_LP(dev_priv)) {
>> -#define IS_SS_DISABLED(ss)    (!(sseu->subslice_mask & BIT(ss)))
>> -        info->has_pooled_eu = hweight8(sseu->subslice_mask) == 3;
>> +#define IS_SS_DISABLED(ss)    (!(sseu->subslice_mask[0] & BIT(ss)))
>> +        info->has_pooled_eu = hweight8(sseu->subslice_mask[0]) == 3;
>>             sseu->min_eu_in_pool = 0;
>>           if (info->has_pooled_eu) {
>> @@ -288,19 +377,22 @@ static void gen9_sseu_info_init(struct 
>> drm_i915_private *dev_priv)
>>   static void broadwell_sseu_info_init(struct drm_i915_private 
>> *dev_priv)
>>   {
>>       struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
>> -    const int s_max = 3, ss_max = 3, eu_max = 8;
>>       int s, ss;
>> -    u32 fuse2, eu_disable[3]; /* s_max */
>> +    u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */
>>         fuse2 = I915_READ(GEN8_FUSE2);
>>       sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> 
>> GEN8_F2_S_ENA_SHIFT;
>> +    sseu->max_slices = 3;
>> +    sseu->max_subslices = 3;
>> +    sseu->max_eus_per_subslice = 8;
>> +
>>       /*
>>        * The subslice disable field is global, i.e. it applies
>>        * to each of the enabled slices.
>>        */
>> -    sseu->subslice_mask = GENMASK(ss_max - 1, 0);
>> -    sseu->subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
>> -                 GEN8_F2_SS_DIS_SHIFT);
>> +    subslice_mask = GENMASK(sseu->max_subslices - 1, 0);
>> +    subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
>> +               GEN8_F2_SS_DIS_SHIFT);
>>         eu_disable[0] = I915_READ(GEN8_EU_DISABLE0) & 
>> GEN8_EU_DIS0_S0_MASK;
>>       eu_disable[1] = (I915_READ(GEN8_EU_DISABLE0) >> 
>> GEN8_EU_DIS0_S1_SHIFT) |
>> @@ -314,30 +406,40 @@ static void broadwell_sseu_info_init(struct 
>> drm_i915_private *dev_priv)
>>        * Iterate through enabled slices and subslices to
>>        * count the total enabled EU.
>>        */
>> -    for (s = 0; s < s_max; s++) {
>> +    for (s = 0; s < sseu->max_slices; s++) {
>>           if (!(sseu->slice_mask & BIT(s)))
>>               /* skip disabled slice */
>>               continue;
>>   -        for (ss = 0; ss < ss_max; ss++) {
>> +        sseu->subslice_mask[s] = subslice_mask;
>> +
>> +        for (ss = 0; ss < sseu->max_subslices; ss++) {
>> +            u8 eu_disabled_mask;
>>               u32 n_disabled;
>>   -            if (!(sseu->subslice_mask & BIT(ss)))
>> +            if (!(sseu->subslice_mask[ss] & BIT(ss)))
>>                   /* skip disabled subslice */
>>                   continue;
>>   -            n_disabled = hweight8(eu_disable[s] >> (ss * eu_max));
>> +            eu_disabled_mask =
>> +                eu_disable[s] >> (ss * sseu->max_eus_per_subslice);
>> +
>> +            sseu->eu_mask[sseu_eu_idx(sseu, s, ss, 0)] =
>> +                ~eu_disabled_mask;
>> +
>> +            n_disabled = hweight8(eu_disabled_mask);
>>                 /*
>>                * Record which subslices have 7 EUs.
>>                */
>> -            if (eu_max - n_disabled == 7)
>> +            if (sseu->max_eus_per_subslice - n_disabled == 7)
>>                   sseu->subslice_7eu[s] |= 1 << ss;
>> -
>> -            sseu->eu_total += eu_max - n_disabled;
>>           }
>>       }
>>   +    sseu->subslice_total = compute_subslice_total(sseu);
>> +    sseu->eu_total = compute_eu_total(sseu);
>> +
>>       /*
>>        * BDW is expected to always have a uniform distribution of EU 
>> across
>>        * subslices with the exception that any one EU in any one 
>> subslice may
>> diff --git a/drivers/gpu/drm/i915/intel_device_info.h 
>> b/drivers/gpu/drm/i915/intel_device_info.h
>> index 49cb27bd04c1..6eb8bcf5a213 100644
>> --- a/drivers/gpu/drm/i915/intel_device_info.h
>> +++ b/drivers/gpu/drm/i915/intel_device_info.h
>> @@ -110,10 +110,14 @@ enum intel_platform {
>>       func(supports_tv); \
>>       func(has_ipc);
>>   +#define GEN_MAX_SLICES        (6) /* CNL upper bound */
>> +#define GEN_MAX_SUBSLICES    (7)
>> +
>>   struct sseu_dev_info {
>>       u8 slice_mask;
>> -    u8 subslice_mask;
>> -    u8 eu_total;
>> +    u8 subslice_mask[GEN_MAX_SUBSLICES];
>> +    u16 subslice_total;
>> +    u16 eu_total;
>>       u8 eu_per_subslice;
>>       u8 min_eu_in_pool;
>>       /* For each slice, which subslice(s) has(have) 7 EUs 
>> (bitfield)? */
>> @@ -121,6 +125,17 @@ struct sseu_dev_info {
>>       u8 has_slice_pg:1;
>>       u8 has_subslice_pg:1;
>>       u8 has_eu_pg:1;
>> +
>> +    /* Topology fields */
>> +    u8 max_slices;
>> +    u8 max_subslices;
>> +    u8 max_eus_per_subslice;
>> +
>> +    /* We don't have more than 8 eus per subslice at the moment and 
>> as we
>> +     * store eus enabled using bits, no need to multiply by eus per
>> +     * subslice.
>> +     */
>> +    u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
>>   };
>>     struct intel_device_info {
>> @@ -167,7 +182,22 @@ struct intel_device_info {
>>     static inline unsigned int sseu_subslice_total(const struct 
>> sseu_dev_info *sseu)
>>   {
>> -    return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask);
>> +    return sseu->subslice_total;
>> +}
>> +
>> +static inline int sseu_eu_idx(const struct sseu_dev_info *sseu,
>> +                  int slice, int subslice, int eu_group)
>
> What is eu_group for? Will it be used at some point?

In case we ever have more than 8 EUs per subslice.

>
>> +{
>> +    int subslice_stride = ALIGN(sseu->max_eus_per_subslice, 8) / 8;
>
> What are these eights? sizeof(eu_mask[0]) ? Is this whole statement 
> DIV_ROUND_UP also?

Sorry, forgot to replace some of them. Done.

>
>> +    int slice_stride = sseu->max_subslices * subslice_stride;
>> +
>> +    return slice * slice_stride + subslice * subslice_stride + 
>> eu_group;
>> +}
>> +
>> +static inline int sseu_eu_mask(const struct sseu_dev_info *sseu,
>> +                   int slice, int subslice, int eu_group)
>> +{
>> +    return sseu->eu_mask[sseu_eu_idx(sseu, slice, subslice, eu_group)];
>>   }
>>     const char *intel_platform_name(enum intel_platform platform);
>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
>> b/drivers/gpu/drm/i915/intel_lrc.c
>> index ff25f209d0a5..ac7896031b8d 100644
>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>> @@ -2098,7 +2098,7 @@ make_rpcs(struct drm_i915_private *dev_priv)
>>         if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) {
>>           rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
>> -        rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask) <<
>> +        rpcs |= 
>> hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask[0]) <<
>>               GEN8_RPCS_SS_CNT_SHIFT;
>>           rpcs |= GEN8_RPCS_ENABLE;
>>       }
>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
>> b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> index c5ff203e42d6..23ae9a957fab 100644
>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
>> @@ -90,7 +90,7 @@ hangcheck_action_to_str(const enum 
>> intel_engine_hangcheck_action a)
>>     #define instdone_subslice_mask(dev_priv__) \
>>       (INTEL_GEN(dev_priv__) == 7 ? \
>> -     1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask)
>> +     1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask[0])
>>     #define for_each_instdone_slice_subslice(dev_priv__, slice__, 
>> subslice__) \
>>       for ((slice__) = 0, (subslice__) = 0; \
>>
>
> Regards,
>
> Tvrtko
>



More information about the Intel-gfx mailing list