[Intel-gfx] [PATCH 1/6] drm/i915: store all subslice masks

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Thu Jan 11 15:57:37 UTC 2018


On 11/01/2018 15:48, Lionel Landwerlin wrote:
> On 11/01/18 11:12, Tvrtko Ursulin wrote:
>>
>> On 18/12/2017 15:35, Lionel Landwerlin wrote:
>>> Up to now, subslice mask was assumed to be uniform across slices. But
>>> starting with Cannonlake, slices can be asymetric (for example slice0
>>
>> asymmetric, thanks auto spell checker. :)
> 
> Done, thanks.
> 
>>
>>> has different number of subslices as slice1+). This change stores all
>>> subslices masks for all slices rather than having a single mask that
>>> applies to all slices.
>>>
>>> Signed-off-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
>>> ---
>>>   drivers/gpu/drm/i915/i915_debugfs.c      |  24 +++--
>>>   drivers/gpu/drm/i915/i915_drv.c          |   2 +-
>>>   drivers/gpu/drm/i915/i915_drv.h          |  23 ++++-
>>>   drivers/gpu/drm/i915/intel_device_info.c | 169 
>>> ++++++++++++++++++++++---------
>>>   drivers/gpu/drm/i915/intel_lrc.c         |   2 +-
>>>   drivers/gpu/drm/i915/intel_ringbuffer.h  |   2 +-
>>>   6 files changed, 161 insertions(+), 61 deletions(-)
>>>
>>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
>>> b/drivers/gpu/drm/i915/i915_debugfs.c
>>> index 0ddce72552bf..0c7890b695c5 100644
>>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>>> @@ -4293,7 +4293,7 @@ static void 
>>> cherryview_sseu_device_status(struct drm_i915_private *dev_priv,
>>>               continue;
>>>             sseu->slice_mask = BIT(0);
>>> -        sseu->subslice_mask |= BIT(ss);
>>> +        sseu->subslices_mask[0] |= BIT(ss);
>>>           eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) +
>>>                ((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) +
>>>                ((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) +
>>> @@ -4340,7 +4340,7 @@ static void gen10_sseu_device_status(struct 
>>> drm_i915_private *dev_priv,
>>>               continue;
>>>             sseu->slice_mask |= BIT(s);
>>> -        sseu->subslice_mask = info->sseu.subslice_mask;
>>> +        sseu->subslices_mask[s] = info->sseu.subslices_mask[s];
>>>             for (ss = 0; ss < ss_max; ss++) {
>>>               unsigned int eu_cnt;
>>> @@ -4395,8 +4395,8 @@ static void gen9_sseu_device_status(struct 
>>> drm_i915_private *dev_priv,
>>>           sseu->slice_mask |= BIT(s);
>>>             if (IS_GEN9_BC(dev_priv))
>>> -            sseu->subslice_mask =
>>> -                INTEL_INFO(dev_priv)->sseu.subslice_mask;
>>> +            sseu->subslices_mask[s] =
>>> + INTEL_INFO(dev_priv)->sseu.subslices_mask[s];
>>>             for (ss = 0; ss < ss_max; ss++) {
>>>               unsigned int eu_cnt;
>>> @@ -4406,7 +4406,7 @@ static void gen9_sseu_device_status(struct 
>>> drm_i915_private *dev_priv,
>>>                       /* skip disabled subslice */
>>>                       continue;
>>>   -                sseu->subslice_mask |= BIT(ss);
>>> +                sseu->subslices_mask[s] |= BIT(ss);
>>>               }
>>>                 eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] &
>>> @@ -4428,9 +4428,12 @@ static void 
>>> broadwell_sseu_device_status(struct drm_i915_private *dev_priv,
>>>       sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK;
>>>         if (sseu->slice_mask) {
>>> -        sseu->subslice_mask = INTEL_INFO(dev_priv)->sseu.subslice_mask;
>>>           sseu->eu_per_subslice =
>>>                   INTEL_INFO(dev_priv)->sseu.eu_per_subslice;
>>> +        for (s = 0; s < fls(sseu->slice_mask); s++) {
>>> +            sseu->subslices_mask[s] =
>>> + INTEL_INFO(dev_priv)->sseu.subslices_mask[s];
>>> +        }
>>>           sseu->eu_total = sseu->eu_per_subslice *
>>>                    sseu_subslice_total(sseu);
>>>   @@ -4449,6 +4452,7 @@ static void i915_print_sseu_info(struct 
>>> seq_file *m, bool is_available_info,
>>>   {
>>>       struct drm_i915_private *dev_priv = node_to_i915(m->private);
>>>       const char *type = is_available_info ? "Available" : "Enabled";
>>> +    int s;
>>>         seq_printf(m, "  %s Slice Mask: %04x\n", type,
>>>              sseu->slice_mask);
>>> @@ -4456,10 +4460,10 @@ static void i915_print_sseu_info(struct 
>>> seq_file *m, bool is_available_info,
>>>              hweight8(sseu->slice_mask));
>>>       seq_printf(m, "  %s Subslice Total: %u\n", type,
>>>              sseu_subslice_total(sseu));
>>> -    seq_printf(m, "  %s Subslice Mask: %04x\n", type,
>>> -           sseu->subslice_mask);
>>> -    seq_printf(m, "  %s Subslice Per Slice: %u\n", type,
>>> -           hweight8(sseu->subslice_mask));
>>> +    for (s = 0; s < fls(sseu->slice_mask); s++) {
>>
>> Slice mask is always contiguous bits?
> 
> I have a 2x6 BXT on my desk where the subslice0 appears to be fused-off.
> I assumed same could be true for slices.
> fls() should make us iterate through all the slices (even the fused off 
> ones).

Do you then want to skip printing the fused off ones? Just asking, don't 
think it matters hugely.

>>
>>> +        seq_printf(m, "  %s Slice%i Subslice Mask: %04x\n", type,
>>> +               s, sseu->subslices_mask[s]);
>>
>> Don't want to keep printing the count? Like maybe " %s Slice%u %u 
>> sublices, mask=%04x\n" ?
> 
> Sure, done.
> 
>>
>>> +    }
>>>       seq_printf(m, "  %s EU Total: %u\n", type,
>>>              sseu->eu_total);
>>>       seq_printf(m, "  %s EU Per Subslice: %u\n", type,
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.c 
>>> b/drivers/gpu/drm/i915/i915_drv.c
>>> index 72bea281edb7..8b99e415c345 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.c
>>> +++ b/drivers/gpu/drm/i915/i915_drv.c
>>> @@ -414,7 +414,7 @@ static int i915_getparam(struct drm_device *dev, 
>>> void *data,
>>>               return -ENODEV;
>>>           break;
>>>       case I915_PARAM_SUBSLICE_MASK:
>>> -        value = INTEL_INFO(dev_priv)->sseu.subslice_mask;
>>> +        value = INTEL_INFO(dev_priv)->sseu.subslices_mask[0];
>>>           if (!value)
>>>               return -ENODEV;
>>>           break;
>>> diff --git a/drivers/gpu/drm/i915/i915_drv.h 
>>> b/drivers/gpu/drm/i915/i915_drv.h
>>> index 1aba5657f5f0..82fc59078c6a 100644
>>> --- a/drivers/gpu/drm/i915/i915_drv.h
>>> +++ b/drivers/gpu/drm/i915/i915_drv.h
>>> @@ -802,9 +802,12 @@ struct intel_csr {
>>>       func(supports_tv); \
>>>       func(has_ipc);
>>>   +#define GEN_MAX_SLICES        (6) /* CNL upper bound */
>>> +#define GEN_MAX_SUBSLICES    (7)
>>> +
>>>   struct sseu_dev_info {
>>>       u8 slice_mask;
>>> -    u8 subslice_mask;
>>> +    u8 subslices_mask[GEN_MAX_SLICES];
>>
>> Personally I would probably kept the subslice_mask name and just 
>> turned it into an array, but that is bike-shed territory.
> 
> It was easier to spot all the places to modify. Will go back.
> 
>>
>>>       u8 eu_total;
>>>       u8 eu_per_subslice;
>>>       u8 min_eu_in_pool;
>>> @@ -813,11 +816,27 @@ struct sseu_dev_info {
>>>       u8 has_slice_pg:1;
>>>       u8 has_subslice_pg:1;
>>>       u8 has_eu_pg:1;
>>> +
>>> +    /* Topology fields */
>>> +    u8 max_slices;
>>> +    u8 max_subslices;
>>> +    u8 max_eus_per_subslice;
>>> +
>>> +    /* We don't have more than 8 eus per subslice at the moment and 
>>> as we
>>> +     * store eus enabled using bits, no need to multiply by eus per
>>> +     * subslice.
>>> +     */
>>> +    u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES];
>>>   };
>>>     static inline unsigned int sseu_subslice_total(const struct 
>>> sseu_dev_info *sseu)
>>>   {
>>> -    return hweight8(sseu->slice_mask) * hweight8(sseu->subslice_mask);
>>> +    unsigned s, total = 0;
>>> +
>>> +    for (s = 0; s < ARRAY_SIZE(sseu->subslices_mask); s++)
>>> +        total += hweight8(sseu->subslices_mask[s]);
>>> +
>>> +    return total;
>>
>> The function is bigger now so maybe it needs stops being inline, but 
>> either moved to the .c file, or even the total stored in struct 
>> sseu_dev_info, and then this becomes just a getter?
> 
> Let's go with a subslice_total field.
> 
>>
>>>   }
>>>     /* Keep in gen based order, and chronological order within a gen */
>>> diff --git a/drivers/gpu/drm/i915/intel_device_info.c 
>>> b/drivers/gpu/drm/i915/intel_device_info.c
>>> index f478be3ae0ba..6a3c40439e83 100644
>>> --- a/drivers/gpu/drm/i915/intel_device_info.c
>>> +++ b/drivers/gpu/drm/i915/intel_device_info.c
>>> @@ -82,22 +82,74 @@ void intel_device_info_dump(struct 
>>> drm_i915_private *dev_priv)
>>>   #undef PRINT_FLAG
>>>   }
>>>   +static u8 compute_eu_total(const struct sseu_dev_info *sseu)
>>> +{
>>> +    u8 i, total = 0;
>>
>> sseu_subslice_total returns an unsinged int, while this is u8, 
>> suggesting there can be more sublices and eus. ;) Jokes aside, I'd 
>> probably make them consistent. The index variable is probably best as 
>> unsigned int, but the total and return can be u8 if big enough for 
>> total eus? Either way best it matches the types in the struct they get 
>> assigned to.
> 
> You're right!
> u16 should be fine though.
> 
>>
>>> +
>>> +    for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++)
>>> +        total += hweight8(sseu->eu_mask[i]);
>>> +
>>> +    return total;
>>> +}
>>> +
>>>   static void gen10_sseu_info_init(struct drm_i915_private *dev_priv)
>>>   {
>>>       struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
>>>       const u32 fuse2 = I915_READ(GEN8_FUSE2);
>>> +    int s, ss, eu_mask = 0xff;
>>
>> const eu_mask ?
> 
> Done.
> 
>>
>>> +    u32 subslice_mask, eu_en;
>>>         sseu->slice_mask = (fuse2 & GEN10_F2_S_ENA_MASK) >>
>>>                   GEN10_F2_S_ENA_SHIFT;
>>> -    sseu->subslice_mask = (1 << 4) - 1;
>>> -    sseu->subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
>>> -                 GEN10_F2_SS_DIS_SHIFT);
>>> +    sseu->max_slices = 6;
>>> +    sseu->max_subslices = 4;
>>> +    sseu->max_eus_per_subslice = 8;
>>> +
>>> +    subslice_mask = (1 << 4) - 1;
>>> +    subslice_mask &= ~((fuse2 & GEN10_F2_SS_DIS_MASK) >>
>>> +               GEN10_F2_SS_DIS_SHIFT);
>>> +
>>> +    /* Slice0 can have up to 3 subslices, but there are only 2 in
>>> +     * slice1/2.
>>> +     */
>>> +    sseu->subslices_mask[0] = subslice_mask;
>>> +    for (s = 1; s < sseu->max_slices; s++)
>>> +        sseu->subslices_mask[s] = subslice_mask & 0x3;
>>> +
>>> +    /* Slice0 */
>>> +    eu_en = ~I915_READ(GEN8_EU_DISABLE0);
>>> +    for (ss = 0; ss < sseu->max_subslices; ss++)
>>> +        sseu->eu_mask[ss]     = (eu_en >> (8 * ss)) & eu_mask;
>>> +    /* Slice1 */
>>> +    sseu->eu_mask[sseu->max_subslices]         = (eu_en >> 24) & 
>>> eu_mask;
>>> +    eu_en = ~I915_READ(GEN8_EU_DISABLE1);
>>> +    sseu->eu_mask[sseu->max_subslices + 1]     = eu_en & eu_mask;
>>
>> I suggest a helper to index into sse->eu_mask, like 
>> sseu->eu_mask[_eu_mask_idx(slice, subslice)] or something, so it is 
>> more readable what is happening here. Or even:
>>
>>  _eu_mask(sseu, slice, subslice) = mask;
>>
>> Doable? I am not 100% I picked up exactly on the layout of the eu_mask 
>> array.. each element is one subslice? Consecutive for slice0, 
>> subslice0..N, slice1... sliceN ?
> 
> Unfortunately that's not possible because of the asymmetry thing...

Hm so on a given SKU you are saying the eu_mask you are generating is 
also asymmetrical? Meaning different number of total allocated bits 
per-slice? Could we have it symmetrical in the internal representation 
and uAPI for simpler code?

Regards,

Tvrtko

>>
>>> +    /* Slice2 */
>>> +    sseu->eu_mask[2 * sseu->max_subslices]     = (eu_en >> 8) & 
>>> eu_mask;
>>> +    sseu->eu_mask[2 * sseu->max_subslices + 1] = (eu_en >> 16) & 
>>> eu_mask;
>>> +    /* Slice3 */
>>> +    sseu->eu_mask[3 * sseu->max_subslices]     = (eu_en >> 24) & 
>>> eu_mask;
>>> +    eu_en = ~I915_READ(GEN8_EU_DISABLE2);
>>> +    sseu->eu_mask[3 * sseu->max_subslices + 1] = eu_en & eu_mask;
>>> +    /* Slice4 */
>>> +    sseu->eu_mask[4 * sseu->max_subslices]     = (eu_en >> 8) & 
>>> eu_mask;
>>> +    sseu->eu_mask[4 * sseu->max_subslices + 1] = (eu_en >> 16) & 
>>> eu_mask;
>>> +    /* Slice5 */
>>> +    sseu->eu_mask[5 * sseu->max_subslices]     = (eu_en >> 24) & 
>>> eu_mask;
>>> +    eu_en = ~I915_READ(GEN10_EU_DISABLE3);
>>> +    sseu->eu_mask[5 * sseu->max_subslices + 1] = eu_en & eu_mask;
>>> +
>>> +    /* Do a second pass where we marked the subslices disabled if all
>>> +     * their eus are off.
>>> +     */
>>> +    for (s = 0; s < sseu->max_slices; s++) {
>>> +        for (ss = 0; ss < sseu->max_subslices; ss++) {
>>> +            if (sseu->eu_mask[s * sseu->max_subslices + ss] == 0)
>>> +                sseu->subslices_mask[s] &= ~BIT(ss);
>>> +        }
>>> +    }
>>>   -    sseu->eu_total = hweight32(~I915_READ(GEN8_EU_DISABLE0));
>>> -    sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE1));
>>> -    sseu->eu_total += hweight32(~I915_READ(GEN8_EU_DISABLE2));
>>> -    sseu->eu_total += hweight8(~(I915_READ(GEN10_EU_DISABLE3) &
>>> -                     GEN10_EU_DIS_SS_MASK));
>>> +    sseu->eu_total = compute_eu_total(sseu);
>>>         /*
>>>        * CNL is expected to always have a uniform distribution
>>> @@ -118,26 +170,30 @@ static void gen10_sseu_info_init(struct 
>>> drm_i915_private *dev_priv)
>>>   static void cherryview_sseu_info_init(struct drm_i915_private 
>>> *dev_priv)
>>>   {
>>>       struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
>>> -    u32 fuse, eu_dis;
>>> +    u32 fuse;
>>>         fuse = I915_READ(CHV_FUSE_GT);
>>>         sseu->slice_mask = BIT(0);
>>> +    sseu->max_slices = 1;
>>> +    sseu->max_subslices = 2;
>>> +    sseu->max_eus_per_subslice = 8;
>>>         if (!(fuse & CHV_FGT_DISABLE_SS0)) {
>>> -        sseu->subslice_mask |= BIT(0);
>>> -        eu_dis = fuse & (CHV_FGT_EU_DIS_SS0_R0_MASK |
>>> -                 CHV_FGT_EU_DIS_SS0_R1_MASK);
>>> -        sseu->eu_total += 8 - hweight32(eu_dis);
>>> +        sseu->subslices_mask[0] |= BIT(0);
>>> +        sseu->eu_mask[0] = (fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >> 
>>> CHV_FGT_EU_DIS_SS0_R0_SHIFT;
>>> +        sseu->eu_mask[0] |= ((fuse & CHV_FGT_EU_DIS_SS0_R1_MASK) >> 
>>> CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4;
>>
>> Do you need to invert the bits here?
> 
> Oh dear... Indeed!
> 
>>
>>> +        sseu->subslices_mask[0] = 1;
>>>       }
>>>         if (!(fuse & CHV_FGT_DISABLE_SS1)) {
>>> -        sseu->subslice_mask |= BIT(1);
>>> -        eu_dis = fuse & (CHV_FGT_EU_DIS_SS1_R0_MASK |
>>> -                 CHV_FGT_EU_DIS_SS1_R1_MASK);
>>> -        sseu->eu_total += 8 - hweight32(eu_dis);
>>> +        sseu->subslices_mask[0] |= BIT(1);
>>> +        sseu->eu_mask[1] = (fuse & CHV_FGT_EU_DIS_SS1_R0_MASK) >> 
>>> CHV_FGT_EU_DIS_SS0_R0_SHIFT;
>>> +        sseu->eu_mask[2] |= ((fuse & CHV_FGT_EU_DIS_SS1_R1_MASK) >> 
>>> CHV_FGT_EU_DIS_SS0_R1_SHIFT) << 4;
>>
>> Why eu_mask indices 1 and 2 here, while for subslice 0 you merged them 
>> into index 0?
> 
> Again, you're right.
> Thanks!
> 
>>
>>>       }
>>>   +    sseu->eu_total = compute_eu_total(sseu);
>>> +
>>>       /*
>>>        * CHV expected to always have a uniform distribution of EU
>>>        * across subslices.
>>> @@ -159,41 +215,50 @@ static void gen9_sseu_info_init(struct 
>>> drm_i915_private *dev_priv)
>>>   {
>>>       struct intel_device_info *info = mkwrite_device_info(dev_priv);
>>>       struct sseu_dev_info *sseu = &info->sseu;
>>> -    int s_max = 3, ss_max = 4, eu_max = 8;
>>>       int s, ss;
>>> -    u32 fuse2, eu_disable;
>>> +    u32 fuse2, eu_disable, subslice_mask;
>>>       u8 eu_mask = 0xff;
>>>         fuse2 = I915_READ(GEN8_FUSE2);
>>>       sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> 
>>> GEN8_F2_S_ENA_SHIFT;
>>>   +    /* BXT has a single slice and at most 3 subslices. */
>>> +    sseu->max_slices = IS_GEN9_LP(dev_priv) ? 1 : 3;
>>> +    sseu->max_subslices = IS_GEN9_LP(dev_priv) ? 3 : 4;
>>> +    sseu->max_eus_per_subslice = 8;
>>> +
>>>       /*
>>>        * The subslice disable field is global, i.e. it applies
>>>        * to each of the enabled slices.
>>>       */
>>> -    sseu->subslice_mask = (1 << ss_max) - 1;
>>> -    sseu->subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
>>> -                 GEN9_F2_SS_DIS_SHIFT);
>>> +    subslice_mask = (1 << sseu->max_subslices) - 1;
>>> +    subslice_mask &= ~((fuse2 & GEN9_F2_SS_DIS_MASK) >>
>>> +               GEN9_F2_SS_DIS_SHIFT);
>>>         /*
>>>        * Iterate through enabled slices and subslices to
>>>        * count the total enabled EU.
>>>       */
>>> -    for (s = 0; s < s_max; s++) {
>>> +    for (s = 0; s < sseu->max_slices; s++) {
>>>           if (!(sseu->slice_mask & BIT(s)))
>>>               /* skip disabled slice */
>>>               continue;
>>>   +        sseu->subslices_mask[s] = subslice_mask;
>>> +
>>>           eu_disable = I915_READ(GEN9_EU_DISABLE(s));
>>> -        for (ss = 0; ss < ss_max; ss++) {
>>> +        for (ss = 0; ss < sseu->max_subslices; ss++) {
>>>               int eu_per_ss;
>>>   -            if (!(sseu->subslice_mask & BIT(ss)))
>>> +            if (!(sseu->subslices_mask[s] & BIT(ss)))
>>>                   /* skip disabled subslice */
>>>                   continue;
>>>   -            eu_per_ss = eu_max - hweight8((eu_disable >> (ss*8)) &
>>> -                              eu_mask);
>>> +            sseu->eu_mask[ss + s * sseu->max_subslices] =
>>> +                ~((eu_disable >> (ss*8)) & eu_mask);
>>> +
>>> +            eu_per_ss = sseu->max_eus_per_subslice -
>>> +                hweight8((eu_disable >> (ss*8)) & eu_mask);
>>
>> Store "eu_disable >> (ss*8)) & eu_mask" in a local for readability 
>> since it is calculated twice?
> 
> Done.
> 
>>
>>>                 /*
>>>                * Record which subslice(s) has(have) 7 EUs. we
>>> @@ -202,11 +267,11 @@ static void gen9_sseu_info_init(struct 
>>> drm_i915_private *dev_priv)
>>>                */
>>>               if (eu_per_ss == 7)
>>>                   sseu->subslice_7eu[s] |= BIT(ss);
>>> -
>>> -            sseu->eu_total += eu_per_ss;
>>
>> You could theoretically keep this running total and then at the end 
>> compare against compute_eu_total under a GEM_BUG_ON, just to check 
>> everyting is working as expected.
>>
>>>           }
>>>       }
>>>   +    sseu->eu_total = compute_eu_total(sseu);
>>> +
>>>       /*
>>>        * SKL is expected to always have a uniform distribution
>>>        * of EU across subslices with the exception that any one
>>> @@ -232,8 +297,8 @@ static void gen9_sseu_info_init(struct 
>>> drm_i915_private *dev_priv)
>>>       sseu->has_eu_pg = sseu->eu_per_subslice > 2;
>>>         if (IS_GEN9_LP(dev_priv)) {
>>> -#define IS_SS_DISABLED(ss)    (!(sseu->subslice_mask & BIT(ss)))
>>> -        info->has_pooled_eu = hweight8(sseu->subslice_mask) == 3;
>>> +#define IS_SS_DISABLED(ss)    (!(sseu->subslices_mask[0] & BIT(ss)))
>>> +        info->has_pooled_eu = hweight8(sseu->subslices_mask[0]) == 3;
>>>             sseu->min_eu_in_pool = 0;
>>>           if (info->has_pooled_eu) {
>>> @@ -251,19 +316,22 @@ static void gen9_sseu_info_init(struct 
>>> drm_i915_private *dev_priv)
>>>   static void broadwell_sseu_info_init(struct drm_i915_private 
>>> *dev_priv)
>>>   {
>>>       struct sseu_dev_info *sseu = &mkwrite_device_info(dev_priv)->sseu;
>>> -    const int s_max = 3, ss_max = 3, eu_max = 8;
>>>       int s, ss;
>>> -    u32 fuse2, eu_disable[3]; /* s_max */
>>> +    u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */
>>>         fuse2 = I915_READ(GEN8_FUSE2);
>>>       sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> 
>>> GEN8_F2_S_ENA_SHIFT;
>>> +    sseu->max_slices = 3;
>>> +    sseu->max_subslices = 3;
>>> +    sseu->max_eus_per_subslice = 8;
>>> +
>>>       /*
>>>        * The subslice disable field is global, i.e. it applies
>>>        * to each of the enabled slices.
>>>        */
>>> -    sseu->subslice_mask = GENMASK(ss_max - 1, 0);
>>> -    sseu->subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
>>> -                 GEN8_F2_SS_DIS_SHIFT);
>>> +    subslice_mask = GENMASK(sseu->max_subslices - 1, 0);
>>> +    subslice_mask &= ~((fuse2 & GEN8_F2_SS_DIS_MASK) >>
>>> +               GEN8_F2_SS_DIS_SHIFT);
>>>         eu_disable[0] = I915_READ(GEN8_EU_DISABLE0) & 
>>> GEN8_EU_DIS0_S0_MASK;
>>>       eu_disable[1] = (I915_READ(GEN8_EU_DISABLE0) >> 
>>> GEN8_EU_DIS0_S1_SHIFT) |
>>> @@ -277,30 +345,36 @@ static void broadwell_sseu_info_init(struct 
>>> drm_i915_private *dev_priv)
>>>        * Iterate through enabled slices and subslices to
>>>        * count the total enabled EU.
>>>        */
>>> -    for (s = 0; s < s_max; s++) {
>>> +    for (s = 0; s < sseu->max_slices; s++) {
>>>           if (!(sseu->slice_mask & BIT(s)))
>>>               /* skip disabled slice */
>>>               continue;
>>>   -        for (ss = 0; ss < ss_max; ss++) {
>>> +        sseu->subslices_mask[s] = subslice_mask;
>>> +
>>> +        for (ss = 0; ss < sseu->max_subslices; ss++) {
>>>               u32 n_disabled;
>>>   -            if (!(sseu->subslice_mask & BIT(ss)))
>>> +            if (!(sseu->subslices_mask[ss] & BIT(ss)))
>>>                   /* skip disabled subslice */
>>>                   continue;
>>>   -            n_disabled = hweight8(eu_disable[s] >> (ss * eu_max));
>>> +            sseu->eu_mask[ss + s * sseu->max_subslices] =
>>> +                ~(eu_disable[s] >>
>>> +                  (ss * sseu->max_eus_per_subslice));
>>> +            n_disabled = hweight8(eu_disable[s] >>
>>> +                          (ss * sseu->max_eus_per_subslice));
>>
>> As above could use a local for "eu_disable[s] >> (ss * 
>> sseu->max_eus_per_subslice)"
> 
> Done.
> 
>>
>>>                 /*
>>>                * Record which subslices have 7 EUs.
>>>                */
>>> -            if (eu_max - n_disabled == 7)
>>> +            if (sseu->max_eus_per_subslice - n_disabled == 7)
>>>                   sseu->subslice_7eu[s] |= 1 << ss;
>>> -
>>> -            sseu->eu_total += eu_max - n_disabled;
>>>           }
>>>       }
>>>   +    sseu->eu_total = compute_eu_total(sseu);
>>> +
>>>       /*
>>>        * BDW is expected to always have a uniform distribution of EU 
>>> across
>>>        * subslices with the exception that any one EU in any one 
>>> subslice may
>>> @@ -437,6 +511,7 @@ void intel_device_info_runtime_init(struct 
>>> drm_i915_private *dev_priv)
>>>   {
>>>       struct intel_device_info *info = mkwrite_device_info(dev_priv);
>>>       enum pipe pipe;
>>> +    int s;
>>>         if (INTEL_GEN(dev_priv) >= 10) {
>>>           for_each_pipe(dev_priv, pipe)
>>> @@ -548,9 +623,11 @@ void intel_device_info_runtime_init(struct 
>>> drm_i915_private *dev_priv)
>>>       DRM_DEBUG_DRIVER("slice total: %u\n", 
>>> hweight8(info->sseu.slice_mask));
>>>       DRM_DEBUG_DRIVER("subslice total: %u\n",
>>>                sseu_subslice_total(&info->sseu));
>>> -    DRM_DEBUG_DRIVER("subslice mask %04x\n", info->sseu.subslice_mask);
>>> -    DRM_DEBUG_DRIVER("subslice per slice: %u\n",
>>> -             hweight8(info->sseu.subslice_mask));
>>> +    for (s = 0; s < ARRAY_SIZE(info->sseu.subslices_mask); s++) {
>>> +        DRM_DEBUG_DRIVER("subslice mask %04x\n", 
>>> info->sseu.subslices_mask[s]);
>>> +        DRM_DEBUG_DRIVER("subslice per slice: %u\n",
>>> +                 hweight8(info->sseu.subslices_mask[s]));
>>
>> Put a subslice index into messages?
> 
> Done.
> 
>>
>>> +    }
>>>       DRM_DEBUG_DRIVER("EU total: %u\n", info->sseu.eu_total);
>>>       DRM_DEBUG_DRIVER("EU per subslice: %u\n", 
>>> info->sseu.eu_per_subslice);
>>>       DRM_DEBUG_DRIVER("has slice power gating: %s\n",
>>> diff --git a/drivers/gpu/drm/i915/intel_lrc.c 
>>> b/drivers/gpu/drm/i915/intel_lrc.c
>>> index 2e38fbfdf08f..6e55b842c21d 100644
>>> --- a/drivers/gpu/drm/i915/intel_lrc.c
>>> +++ b/drivers/gpu/drm/i915/intel_lrc.c
>>> @@ -2072,7 +2072,7 @@ make_rpcs(struct drm_i915_private *dev_priv)
>>>         if (INTEL_INFO(dev_priv)->sseu.has_subslice_pg) {
>>>           rpcs |= GEN8_RPCS_SS_CNT_ENABLE;
>>> -        rpcs |= hweight8(INTEL_INFO(dev_priv)->sseu.subslice_mask) <<
>>> +        rpcs |= 
>>> hweight8(INTEL_INFO(dev_priv)->sseu.subslices_mask[0]) <<
>>>               GEN8_RPCS_SS_CNT_SHIFT;
>>>           rpcs |= GEN8_RPCS_ENABLE;
>>>       }
>>> diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h 
>>> b/drivers/gpu/drm/i915/intel_ringbuffer.h
>>> index c5ff203e42d6..a94bc1b5c502 100644
>>> --- a/drivers/gpu/drm/i915/intel_ringbuffer.h
>>> +++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
>>> @@ -90,7 +90,7 @@ hangcheck_action_to_str(const enum 
>>> intel_engine_hangcheck_action a)
>>>     #define instdone_subslice_mask(dev_priv__) \
>>>       (INTEL_GEN(dev_priv__) == 7 ? \
>>> -     1 : INTEL_INFO(dev_priv__)->sseu.subslice_mask)
>>> +     1 : INTEL_INFO(dev_priv__)->sseu.subslices_mask[0])
>>>     #define for_each_instdone_slice_subslice(dev_priv__, slice__, 
>>> subslice__) \
>>>       for ((slice__) = 0, (subslice__) = 0; \
>>>
>>
>> Regards,
>>
>> Tvrtko
>>
> 
> 


More information about the Intel-gfx mailing list