[Intel-gfx] [PATCH 9/9] drm/i915: Expand subslice mask

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Mon Sep 2 13:42:44 UTC 2019


On 24/07/2019 14:05, Tvrtko Ursulin wrote:
> 
> On 23/07/2019 16:49, Stuart Summers wrote:
>> Currently, the subslice_mask runtime parameter is stored as an
>> array of subslices per slice. Expand the subslice mask array to
>> better match what is presented to userspace through the
>> I915_QUERY_TOPOLOGY_INFO ioctl. The index into this array is
>> then calculated:
>>    slice * subslice stride + subslice index / 8
>>
>> Signed-off-by: Stuart Summers <stuart.summers at intel.com>
>> ---
>>   drivers/gpu/drm/i915/gt/intel_sseu.c        | 26 ++++++++++++++++++++-
>>   drivers/gpu/drm/i915/gt/intel_sseu.h        |  5 +++-
>>   drivers/gpu/drm/i915/gt/intel_workarounds.c |  3 +--
>>   drivers/gpu/drm/i915/i915_debugfs.c         |  5 +++-
>>   drivers/gpu/drm/i915/intel_device_info.c    |  8 +++----
>>   5 files changed, 38 insertions(+), 9 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c 
>> b/drivers/gpu/drm/i915/gt/intel_sseu.c
>> index 607c1447287c..7abc2487b994 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_sseu.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
>> @@ -30,6 +30,30 @@ intel_sseu_subslice_total(const struct 
>> sseu_dev_info *sseu)
>>       return total;
>>   }
>> +u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice)
>> +{
>> +    int i, offset = slice * sseu->ss_stride;
>> +    u32 mask = 0;
>> +
>> +    if (slice >= sseu->max_slices) {
>> +        DRM_ERROR("%s: invalid slice %d, max: %d\n",
>> +              __func__, slice, sseu->max_slices);
>> +        return 0;
>> +    }
>> +
>> +    if (sseu->ss_stride > sizeof(mask)) {
>> +        DRM_ERROR("%s: invalid subslice stride %d, max: %lu\n",
>> +              __func__, sseu->ss_stride, sizeof(mask));
>> +        return 0;
>> +    }
>> +
>> +    for (i = 0; i < sseu->ss_stride; i++)
>> +        mask |= (u32)sseu->subslice_mask[offset + i] <<
>> +            i * BITS_PER_BYTE;
>> +
>> +    return mask;
>> +}
> 
> Why do you actually need these complications when the plan from the 
> start was that the driver and user sseu representation structures can be 
> different?
> 
> I only gave it a quick look so I might be wrong, but why not just expand 
> the driver representations of subslice mask up from u8? Userspace API 
> should be able to cope with strides already.

I never got an answer to this and the series was merged in the meantime.

Maybe not much harm but I still don't understand why all the 
complications seemingly just to avoid bumping the *internal* ss mask up 
from u8. As long as the internal and abi sseu info struct are well 
separated and access point few and well controlled (I think they are) 
then I don't see why the internal side had to be converted to u8 and 
strides. But maybe I am missing something.

Regards,

Tvrtko

> 
> Regards,
> 
> Tvrtko
> 
>> +
>>   void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
>>                     u32 ss_mask)
>>   {
>> @@ -43,7 +67,7 @@ void intel_sseu_set_subslices(struct sseu_dev_info 
>> *sseu, int slice,
>>   unsigned int
>>   intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 
>> slice)
>>   {
>> -    return hweight8(sseu->subslice_mask[slice]);
>> +    return hweight32(intel_sseu_get_subslices(sseu, slice));
>>   }
>>   u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
>> diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h 
>> b/drivers/gpu/drm/i915/gt/intel_sseu.h
>> index 0ecc1c35a7a1..2291764b7db5 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_sseu.h
>> +++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
>> @@ -15,10 +15,11 @@ struct drm_i915_private;
>>   #define GEN_MAX_SLICES        (6) /* CNL upper bound */
>>   #define GEN_MAX_SUBSLICES    (8) /* ICL upper bound */
>>   #define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, 
>> BITS_PER_BYTE)
>> +#define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES)
>>   struct sseu_dev_info {
>>       u8 slice_mask;
>> -    u8 subslice_mask[GEN_MAX_SLICES];
>> +    u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
>>       u16 eu_total;
>>       u8 eu_per_subslice;
>>       u8 min_eu_in_pool;
>> @@ -85,6 +86,8 @@ intel_sseu_subslice_total(const struct sseu_dev_info 
>> *sseu);
>>   unsigned int
>>   intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 
>> slice);
>> +u32  intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 
>> slice);
>> +
>>   void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
>>                     u32 ss_mask);
>> diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c 
>> b/drivers/gpu/drm/i915/gt/intel_workarounds.c
>> index 704ace01e7f5..7ec60435d871 100644
>> --- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
>> +++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
>> @@ -794,8 +794,7 @@ wa_init_mcr(struct drm_i915_private *i915, struct 
>> i915_wa_list *wal)
>>       }
>>       slice = fls(sseu->slice_mask) - 1;
>> -    GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask));
>> -    subslice = fls(l3_en & sseu->subslice_mask[slice]);
>> +    subslice = fls(l3_en & intel_sseu_get_subslices(sseu, slice));
>>       if (!subslice) {
>>           DRM_WARN("No common index found between subslice mask %x and 
>> L3 bank mask %x!\n",
>>                sseu->subslice_mask[slice], l3_en);
>> diff --git a/drivers/gpu/drm/i915/i915_debugfs.c 
>> b/drivers/gpu/drm/i915/i915_debugfs.c
>> index 7f842506b9ea..96a25a770ade 100644
>> --- a/drivers/gpu/drm/i915/i915_debugfs.c
>> +++ b/drivers/gpu/drm/i915/i915_debugfs.c
>> @@ -3944,13 +3944,16 @@ static void gen9_sseu_device_status(struct 
>> drm_i915_private *dev_priv,
>>           for (ss = 0; ss < info->sseu.max_subslices; ss++) {
>>               unsigned int eu_cnt;
>> +            u8 ss_idx = s * info->sseu.ss_stride +
>> +                    ss / BITS_PER_BYTE;
>>               if (IS_GEN9_LP(dev_priv)) {
>>                   if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
>>                       /* skip disabled subslice */
>>                       continue;
>> -                sseu->subslice_mask[s] |= BIT(ss);
>> +                sseu->subslice_mask[ss_idx] |=
>> +                    BIT(ss % BITS_PER_BYTE);
>>               }
>>               eu_cnt = 2 * hweight32(eu_reg[2*s + ss/2] &
>> diff --git a/drivers/gpu/drm/i915/intel_device_info.c 
>> b/drivers/gpu/drm/i915/intel_device_info.c
>> index 723b1fde5fc4..04dde4f204c3 100644
>> --- a/drivers/gpu/drm/i915/intel_device_info.c
>> +++ b/drivers/gpu/drm/i915/intel_device_info.c
>> @@ -93,9 +93,9 @@ static void sseu_dump(const struct sseu_dev_info 
>> *sseu, struct drm_printer *p)
>>              hweight8(sseu->slice_mask), sseu->slice_mask);
>>       drm_printf(p, "subslice total: %u\n", 
>> intel_sseu_subslice_total(sseu));
>>       for (s = 0; s < sseu->max_slices; s++) {
>> -        drm_printf(p, "slice%d: %u subslices, mask=%04x\n",
>> +        drm_printf(p, "slice%d: %u subslices, mask=%08x\n",
>>                  s, intel_sseu_subslices_per_slice(sseu, s),
>> -               sseu->subslice_mask[s]);
>> +               intel_sseu_get_subslices(sseu, s));
>>       }
>>       drm_printf(p, "EU total: %u\n", sseu->eu_total);
>>       drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice);
>> @@ -159,9 +159,9 @@ void intel_device_info_dump_topology(const struct 
>> sseu_dev_info *sseu,
>>       }
>>       for (s = 0; s < sseu->max_slices; s++) {
>> -        drm_printf(p, "slice%d: %u subslice(s) (0x%hhx):\n",
>> +        drm_printf(p, "slice%d: %u subslice(s) (0x%08x):\n",
>>                  s, intel_sseu_subslices_per_slice(sseu, s),
>> -               sseu->subslice_mask[s]);
>> +               intel_sseu_get_subslices(sseu, s));
>>           for (ss = 0; ss < sseu->max_subslices; ss++) {
>>               u16 enabled_eus = sseu_get_eus(sseu, s, ss);
>>


More information about the Intel-gfx mailing list