[Intel-gfx] [PATCH 5/5] drm/i915: Expand subslice mask
Daniele Ceraolo Spurio
daniele.ceraolospurio at intel.com
Tue May 7 19:00:55 UTC 2019
On 5/3/19 2:30 PM, Stuart Summers wrote:
> Currently, the subslice_mask runtime parameter is stored as an
> array of subslices per slice. Expand the subslice mask array to
> better match what is presented to userspace through the
> I915_QUERY_TOPOLOGY_INFO ioctl. The index into this array is
> then calculated:
> slice * subslice stride + subslice index / 8
>
> v2: fix spacing in set_sseu_info args
> use set_sseu_info to initialize sseu data when building
> device status in debugfs
> rename variables in intel_engine_types.h to avoid checkpatch
> warnings
> v3: update headers in intel_sseu.h
> v4: add const to some sseu_dev_info variables
> use sseu->eu_stride for EU stride calculations
> v5: address review comments from Tvrtko and Daniele
>
> Cc: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Cc: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> Acked-by: Lionel Landwerlin <lionel.g.landwerlin at intel.com>
> Signed-off-by: Stuart Summers <stuart.summers at intel.com>
> ---
> drivers/gpu/drm/i915/gt/intel_engine_cs.c | 24 ++-
> drivers/gpu/drm/i915/gt/intel_engine_types.h | 30 ++--
> drivers/gpu/drm/i915/gt/intel_hangcheck.c | 3 +-
> drivers/gpu/drm/i915/gt/intel_sseu.c | 43 ++++-
> drivers/gpu/drm/i915/gt/intel_sseu.h | 28 +++-
> drivers/gpu/drm/i915/gt/intel_workarounds.c | 2 +-
> drivers/gpu/drm/i915/i915_debugfs.c | 40 ++---
> drivers/gpu/drm/i915/i915_drv.c | 6 +-
> drivers/gpu/drm/i915/i915_gpu_error.c | 5 +-
> drivers/gpu/drm/i915/i915_query.c | 10 +-
> drivers/gpu/drm/i915/intel_device_info.c | 155 ++++++++++---------
> 11 files changed, 227 insertions(+), 119 deletions(-)
>
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> index 5907a9613641..290bda5cc82b 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
> @@ -909,12 +909,30 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type)
> }
> }
>
> +static inline u32
> +intel_sseu_fls_subslice(const struct sseu_dev_info *sseu, u32 slice)
> +{
> + u32 subslice;
> + int i;
> +
> + for (i = sseu->ss_stride - 1; i >= 0; i--) {
> + subslice = fls(sseu->subslice_mask[slice * sseu->ss_stride +
> + i]);
> + if (subslice) {
> + subslice += i * BITS_PER_BYTE;
> + break;
> + }
> + }
> +
> + return subslice;
> +}
> +
> u32 intel_calculate_mcr_s_ss_select(struct drm_i915_private *dev_priv)
> {
> const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
> u32 mcr_s_ss_select;
> u32 slice = fls(sseu->slice_mask);
> - u32 subslice = fls(sseu->subslice_mask[slice]);
> + u32 subslice = intel_sseu_fls_subslice(sseu, slice);
>
> if (IS_GEN(dev_priv, 10))
> mcr_s_ss_select = GEN8_MCR_SLICE(slice) |
> @@ -990,6 +1008,7 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine,
> struct intel_instdone *instdone)
> {
> struct drm_i915_private *dev_priv = engine->i915;
> + const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
> struct intel_uncore *uncore = engine->uncore;
> u32 mmio_base = engine->mmio_base;
> int slice;
> @@ -1007,7 +1026,8 @@ void intel_engine_get_instdone(struct intel_engine_cs *engine,
>
> instdone->slice_common =
> intel_uncore_read(uncore, GEN7_SC_INSTDONE);
> - for_each_instdone_slice_subslice(dev_priv, slice, subslice) {
> + for_each_instdone_slice_subslice(dev_priv, sseu, slice,
> + subslice) {
> instdone->sampler[slice][subslice] =
> read_subslice_reg(dev_priv, slice, subslice,
> GEN7_SAMPLER_INSTDONE);
> diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> index c0ab11b12e14..582340b55144 100644
> --- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
> +++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
> @@ -535,20 +535,20 @@ intel_engine_needs_breadcrumb_tasklet(const struct intel_engine_cs *engine)
> return engine->flags & I915_ENGINE_NEEDS_BREADCRUMB_TASKLET;
> }
>
> -#define instdone_slice_mask(dev_priv__) \
> - (IS_GEN(dev_priv__, 7) ? \
> - 1 : RUNTIME_INFO(dev_priv__)->sseu.slice_mask)
> -
> -#define instdone_subslice_mask(dev_priv__) \
> - (IS_GEN(dev_priv__, 7) ? \
> - 1 : RUNTIME_INFO(dev_priv__)->sseu.subslice_mask[0])
> -
> -#define for_each_instdone_slice_subslice(dev_priv__, slice__, subslice__) \
> - for ((slice__) = 0, (subslice__) = 0; \
> - (slice__) < I915_MAX_SLICES; \
> - (subslice__) = ((subslice__) + 1) < I915_MAX_SUBSLICES ? (subslice__) + 1 : 0, \
> - (slice__) += ((subslice__) == 0)) \
> - for_each_if((BIT(slice__) & instdone_slice_mask(dev_priv__)) && \
> - (BIT(subslice__) & instdone_subslice_mask(dev_priv__)))
> +#define instdone_has_slice(dev_priv___, sseu___, slice___) \
> + ((IS_GEN(dev_priv___, 7) ? 1 : ((sseu___)->slice_mask)) & \
> + BIT(slice___))
> +
> +#define instdone_has_subslice(dev_priv__, sseu__, slice__, subslice__) \
> + (IS_GEN(dev_priv__, 7) ? 1 : \
This will return true for all ss on gen7, while the original code
returned 1 as the subslice mask (i.e. only has ss == 0).
> + intel_sseu_has_subslice(sseu__, slice__, subslice__))
> +
> +#define for_each_instdone_slice_subslice(dev_priv_, sseu_, slice_, subslice_) \
> + for ((slice_) = 0, (subslice_) = 0; (slice_) < I915_MAX_SLICES; \
> + (subslice_) = ((subslice_) + 1) % I915_MAX_SUBSLICES, \
> + (slice_) += ((subslice_) == 0)) \
> + for_each_if((instdone_has_slice(dev_priv_, sseu_, slice_)) && \
> + (instdone_has_subslice(dev_priv_, sseu_, slice_, \
> + subslice_)))
>
> #endif /* __INTEL_ENGINE_TYPES_H__ */
> diff --git a/drivers/gpu/drm/i915/gt/intel_hangcheck.c b/drivers/gpu/drm/i915/gt/intel_hangcheck.c
> index 721ab74a382f..10e032c9ab10 100644
> --- a/drivers/gpu/drm/i915/gt/intel_hangcheck.c
> +++ b/drivers/gpu/drm/i915/gt/intel_hangcheck.c
> @@ -51,6 +51,7 @@ static bool instdone_unchanged(u32 current_instdone, u32 *old_instdone)
> static bool subunits_stuck(struct intel_engine_cs *engine)
> {
> struct drm_i915_private *dev_priv = engine->i915;
> + const struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
> struct intel_instdone instdone;
> struct intel_instdone *accu_instdone = &engine->hangcheck.instdone;
> bool stuck;
> @@ -72,7 +73,7 @@ static bool subunits_stuck(struct intel_engine_cs *engine)
> stuck &= instdone_unchanged(instdone.slice_common,
> &accu_instdone->slice_common);
>
> - for_each_instdone_slice_subslice(dev_priv, slice, subslice) {
> + for_each_instdone_slice_subslice(dev_priv, sseu, slice, subslice) {
> stuck &= instdone_unchanged(instdone.sampler[slice][subslice],
> &accu_instdone->sampler[slice][subslice]);
> stuck &= instdone_unchanged(instdone.row[slice][subslice],
> diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
> index a0756f006f5f..a8b98b0266b7 100644
> --- a/drivers/gpu/drm/i915/gt/intel_sseu.c
> +++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
> @@ -8,6 +8,17 @@
> #include "intel_lrc_reg.h"
> #include "intel_sseu.h"
>
> +void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices,
> + u8 max_subslices, u8 max_eus_per_subslice)
> +{
> + sseu->max_slices = max_slices;
> + sseu->max_subslices = max_subslices;
> + sseu->max_eus_per_subslice = max_eus_per_subslice;
> +
> + sseu->ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices);
> + sseu->eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice);
> +}
> +
> unsigned int
> intel_sseu_subslice_total(const struct sseu_dev_info *sseu)
> {
> @@ -19,10 +30,40 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu)
> return total;
> }
>
> +void intel_sseu_copy_subslices(const struct sseu_dev_info *sseu, int slice,
> + u8 *to_mask)
> +{
> + int offset = slice * sseu->ss_stride;
> +
> + memcpy(&to_mask[offset], &sseu->subslice_mask[offset], sseu->ss_stride);
> +}
> +
> +u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice)
nitpick: extra space
> +{
> + int i, offset = slice * sseu->ss_stride;
> + u32 mask;
> +
GEM_BUG_ON(sseu->ss_stride > size(u32)) to catch the unlikely case we go
over and need to update the mask size?
> + for (i = 0; i < sseu->ss_stride; i++)
> + mask |= (u32)sseu->subslice_mask[offset + i] <<
> + i * BITS_PER_BYTE;
> +
> + return mask;
> +}
> +
> +void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
> + u32 ss_mask)
> +{
> + int i, offset = slice * sseu->ss_stride;
> +
> + for (i = 0; i < sseu->ss_stride; i++)
> + sseu->subslice_mask[offset + i] =
> + (ss_mask >> (BITS_PER_BYTE * i)) & 0xff;
> +}
> +
> unsigned int
> intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice)
> {
> - return hweight8(sseu->subslice_mask[slice]);
> + return hweight32(intel_sseu_get_subslices(sseu, slice));
> }
>
> u32 intel_sseu_make_rpcs(struct drm_i915_private *i915,
<snip>
> --- a/drivers/gpu/drm/i915/intel_device_info.c
> +++ b/drivers/gpu/drm/i915/intel_device_info.c
> @@ -84,17 +84,46 @@ void intel_device_info_dump_flags(const struct intel_device_info *info,
> #undef PRINT_FLAG
> }
>
> +#define SS_STR_MAX_SIZE (GEN_MAX_SUBSLICE_STRIDE * 2 + 1)
> +
> +static char *
> +subslice_per_slice_str(char *buf, u8 size, const struct sseu_dev_info *sseu,
> + u8 slice)
> +{
> + int i;
> + u8 ss_offset = slice * sseu->ss_stride;
> +
> + GEM_BUG_ON(slice >= sseu->max_slices);
> +
> + /* Two ASCII character hex plus null terminator */
> + GEM_BUG_ON(size < sseu->ss_stride * 2 + 1);
> +
> + memset(buf, 0, size);
> +
> + /*
> + * Print subslice information in reverse order to match
> + * userspace expectations.
> + */
> + for (i = 0; i < sseu->ss_stride; i++)
> + sprintf(&buf[i * 2], "%02x",
> + sseu->subslice_mask[ss_offset + sseu->ss_stride -
> + (i + 1)]);
> +
> + return buf;
> +}
> +
> static void sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p)
> {
> int s;
> + char buf[SS_STR_MAX_SIZE];
>
> drm_printf(p, "slice total: %u, mask=%04x\n",
> hweight8(sseu->slice_mask), sseu->slice_mask);
> drm_printf(p, "subslice total: %u\n", intel_sseu_subslice_total(sseu));
> for (s = 0; s < sseu->max_slices; s++) {
> - drm_printf(p, "slice%d: %u subslices, mask=%04x\n",
> + drm_printf(p, "slice%d: %u subslices, mask=%s\n",
> s, intel_sseu_subslices_per_slice(sseu, s),
> - sseu->subslice_mask[s]);
> + subslice_per_slice_str(buf, ARRAY_SIZE(buf), sseu, s));
Now that we have intel_sseu_get_subslices() can't we just print the
return from that instead of using the buffer?
> }
> drm_printf(p, "EU total: %u\n", sseu->eu_total);
> drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice);
<snip>
> @@ -555,6 +570,7 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv)
> struct sseu_dev_info *sseu = &RUNTIME_INFO(dev_priv)->sseu;
> u32 fuse1;
> int s, ss;
> + u32 subslice_mask;
>
> /*
> * There isn't a register to tell us how many slices/subslices. We
> @@ -566,22 +582,18 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv)
> /* fall through */
> case 1:
> sseu->slice_mask = BIT(0);
> - sseu->subslice_mask[0] = BIT(0);
> + subslice_mask = BIT(0);
> break;
> case 2:
> sseu->slice_mask = BIT(0);
> - sseu->subslice_mask[0] = BIT(0) | BIT(1);
> + subslice_mask = BIT(0) | BIT(1);
> break;
> case 3:
> sseu->slice_mask = BIT(0) | BIT(1);
> - sseu->subslice_mask[0] = BIT(0) | BIT(1);
> - sseu->subslice_mask[1] = BIT(0) | BIT(1);
> + subslice_mask = BIT(0) | BIT(1);
> break;
> }
>
> - sseu->max_slices = hweight8(sseu->slice_mask);
> - sseu->max_subslices = hweight8(sseu->subslice_mask[0]);
> -
> fuse1 = I915_READ(HSW_PAVP_FUSE1);
> switch ((fuse1 & HSW_F1_EU_DIS_MASK) >> HSW_F1_EU_DIS_SHIFT) {
> default:
> @@ -598,9 +610,14 @@ static void haswell_sseu_info_init(struct drm_i915_private *dev_priv)
> sseu->eu_per_subslice = 6;
> break;
> }
> - sseu->max_eus_per_subslice = sseu->eu_per_subslice;
> +
> + intel_sseu_set_info(sseu, hweight8(sseu->slice_mask),
> + hweight8(subslice_mask),
> + sseu->eu_per_subslice);
I'd still prefer this to use a local variable so that we always only set
sseu->eu_per_subslice from within intel_sseu_set_info.
Daniele
>
> for (s = 0; s < sseu->max_slices; s++) {
> + intel_sseu_set_subslices(sseu, s, subslice_mask);
> +
> for (ss = 0; ss < sseu->max_subslices; ss++) {
> intel_sseu_set_eus(sseu, s, ss,
> (1UL << sseu->eu_per_subslice) - 1);
>
More information about the Intel-gfx
mailing list