[Intel-gfx] [PATCH 1/2] drm/i915/sseu: Don't overallocate subslice storage

Lucas De Marchi lucas.demarchi at intel.com
Fri Mar 11 19:00:09 UTC 2022


On Thu, Mar 10, 2022 at 10:15:42PM -0800, Matt Roper wrote:
>Xe_HP removed "slice" as a first-class unit in the hardware design.
>Instead we now have a single pool of subslices (which are now referred
>to as "DSS") that different hardware units have different ways of
>grouping ("compute slices," "geometry slices," etc.).  For the purposes
>of topology representation, we treat Xe_HP-based platforms as having a
>single slice that contains all of the platform's DSS.  There's no need
>to allocate storage space for (max legacy slices * max dss); let's
>update some of our macros to minimize the storage requirement for sseu
>topology.  We'll also document some of the constants to make it a little
>bit more clear what they represent.
>
>Signed-off-by: Matt Roper <matthew.d.roper at intel.com>
>---
> drivers/gpu/drm/i915/gt/intel_engine_types.h |  2 +-
> drivers/gpu/drm/i915/gt/intel_sseu.h         | 47 +++++++++++++++-----
> 2 files changed, 36 insertions(+), 13 deletions(-)
>
>diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
>index 4fbf45a74ec0..f9e246004bc0 100644
>--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
>+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
>@@ -645,7 +645,7 @@ intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine)
>
> #define for_each_instdone_gslice_dss_xehp(dev_priv_, sseu_, iter_, gslice_, dss_) \
> 	for ((iter_) = 0, (gslice_) = 0, (dss_) = 0; \
>-	     (iter_) < GEN_MAX_SUBSLICES; \
>+	     (iter_) < GEN_SS_MASK_SIZE; \
> 	     (iter_)++, (gslice_) = (iter_) / GEN_DSS_PER_GSLICE, \
> 	     (dss_) = (iter_) % GEN_DSS_PER_GSLICE) \
> 		for_each_if(intel_sseu_has_subslice((sseu_), 0, (iter_)))
>diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
>index 8a79cd8eaab4..4f59eadbb61a 100644
>--- a/drivers/gpu/drm/i915/gt/intel_sseu.h
>+++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
>@@ -15,26 +15,49 @@ struct drm_i915_private;
> struct intel_gt;
> struct drm_printer;
>
>-#define GEN_MAX_SLICES		(3) /* SKL upper bound */
>-#define GEN_MAX_SUBSLICES	(32) /* XEHPSDV upper bound */
>-#define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
>-#define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_MAX_SUBSLICES)
>-#define GEN_MAX_EUS		(16) /* TGL upper bound */
>-#define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS)
>+/*
>+ * Maximum number of legacy slices.  Legacy slices no longer exist starting on
>+ * Xe_HP ("gslices," "cslices," etc. on Xe_HP and beyond are a different
>+ * concept and are not expressed through fusing).
>+ */
>+#define GEN_MAX_LEGACY_SLICES		3
>+
>+/*
>+ * Maximum number of subslices that can exist within a legacy slice.  This is
>+ * only relevant to pre-Xe_HP platforms (Xe_HP and beyond use the GEN_MAX_DSS
>+ * value below).
>+ */
>+#define GEN_MAX_LEGACY_SUBSLICES	6

instead of calling the old legacy, maybe just add the XEHP_ prefix to
the new ones?

>+
>+/* Maximum number of DSS on newer platforms (Xe_HP and beyond). */
>+#define GEN_MAX_DSS			32
>+
>+/* Maximum number of EUs that can exist within a subslice or DSS. */
>+#define GEN_MAX_EUS_PER_SS		16
>+
>+#define MAX(a, b)			((a) > (b) ? (a) : (b))

what's worse, include kernel.h in another header file or redefine MAX
everywhere? Re-defining it in headers we risk situations in which the
include order may create warnings about defining it in multiple places.


>+
>+/* The maximum number of bits needed to express each subslice/DSS independently */
>+#define GEN_SS_MASK_SIZE		MAX(GEN_MAX_DSS, \
>+					    GEN_MAX_LEGACY_SLICES * GEN_MAX_LEGACY_SUBSLICES)
>+
>+#define GEN_SSEU_STRIDE(max_entries)	DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
>+#define GEN_MAX_SUBSLICE_STRIDE		GEN_SSEU_STRIDE(GEN_SS_MASK_SIZE)
>+#define GEN_MAX_EU_STRIDE		GEN_SSEU_STRIDE(GEN_MAX_EUS_PER_SS)
>
> #define GEN_DSS_PER_GSLICE	4
> #define GEN_DSS_PER_CSLICE	8
> #define GEN_DSS_PER_MSLICE	8
>
>-#define GEN_MAX_GSLICES		(GEN_MAX_SUBSLICES / GEN_DSS_PER_GSLICE)
>-#define GEN_MAX_CSLICES		(GEN_MAX_SUBSLICES / GEN_DSS_PER_CSLICE)
>+#define GEN_MAX_GSLICES		(GEN_MAX_DSS / GEN_DSS_PER_GSLICE)
>+#define GEN_MAX_CSLICES		(GEN_MAX_DSS / GEN_DSS_PER_CSLICE)
>
> struct sseu_dev_info {
> 	u8 slice_mask;
>-	u8 subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
>-	u8 geometry_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
>-	u8 compute_subslice_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICE_STRIDE];
>-	u8 eu_mask[GEN_MAX_SLICES * GEN_MAX_SUBSLICES * GEN_MAX_EU_STRIDE];
>+	u8 subslice_mask[GEN_SS_MASK_SIZE];
>+	u8 geometry_subslice_mask[GEN_SS_MASK_SIZE];
>+	u8 compute_subslice_mask[GEN_SS_MASK_SIZE];
>+	u8 eu_mask[GEN_SS_MASK_SIZE * GEN_MAX_EU_STRIDE];


Aside the minor things above, everything look correct.

Reviewed-by: Lucas De Marchi <lucas.demarchi at intel.com>

thanks
Lucas De Marchi

> 	u16 eu_total;
> 	u8 eu_per_subslice;
> 	u8 min_eu_in_pool;
>-- 
>2.34.1
>


More information about the Intel-gfx mailing list