[PATCH] drm/i915/sseu: Disassociate internal EU mask representation from uapi
Matt Roper
matthew.d.roper at intel.com
Thu Apr 14 21:55:21 UTC 2022
The sseu_dev_info structure currently stores slice, subslice, and EU
masks in u8[] arrays that match the format returned by the query uapi.
While this makes the query handler's copy_to_user simple, it makes the
masks harder to work with inside the driver. On gen11 platforms and
beyond it also wastes a lot of space because the EU mask is
architecturally guaranteed to be identical for each subslice of the
platform --- on Xe_HP we currently allocate 64 bytes of storage for the
EU mask, even though we only truly need 2 bytes; the wasted space will
only continue to increase as future platforms increase DSS counts.
Let's switch to a more natural internal representation of the EU mask,
in a format that's compatible with linux/bitmap.h operations. On
pre-gen11 platforms we'll store the full-device EU mask (which can be up
to 96 bits on gen8/gen9) and on gen11+ we'll store a single-SS EU mask
(which only needs 16 bits).
Signed-off-by: Matt Roper <matthew.d.roper at intel.com>
---
drivers/gpu/drm/i915/gt/intel_sseu.c | 115 +++++++++++++++++++--------
drivers/gpu/drm/i915/gt/intel_sseu.h | 27 ++++++-
drivers/gpu/drm/i915/i915_query.c | 28 +++++--
3 files changed, 133 insertions(+), 37 deletions(-)
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
index 9881a6790574..296c505aa598 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -80,45 +80,59 @@ intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice)
return hweight32(intel_sseu_get_subslices(sseu, slice));
}
-static int sseu_eu_idx(const struct sseu_dev_info *sseu, int slice,
- int subslice)
+static intel_sseu_eu_mask_t
+sseu_get_eus(const struct sseu_dev_info *sseu, int slice, int subslice)
{
- int slice_stride = sseu->max_subslices * sseu->eu_stride;
-
- return slice * slice_stride + subslice * sseu->eu_stride;
-}
-
-static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice,
- int subslice)
-{
- int i, offset = sseu_eu_idx(sseu, slice, subslice);
- u16 eu_mask = 0;
+ intel_sseu_eu_mask_t ret = {};
+ int first_eu =
+ slice * sseu->max_subslices * sseu->max_eus_per_subslice +
+ subslice * sseu->max_eus_per_subslice;
+
+ if (sseu->has_common_ss_eumask) {
+ if (intel_sseu_get_subslices(sseu, 0) & BIT(subslice))
+ return sseu->eu_mask;
+ else
+ /* Empty bitmap */
+ return ret;
+ }
- for (i = 0; i < sseu->eu_stride; i++)
- eu_mask |=
- ((u16)sseu->eu_mask[offset + i]) << (i * BITS_PER_BYTE);
+ bitmap_cut(ret.b, sseu->eu_mask.b, first_eu,
+ sseu->max_eus_per_subslice, I915_MAX_EU_FUSE_BITS);
- return eu_mask;
+ return ret;
}
static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice,
- u16 eu_mask)
+ u32 eu_mask)
{
- int i, offset = sseu_eu_idx(sseu, slice, subslice);
+ intel_sseu_eu_mask_t eu_bitmap = {};
+ int numbits = sizeof(eu_mask);
+ int first_eu =
+ slice * sseu->max_subslices * sseu->max_eus_per_subslice +
+ subslice * sseu->max_eus_per_subslice;
- for (i = 0; i < sseu->eu_stride; i++)
- sseu->eu_mask[offset + i] =
- (eu_mask >> (BITS_PER_BYTE * i)) & 0xff;
+ bitmap_from_arr32(eu_bitmap.b, &eu_mask, numbits);
+
+ if (sseu->has_common_ss_eumask) {
+ WARN_ON(slice > 0 || subslice > 0);
+ bitmap_copy(sseu->eu_mask.b, eu_bitmap.b, numbits);
+ } else {
+ bitmap_shift_left(eu_bitmap.b, eu_bitmap.b, first_eu, numbits);
+ bitmap_clear(sseu->eu_mask.b, first_eu, numbits);
+ bitmap_or(sseu->eu_mask.b, sseu->eu_mask.b, eu_bitmap.b, numbits);
+ }
}
static u16 compute_eu_total(const struct sseu_dev_info *sseu)
{
- u16 i, total = 0;
+ int mult;
- for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++)
- total += hweight8(sseu->eu_mask[i]);
+ if (sseu->has_common_ss_eumask)
+ mult = hweight32(intel_sseu_get_subslices(sseu, 0));
+ else
+ mult = 1;
- return total;
+ return mult * bitmap_weight(sseu->eu_mask.b, I915_MAX_EU_FUSE_BITS);
}
static u32 get_ss_stride_mask(struct sseu_dev_info *sseu, u8 s, u32 ss_en)
@@ -167,6 +181,7 @@ static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en,
if (intel_sseu_has_subslice(sseu, s, ss))
sseu_set_eus(sseu, s, ss, eu_en);
}
+ sseu->has_common_ss_eumask = 1;
sseu->eu_per_subslice = hweight16(eu_en);
sseu->eu_total = compute_eu_total(sseu);
}
@@ -738,10 +753,11 @@ static void sseu_print_hsw_topology(const struct sseu_dev_info *sseu,
intel_sseu_get_subslices(sseu, s));
for (ss = 0; ss < sseu->max_subslices; ss++) {
- u16 enabled_eus = sseu_get_eus(sseu, s, ss);
+ intel_sseu_eu_mask_t enabled_eus = sseu_get_eus(sseu, s, ss);
- drm_printf(p, "\tsubslice%d: %u EUs (0x%hx)\n",
- ss, hweight16(enabled_eus), enabled_eus);
+ drm_printf(p, "\tsubslice%d: %u EUs (0x%*pb)\n",
+ ss, bitmap_weight(enabled_eus.b, I915_MAX_EU_FUSE_BITS),
+ I915_MAX_EU_FUSE_BITS, enabled_eus.b);
}
}
}
@@ -754,12 +770,13 @@ static void sseu_print_xehp_topology(const struct sseu_dev_info *sseu,
int dss;
for (dss = 0; dss < sseu->max_subslices; dss++) {
- u16 enabled_eus = sseu_get_eus(sseu, 0, dss);
+ intel_sseu_eu_mask_t enabled_eus = sseu_get_eus(sseu, 0, dss);
- drm_printf(p, "DSS_%02d: G:%3s C:%3s, %2u EUs (0x%04hx)\n", dss,
+ drm_printf(p, "DSS_%02d: G:%3s C:%3s, %2u EUs (0x%*pb)\n", dss,
str_yes_no(g_dss_mask & BIT(dss)),
str_yes_no(c_dss_mask & BIT(dss)),
- hweight16(enabled_eus), enabled_eus);
+ bitmap_weight(enabled_eus.b, I915_MAX_EU_FUSE_BITS),
+ I915_MAX_EU_FUSE_BITS, enabled_eus.b);
}
}
@@ -793,3 +810,39 @@ u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice)
return slice_mask;
}
+/**
+ * intel_sseu_get_uapi_eumask - Allocate and fill a uapi-compatible EU mask
+ * @sseu: device SSEU info
+ *
+ * For gen11 platforms each subslice has the same set of EUs enabled/disabled
+ * so we only store the data for a single subslice in our internal mask. When
+ * queried by userspace, we need to expand this into a full-device EU mask.
+ *
+ * Returns a pointer to a newly allocated buffer containing the full-device
+ * EU mask in the format expected by userspace. The caller is responsible
+ * for freeing the buffer when done.
+ */
+u8 *intel_sseu_get_uapi_eumask(const struct sseu_dev_info *sseu)
+{
+ u8 *buf;
+ u32 ssmask;
+ int ss;
+
+ if (!WARN_ON(sseu->has_common_ss_eumask))
+ return NULL;
+
+ WARN_ON(sseu->max_slices > 1);
+ ssmask = intel_sseu_get_subslices(sseu, 0);
+
+ buf = kzalloc(sseu->max_slices * sseu->max_subslices * sseu->eu_stride,
+ GFP_KERNEL);
+ if (!buf)
+ return NULL;
+
+ for (ss = 0; ss < sseu->max_subslices; ss++)
+ if (ssmask & BIT(ss))
+ memcpy(&buf[ss * sseu->eu_stride], sseu->eu_mask.b,
+ sseu->eu_stride);
+
+ return buf;
+}
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
index 5c078df4729c..b6e49a2a6973 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.h
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
@@ -52,12 +52,34 @@ struct drm_printer;
#define GEN_MAX_GSLICES (GEN_MAX_DSS / GEN_DSS_PER_GSLICE)
#define GEN_MAX_CSLICES (GEN_MAX_DSS / GEN_DSS_PER_CSLICE)
+/*
+ * Maximum number of 32-bit registers used by hardware to express the
+ * enabled/disabled EUs. Note that starting with gen11 each SS has an
+ * identical set of EUs enabled/disabled, so newer platforms may require less
+ * storage than the older gen8/gen9 platforms.
+ */
+#define I915_MAX_EU_FUSE_REGS 3
+#define I915_MAX_EU_FUSE_BITS (I915_MAX_EU_FUSE_REGS * 32)
+
+/* Compatible with helpers in linux/bitmap.h */
+typedef struct {
+ unsigned long b[BITS_TO_LONGS(I915_MAX_EU_FUSE_BITS)];
+} intel_sseu_eu_mask_t;
+
struct sseu_dev_info {
u8 slice_mask;
u8 subslice_mask[GEN_SS_MASK_SIZE];
u8 geometry_subslice_mask[GEN_SS_MASK_SIZE];
u8 compute_subslice_mask[GEN_SS_MASK_SIZE];
- u8 eu_mask[GEN_SS_MASK_SIZE * GEN_MAX_EU_STRIDE];
+
+ /*
+ * On gen11 and beyond, eu_mask represents the EUs enabled/disabled
+ * for a single subslice (each SS has identical EUs). On earlier
+ * platforms it holds the individual enable/disable state for each
+ * EU of the platform.
+ */
+ intel_sseu_eu_mask_t eu_mask;
+
u16 eu_total;
u8 eu_per_subslice;
u8 min_eu_in_pool;
@@ -66,6 +88,7 @@ struct sseu_dev_info {
u8 has_slice_pg:1;
u8 has_subslice_pg:1;
u8 has_eu_pg:1;
+ u8 has_common_ss_eumask:1;
/* Topology fields */
u8 max_slices;
@@ -145,4 +168,6 @@ void intel_sseu_print_topology(struct drm_i915_private *i915,
u16 intel_slicemask_from_dssmask(u64 dss_mask, int dss_per_slice);
+u8 *intel_sseu_get_uapi_eumask(const struct sseu_dev_info *sseu);
+
#endif /* __INTEL_SSEU_H__ */
diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
index b5ca00cb6cf6..563de06add30 100644
--- a/drivers/gpu/drm/i915/i915_query.c
+++ b/drivers/gpu/drm/i915/i915_query.c
@@ -81,11 +81,29 @@ static int query_topology_info(struct drm_i915_private *dev_priv,
sseu->subslice_mask, subslice_length))
return -EFAULT;
- if (copy_to_user(u64_to_user_ptr(query_item->data_ptr +
- sizeof(topo) +
- slice_length + subslice_length),
- sseu->eu_mask, eu_length))
- return -EFAULT;
+ if (sseu->has_common_ss_eumask) {
+ u8 *uapi_eumask;
+ int ret;
+
+ /* Expand internal representation to full-device eumask */
+ uapi_eumask = intel_sseu_get_uapi_eumask(sseu);
+ if (!uapi_eumask)
+ return -ENOMEM;
+
+ ret = copy_to_user(u64_to_user_ptr(query_item->data_ptr +
+ sizeof(topo) +
+ slice_length + subslice_length),
+ uapi_eumask, eu_length);
+ kfree(uapi_eumask);
+ if (ret)
+ return -EFAULT;
+ } else {
+ if (copy_to_user(u64_to_user_ptr(query_item->data_ptr +
+ sizeof(topo) +
+ slice_length + subslice_length),
+ sseu->eu_mask.b, eu_length))
+ return -EFAULT;
+ }
return total_length;
}
--
2.34.1
More information about the Intel-gfx-trybot
mailing list