[PATCH] Decouple interal sseu from uapi
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Wed Mar 30 12:16:32 UTC 2022
From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
- Uses multi-dimensional arrays and linux/bitmap.h.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Cc: Matt Roper <matthew.d.roper at intel.com>
---
drivers/gpu/drm/i915/gem/i915_gem_context.c | 9 +-
drivers/gpu/drm/i915/gt/intel_engine_types.h | 2 +-
drivers/gpu/drm/i915/gt/intel_sseu.c | 218 ++++++++++---------
drivers/gpu/drm/i915/gt/intel_sseu.h | 123 +++++++----
drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c | 57 +++--
drivers/gpu/drm/i915/gt/intel_workarounds.c | 7 +-
drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c | 2 +-
drivers/gpu/drm/i915/i915_getparam.c | 6 +-
drivers/gpu/drm/i915/i915_query.c | 69 ++++--
9 files changed, 273 insertions(+), 220 deletions(-)
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c
index 54a901673e94..5394fbed8bf0 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c
@@ -1859,10 +1859,10 @@ i915_gem_user_to_context_sseu(struct intel_gt *gt,
return -EINVAL;
/* Check validity against hardware. */
- if (user->slice_mask & ~device->slice_mask)
+ if (user->slice_mask & ~intel_sseu_slice_mask(device))
return -EINVAL;
- if (user->subslice_mask & ~device->subslice_mask[0])
+ if (user->subslice_mask & ~intel_sseu_get_subslices(device, 0))
return -EINVAL;
if (user->max_eus_per_subslice > device->max_eus_per_subslice)
@@ -1875,8 +1875,9 @@ i915_gem_user_to_context_sseu(struct intel_gt *gt,
/* Part specific restrictions. */
if (GRAPHICS_VER(i915) == 11) {
- unsigned int hw_s = hweight8(device->slice_mask);
- unsigned int hw_ss_per_s = hweight8(device->subslice_mask[0]);
+ unsigned int hw_s = intel_sseu_slice_count(device);
+ unsigned int hw_ss_per_s =
+ intel_sseu_subslices_per_slice(device, 0);
unsigned int req_s = hweight8(context->slice_mask);
unsigned int req_ss = hweight8(context->subslice_mask);
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index eac20112709c..36eefebe3add 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -630,7 +630,7 @@ intel_engine_has_relative_mmio(const struct intel_engine_cs * const engine)
}
#define instdone_has_slice(dev_priv___, sseu___, slice___) \
- ((GRAPHICS_VER(dev_priv___) == 7 ? 1 : ((sseu___)->slice_mask)) & BIT(slice___))
+ (GRAPHICS_VER(dev_priv___) == 7 ? 1 : intel_sseu_has_slice((sseu___), (slice___)))
#define instdone_has_subslice(dev_priv__, sseu__, slice__, subslice__) \
(GRAPHICS_VER(dev_priv__) == 7 ? (1 & BIT(subslice__)) : \
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.c b/drivers/gpu/drm/i915/gt/intel_sseu.c
index 9881a6790574..8cd081e3e767 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.c
@@ -12,17 +12,35 @@
#include "linux/string_helpers.h"
-void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices,
- u8 max_subslices, u8 max_eus_per_subslice)
+struct intel_sseu intel_sseu_from_device_info(const struct sseu_dev_info *sseu)
{
+ struct intel_sseu value = {
+ .slice_mask = (u8)bitmap_get_value8(sseu->slice_mask.b, 0),
+ .subslice_mask = (u8)bitmap_get_value8(sseu->ss_mask[0].b, 0),
+ .min_eus_per_subslice = sseu->max_eus_per_subslice,
+ .max_eus_per_subslice = sseu->max_eus_per_subslice,
+ };
+
+ return value;
+}
+
+void intel_sseu_set_info(struct sseu_dev_info *sseu,
+ unsigned int max_slices,
+ unsigned int max_subslices,
+ unsigned int max_eus_per_subslice)
+{
+ GEM_BUG_ON(max_slices > BITMAP_BITS(sseu->slice_mask.b));
+ GEM_BUG_ON(max_slices > ARRAY_SIZE(sseu->ss_mask));
+ GEM_BUG_ON(max_slices > ARRAY_SIZE(sseu->eu_mask));
+
+ GEM_BUG_ON(max_subslices > BITMAP_BITS(sseu->ss_mask[0].b));
+ GEM_BUG_ON(max_subslices > ARRAY_SIZE(sseu->eu_mask[0]));
+
+ GEM_BUG_ON(max_eus_per_subslice > BITMAP_BITS(sseu->eu_mask[0][0].b));
+
sseu->max_slices = max_slices;
sseu->max_subslices = max_subslices;
sseu->max_eus_per_subslice = max_eus_per_subslice;
-
- sseu->ss_stride = GEN_SSEU_STRIDE(sseu->max_subslices);
- GEM_BUG_ON(sseu->ss_stride > GEN_MAX_SUBSLICE_STRIDE);
- sseu->eu_stride = GEN_SSEU_STRIDE(sseu->max_eus_per_subslice);
- GEM_BUG_ON(sseu->eu_stride > GEN_MAX_EU_STRIDE);
}
unsigned int
@@ -30,121 +48,106 @@ intel_sseu_subslice_total(const struct sseu_dev_info *sseu)
{
unsigned int i, total = 0;
- for (i = 0; i < ARRAY_SIZE(sseu->subslice_mask); i++)
- total += hweight8(sseu->subslice_mask[i]);
+ for (i = 0; i < ARRAY_SIZE(sseu->ss_mask); i++)
+ total += bitmap_weight(sseu->ss_mask[i].b,
+ BITMAP_BITS(sseu->ss_mask[i].b));
return total;
}
-static u32
-sseu_get_subslices(const struct sseu_dev_info *sseu,
- const u8 *subslice_mask, u8 slice)
+static u32 sseu_get_subslices(const unsigned long *ss_mask, int nrbits)
{
- int i, offset = slice * sseu->ss_stride;
- u32 mask = 0;
+ u32 mask;
- GEM_BUG_ON(slice >= sseu->max_slices);
-
- for (i = 0; i < sseu->ss_stride; i++)
- mask |= (u32)subslice_mask[offset + i] << i * BITS_PER_BYTE;
+ bitmap_to_arr32(&mask, ss_mask, sizeof(mask) * BITS_PER_BYTE);
return mask;
}
-u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice)
+u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, int slice)
{
- return sseu_get_subslices(sseu, sseu->subslice_mask, slice);
+
+ GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->ss_mask));
+ return sseu_get_subslices(sseu->ss_mask[slice].b,
+ BITMAP_BITS(sseu->ss_mask[slice].b));
}
static u32 sseu_get_geometry_subslices(const struct sseu_dev_info *sseu)
{
- return sseu_get_subslices(sseu, sseu->geometry_subslice_mask, 0);
+ return sseu_get_subslices(sseu->geometry_ss_mask[0].b,
+ BITMAP_BITS(sseu->geometry_ss_mask[0].b));
}
u32 intel_sseu_get_compute_subslices(const struct sseu_dev_info *sseu)
{
- return sseu_get_subslices(sseu, sseu->compute_subslice_mask, 0);
+ return sseu_get_subslices(sseu->compute_ss_mask[0].b,
+ BITMAP_BITS(sseu->compute_ss_mask[0].b));
}
-void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
- u8 *subslice_mask, u32 ss_mask)
+static void __sseu_set_subslices(unsigned long *bitmap, int nrbits, u32 ss)
{
- int offset = slice * sseu->ss_stride;
-
- memcpy(&subslice_mask[offset], &ss_mask, sseu->ss_stride);
+ GEM_BUG_ON(ss && (__fls(ss) > nrbits));
+ bitmap_from_arr32(bitmap, &ss, sizeof(ss) * BITS_PER_BYTE);
}
+#define sseu_set_subslices(b, ss) \
+ __sseu_set_subslices((b), BITMAP_BITS(b), (ss))
+
unsigned int
-intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice)
+intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, int slice)
{
- return hweight32(intel_sseu_get_subslices(sseu, slice));
-}
-
-static int sseu_eu_idx(const struct sseu_dev_info *sseu, int slice,
- int subslice)
-{
- int slice_stride = sseu->max_subslices * sseu->eu_stride;
-
- return slice * slice_stride + subslice * sseu->eu_stride;
+ return bitmap_weight(sseu->ss_mask[slice].b,
+ BITMAP_BITS(sseu->ss_mask[slice].b));
}
static u16 sseu_get_eus(const struct sseu_dev_info *sseu, int slice,
int subslice)
{
- int i, offset = sseu_eu_idx(sseu, slice, subslice);
- u16 eu_mask = 0;
+ const unsigned int nrbits =
+ BITMAP_BITS(sseu->eu_mask[slice][subslice].b);
+ u32 mask;
- for (i = 0; i < sseu->eu_stride; i++)
- eu_mask |=
- ((u16)sseu->eu_mask[offset + i]) << (i * BITS_PER_BYTE);
+ GEM_BUG_ON(nrbits > sizeof(mask) * BITS_PER_BYTE);
+ bitmap_to_arr32(&mask, sseu->eu_mask[slice][subslice].b, nrbits);
- return eu_mask;
+ return (u16)mask;
}
static void sseu_set_eus(struct sseu_dev_info *sseu, int slice, int subslice,
- u16 eu_mask)
+ u32 mask)
{
- int i, offset = sseu_eu_idx(sseu, slice, subslice);
+ const unsigned int nrbits =
+ BITMAP_BITS(sseu->eu_mask[slice][subslice].b);
- for (i = 0; i < sseu->eu_stride; i++)
- sseu->eu_mask[offset + i] =
- (eu_mask >> (BITS_PER_BYTE * i)) & 0xff;
+ GEM_BUG_ON(mask && (__fls(mask) > nrbits));
+ bitmap_from_arr32(sseu->eu_mask[slice][subslice].b, &mask,
+ sizeof(mask) * BITS_PER_BYTE);
}
-static u16 compute_eu_total(const struct sseu_dev_info *sseu)
+static unsigned int compute_eu_total(const struct sseu_dev_info *sseu)
{
- u16 i, total = 0;
+ unsigned int s, ss, total = 0;
- for (i = 0; i < ARRAY_SIZE(sseu->eu_mask); i++)
- total += hweight8(sseu->eu_mask[i]);
+ for (s = 0; s < ARRAY_SIZE(sseu->ss_mask); s++) {
+ for (ss = 0; ss < ARRAY_SIZE(sseu->eu_mask[s]); ss++) {
+ total += bitmap_weight(sseu->eu_mask[s][ss].b,
+ BITMAP_BITS(sseu->eu_mask[s][ss].b));
+ }
+ }
return total;
}
-static u32 get_ss_stride_mask(struct sseu_dev_info *sseu, u8 s, u32 ss_en)
-{
- u32 ss_mask;
-
- ss_mask = ss_en >> (s * sseu->max_subslices);
- ss_mask &= GENMASK(sseu->max_subslices - 1, 0);
-
- return ss_mask;
-}
-
static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en,
u32 g_ss_en, u32 c_ss_en, u16 eu_en)
{
int s, ss;
- /* g_ss_en/c_ss_en represent entire subslice mask across all slices */
- GEM_BUG_ON(sseu->max_slices * sseu->max_subslices >
- sizeof(g_ss_en) * BITS_PER_BYTE);
-
for (s = 0; s < sseu->max_slices; s++) {
if ((s_en & BIT(s)) == 0)
continue;
- sseu->slice_mask |= BIT(s);
+ intel_sseu_enable_slice(sseu, s);
/*
* XeHP introduces the concept of compute vs geometry DSS. To
@@ -155,13 +158,9 @@ static void gen11_compute_sseu_info(struct sseu_dev_info *sseu, u8 s_en,
* for the purposes of selecting subslices to use in a
* particular GEM context.
*/
- intel_sseu_set_subslices(sseu, s, sseu->compute_subslice_mask,
- get_ss_stride_mask(sseu, s, c_ss_en));
- intel_sseu_set_subslices(sseu, s, sseu->geometry_subslice_mask,
- get_ss_stride_mask(sseu, s, g_ss_en));
- intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
- get_ss_stride_mask(sseu, s,
- g_ss_en | c_ss_en));
+ sseu_set_subslices(sseu->compute_ss_mask[s].b, c_ss_en);
+ sseu_set_subslices(sseu->geometry_ss_mask[s].b, g_ss_en);
+ sseu_set_subslices(sseu->ss_mask[s].b, g_ss_en | c_ss_en);
for (ss = 0; ss < sseu->max_subslices; ss++)
if (intel_sseu_has_subslice(sseu, s, ss))
@@ -261,11 +260,11 @@ static void cherryview_sseu_info_init(struct intel_gt *gt)
u32 fuse;
u8 subslice_mask = 0;
- fuse = intel_uncore_read(gt->uncore, CHV_FUSE_GT);
-
- sseu->slice_mask = BIT(0);
intel_sseu_set_info(sseu, 1, 2, 8);
+ intel_sseu_enable_slice(sseu, 0);
+
+ fuse = intel_uncore_read(gt->uncore, CHV_FUSE_GT);
if (!(fuse & CHV_FGT_DISABLE_SS0)) {
u8 disabled_mask =
((fuse & CHV_FGT_EU_DIS_SS0_R0_MASK) >>
@@ -288,7 +287,7 @@ static void cherryview_sseu_info_init(struct intel_gt *gt)
sseu_set_eus(sseu, 0, 1, ~disabled_mask);
}
- intel_sseu_set_subslices(sseu, 0, sseu->subslice_mask, subslice_mask);
+ sseu_set_subslices(sseu->ss_mask[0].b, subslice_mask);
sseu->eu_total = compute_eu_total(sseu);
@@ -320,13 +319,15 @@ static void gen9_sseu_info_init(struct intel_gt *gt)
const u8 eu_mask = 0xff;
int s, ss;
- fuse2 = intel_uncore_read(uncore, GEN8_FUSE2);
- sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
-
/* BXT has a single slice and at most 3 subslices. */
intel_sseu_set_info(sseu, IS_GEN9_LP(i915) ? 1 : 3,
IS_GEN9_LP(i915) ? 3 : 4, 8);
+ fuse2 = intel_uncore_read(uncore, GEN8_FUSE2);
+ intel_sseu_enable_slices(sseu,
+ (fuse2 & GEN8_F2_S_ENA_MASK) >>
+ GEN8_F2_S_ENA_SHIFT);
+
/*
* The subslice disable field is global, i.e. it applies
* to each of the enabled slices.
@@ -340,12 +341,11 @@ static void gen9_sseu_info_init(struct intel_gt *gt)
* count the total enabled EU.
*/
for (s = 0; s < sseu->max_slices; s++) {
- if (!(sseu->slice_mask & BIT(s)))
+ if (!intel_sseu_has_slice(sseu, s))
/* skip disabled slice */
continue;
- intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
- subslice_mask);
+ sseu_set_subslices(sseu->ss_mask[s].b, subslice_mask);
eu_disable = intel_uncore_read(uncore, GEN9_EU_DISABLE(s));
for (ss = 0; ss < sseu->max_subslices; ss++) {
@@ -396,14 +396,15 @@ static void gen9_sseu_info_init(struct intel_gt *gt)
* pair per subslice.
*/
sseu->has_slice_pg =
- !IS_GEN9_LP(i915) && hweight8(sseu->slice_mask) > 1;
+ !IS_GEN9_LP(i915) && intel_sseu_slice_count(sseu) > 1;
sseu->has_subslice_pg =
IS_GEN9_LP(i915) && intel_sseu_subslice_total(sseu) > 1;
sseu->has_eu_pg = sseu->eu_per_subslice > 2;
if (IS_GEN9_LP(i915)) {
-#define IS_SS_DISABLED(ss) (!(sseu->subslice_mask[0] & BIT(ss)))
- info->has_pooled_eu = hweight8(sseu->subslice_mask[0]) == 3;
+#define IS_SS_DISABLED(ss) (!test_bit(ss, sseu->ss_mask[0].b))
+ info->has_pooled_eu =
+ intel_sseu_subslices_per_slice(sseu, 0) == 3;
sseu->min_eu_in_pool = 0;
if (info->has_pooled_eu) {
@@ -426,10 +427,13 @@ static void bdw_sseu_info_init(struct intel_gt *gt)
u32 fuse2, subslice_mask, eu_disable[3]; /* s_max */
u32 eu_disable0, eu_disable1, eu_disable2;
- fuse2 = intel_uncore_read(uncore, GEN8_FUSE2);
- sseu->slice_mask = (fuse2 & GEN8_F2_S_ENA_MASK) >> GEN8_F2_S_ENA_SHIFT;
intel_sseu_set_info(sseu, 3, 3, 8);
+ fuse2 = intel_uncore_read(uncore, GEN8_FUSE2);
+ intel_sseu_enable_slices(sseu,
+ (fuse2 & GEN8_F2_S_ENA_MASK) >>
+ GEN8_F2_S_ENA_SHIFT);
+
/*
* The subslice disable field is global, i.e. it applies
* to each of the enabled slices.
@@ -453,12 +457,11 @@ static void bdw_sseu_info_init(struct intel_gt *gt)
* count the total enabled EU.
*/
for (s = 0; s < sseu->max_slices; s++) {
- if (!(sseu->slice_mask & BIT(s)))
+ if (!intel_sseu_has_slice(sseu, s))
/* skip disabled slice */
continue;
- intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
- subslice_mask);
+ sseu_set_subslices(sseu->ss_mask[s].b, subslice_mask);
for (ss = 0; ss < sseu->max_subslices; ss++) {
u8 eu_disabled_mask;
@@ -499,7 +502,7 @@ static void bdw_sseu_info_init(struct intel_gt *gt)
* BDW supports slice power gating on devices with more than
* one slice.
*/
- sseu->has_slice_pg = hweight8(sseu->slice_mask) > 1;
+ sseu->has_slice_pg = intel_sseu_slice_count(sseu) > 1;
sseu->has_subslice_pg = 0;
sseu->has_eu_pg = 0;
}
@@ -521,15 +524,15 @@ static void hsw_sseu_info_init(struct intel_gt *gt)
MISSING_CASE(INTEL_INFO(i915)->gt);
fallthrough;
case 1:
- sseu->slice_mask = BIT(0);
+ intel_sseu_enable_slice(sseu, 0);
subslice_mask = BIT(0);
break;
case 2:
- sseu->slice_mask = BIT(0);
+ intel_sseu_enable_slice(sseu, 0);
subslice_mask = BIT(0) | BIT(1);
break;
case 3:
- sseu->slice_mask = BIT(0) | BIT(1);
+ intel_sseu_enable_slices(sseu, 0x3);
subslice_mask = BIT(0) | BIT(1);
break;
}
@@ -550,13 +553,11 @@ static void hsw_sseu_info_init(struct intel_gt *gt)
break;
}
- intel_sseu_set_info(sseu, hweight8(sseu->slice_mask),
- hweight8(subslice_mask),
- sseu->eu_per_subslice);
+ intel_sseu_set_info(sseu, intel_sseu_slice_count(sseu),
+ hweight8(subslice_mask), sseu->eu_per_subslice);
for (s = 0; s < sseu->max_slices; s++) {
- intel_sseu_set_subslices(sseu, s, sseu->subslice_mask,
- subslice_mask);
+ sseu_set_subslices(sseu->ss_mask[s].b, subslice_mask);
for (ss = 0; ss < sseu->max_subslices; ss++) {
sseu_set_eus(sseu, s, ss,
@@ -643,7 +644,7 @@ u32 intel_sseu_make_rpcs(struct intel_gt *gt,
*/
if (GRAPHICS_VER(i915) == 11 &&
slices == 1 &&
- subslices > min_t(u8, 4, hweight8(sseu->subslice_mask[0]) / 2)) {
+ subslices > min_t(u8, 4, intel_sseu_subslices_per_slice(sseu, 0) / 2)) {
GEM_BUG_ON(subslices & 1);
subslice_pg = false;
@@ -709,13 +710,16 @@ void intel_sseu_dump(const struct sseu_dev_info *sseu, struct drm_printer *p)
{
int s;
- drm_printf(p, "slice total: %u, mask=%04x\n",
- hweight8(sseu->slice_mask), sseu->slice_mask);
- drm_printf(p, "subslice total: %u\n", intel_sseu_subslice_total(sseu));
+ drm_printf(p, "slices total: %u, present=%*pbl\n",
+ intel_sseu_slice_count(sseu),
+ __intel_sseu_slice_mask_bits(sseu),
+ __intel_sseu_slice_mask_bitmap(sseu));
+ drm_printf(p, "subslices total: %u\n", intel_sseu_subslice_total(sseu));
for (s = 0; s < sseu->max_slices; s++) {
- drm_printf(p, "slice%d: %u subslices, mask=%08x\n",
+ drm_printf(p, "slice%d: %u subslices, present=%*pbl\n",
s, intel_sseu_subslices_per_slice(sseu, s),
- intel_sseu_get_subslices(sseu, s));
+ BITMAP_BITS(sseu->ss_mask[s].b),
+ sseu->ss_mask[s].b);
}
drm_printf(p, "EU total: %u\n", sseu->eu_total);
drm_printf(p, "EU per subslice: %u\n", sseu->eu_per_subslice);
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu.h b/drivers/gpu/drm/i915/gt/intel_sseu.h
index 5c078df4729c..c8e3833ff286 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu.h
+++ b/drivers/gpu/drm/i915/gt/intel_sseu.h
@@ -6,8 +6,9 @@
#ifndef __INTEL_SSEU_H__
#define __INTEL_SSEU_H__
-#include <linux/types.h>
+#include <linux/bitmap.h>
#include <linux/kernel.h>
+#include <linux/types.h>
#include "i915_gem.h"
@@ -21,6 +22,7 @@ struct drm_printer;
* are not expressed through fusing).
*/
#define GEN_MAX_HSW_SLICES 3
+#define SSEU_MAX_SLICES GEN_MAX_HSW_SLICES
/*
* Maximum number of subslices that can exist within a HSW-style slice. This
@@ -40,10 +42,7 @@ struct drm_printer;
/* The maximum number of bits needed to express each subslice/DSS independently */
#define GEN_SS_MASK_SIZE SSEU_MAX(GEN_MAX_DSS, \
GEN_MAX_HSW_SLICES * GEN_MAX_SS_PER_HSW_SLICE)
-
-#define GEN_SSEU_STRIDE(max_entries) DIV_ROUND_UP(max_entries, BITS_PER_BYTE)
-#define GEN_MAX_SUBSLICE_STRIDE GEN_SSEU_STRIDE(GEN_SS_MASK_SIZE)
-#define GEN_MAX_EU_STRIDE GEN_SSEU_STRIDE(GEN_MAX_EUS_PER_SS)
+#define SSEU_MAX_SUBSLICES SSEU_MAX(GEN_MAX_DSS, GEN_MAX_SS_PER_HSW_SLICE)
#define GEN_DSS_PER_GSLICE 4
#define GEN_DSS_PER_CSLICE 8
@@ -52,15 +51,25 @@ struct drm_printer;
#define GEN_MAX_GSLICES (GEN_MAX_DSS / GEN_DSS_PER_GSLICE)
#define GEN_MAX_CSLICES (GEN_MAX_DSS / GEN_DSS_PER_CSLICE)
+#define BITMAP_BITS(b) ((unsigned int)(sizeof(b) * BITS_PER_BYTE))
+
+typedef struct { unsigned long b[BITS_TO_LONGS(SSEU_MAX_SLICES)]; } intel_sseu_slice_mask_t;
+typedef struct { unsigned long b[BITS_TO_LONGS(SSEU_MAX_SUBSLICES)]; } intel_sseu_subslice_mask_t;
+typedef struct { unsigned long b[BITS_TO_LONGS(GEN_MAX_EUS_PER_SS)]; } intel_sseu_eu_mask_t;
+
struct sseu_dev_info {
- u8 slice_mask;
- u8 subslice_mask[GEN_SS_MASK_SIZE];
- u8 geometry_subslice_mask[GEN_SS_MASK_SIZE];
- u8 compute_subslice_mask[GEN_SS_MASK_SIZE];
- u8 eu_mask[GEN_SS_MASK_SIZE * GEN_MAX_EU_STRIDE];
- u16 eu_total;
- u8 eu_per_subslice;
- u8 min_eu_in_pool;
+ intel_sseu_slice_mask_t slice_mask;
+
+ intel_sseu_subslice_mask_t ss_mask[SSEU_MAX_SLICES];
+ intel_sseu_subslice_mask_t geometry_ss_mask[SSEU_MAX_SLICES];
+ intel_sseu_subslice_mask_t compute_ss_mask[SSEU_MAX_SLICES];
+
+ intel_sseu_eu_mask_t eu_mask[SSEU_MAX_SLICES][SSEU_MAX_SUBSLICES];
+
+ unsigned int eu_total;
+ unsigned int eu_per_subslice;
+ unsigned int min_eu_in_pool;
+
/* For each slice, which subslice(s) has(have) 7 EUs (bitfield)? */
u8 subslice_7eu[3];
u8 has_slice_pg:1;
@@ -68,12 +77,9 @@ struct sseu_dev_info {
u8 has_eu_pg:1;
/* Topology fields */
- u8 max_slices;
- u8 max_subslices;
- u8 max_eus_per_subslice;
-
- u8 ss_stride;
- u8 eu_stride;
+ unsigned int max_slices;
+ unsigned int max_subslices;
+ unsigned int max_eus_per_subslice;
};
/*
@@ -86,53 +92,78 @@ struct intel_sseu {
u8 max_eus_per_subslice;
};
-static inline struct intel_sseu
-intel_sseu_from_device_info(const struct sseu_dev_info *sseu)
+struct intel_sseu intel_sseu_from_device_info(const struct sseu_dev_info *sseu);
+
+static inline void
+intel_sseu_enable_slice(struct sseu_dev_info *sseu, int slice)
{
- struct intel_sseu value = {
- .slice_mask = sseu->slice_mask,
- .subslice_mask = sseu->subslice_mask[0],
- .min_eus_per_subslice = sseu->max_eus_per_subslice,
- .max_eus_per_subslice = sseu->max_eus_per_subslice,
- };
-
- return value;
+ set_bit(slice, sseu->slice_mask.b);
+}
+
+static inline void
+intel_sseu_enable_slices(struct sseu_dev_info *sseu, u8 mask)
+{
+ bitmap_set_value8(sseu->slice_mask.b, mask, 0);
}
static inline bool
-intel_sseu_has_subslice(const struct sseu_dev_info *sseu, int slice,
- int subslice)
+intel_sseu_has_slice(const struct sseu_dev_info *sseu, int slice)
{
- u8 mask;
- int ss_idx = subslice / BITS_PER_BYTE;
+ return test_bit(slice, sseu->slice_mask.b);
+}
+
+static inline const unsigned long *
+__intel_sseu_slice_mask_bitmap(const struct sseu_dev_info *sseu)
+{
+ return sseu->slice_mask.b;
+}
+
+static inline unsigned int
+__intel_sseu_slice_mask_bits(const struct sseu_dev_info *sseu)
+{
+ return BITMAP_BITS(sseu->slice_mask.b);
+}
+
+static inline u32 intel_sseu_slice_mask(const struct sseu_dev_info *sseu)
+{
+ u32 mask;
- if (slice >= sseu->max_slices ||
- subslice >= sseu->max_subslices)
- return false;
+ bitmap_to_arr32(&mask, sseu->slice_mask.b, sizeof(mask) * BITS_PER_BYTE);
- GEM_BUG_ON(ss_idx >= sseu->ss_stride);
+ return mask;
+}
- mask = sseu->subslice_mask[slice * sseu->ss_stride + ss_idx];
+static inline unsigned int
+intel_sseu_slice_count(const struct sseu_dev_info *sseu)
+{
+ return bitmap_weight(sseu->slice_mask.b,
+ BITMAP_BITS(sseu->slice_mask.b));
+}
- return mask & BIT(subslice % BITS_PER_BYTE);
+static inline bool
+intel_sseu_has_subslice(const struct sseu_dev_info *sseu, int slice,
+ int subslice)
+{
+ GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->ss_mask));
+
+ return test_bit(subslice, sseu->ss_mask[slice].b);
}
-void intel_sseu_set_info(struct sseu_dev_info *sseu, u8 max_slices,
- u8 max_subslices, u8 max_eus_per_subslice);
+void intel_sseu_set_info(struct sseu_dev_info *sseu,
+ unsigned int max_slices,
+ unsigned int max_subslices,
+ unsigned int max_eus_per_subslice);
unsigned int
intel_sseu_subslice_total(const struct sseu_dev_info *sseu);
unsigned int
-intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, u8 slice);
+intel_sseu_subslices_per_slice(const struct sseu_dev_info *sseu, int slice);
-u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, u8 slice);
+u32 intel_sseu_get_subslices(const struct sseu_dev_info *sseu, int slice);
u32 intel_sseu_get_compute_subslices(const struct sseu_dev_info *sseu);
-void intel_sseu_set_subslices(struct sseu_dev_info *sseu, int slice,
- u8 *subslice_mask, u32 ss_mask);
-
void intel_sseu_info_init(struct intel_gt *gt);
u32 intel_sseu_make_rpcs(struct intel_gt *gt,
diff --git a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c
index 2d5d011e01db..ae3e8972121f 100644
--- a/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_sseu_debugfs.c
@@ -12,11 +12,10 @@
#include "intel_sseu_debugfs.h"
static void sseu_copy_subslices(const struct sseu_dev_info *sseu,
- int slice, u8 *to_mask)
+ int slice, unsigned long *to)
{
- int offset = slice * sseu->ss_stride;
-
- memcpy(&to_mask[offset], &sseu->subslice_mask[offset], sseu->ss_stride);
+ bitmap_copy(to, sseu->ss_mask[slice].b,
+ BITMAP_BITS(sseu->ss_mask[slice].b));
}
static void cherryview_sseu_device_status(struct intel_gt *gt,
@@ -40,8 +39,8 @@ static void cherryview_sseu_device_status(struct intel_gt *gt,
/* skip disabled subslice */
continue;
- sseu->slice_mask = BIT(0);
- sseu->subslice_mask[0] |= BIT(ss);
+ intel_sseu_enable_slice(sseu, 0);
+ set_bit(ss, sseu->ss_mask[0].b);
eu_cnt = ((sig1[ss] & CHV_EU08_PG_ENABLE) ? 0 : 2) +
((sig1[ss] & CHV_EU19_PG_ENABLE) ? 0 : 2) +
((sig1[ss] & CHV_EU210_PG_ENABLE) ? 0 : 2) +
@@ -91,8 +90,8 @@ static void gen11_sseu_device_status(struct intel_gt *gt,
/* skip disabled slice */
continue;
- sseu->slice_mask |= BIT(s);
- sseu_copy_subslices(&info->sseu, s, sseu->subslice_mask);
+ intel_sseu_enable_slice(sseu, s);
+ sseu_copy_subslices(&info->sseu, s, sseu->ss_mask[s].b);
for (ss = 0; ss < info->sseu.max_subslices; ss++) {
unsigned int eu_cnt;
@@ -144,24 +143,20 @@ static void gen9_sseu_device_status(struct intel_gt *gt,
/* skip disabled slice */
continue;
- sseu->slice_mask |= BIT(s);
+ intel_sseu_enable_slice(sseu, s);
if (IS_GEN9_BC(gt->i915))
- sseu_copy_subslices(&info->sseu, s,
- sseu->subslice_mask);
+ sseu_copy_subslices(&info->sseu, s, sseu->ss_mask[s].b);
for (ss = 0; ss < info->sseu.max_subslices; ss++) {
unsigned int eu_cnt;
- u8 ss_idx = s * info->sseu.ss_stride +
- ss / BITS_PER_BYTE;
if (IS_GEN9_LP(gt->i915)) {
if (!(s_reg[s] & (GEN9_PGCTL_SS_ACK(ss))))
/* skip disabled subslice */
continue;
- sseu->subslice_mask[ss_idx] |=
- BIT(ss % BITS_PER_BYTE);
+ set_bit(ss, sseu->ss_mask[s].b);
}
eu_cnt = eu_reg[2 * s + ss / 2] & eu_mask[ss % 2];
@@ -183,22 +178,19 @@ static void bdw_sseu_device_status(struct intel_gt *gt,
u32 slice_info = intel_uncore_read(gt->uncore, GEN8_GT_SLICE_INFO);
int s;
- sseu->slice_mask = slice_info & GEN8_LSLICESTAT_MASK;
+ intel_sseu_enable_slices(sseu, slice_info & GEN8_LSLICESTAT_MASK);
- if (sseu->slice_mask) {
+ if (!bitmap_empty(sseu->slice_mask.b,
+ BITMAP_BITS(sseu->slice_mask.b))) {
sseu->eu_per_subslice = info->sseu.eu_per_subslice;
- for (s = 0; s < fls(sseu->slice_mask); s++)
- sseu_copy_subslices(&info->sseu, s,
- sseu->subslice_mask);
+ for (s = 0; s < ARRAY_SIZE(sseu->ss_mask); s++)
+ sseu_copy_subslices(&info->sseu, s, sseu->ss_mask[s].b);
sseu->eu_total = sseu->eu_per_subslice *
intel_sseu_subslice_total(sseu);
/* subtract fused off EU(s) from enabled slice(s) */
- for (s = 0; s < fls(sseu->slice_mask); s++) {
- u8 subslice_7eu = info->sseu.subslice_7eu[s];
-
- sseu->eu_total -= hweight8(subslice_7eu);
- }
+ for (s = 0; s < ARRAY_SIZE(info->sseu.subslice_7eu); s++)
+ sseu->eu_total -= hweight8(info->sseu.subslice_7eu[s]);
}
}
@@ -210,14 +202,15 @@ static void i915_print_sseu_info(struct seq_file *m,
const char *type = is_available_info ? "Available" : "Enabled";
int s;
- seq_printf(m, " %s Slice Mask: %04x\n", type,
- sseu->slice_mask);
- seq_printf(m, " %s Slice Total: %u\n", type,
- hweight8(sseu->slice_mask));
- seq_printf(m, " %s Subslice Total: %u\n", type,
+ seq_printf(m, " %s Slices: %*pbl\n", type,
+ __intel_sseu_slice_mask_bits(sseu),
+ __intel_sseu_slice_mask_bitmap(sseu));
+ seq_printf(m, " %s Slices Total: %u\n", type,
+ intel_sseu_slice_count(sseu));
+ seq_printf(m, " %s Subslices Total: %u\n", type,
intel_sseu_subslice_total(sseu));
- for (s = 0; s < fls(sseu->slice_mask); s++) {
- seq_printf(m, " %s Slice%i subslices: %u\n", type,
+ for (s = 0; s < sseu->max_slices; s++) {
+ seq_printf(m, " %s Slice%i count: %u\n", type,
s, intel_sseu_subslices_per_slice(sseu, s));
}
seq_printf(m, " %s EU Total: %u\n", type,
diff --git a/drivers/gpu/drm/i915/gt/intel_workarounds.c b/drivers/gpu/drm/i915/gt/intel_workarounds.c
index dc0ffff6f655..c6f010f8da11 100644
--- a/drivers/gpu/drm/i915/gt/intel_workarounds.c
+++ b/drivers/gpu/drm/i915/gt/intel_workarounds.c
@@ -947,8 +947,9 @@ gen9_wa_init_mcr(struct drm_i915_private *i915, struct i915_wa_list *wal)
* occasions, such as INSTDONE, where this value is dependent
* on s/ss combo, the read should be done with read_subslice_reg.
*/
- slice = ffs(sseu->slice_mask) - 1;
- GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->subslice_mask));
+ slice = find_first_bit(sseu->slice_mask.b,
+ BITMAP_BITS(sseu->slice_mask.b));
+ GEM_BUG_ON(slice >= ARRAY_SIZE(sseu->ss_mask));
subslice = ffs(intel_sseu_get_subslices(sseu, slice));
GEM_BUG_ON(!subslice);
subslice--;
@@ -1090,7 +1091,7 @@ icl_wa_init_mcr(struct intel_gt *gt, struct i915_wa_list *wal)
unsigned int slice, subslice;
GEM_BUG_ON(GRAPHICS_VER(gt->i915) < 11);
- GEM_BUG_ON(hweight8(sseu->slice_mask) > 1);
+ GEM_BUG_ON(intel_sseu_slice_count(sseu) > 1);
slice = 0;
/*
diff --git a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
index 17004bca4d24..1337ca210a06 100644
--- a/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
+++ b/drivers/gpu/drm/i915/gt/uc/intel_guc_ads.c
@@ -741,7 +741,7 @@ static void __guc_ads_init(struct intel_guc *guc)
fill_engine_enable_masks(gt, &info_map);
ads_blob_write(guc, system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_SLICE_ENABLED],
- hweight8(gt->info.sseu.slice_mask));
+ intel_sseu_slice_count(>->info.sseu));
ads_blob_write(guc, system_info.generic_gt_sysinfo[GUC_GENERIC_GT_SYSINFO_VDBOX_SFC_SUPPORT_MASK],
gt->info.vdbox_sfc_access);
diff --git a/drivers/gpu/drm/i915/i915_getparam.c b/drivers/gpu/drm/i915/i915_getparam.c
index c12a0adefda5..563f190d94ad 100644
--- a/drivers/gpu/drm/i915/i915_getparam.c
+++ b/drivers/gpu/drm/i915/i915_getparam.c
@@ -148,14 +148,12 @@ int i915_getparam_ioctl(struct drm_device *dev, void *data,
value = intel_engines_has_context_isolation(i915);
break;
case I915_PARAM_SLICE_MASK:
- value = sseu->slice_mask;
+ value = intel_sseu_slice_mask(sseu);
if (!value)
return -ENODEV;
break;
case I915_PARAM_SUBSLICE_MASK:
- /* Only copy bits from the first slice */
- memcpy(&value, sseu->subslice_mask,
- min(sseu->ss_stride, (u8)sizeof(value)));
+ value = intel_sseu_get_subslices(sseu, 0);
if (!value)
return -ENODEV;
break;
diff --git a/drivers/gpu/drm/i915/i915_query.c b/drivers/gpu/drm/i915/i915_query.c
index b5ca00cb6cf6..7e3631ee14a3 100644
--- a/drivers/gpu/drm/i915/i915_query.c
+++ b/drivers/gpu/drm/i915/i915_query.c
@@ -32,8 +32,11 @@ static int query_topology_info(struct drm_i915_private *dev_priv,
struct drm_i915_query_item *query_item)
{
const struct sseu_dev_info *sseu = &to_gt(dev_priv)->info.sseu;
+ unsigned int slice_length, subslice_length, eu_length, total_length;
struct drm_i915_query_topology_info topo;
- u32 slice_length, subslice_length, eu_length, total_length;
+ unsigned int ss_stride, eu_stride;
+ unsigned int s, ss, i;
+ u8 __user *out;
int ret;
if (query_item->flags != 0)
@@ -42,11 +45,11 @@ static int query_topology_info(struct drm_i915_private *dev_priv,
if (sseu->max_slices == 0)
return -ENODEV;
- BUILD_BUG_ON(sizeof(u8) != sizeof(sseu->slice_mask));
-
- slice_length = sizeof(sseu->slice_mask);
- subslice_length = sseu->max_slices * sseu->ss_stride;
- eu_length = sseu->max_slices * sseu->max_subslices * sseu->eu_stride;
+ slice_length = DIV_ROUND_UP(sseu->max_slices, BITS_PER_BYTE);
+ ss_stride = DIV_ROUND_UP(sseu->max_subslices, BITS_PER_BYTE);
+ eu_stride = DIV_ROUND_UP(sseu->max_eus_per_subslice, BITS_PER_BYTE);
+ subslice_length = sseu->max_slices * ss_stride;
+ eu_length = sseu->max_slices * sseu->max_subslices * eu_stride;
total_length = sizeof(topo) + slice_length + subslice_length +
eu_length;
@@ -64,28 +67,50 @@ static int query_topology_info(struct drm_i915_private *dev_priv,
topo.max_eus_per_subslice = sseu->max_eus_per_subslice;
topo.subslice_offset = slice_length;
- topo.subslice_stride = sseu->ss_stride;
+ topo.subslice_stride = ss_stride;
topo.eu_offset = slice_length + subslice_length;
- topo.eu_stride = sseu->eu_stride;
+ topo.eu_stride = eu_stride;
- if (copy_to_user(u64_to_user_ptr(query_item->data_ptr),
- &topo, sizeof(topo)))
- return -EFAULT;
+ out = u64_to_user_ptr(query_item->data_ptr);
- if (copy_to_user(u64_to_user_ptr(query_item->data_ptr + sizeof(topo)),
- &sseu->slice_mask, slice_length))
+ if (copy_to_user(out, &topo, sizeof(topo)))
return -EFAULT;
+ out += sizeof(topo);
- if (copy_to_user(u64_to_user_ptr(query_item->data_ptr +
- sizeof(topo) + slice_length),
- sseu->subslice_mask, subslice_length))
- return -EFAULT;
+ for (i = 0; i < slice_length; i++) {
+ u8 mask;
- if (copy_to_user(u64_to_user_ptr(query_item->data_ptr +
- sizeof(topo) +
- slice_length + subslice_length),
- sseu->eu_mask, eu_length))
- return -EFAULT;
+ mask = bitmap_get_value8(sseu->slice_mask.b, i * BITS_PER_BYTE);
+ if (copy_to_user(out, &mask, sizeof(mask)))
+ return -EFAULT;
+ out += sizeof(mask);
+ }
+
+ for (s = 0; s < sseu->max_slices; s++) {
+ for (i = 0; i < ss_stride; i++) {
+ u8 mask;
+
+ mask = bitmap_get_value8(sseu->ss_mask[s].b,
+ i * BITS_PER_BYTE);
+ if (copy_to_user(out, &mask, sizeof(mask)))
+ return -EFAULT;
+ out += sizeof(mask);
+ }
+ }
+
+ for (s = 0; s < sseu->max_slices; s++) {
+ for (ss = 0; ss < sseu->max_subslices; ss++) {
+ for (i = 0; i < eu_stride; i++) {
+ u8 mask;
+
+ mask = bitmap_get_value8(sseu->eu_mask[s][ss].b,
+ i * BITS_PER_BYTE);
+ if (copy_to_user(out, &mask, sizeof(mask)))
+ return -EFAULT;
+ out += sizeof(mask);
+ }
+ }
+ }
return total_length;
}
--
2.32.0
More information about the Intel-gfx-trybot
mailing list