[Intel-xe] [PATCH v2 2/2] drm/xe: Add support for CCS engine fusing
Matt Roper
matthew.d.roper at intel.com
Thu Mar 9 00:55:30 UTC 2023
For Xe_HP platforms that can have multiple CCS engines, the
presence/absence of each CCS is inferred by the presence/absence of any
DSS in the corresponding quadrant of the GT's DSS mask.
This handling is only needed on platforms that can have more than one
CCS. The CCS is never fused off on platforms like MTL that can only
have one.
v2:
- Add extra warnings to try to catch mistakes where the register counts
in get_num_dss_regs() are updated without corresponding updates to
the register parameters passed to load_dss_mask(). (Lucas)
- Add kerneldoc for xe_gt_topology_has_dss_in_quadrant() and clarify
why we care about quadrants of the DSS space. (Lucas)
- Ensure CCS engine counting treats engine mask as 64-bit. (Lucas)
Cc: Lucas De Marchi <lucas.demarchi at intel.com>
Signed-off-by: Matt Roper <matthew.d.roper at intel.com>
---
drivers/gpu/drm/xe/xe_gt_topology.c | 65 ++++++++++++++++++++++++-----
drivers/gpu/drm/xe/xe_gt_topology.h | 3 ++
drivers/gpu/drm/xe/xe_hw_engine.c | 29 ++++++++++++-
3 files changed, 85 insertions(+), 12 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c
index 2123f84be336..f2cbee53462b 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.c
+++ b/drivers/gpu/drm/xe/xe_gt_topology.c
@@ -62,6 +62,21 @@ load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask)
bitmap_from_arr32(mask, &val, XE_MAX_EU_FUSE_BITS);
}
+static void
+get_num_dss_regs(struct xe_device *xe, int *geometry_regs, int *compute_regs)
+{
+ if (GRAPHICS_VERx100(xe) == 1260) {
+ *geometry_regs = 0;
+ *compute_regs = 2;
+ } else if (GRAPHICS_VERx100(xe) >= 1250) {
+ *geometry_regs = 1;
+ *compute_regs = 1;
+ } else {
+ *geometry_regs = 1;
+ *compute_regs = 0;
+ }
+}
+
void
xe_gt_topology_init(struct xe_gt *gt)
{
@@ -69,18 +84,17 @@ xe_gt_topology_init(struct xe_gt *gt)
struct drm_printer p = drm_debug_printer("GT topology");
int num_geometry_regs, num_compute_regs;
- if (GRAPHICS_VERx100(xe) == 1260) {
- num_geometry_regs = 0;
- num_compute_regs = 2;
- } else if (GRAPHICS_VERx100(xe) >= 1250) {
- num_geometry_regs = 1;
- num_compute_regs = 1;
- } else {
- num_geometry_regs = 1;
- num_compute_regs = 0;
- }
+ get_num_dss_regs(xe, &num_geometry_regs, &num_compute_regs);
- load_dss_mask(gt, gt->fuse_topo.g_dss_mask, num_geometry_regs,
+ /*
+ * Register counts returned shouldn't exceed the number of registers
+ * passed as parameters below.
+ */
+ drm_WARN_ON(&xe->drm, num_geometry_regs > 1);
+ drm_WARN_ON(&xe->drm, num_compute_regs > 2);
+
+ load_dss_mask(gt, gt->fuse_topo.g_dss_mask,
+ num_geometry_regs,
XELP_GT_GEOMETRY_DSS_ENABLE.reg);
load_dss_mask(gt, gt->fuse_topo.c_dss_mask, num_compute_regs,
XEHP_GT_COMPUTE_DSS_ENABLE.reg,
@@ -113,3 +127,32 @@ xe_dss_mask_group_ffs(xe_dss_mask_t mask, int groupsize, int groupnum)
{
return find_next_bit(mask, XE_MAX_DSS_FUSE_BITS, groupnum * groupsize);
}
+
+/**
+ * xe_gt_topology_has_dss_in_quadrant - check fusing of DSS in GT quadrant
+ * @gt: GT to check
+ * @quad: Which quadrant of the DSS space to check
+ *
+ * Since Xe_HP platforms can have up to four CCS engines, those engines
+ * are each logically associated with a quarter of the possible DSS. If there
+ * are no DSS present in one of the four quadrants of the DSS space, the
+ * corresponding CCS engine is also not available for use.
+ *
+ * Returns false if all DSS in a quadrant of the GT are fused off, else true.
+ */
+bool xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+ xe_dss_mask_t all_dss;
+ int g_dss_regs, c_dss_regs, dss_per_quad, quad_first;
+
+ bitmap_or(all_dss, gt->fuse_topo.g_dss_mask, gt->fuse_topo.c_dss_mask,
+ XE_MAX_DSS_FUSE_BITS);
+
+ get_num_dss_regs(xe, &g_dss_regs, &c_dss_regs);
+ dss_per_quad = 32 * max(g_dss_regs, c_dss_regs) / 4;
+
+ quad_first = xe_dss_mask_group_ffs(all_dss, dss_per_quad, quad);
+
+ return quad_first < (quad + 1) * dss_per_quad;
+}
diff --git a/drivers/gpu/drm/xe/xe_gt_topology.h b/drivers/gpu/drm/xe/xe_gt_topology.h
index b2540dc266f2..f47ab1b1269c 100644
--- a/drivers/gpu/drm/xe/xe_gt_topology.h
+++ b/drivers/gpu/drm/xe/xe_gt_topology.h
@@ -17,4 +17,7 @@ void xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p);
unsigned int
xe_dss_mask_group_ffs(xe_dss_mask_t mask, int groupsize, int groupnum);
+bool
+xe_gt_topology_has_dss_in_quadrant(struct xe_gt *gt, int quad);
+
#endif /* _XE_GT_TOPOLOGY_H_ */
diff --git a/drivers/gpu/drm/xe/xe_hw_engine.c b/drivers/gpu/drm/xe/xe_hw_engine.c
index abfd35491b47..63a4efd5edcc 100644
--- a/drivers/gpu/drm/xe/xe_hw_engine.c
+++ b/drivers/gpu/drm/xe/xe_hw_engine.c
@@ -436,13 +436,40 @@ static void read_copy_fuses(struct xe_gt *gt)
}
}
+static void read_compute_fuses(struct xe_gt *gt)
+{
+ struct xe_device *xe = gt_to_xe(gt);
+
+ /*
+ * CCS fusing based on DSS masks only applies to platforms that can
+ * have more than one CCS.
+ */
+ if (hweight64(gt->info.engine_mask &
+ GENMASK_ULL(XE_HW_ENGINE_CCS3, XE_HW_ENGINE_CCS0)) <= 1)
+ return;
+
+ /*
+ * CCS availability on Xe_HP is inferred from the presence of DSS in
+ * each quadrant.
+ */
+ for (int i = XE_HW_ENGINE_CCS0, j = 0; i <= XE_HW_ENGINE_CCS3; ++i, ++j) {
+ if (!(gt->info.engine_mask & BIT(i)))
+ continue;
+
+ if (!xe_gt_topology_has_dss_in_quadrant(gt, j)) {
+ gt->info.engine_mask &= ~BIT(i);
+ drm_info(&xe->drm, "ccs%u fused off\n", j);
+ }
+ }
+}
+
int xe_hw_engines_init_early(struct xe_gt *gt)
{
int i;
read_media_fuses(gt);
read_copy_fuses(gt);
- /* TODO: compute engines */
+ read_compute_fuses(gt);
for (i = 0; i < ARRAY_SIZE(gt->hw_engines); i++)
hw_engine_init_early(gt, >->hw_engines[i], i);
--
2.39.2
More information about the Intel-xe
mailing list