[Intel-xe] [PATCH v3 1/3] drm/xe: Enable Fixed CCS mode setting
Lucas De Marchi
lucas.demarchi at intel.com
Thu Dec 7 21:45:19 UTC 2023
I see this is merged, just giving some after-the-fact comments.
On Sun, Dec 03, 2023 at 09:37:07PM -0800, Niranjana Vishwanathapura wrote:
>Disable dynamic HW load balancing of compute resource assignment
>to engines and instead enabled fixed mode of mapping compute
>resources to engines on all platforms with more than one compute
>engine.
>
>By default enable only one CCS engine with all compute slices
>assigned to it. This is the desired configuration for common
>workloads.
>
>PVC platform supports only the fixed CCS mode (workaround 16016805146).
>
>v2: Rebase, make it platform agnostic
>v3: Minor code refactoring
>
>Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura at intel.com>
>---
> drivers/gpu/drm/xe/Makefile | 1 +
> drivers/gpu/drm/xe/regs/xe_gt_regs.h | 14 +++++
> drivers/gpu/drm/xe/xe_gt.c | 10 ++++
> drivers/gpu/drm/xe/xe_gt.h | 2 +
> drivers/gpu/drm/xe/xe_gt_ccs_mode.c | 78 ++++++++++++++++++++++++++++
> drivers/gpu/drm/xe/xe_gt_ccs_mode.h | 23 ++++++++
> drivers/gpu/drm/xe/xe_gt_types.h | 8 +++
> drivers/gpu/drm/xe/xe_guc_ads.c | 3 ++
> drivers/gpu/drm/xe/xe_hw_engine.c | 20 +++++++
> 9 files changed, 159 insertions(+)
> create mode 100644 drivers/gpu/drm/xe/xe_gt_ccs_mode.c
> create mode 100644 drivers/gpu/drm/xe/xe_gt_ccs_mode.h
>
>diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
>index 87f3fca0c0ee..86691f3b9077 100644
>--- a/drivers/gpu/drm/xe/Makefile
>+++ b/drivers/gpu/drm/xe/Makefile
>@@ -70,6 +70,7 @@ xe-y += xe_bb.o \
> xe_gsc.o \
> xe_gsc_submit.o \
> xe_gt.o \
>+ xe_gt_ccs_mode.o \
> xe_gt_clock.o \
> xe_gt_debugfs.o \
> xe_gt_idle.o \
>diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
>index d318ec0efd7d..f4f2cf8d9022 100644
>--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
>+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
>@@ -401,8 +401,22 @@
> #define COMP_CKN_IN REG_GENMASK(30, 29)
>
> #define RCU_MODE XE_REG(0x14800, XE_REG_OPTION_MASKED)
>+#define RCU_MODE_FIXED_SLICE_CCS_MODE REG_BIT(1)
> #define RCU_MODE_CCS_ENABLE REG_BIT(0)
>
>+/*
>+ * Total of 4 cslices, where each cslice is in the form:
>+ * [0-3] CCS ID
>+ * [4-6] RSVD
>+ * [7] Disabled
>+ */
we don't document fields like this. This should come verbatim from the
bspec. Eventually we want to autogen this header.
>+#define CCS_MODE XE_REG(0x14804)
>+#define CCS_MODE_CSLICE_0_3_MASK REG_GENMASK(11, 0) /* 3 bits per cslice */
>+#define CCS_MODE_CSLICE_MASK 0x7 /* CCS0-3 + rsvd */
these names are odd... aren't then inverted? I.e., should be
+#define CCS_MODE_CSLICE_MASK REG_GENMASK(11, 0)
+#define CCS_MODE_CSLICE_0_3_MASK 0x7
>+#define CCS_MODE_CSLICE_WIDTH ilog2(CCS_MODE_CSLICE_MASK + 1)
>+#define CCS_MODE_CSLICE(cslice, ccs) \
>+ ((ccs) << ((cslice) * CCS_MODE_CSLICE_WIDTH))
I don't think we need these calculations here. Prefer using the REG_FIELD
macros instead
>+
> #define FORCEWAKE_ACK_GT XE_REG(0x130044)
> #define FORCEWAKE_KERNEL BIT(0)
> #define FORCEWAKE_USER BIT(1)
>diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
>index 8a6fb9641cd6..dc05c7780848 100644
>--- a/drivers/gpu/drm/xe/xe_gt.c
>+++ b/drivers/gpu/drm/xe/xe_gt.c
>@@ -22,6 +22,7 @@
> #include "xe_force_wake.h"
> #include "xe_ggtt.h"
> #include "xe_gsc.h"
>+#include "xe_gt_ccs_mode.h"
> #include "xe_gt_clock.h"
> #include "xe_gt_idle.h"
> #include "xe_gt_mcr.h"
>@@ -452,6 +453,12 @@ static int all_fw_domain_init(struct xe_gt *gt)
> if (err)
> goto err_force_wake;
>
>+ /* Configure default CCS mode of 1 engine with all resources */
>+ if (xe_gt_ccs_mode_enabled(gt)) {
>+ gt->ccs_mode = 1;
mode? or count?
>+ xe_gt_apply_ccs_mode(gt);
>+ }
>+
> err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
> XE_WARN_ON(err);
> xe_device_mem_access_put(gt_to_xe(gt));
>@@ -558,6 +565,9 @@ static int do_gt_restart(struct xe_gt *gt)
> xe_reg_sr_apply_whitelist(hwe);
> }
>
>+ /* Get CCS mode in sync between sw/hw */
>+ xe_gt_apply_ccs_mode(gt);
>+
> return 0;
> }
>
>diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
>index caded203a8a0..a818cc9c8fd0 100644
>--- a/drivers/gpu/drm/xe/xe_gt.h
>+++ b/drivers/gpu/drm/xe/xe_gt.h
>@@ -17,6 +17,8 @@
> for_each_if(((hwe__) = (gt__)->hw_engines + (id__)) && \
> xe_hw_engine_is_valid((hwe__)))
>
>+#define CCS_MASK(gt) (((gt)->info.engine_mask & XE_HW_ENGINE_CCS_MASK) >> XE_HW_ENGINE_CCS0)
this define seems out of place in this header
>+
> #ifdef CONFIG_FAULT_INJECTION
> extern struct fault_attr gt_reset_failure;
> static inline bool xe_fault_inject_gt_reset(void)
>diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
>new file mode 100644
>index 000000000000..541c44c70a84
>--- /dev/null
>+++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
>@@ -0,0 +1,78 @@
>+// SPDX-License-Identifier: MIT
>+/*
>+ * Copyright © 2023 Intel Corporation
>+ */
>+
>+#include "regs/xe_gt_regs.h"
>+#include "xe_assert.h"
>+#include "xe_gt.h"
>+#include "xe_gt_ccs_mode.h"
>+#include "xe_mmio.h"
>+
>+static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines)
>+{
>+ u32 mode = CCS_MODE_CSLICE_0_3_MASK; /* disable all by default */
>+ int num_slices = hweight32(CCS_MASK(gt));
>+ struct xe_device *xe = gt_to_xe(gt);
>+ int width, cslice = 0;
>+ u32 config = 0;
>+
>+ xe_assert(xe, xe_gt_ccs_mode_enabled(gt));
>+
>+ xe_assert(xe, num_engines && num_engines <= num_slices);
>+ xe_assert(xe, !(num_slices % num_engines));
>+
>+ /*
>+ * Loop over all available slices and assign each a user engine.
>+ * For example, if there are four compute slices available, the
>+ * assignment of compute slices to compute engines would be,
>+ *
>+ * With 1 engine (ccs0):
>+ * slice 0, 1, 2, 3: ccs0
>+ *
>+ * With 2 engines (ccs0, ccs1):
>+ * slice 0, 2: ccs0
>+ * slice 1, 3: ccs1
>+ *
>+ * With 4 engines (ccs0, ccs1, ccs2, ccs3):
>+ * slice 0: ccs0
>+ * slice 1: ccs1
>+ * slice 2: ccs2
>+ * slice 3: ccs3
>+ */
>+ for (width = num_slices / num_engines; width; width--) {
>+ struct xe_hw_engine *hwe;
>+ enum xe_hw_engine_id id;
>+
>+ for_each_hw_engine(hwe, gt, id) {
>+ if (hwe->class != XE_ENGINE_CLASS_COMPUTE)
>+ continue;
>+
>+ if (hwe->logical_instance >= num_engines)
>+ break;
>+
>+ config |= BIT(hwe->instance) << XE_HW_ENGINE_CCS0;
>+
>+ /* If a slice is fused off, leave disabled */
>+ while ((CCS_MASK(gt) & BIT(cslice)) == 0)
>+ cslice++;
>+
>+ mode &= ~CCS_MODE_CSLICE(cslice, CCS_MODE_CSLICE_MASK);
>+ mode |= CCS_MODE_CSLICE(cslice, hwe->instance);
REG_FIELD_SET() would be more appropriate.
>+ cslice++;
>+ }
>+ }
>+
>+ xe_mmio_write32(gt, CCS_MODE, mode);
>+
>+ xe_gt_info(gt, "CCS_MODE=%x config:%08x, num_engines:%d, num_slices:%d\n",
>+ mode, config, num_engines, num_slices);
>+}
>+
>+void xe_gt_apply_ccs_mode(struct xe_gt *gt)
in xe we try to keep things into namespaces defined by the header, so
this should had been xe_gt_ccs_mode_apply()
>+{
>+ if (!gt->ccs_mode)
>+ return;
>+
>+ __xe_gt_apply_ccs_mode(gt, gt->ccs_mode);
what's the point of the split between xe_gt_apply_ccs_mode() and
__xe_gt_apply_ccs_mode()? At most it would have been called
apply_ccs_mode() rather than __xe_gt_apply_ccs_mode(). But it could as
well just had been 1 function as there are no other callers and no
clear role split between one and the other. Bothw of them operate on "gt"
>+}
>diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.h b/drivers/gpu/drm/xe/xe_gt_ccs_mode.h
>new file mode 100644
>index 000000000000..e8766879f6ec
>--- /dev/null
>+++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.h
>@@ -0,0 +1,23 @@
>+/* SPDX-License-Identifier: MIT */
>+/*
>+ * Copyright © 2023 Intel Corporation
>+ */
>+
>+#ifndef _XE_GT_CCS_MODE_H_
>+#define _XE_GT_CCS_MODE_H_
>+
>+#include "xe_device_types.h"
>+#include "xe_gt.h"
>+#include "xe_gt_types.h"
>+#include "xe_platform_types.h"
why do you need all these headers?
Lucas De Marchi
More information about the Intel-xe
mailing list