[Intel-xe] [PATCH v3 1/3] drm/xe: Enable Fixed CCS mode setting

Lucas De Marchi lucas.demarchi at intel.com
Thu Dec 7 21:45:19 UTC 2023


I see this is merged, just giving some after-the-fact comments.

On Sun, Dec 03, 2023 at 09:37:07PM -0800, Niranjana Vishwanathapura wrote:
>Disable dynamic HW load balancing of compute resource assignment
>to engines and instead enabled fixed mode of mapping compute
>resources to engines on all platforms with more than one compute
>engine.
>
>By default enable only one CCS engine with all compute slices
>assigned to it. This is the desired configuration for common
>workloads.
>
>PVC platform supports only the fixed CCS mode (workaround 16016805146).
>
>v2: Rebase, make it platform agnostic
>v3: Minor code refactoring
>
>Signed-off-by: Niranjana Vishwanathapura <niranjana.vishwanathapura at intel.com>
>---
> drivers/gpu/drm/xe/Makefile          |  1 +
> drivers/gpu/drm/xe/regs/xe_gt_regs.h | 14 +++++
> drivers/gpu/drm/xe/xe_gt.c           | 10 ++++
> drivers/gpu/drm/xe/xe_gt.h           |  2 +
> drivers/gpu/drm/xe/xe_gt_ccs_mode.c  | 78 ++++++++++++++++++++++++++++
> drivers/gpu/drm/xe/xe_gt_ccs_mode.h  | 23 ++++++++
> drivers/gpu/drm/xe/xe_gt_types.h     |  8 +++
> drivers/gpu/drm/xe/xe_guc_ads.c      |  3 ++
> drivers/gpu/drm/xe/xe_hw_engine.c    | 20 +++++++
> 9 files changed, 159 insertions(+)
> create mode 100644 drivers/gpu/drm/xe/xe_gt_ccs_mode.c
> create mode 100644 drivers/gpu/drm/xe/xe_gt_ccs_mode.h
>
>diff --git a/drivers/gpu/drm/xe/Makefile b/drivers/gpu/drm/xe/Makefile
>index 87f3fca0c0ee..86691f3b9077 100644
>--- a/drivers/gpu/drm/xe/Makefile
>+++ b/drivers/gpu/drm/xe/Makefile
>@@ -70,6 +70,7 @@ xe-y += xe_bb.o \
> 	xe_gsc.o \
> 	xe_gsc_submit.o \
> 	xe_gt.o \
>+	xe_gt_ccs_mode.o \
> 	xe_gt_clock.o \
> 	xe_gt_debugfs.o \
> 	xe_gt_idle.o \
>diff --git a/drivers/gpu/drm/xe/regs/xe_gt_regs.h b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
>index d318ec0efd7d..f4f2cf8d9022 100644
>--- a/drivers/gpu/drm/xe/regs/xe_gt_regs.h
>+++ b/drivers/gpu/drm/xe/regs/xe_gt_regs.h
>@@ -401,8 +401,22 @@
> #define   COMP_CKN_IN				REG_GENMASK(30, 29)
>
> #define RCU_MODE				XE_REG(0x14800, XE_REG_OPTION_MASKED)
>+#define   RCU_MODE_FIXED_SLICE_CCS_MODE		REG_BIT(1)
> #define   RCU_MODE_CCS_ENABLE			REG_BIT(0)
>
>+/*
>+ * Total of 4 cslices, where each cslice is in the form:
>+ *   [0-3]     CCS ID
>+ *   [4-6]     RSVD
>+ *   [7]       Disabled
>+ */

we don't document fields like this. This should come verbatim from the
bspec. Eventually we want to autogen this header.

>+#define CCS_MODE				XE_REG(0x14804)
>+#define   CCS_MODE_CSLICE_0_3_MASK		REG_GENMASK(11, 0) /* 3 bits per cslice */
>+#define   CCS_MODE_CSLICE_MASK			0x7 /* CCS0-3 + rsvd */

these names are odd... aren't then inverted? I.e., should be

+#define   CCS_MODE_CSLICE_MASK			REG_GENMASK(11, 0)
+#define   CCS_MODE_CSLICE_0_3_MASK		0x7

>+#define   CCS_MODE_CSLICE_WIDTH			ilog2(CCS_MODE_CSLICE_MASK + 1)
>+#define   CCS_MODE_CSLICE(cslice, ccs) \
>+	((ccs) << ((cslice) * CCS_MODE_CSLICE_WIDTH))

I don't think we need these calculations here. Prefer using the REG_FIELD
macros instead

>+
> #define FORCEWAKE_ACK_GT			XE_REG(0x130044)
> #define   FORCEWAKE_KERNEL			BIT(0)
> #define   FORCEWAKE_USER			BIT(1)
>diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
>index 8a6fb9641cd6..dc05c7780848 100644
>--- a/drivers/gpu/drm/xe/xe_gt.c
>+++ b/drivers/gpu/drm/xe/xe_gt.c
>@@ -22,6 +22,7 @@
> #include "xe_force_wake.h"
> #include "xe_ggtt.h"
> #include "xe_gsc.h"
>+#include "xe_gt_ccs_mode.h"
> #include "xe_gt_clock.h"
> #include "xe_gt_idle.h"
> #include "xe_gt_mcr.h"
>@@ -452,6 +453,12 @@ static int all_fw_domain_init(struct xe_gt *gt)
> 	if (err)
> 		goto err_force_wake;
>
>+	/* Configure default CCS mode of 1 engine with all resources */
>+	if (xe_gt_ccs_mode_enabled(gt)) {
>+		gt->ccs_mode = 1;

mode? or count? 

>+		xe_gt_apply_ccs_mode(gt);
>+	}
>+
> 	err = xe_force_wake_put(gt_to_fw(gt), XE_FORCEWAKE_ALL);
> 	XE_WARN_ON(err);
> 	xe_device_mem_access_put(gt_to_xe(gt));
>@@ -558,6 +565,9 @@ static int do_gt_restart(struct xe_gt *gt)
> 		xe_reg_sr_apply_whitelist(hwe);
> 	}
>
>+	/* Get CCS mode in sync between sw/hw */
>+	xe_gt_apply_ccs_mode(gt);
>+
> 	return 0;
> }
>
>diff --git a/drivers/gpu/drm/xe/xe_gt.h b/drivers/gpu/drm/xe/xe_gt.h
>index caded203a8a0..a818cc9c8fd0 100644
>--- a/drivers/gpu/drm/xe/xe_gt.h
>+++ b/drivers/gpu/drm/xe/xe_gt.h
>@@ -17,6 +17,8 @@
> 		for_each_if(((hwe__) = (gt__)->hw_engines + (id__)) && \
> 			  xe_hw_engine_is_valid((hwe__)))
>
>+#define CCS_MASK(gt) (((gt)->info.engine_mask & XE_HW_ENGINE_CCS_MASK) >> XE_HW_ENGINE_CCS0)

this define seems out of place in this header

>+
> #ifdef CONFIG_FAULT_INJECTION
> extern struct fault_attr gt_reset_failure;
> static inline bool xe_fault_inject_gt_reset(void)
>diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.c b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
>new file mode 100644
>index 000000000000..541c44c70a84
>--- /dev/null
>+++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.c
>@@ -0,0 +1,78 @@
>+// SPDX-License-Identifier: MIT
>+/*
>+ * Copyright © 2023 Intel Corporation
>+ */
>+
>+#include "regs/xe_gt_regs.h"
>+#include "xe_assert.h"
>+#include "xe_gt.h"
>+#include "xe_gt_ccs_mode.h"
>+#include "xe_mmio.h"
>+
>+static void __xe_gt_apply_ccs_mode(struct xe_gt *gt, u32 num_engines)
>+{
>+	u32 mode = CCS_MODE_CSLICE_0_3_MASK; /* disable all by default */
>+	int num_slices = hweight32(CCS_MASK(gt));
>+	struct xe_device *xe = gt_to_xe(gt);
>+	int width, cslice = 0;
>+	u32 config = 0;
>+
>+	xe_assert(xe, xe_gt_ccs_mode_enabled(gt));
>+
>+	xe_assert(xe, num_engines && num_engines <= num_slices);
>+	xe_assert(xe, !(num_slices % num_engines));
>+
>+	/*
>+	 * Loop over all available slices and assign each a user engine.
>+	 * For example, if there are four compute slices available, the
>+	 * assignment of compute slices to compute engines would be,
>+	 *
>+	 * With 1 engine (ccs0):
>+	 *   slice 0, 1, 2, 3: ccs0
>+	 *
>+	 * With 2 engines (ccs0, ccs1):
>+	 *   slice 0, 2: ccs0
>+	 *   slice 1, 3: ccs1
>+	 *
>+	 * With 4 engines (ccs0, ccs1, ccs2, ccs3):
>+	 *   slice 0: ccs0
>+	 *   slice 1: ccs1
>+	 *   slice 2: ccs2
>+	 *   slice 3: ccs3
>+	 */
>+	for (width = num_slices / num_engines; width; width--) {
>+		struct xe_hw_engine *hwe;
>+		enum xe_hw_engine_id id;
>+
>+		for_each_hw_engine(hwe, gt, id) {
>+			if (hwe->class != XE_ENGINE_CLASS_COMPUTE)
>+				continue;
>+
>+			if (hwe->logical_instance >= num_engines)
>+				break;
>+
>+			config |= BIT(hwe->instance) << XE_HW_ENGINE_CCS0;
>+
>+			/* If a slice is fused off, leave disabled */
>+			while ((CCS_MASK(gt) & BIT(cslice)) == 0)
>+				cslice++;
>+
>+			mode &= ~CCS_MODE_CSLICE(cslice, CCS_MODE_CSLICE_MASK);
>+			mode |= CCS_MODE_CSLICE(cslice, hwe->instance);

REG_FIELD_SET() would be more appropriate.

>+			cslice++;
>+		}
>+	}
>+
>+	xe_mmio_write32(gt, CCS_MODE, mode);
>+
>+	xe_gt_info(gt, "CCS_MODE=%x config:%08x, num_engines:%d, num_slices:%d\n",
>+		   mode, config, num_engines, num_slices);
>+}
>+
>+void xe_gt_apply_ccs_mode(struct xe_gt *gt)

in xe we try to keep things into namespaces defined by the header, so
this should had been xe_gt_ccs_mode_apply()

>+{
>+	if (!gt->ccs_mode)
>+		return;
>+
>+	__xe_gt_apply_ccs_mode(gt, gt->ccs_mode);

what's the point of the split between xe_gt_apply_ccs_mode() and
__xe_gt_apply_ccs_mode()? At most it would have been called
apply_ccs_mode() rather than __xe_gt_apply_ccs_mode(). But it could as
well just had been 1 function as there are no other callers and no
clear role split between one and the other. Bothw of them operate on "gt"


>+}
>diff --git a/drivers/gpu/drm/xe/xe_gt_ccs_mode.h b/drivers/gpu/drm/xe/xe_gt_ccs_mode.h
>new file mode 100644
>index 000000000000..e8766879f6ec
>--- /dev/null
>+++ b/drivers/gpu/drm/xe/xe_gt_ccs_mode.h
>@@ -0,0 +1,23 @@
>+/* SPDX-License-Identifier: MIT */
>+/*
>+ * Copyright © 2023 Intel Corporation
>+ */
>+
>+#ifndef _XE_GT_CCS_MODE_H_
>+#define _XE_GT_CCS_MODE_H_
>+
>+#include "xe_device_types.h"
>+#include "xe_gt.h"
>+#include "xe_gt_types.h"
>+#include "xe_platform_types.h"

why do you need all these headers?

Lucas De Marchi


More information about the Intel-xe mailing list