[PATCH v4 2/5] drm/xe: Add infrastructure for Device OOB workarounds
Matt Atwood
matthew.s.atwood at intel.com
Wed Jun 25 20:07:47 UTC 2025
Some workarounds need to be able to be applied ahead of any GT
initialization for example 15015404425. This patch creates XE_DEVICE_WA
macro, in the same vein as XE_WA. This macro can be used ahead of GT
initialization, and can be tracked in sysfs. This should alleviate some
of the complexities that exist in i915.
v2: name change SoC to Device, address style issues
Signed-off-by: Matt Atwood <matthew.s.atwood at intel.com>
---
drivers/gpu/drm/xe/xe_device_types.h | 9 ++++++
drivers/gpu/drm/xe/xe_pci.c | 4 +++
drivers/gpu/drm/xe/xe_rtp.c | 5 +++
drivers/gpu/drm/xe/xe_rtp.h | 3 +-
drivers/gpu/drm/xe/xe_rtp_types.h | 2 ++
drivers/gpu/drm/xe/xe_wa.c | 46 ++++++++++++++++++++++++++++
drivers/gpu/drm/xe/xe_wa.h | 18 +++++++++--
7 files changed, 83 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index 6aca4b1a2824..62f09d2923c3 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -360,6 +360,15 @@ struct xe_device {
u8 skip_pcode:1;
} info;
+ /** @xe_active.oob: bitmap with active OOB workarounds */
+ unsigned long *oob;
+ /**
+ * @xe_active.oob_initialized: mark oob as initialized to help
+ * detecting misuse of XE_SOC_WA() - it can only be called on
+ * initialization after SOC OOB WAs have been processed
+ */
+ bool oob_initialized;
+
/** @survivability: survivability information for device */
struct xe_survivability survivability;
diff --git a/drivers/gpu/drm/xe/xe_pci.c b/drivers/gpu/drm/xe/xe_pci.c
index 824461c31288..a9f708608f3e 100644
--- a/drivers/gpu/drm/xe/xe_pci.c
+++ b/drivers/gpu/drm/xe/xe_pci.c
@@ -32,6 +32,7 @@
#include "xe_step.h"
#include "xe_survivability_mode.h"
#include "xe_tile.h"
+#include "xe_wa.h"
enum toggle_d3cold {
D3COLD_DISABLE,
@@ -832,6 +833,9 @@ static int xe_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (err)
return err;
+ xe_device_wa_init(xe);
+ xe_device_wa_process_oob(xe);
+
err = xe_device_probe_early(xe);
/*
* In Boot Survivability mode, no drm card is exposed and driver
diff --git a/drivers/gpu/drm/xe/xe_rtp.c b/drivers/gpu/drm/xe/xe_rtp.c
index 29e694bb1219..2a0b0a1a8969 100644
--- a/drivers/gpu/drm/xe/xe_rtp.c
+++ b/drivers/gpu/drm/xe/xe_rtp.c
@@ -196,6 +196,11 @@ static void rtp_get_context(struct xe_rtp_process_ctx *ctx,
*gt = (*hwe)->gt;
*xe = gt_to_xe(*gt);
break;
+ case XE_RTP_PROCESS_TYPE_DEVICE:
+ *hwe = NULL;
+ *gt = NULL;
+ *xe = ctx->xe;
+ break;
}
}
diff --git a/drivers/gpu/drm/xe/xe_rtp.h b/drivers/gpu/drm/xe/xe_rtp.h
index 4fe736a11c42..ac260feaabef 100644
--- a/drivers/gpu/drm/xe/xe_rtp.h
+++ b/drivers/gpu/drm/xe/xe_rtp.h
@@ -422,7 +422,8 @@ struct xe_reg_sr;
#define XE_RTP_PROCESS_CTX_INITIALIZER(arg__) _Generic((arg__), \
struct xe_hw_engine * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_ENGINE }, \
- struct xe_gt * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_GT })
+ struct xe_gt * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_GT }, \
+ struct xe_device * : (struct xe_rtp_process_ctx){ { (void *)(arg__) }, XE_RTP_PROCESS_TYPE_DEVICE })
void xe_rtp_process_ctx_enable_active_tracking(struct xe_rtp_process_ctx *ctx,
unsigned long *active_entries,
diff --git a/drivers/gpu/drm/xe/xe_rtp_types.h b/drivers/gpu/drm/xe/xe_rtp_types.h
index 1b76b947c706..123579ebf2cf 100644
--- a/drivers/gpu/drm/xe/xe_rtp_types.h
+++ b/drivers/gpu/drm/xe/xe_rtp_types.h
@@ -112,12 +112,14 @@ struct xe_rtp_entry {
enum xe_rtp_process_type {
XE_RTP_PROCESS_TYPE_GT,
XE_RTP_PROCESS_TYPE_ENGINE,
+ XE_RTP_PROCESS_TYPE_DEVICE,
};
struct xe_rtp_process_ctx {
union {
struct xe_gt *gt;
struct xe_hw_engine *hwe;
+ struct xe_device *xe;
};
enum xe_rtp_process_type type;
unsigned long *active_entries;
diff --git a/drivers/gpu/drm/xe/xe_wa.c b/drivers/gpu/drm/xe/xe_wa.c
index f51218a7a580..2cb2fab20070 100644
--- a/drivers/gpu/drm/xe/xe_wa.c
+++ b/drivers/gpu/drm/xe/xe_wa.c
@@ -11,6 +11,7 @@
#include <linux/fault-inject.h>
#include <generated/xe_wa_oob.h>
+#include <generated/xe_device_wa_oob.h>
#include "regs/xe_engine_regs.h"
#include "regs/xe_gt_regs.h"
@@ -876,6 +877,13 @@ static __maybe_unused const struct xe_rtp_entry oob_was[] = {
static_assert(ARRAY_SIZE(oob_was) - 1 == _XE_WA_OOB_COUNT);
+static __maybe_unused const struct xe_rtp_entry device_oob_was[] = {
+#include <generated/xe_device_wa_oob.c>
+ {}
+};
+
+static_assert(ARRAY_SIZE(device_oob_was) - 1 == _XE_DEVICE_WA_OOB_COUNT);
+
__diag_pop();
/**
@@ -895,6 +903,39 @@ void xe_wa_process_oob(struct xe_gt *gt)
xe_rtp_process(&ctx, oob_was);
}
+int xe_device_wa_init(struct xe_device *xe)
+{
+ unsigned long *p;
+
+ p = drmm_kzalloc(&xe->drm,
+ sizeof(xe->oob) * BITS_TO_LONGS(ARRAY_SIZE(device_oob_was)),
+ GFP_KERNEL);
+
+ if (!p)
+ return -ENOMEM;
+
+ xe->oob = p;
+
+ return 0;
+}
+
+/**
+ * xe_device_wa_process_oob - process OOB workaround table
+ * @xe: xe device instance process workarounds for
+ *
+ * Process OOB workaround table for this platform, marking in @xe the
+ * workarounds that are active.
+ */
+void xe_device_wa_process_oob(struct xe_device *xe)
+{
+ struct xe_rtp_process_ctx ctx = XE_RTP_PROCESS_CTX_INITIALIZER(xe);
+
+ xe_rtp_process_ctx_enable_active_tracking(&ctx, xe->oob, ARRAY_SIZE(device_oob_was));
+
+ xe->oob_initialized = true;
+ xe_rtp_process(&ctx, device_oob_was);
+}
+
/**
* xe_wa_process_gt - process GT workaround table
* @gt: GT instance to process workarounds for
@@ -1000,6 +1041,11 @@ void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p)
for_each_set_bit(idx, gt->wa_active.oob, ARRAY_SIZE(oob_was))
if (oob_was[idx].name)
drm_printf_indent(p, 1, "%s\n", oob_was[idx].name);
+
+ drm_printf(p, "\nDevice OOB Workarounds\n");
+ for_each_set_bit(idx, gt_to_xe(gt)->oob, ARRAY_SIZE(device_oob_was))
+ if (device_oob_was[idx].name)
+ drm_printf_indent(p, 1, "%s\n", device_oob_was[idx].name);
}
/*
diff --git a/drivers/gpu/drm/xe/xe_wa.h b/drivers/gpu/drm/xe/xe_wa.h
index 52337405b5bc..09d065e0abb5 100644
--- a/drivers/gpu/drm/xe/xe_wa.h
+++ b/drivers/gpu/drm/xe/xe_wa.h
@@ -14,7 +14,9 @@ struct xe_hw_engine;
struct xe_tile;
int xe_wa_init(struct xe_gt *gt);
+int xe_device_wa_init(struct xe_device *xe);
void xe_wa_process_oob(struct xe_gt *gt);
+void xe_device_wa_process_oob(struct xe_device *xe);
void xe_wa_process_gt(struct xe_gt *gt);
void xe_wa_process_engine(struct xe_hw_engine *hwe);
void xe_wa_process_lrc(struct xe_hw_engine *hwe);
@@ -22,14 +24,24 @@ void xe_wa_apply_tile_workarounds(struct xe_tile *tile);
void xe_wa_dump(struct xe_gt *gt, struct drm_printer *p);
/**
- * XE_WA - Out-of-band workarounds, that don't fit the lifecycle any
- * other more specific type
+ * XE_WA - Out-of-band GT workarounds, that don't fit the lifecycle any
+ * other more specific type,
* @gt__: gt instance
- * @id__: XE_OOB_<id__>, as generated by build system in generated/xe_wa_oob.h
+ * @id__: XE_WA_OOB_<id__>, as generated by build system in generated/xe_wa_oob.h
*/
#define XE_WA(gt__, id__) ({ \
xe_gt_assert(gt__, (gt__)->wa_active.oob_initialized); \
test_bit(XE_WA_OOB_ ## id__, (gt__)->wa_active.oob); \
})
+/**
+ * XE_DEVICE_WA - Out-of-band Device workarounds, that don't fit the lifecycle any
+ * other more specific type
+ * @xe__: xe_device
+ * @id__: XE_SOC_WA_OOB_<id__>, as generated by build system in generated/xe_wa_oob.h
+ */
+#define XE_DEVICE_WA(xe__, id__) ({ \
+ xe_assert(xe__, (xe__)->oob_initialized); \
+ test_bit(XE_DEVICE_WA_OOB_ ## id__, (xe__)->oob); \
+})
#endif
--
2.49.0
More information about the Intel-xe
mailing list