[Intel-xe] [PATCH 1/2] drm/xe: Indroduce low level driver error counting APIs

Tejas Upadhyay tejas.upadhyay at intel.com
Thu Sep 21 06:03:13 UTC 2023


Low level driver error that might have power or performance
impact on the system, we are adding a new error counter to GT
and tile and increment on each occurrance. Lets introcuce APIs
to define and increment each error type counter.

Signed-off-by: Tejas Upadhyay <tejas.upadhyay at intel.com>
---
 drivers/gpu/drm/xe/xe_device_types.h | 13 +++++++++++++
 drivers/gpu/drm/xe/xe_gt.c           | 24 ++++++++++++++++++++++++
 drivers/gpu/drm/xe/xe_gt_types.h     | 12 ++++++++++++
 drivers/gpu/drm/xe/xe_tile.c         | 25 +++++++++++++++++++++++++
 4 files changed, 74 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_device_types.h b/drivers/gpu/drm/xe/xe_device_types.h
index a82f28c6a3a0..14f477412581 100644
--- a/drivers/gpu/drm/xe/xe_device_types.h
+++ b/drivers/gpu/drm/xe/xe_device_types.h
@@ -57,6 +57,13 @@ struct xe_ggtt;
 		 const struct xe_tile * : (const struct xe_device *)((tile__)->xe),	\
 		 struct xe_tile * : (tile__)->xe)
 
+enum xe_tile_err_type {
+	XE_TILE_DRV_ERR_GGTT = 0,
+	XE_TILE_DRV_ERR_GUC_COMM,
+	XE_TILE_DRV_ERR_INTR,
+	XE_TILE_DRV_ERR_MAX
+};
+
 /**
  * struct xe_mem_region - memory region structure
  * This is used to describe a memory region in xe
@@ -173,8 +180,14 @@ struct xe_tile {
 
 	/** @sysfs: sysfs' kobj used by xe_tile_sysfs */
 	struct kobject *sysfs;
+
+	/** @drv_err_cnt: driver error counter for this tile */
+	u32 drv_err_cnt[XE_TILE_DRV_ERR_MAX];
 };
 
+void xe_tile_cnt_drv_err(struct xe_tile *tile,
+			 const enum xe_tile_err_type err);
+
 /**
  * struct xe_device - Top level struct of XE device
  */
diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
index 1aa44d4f9ac1..61e4d0222836 100644
--- a/drivers/gpu/drm/xe/xe_gt.c
+++ b/drivers/gpu/drm/xe/xe_gt.c
@@ -47,6 +47,30 @@
 #include "xe_wa.h"
 #include "xe_wopcm.h"
 
+static const char *const xe_gt_drv_err_to_str[] = {
+	[XE_GT_DRV_ERR_ENGINE] = "ENGINE OTHER",
+	[XE_GT_DRV_ERR_OTHERS] = "GT OTHER"
+};
+
+/**
+ * xe_gt_cnt_drv_err - Count driver err for gt
+ * @gt: GT to count error for
+ * @err: enum error type
+ *
+ * Increment the driver error counter in respective error
+ * category for this GT.
+ *
+ * Returns void.
+ */
+void xe_gt_cnt_drv_err(struct xe_gt *gt,
+		       const enum xe_gt_err_type err)
+{
+	if (err >= ARRAY_SIZE(gt->drv_err_cnt))
+		return;
+	WRITE_ONCE(gt->drv_err_cnt[err],
+		   READ_ONCE(gt->drv_err_cnt[err]) + 1);
+}
+
 struct xe_gt *xe_gt_alloc(struct xe_tile *tile)
 {
 	struct xe_gt *gt;
diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h
index d4310be3e1e7..cb71aff16a0b 100644
--- a/drivers/gpu/drm/xe/xe_gt_types.h
+++ b/drivers/gpu/drm/xe/xe_gt_types.h
@@ -24,6 +24,12 @@ enum xe_gt_type {
 	XE_GT_TYPE_MEDIA,
 };
 
+enum xe_gt_err_type {
+	XE_GT_DRV_ERR_ENGINE = 0,
+	XE_GT_DRV_ERR_OTHERS,
+	XE_GT_DRV_ERR_MAX
+};
+
 #define XE_MAX_DSS_FUSE_REGS	3
 #define XE_MAX_EU_FUSE_REGS	1
 
@@ -347,6 +353,12 @@ struct xe_gt {
 		/** @oob: bitmap with active OOB workaroudns */
 		unsigned long *oob;
 	} wa_active;
+
+	/** @drv_err_cnt: driver error counter for this GT */
+	u32 drv_err_cnt[XE_GT_DRV_ERR_MAX];
 };
 
+void xe_gt_cnt_drv_err(struct xe_gt *gt,
+		       const enum xe_gt_err_type err);
+
 #endif
diff --git a/drivers/gpu/drm/xe/xe_tile.c b/drivers/gpu/drm/xe/xe_tile.c
index 131752a57f65..c6dfcb4431f0 100644
--- a/drivers/gpu/drm/xe/xe_tile.c
+++ b/drivers/gpu/drm/xe/xe_tile.c
@@ -71,6 +71,31 @@
  *  - MOCS and PAT programming
  */
 
+static const char *const xe_tile_drv_err_to_str[] = {
+	[XE_TILE_DRV_ERR_GGTT] = "GGTT",
+	[XE_TILE_DRV_ERR_GUC_COMM] = "GUC COMMUNICATION",
+	[XE_TILE_DRV_ERR_INTR] = "INTERRUPT"
+};
+
+/**
+ * xe_tile_cnt_drv_err - Count driver err for tile
+ * @tile: Tile to count error for
+ * @err: enum error type
+ *
+ * Increment the driver error counter in respective error
+ * category for this tile.
+ *
+ * Returns void.
+ */
+void xe_tile_cnt_drv_err(struct xe_tile *tile,
+			 const enum xe_tile_err_type err)
+{
+	if (err >= ARRAY_SIZE(tile->drv_err_cnt))
+		return;
+	WRITE_ONCE(tile->drv_err_cnt[err],
+		   READ_ONCE(tile->drv_err_cnt[err]) + 1);
+}
+
 /**
  * xe_tile_alloc - Perform per-tile memory allocation
  * @tile: Tile to perform allocations for
-- 
2.25.1



More information about the Intel-xe mailing list