[Intel-xe] [PATCH 1/2] drm/xe: Avoid 64-bit register reads

Matt Roper matthew.d.roper at intel.com
Mon Aug 21 23:43:23 UTC 2023


Intel hardware officially only supports GTTMMADR register accesses of
32-bits or less (although 64-bit accesses to device memory and PTEs in
the GSM are fine).  Even though we do usually seem to get back
reasonable values when performing readq() operations on registers in
BAR0, we shouldn't rely on this violation of the spec working
consistently.  It's likely that even when we do get proper register
values back the hardware is internally satisfying the request via a
non-atomic sequence of two 32-bit reads, which can be problematic for
timestamps and counters if rollover of the lower bits is not considered.

Replace xe_mmio_read64() with xe_mmio_read64_2x32() that implements
64-bit register reads as two 32-bit reads and attempts to ensure that
the upper dword has stabilized to avoid problematic rollovers for
counter and timestamp registers.

Bspec: 60027
Signed-off-by: Matt Roper <matthew.d.roper at intel.com>
---
 drivers/gpu/drm/xe/xe_mmio.c           |  6 ++---
 drivers/gpu/drm/xe/xe_mmio.h           | 37 +++++++++++++++++++++++---
 drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c |  6 ++---
 3 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
index bb6823db14d4..932aa4ca59c8 100644
--- a/drivers/gpu/drm/xe/xe_mmio.c
+++ b/drivers/gpu/drm/xe/xe_mmio.c
@@ -228,7 +228,7 @@ int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size, u64 *tile_size,
 		reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR);
 		offset = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K;
 	} else {
-		offset = xe_mmio_read64(gt, GSMBASE);
+		offset = xe_mmio_read64_2x32(gt, GSMBASE);
 	}
 
 	/* remove the tile offset so we have just the available size */
@@ -326,7 +326,7 @@ static void xe_mmio_probe_tiles(struct xe_device *xe)
 	if (xe->info.tile_count == 1)
 		return;
 
-	mtcfg = xe_mmio_read64(gt, XEHP_MTCFG_ADDR);
+	mtcfg = xe_mmio_read64_2x32(gt, XEHP_MTCFG_ADDR);
 	adj_tile_count = xe->info.tile_count =
 		REG_FIELD_GET(TILE_COUNT, mtcfg) + 1;
 
@@ -509,7 +509,7 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data,
 			args->value = xe_mmio_read32(gt, reg);
 			break;
 		case DRM_XE_MMIO_64BIT:
-			args->value = xe_mmio_read64(gt, reg);
+			args->value = xe_mmio_read64_2x32(gt, reg);
 			break;
 		default:
 			drm_dbg(&xe->drm, "Invalid MMIO bit size");
diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
index d24badca8677..892e96e50463 100644
--- a/drivers/gpu/drm/xe/xe_mmio.h
+++ b/drivers/gpu/drm/xe/xe_mmio.h
@@ -11,6 +11,7 @@
 
 #include "regs/xe_reg_defs.h"
 #include "xe_device_types.h"
+#include "xe_gt_printk.h"
 #include "xe_gt_types.h"
 
 struct drm_device;
@@ -85,14 +86,42 @@ static inline void xe_mmio_write64(struct xe_gt *gt,
 	writeq(val, tile->mmio.regs + reg.addr);
 }
 
-static inline u64 xe_mmio_read64(struct xe_gt *gt, struct xe_reg reg)
+/*
+ * Intel hardware officially only supports GTTMMADR register reads of 32 bits
+ * or smaller.  Although we often seem to get reasonable values back from a
+ * readq of a register address, we shouldn't rely on this violation of the
+ * spec.  Furthermore, when reading registers that may be changing (such as
+ * counters), the hardware may not perform the read atomically, leading to
+ * problems if the read happens while the lower dword is rolling over.
+ *
+ * Implement 64-bit register reads as a pair of 32-bit reads and ensure the
+ * upper dword has stabilized before returning a value.
+ */
+static inline u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg)
 {
-	struct xe_tile *tile = gt_to_tile(gt);
+	struct xe_reg reg_udw = { .addr = reg.addr + 0x4 };
+	u32 ldw, udw, oldudw, retries;
 
-	if (reg.addr < gt->mmio.adj_limit)
+	if (reg.addr < gt->mmio.adj_limit) {
 		reg.addr += gt->mmio.adj_offset;
+		reg_udw.addr += gt->mmio.adj_offset;
+	}
 
-	return readq(tile->mmio.regs + reg.addr);
+	oldudw = xe_mmio_read32(gt, reg_udw);
+	for (retries = 5; retries; --retries) {
+		ldw = xe_mmio_read32(gt, reg);
+		udw = xe_mmio_read32(gt, reg_udw);
+
+		if (udw == oldudw)
+			break;
+
+		oldudw = udw;
+	}
+
+	xe_gt_WARN(gt, retries == 0,
+		   "64-bit read of %#x did not stabilize\n", reg.addr);
+
+	return (u64)udw << 32 | ldw;
 }
 
 static inline int xe_mmio_write32_and_verify(struct xe_gt *gt,
diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
index be0a25e23929..6ba6b1b7f34b 100644
--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
@@ -67,7 +67,7 @@ static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
 	}
 
 	/* Use DSM base address instead for stolen memory */
-	mgr->stolen_base = (xe_mmio_read64(mmio, DSMBASE) & BDSM_MASK) - tile_offset;
+	mgr->stolen_base = (xe_mmio_read64_2x32(mmio, DSMBASE) & BDSM_MASK) - tile_offset;
 	if (drm_WARN_ON(&xe->drm, tile_size < mgr->stolen_base))
 		return 0;
 
@@ -126,8 +126,8 @@ static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr
 
 	/* Carve out the top of DSM as it contains the reserved WOPCM region */
 	wopcm_size = REG_FIELD_GET64(WOPCM_SIZE_MASK,
-				     xe_mmio_read64(xe_root_mmio_gt(xe),
-						    STOLEN_RESERVED));
+				     xe_mmio_read64_2x32(xe_root_mmio_gt(xe),
+							 STOLEN_RESERVED));
 	stolen_size -= (1U << wopcm_size) * SZ_1M;
 
 	if (drm_WARN_ON(&xe->drm, stolen_size + SZ_8M > pci_resource_len(pdev, 2)))
-- 
2.41.0



More information about the Intel-xe mailing list