[Intel-xe] [PATCH 1/2] drm/xe: Avoid 64-bit register reads
Lucas De Marchi
lucas.demarchi at intel.com
Tue Aug 22 14:02:17 UTC 2023
On Mon, Aug 21, 2023 at 04:43:23PM -0700, Matt Roper wrote:
>Intel hardware officially only supports GTTMMADR register accesses of
>32-bits or less (although 64-bit accesses to device memory and PTEs in
>the GSM are fine). Even though we do usually seem to get back
>reasonable values when performing readq() operations on registers in
>BAR0, we shouldn't rely on this violation of the spec working
>consistently. It's likely that even when we do get proper register
>values back the hardware is internally satisfying the request via a
>non-atomic sequence of two 32-bit reads, which can be problematic for
>timestamps and counters if rollover of the lower bits is not considered.
>
>Replace xe_mmio_read64() with xe_mmio_read64_2x32() that implements
>64-bit register reads as two 32-bit reads and attempts to ensure that
>the upper dword has stabilized to avoid problematic rollovers for
>counter and timestamp registers.
>
>Bspec: 60027
>Signed-off-by: Matt Roper <matthew.d.roper at intel.com>
>---
> drivers/gpu/drm/xe/xe_mmio.c | 6 ++---
> drivers/gpu/drm/xe/xe_mmio.h | 37 +++++++++++++++++++++++---
> drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c | 6 ++---
> 3 files changed, 39 insertions(+), 10 deletions(-)
>
>diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
>index bb6823db14d4..932aa4ca59c8 100644
>--- a/drivers/gpu/drm/xe/xe_mmio.c
>+++ b/drivers/gpu/drm/xe/xe_mmio.c
>@@ -228,7 +228,7 @@ int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size, u64 *tile_size,
> reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR);
> offset = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K;
> } else {
>- offset = xe_mmio_read64(gt, GSMBASE);
>+ offset = xe_mmio_read64_2x32(gt, GSMBASE);
> }
>
> /* remove the tile offset so we have just the available size */
>@@ -326,7 +326,7 @@ static void xe_mmio_probe_tiles(struct xe_device *xe)
> if (xe->info.tile_count == 1)
> return;
>
>- mtcfg = xe_mmio_read64(gt, XEHP_MTCFG_ADDR);
>+ mtcfg = xe_mmio_read64_2x32(gt, XEHP_MTCFG_ADDR);
> adj_tile_count = xe->info.tile_count =
> REG_FIELD_GET(TILE_COUNT, mtcfg) + 1;
>
>@@ -509,7 +509,7 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data,
> args->value = xe_mmio_read32(gt, reg);
> break;
> case DRM_XE_MMIO_64BIT:
>- args->value = xe_mmio_read64(gt, reg);
>+ args->value = xe_mmio_read64_2x32(gt, reg);
> break;
> default:
> drm_dbg(&xe->drm, "Invalid MMIO bit size");
>diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
>index d24badca8677..892e96e50463 100644
>--- a/drivers/gpu/drm/xe/xe_mmio.h
>+++ b/drivers/gpu/drm/xe/xe_mmio.h
>@@ -11,6 +11,7 @@
>
> #include "regs/xe_reg_defs.h"
> #include "xe_device_types.h"
>+#include "xe_gt_printk.h"
> #include "xe_gt_types.h"
>
> struct drm_device;
>@@ -85,14 +86,42 @@ static inline void xe_mmio_write64(struct xe_gt *gt,
> writeq(val, tile->mmio.regs + reg.addr);
> }
>
>-static inline u64 xe_mmio_read64(struct xe_gt *gt, struct xe_reg reg)
>+/*
>+ * Intel hardware officially only supports GTTMMADR register reads of 32 bits
>+ * or smaller. Although we often seem to get reasonable values back from a
>+ * readq of a register address, we shouldn't rely on this violation of the
>+ * spec. Furthermore, when reading registers that may be changing (such as
>+ * counters), the hardware may not perform the read atomically, leading to
>+ * problems if the read happens while the lower dword is rolling over.
>+ *
>+ * Implement 64-bit register reads as a pair of 32-bit reads and ensure the
>+ * upper dword has stabilized before returning a value.
As an internal interface, should probably be kernel-doc? Another thing
to call out is that this function can't be used if there are side
effects on read.
Also, function seems a bit too big to be on the header.
$ ./scripts/bloat-o-meter build64/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.o.old build64/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.o
add/remove: 0/0 grow/shrink: 1/0 up/down: 514/0 (514)
Function old new delta
xe_ttm_stolen_mgr_init 1155 1669 +514
Total: Before=2340, After=2854, chg +21.97%
Let's move it to xe_mmio.c?
Aside from that, implementation and justification seem goo
Reviewed-by: Lucas De Marchi <lucas.demarchi at intel.com>
Lucas De Marchi
>+ */
>+static inline u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg)
> {
>- struct xe_tile *tile = gt_to_tile(gt);
>+ struct xe_reg reg_udw = { .addr = reg.addr + 0x4 };
>+ u32 ldw, udw, oldudw, retries;
>
>- if (reg.addr < gt->mmio.adj_limit)
>+ if (reg.addr < gt->mmio.adj_limit) {
> reg.addr += gt->mmio.adj_offset;
>+ reg_udw.addr += gt->mmio.adj_offset;
>+ }
>
>- return readq(tile->mmio.regs + reg.addr);
>+ oldudw = xe_mmio_read32(gt, reg_udw);
>+ for (retries = 5; retries; --retries) {
>+ ldw = xe_mmio_read32(gt, reg);
>+ udw = xe_mmio_read32(gt, reg_udw);
>+
>+ if (udw == oldudw)
>+ break;
>+
>+ oldudw = udw;
>+ }
>+
>+ xe_gt_WARN(gt, retries == 0,
>+ "64-bit read of %#x did not stabilize\n", reg.addr);
>+
>+ return (u64)udw << 32 | ldw;
> }
>
> static inline int xe_mmio_write32_and_verify(struct xe_gt *gt,
>diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
>index be0a25e23929..6ba6b1b7f34b 100644
>--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
>+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
>@@ -67,7 +67,7 @@ static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
> }
>
> /* Use DSM base address instead for stolen memory */
>- mgr->stolen_base = (xe_mmio_read64(mmio, DSMBASE) & BDSM_MASK) - tile_offset;
>+ mgr->stolen_base = (xe_mmio_read64_2x32(mmio, DSMBASE) & BDSM_MASK) - tile_offset;
> if (drm_WARN_ON(&xe->drm, tile_size < mgr->stolen_base))
> return 0;
>
>@@ -126,8 +126,8 @@ static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr
>
> /* Carve out the top of DSM as it contains the reserved WOPCM region */
> wopcm_size = REG_FIELD_GET64(WOPCM_SIZE_MASK,
>- xe_mmio_read64(xe_root_mmio_gt(xe),
>- STOLEN_RESERVED));
>+ xe_mmio_read64_2x32(xe_root_mmio_gt(xe),
>+ STOLEN_RESERVED));
> stolen_size -= (1U << wopcm_size) * SZ_1M;
>
> if (drm_WARN_ON(&xe->drm, stolen_size + SZ_8M > pci_resource_len(pdev, 2)))
>--
>2.41.0
>
More information about the Intel-xe
mailing list