[Intel-xe] [PATCH v2 1/2] drm/xe: Avoid 64-bit register reads

Gustavo Sousa gustavo.sousa at intel.com
Thu Aug 24 14:12:20 UTC 2023


Quoting Matt Roper (2023-08-22 21:33:13-03:00)
>Intel hardware officially only supports GTTMMADR register accesses of
>32-bits or less (although 64-bit accesses to device memory and PTEs in
>the GSM are fine).  Even though we do usually seem to get back
>reasonable values when performing readq() operations on registers in
>BAR0, we shouldn't rely on this violation of the spec working
>consistently.  It's likely that even when we do get proper register
>values back the hardware is internally satisfying the request via a
>non-atomic sequence of two 32-bit reads, which can be problematic for
>timestamps and counters if rollover of the lower bits is not considered.
>
>Replace xe_mmio_read64() with xe_mmio_read64_2x32() that implements
>64-bit register reads as two 32-bit reads and attempts to ensure that
>the upper dword has stabilized to avoid problematic rollovers for
>counter and timestamp registers.
>
>v2:
> - Move function from xe_mmio.h to xe_mmio.c.  (Lucas)
> - Convert comment to kerneldoc and note that it shouldn't be used on
>   registers where reads may trigger side effects.  (Lucas)
>
>Bspec: 60027
>Signed-off-by: Matt Roper <matthew.d.roper at intel.com>
>Reviewed-by: Lucas De Marchi <lucas.demarchi at intel.com>
>Reviewed-by: José Roberto de Souza <jose.souza at intel.com>
>---
> drivers/gpu/drm/xe/xe_mmio.c           | 56 ++++++++++++++++++++++++--
> drivers/gpu/drm/xe/xe_mmio.h           | 12 +-----
> drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c |  6 +--
> 3 files changed, 58 insertions(+), 16 deletions(-)
>
>diff --git a/drivers/gpu/drm/xe/xe_mmio.c b/drivers/gpu/drm/xe/xe_mmio.c
>index bb6823db14d4..c2ec52eefb2e 100644
>--- a/drivers/gpu/drm/xe/xe_mmio.c
>+++ b/drivers/gpu/drm/xe/xe_mmio.c
>@@ -228,7 +228,7 @@ int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size, u64 *tile_size,
>                 reg = xe_gt_mcr_unicast_read_any(gt, XEHP_FLAT_CCS_BASE_ADDR);
>                 offset = (u64)REG_FIELD_GET(GENMASK(31, 8), reg) * SZ_64K;
>         } else {
>-                offset = xe_mmio_read64(gt, GSMBASE);
>+                offset = xe_mmio_read64_2x32(gt, GSMBASE);
>         }
> 
>         /* remove the tile offset so we have just the available size */
>@@ -326,7 +326,7 @@ static void xe_mmio_probe_tiles(struct xe_device *xe)
>         if (xe->info.tile_count == 1)
>                 return;
> 
>-        mtcfg = xe_mmio_read64(gt, XEHP_MTCFG_ADDR);
>+        mtcfg = xe_mmio_read64_2x32(gt, XEHP_MTCFG_ADDR);
>         adj_tile_count = xe->info.tile_count =
>                 REG_FIELD_GET(TILE_COUNT, mtcfg) + 1;
> 
>@@ -509,7 +509,7 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data,
>                         args->value = xe_mmio_read32(gt, reg);
>                         break;
>                 case DRM_XE_MMIO_64BIT:
>-                        args->value = xe_mmio_read64(gt, reg);
>+                        args->value = xe_mmio_read64_2x32(gt, reg);
>                         break;
>                 default:
>                         drm_dbg(&xe->drm, "Invalid MMIO bit size");
>@@ -526,3 +526,53 @@ int xe_mmio_ioctl(struct drm_device *dev, void *data,
> 
>         return ret;
> }
>+
>+/**
>+ * xe_mmio_read64_2x32() - Read a 64-bit register as two 32-bit reads
>+ * @gt: MMIO target GT
>+ * @reg: register to read value from
>+ *
>+ * Although Intel GPUs have some 64-bit registers, the hardware officially
>+ * only supports GTTMMADR register reads of 32 bits or smaller.  Even if
>+ * a readq operation may return a reasonable value, that violation of the
>+ * spec shouldn't be relied upon and all 64-bit register reads should be
>+ * performed as two 32-bit reads of the upper and lower dwords.
>+ *
>+ * When reading registers that may be changing (such as
>+ * counters), a rollover of the lower dword between the two 32-bit reads
>+ * can be problematic.  This function attempts to ensure the upper dword has
>+ * stabilized before returning the 64-bit value.

Sorry to be a bit late to the party here, but is this the common case? I wonder
if we should rather have a specialized version of xe_mmio_read64_2x32() for
cases where then rollover is possible.

--
Gustavo Sousa

>+ *
>+ * Note that because this function may re-read the register multiple times
>+ * while waiting for the value to stabilize it should not be used to read
>+ * any registers where read operations have side effects.
>+ *
>+ * Returns the value of the 64-bit register.
>+ */
>+u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg)
>+{
>+        struct xe_reg reg_udw = { .addr = reg.addr + 0x4 };
>+        u32 ldw, udw, oldudw, retries;
>+
>+        if (reg.addr < gt->mmio.adj_limit) {
>+                reg.addr += gt->mmio.adj_offset;
>+                reg_udw.addr += gt->mmio.adj_offset;
>+        }
>+
>+        oldudw = xe_mmio_read32(gt, reg_udw);
>+        for (retries = 5; retries; --retries) {
>+                ldw = xe_mmio_read32(gt, reg);
>+                udw = xe_mmio_read32(gt, reg_udw);
>+
>+                if (udw == oldudw)
>+                        break;
>+
>+                oldudw = udw;
>+        }
>+
>+        xe_gt_WARN(gt, retries == 0,
>+                   "64-bit read of %#x did not stabilize\n", reg.addr);
>+
>+        return (u64)udw << 32 | ldw;
>+}
>+
>diff --git a/drivers/gpu/drm/xe/xe_mmio.h b/drivers/gpu/drm/xe/xe_mmio.h
>index d24badca8677..f72c34c7d1d0 100644
>--- a/drivers/gpu/drm/xe/xe_mmio.h
>+++ b/drivers/gpu/drm/xe/xe_mmio.h
>@@ -11,6 +11,7 @@
> 
> #include "regs/xe_reg_defs.h"
> #include "xe_device_types.h"
>+#include "xe_gt_printk.h"
> #include "xe_gt_types.h"
> 
> struct drm_device;
>@@ -85,16 +86,6 @@ static inline void xe_mmio_write64(struct xe_gt *gt,
>         writeq(val, tile->mmio.regs + reg.addr);
> }
> 
>-static inline u64 xe_mmio_read64(struct xe_gt *gt, struct xe_reg reg)
>-{
>-        struct xe_tile *tile = gt_to_tile(gt);
>-
>-        if (reg.addr < gt->mmio.adj_limit)
>-                reg.addr += gt->mmio.adj_offset;
>-
>-        return readq(tile->mmio.regs + reg.addr);
>-}
>-
> static inline int xe_mmio_write32_and_verify(struct xe_gt *gt,
>                                              struct xe_reg reg, u32 val,
>                                              u32 mask, u32 eval)
>@@ -155,5 +146,6 @@ static inline bool xe_mmio_in_range(const struct xe_mmio_range *range,
> 
> int xe_mmio_probe_vram(struct xe_device *xe);
> int xe_mmio_tile_vram_size(struct xe_tile *tile, u64 *vram_size, u64 *tile_size, u64 *tile_base);
>+u64 xe_mmio_read64_2x32(struct xe_gt *gt, struct xe_reg reg);
> 
> #endif
>diff --git a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
>index be0a25e23929..6ba6b1b7f34b 100644
>--- a/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
>+++ b/drivers/gpu/drm/xe/xe_ttm_stolen_mgr.c
>@@ -67,7 +67,7 @@ static s64 detect_bar2_dgfx(struct xe_device *xe, struct xe_ttm_stolen_mgr *mgr)
>         }
> 
>         /* Use DSM base address instead for stolen memory */
>-        mgr->stolen_base = (xe_mmio_read64(mmio, DSMBASE) & BDSM_MASK) - tile_offset;
>+        mgr->stolen_base = (xe_mmio_read64_2x32(mmio, DSMBASE) & BDSM_MASK) - tile_offset;
>         if (drm_WARN_ON(&xe->drm, tile_size < mgr->stolen_base))
>                 return 0;
> 
>@@ -126,8 +126,8 @@ static u32 detect_bar2_integrated(struct xe_device *xe, struct xe_ttm_stolen_mgr
> 
>         /* Carve out the top of DSM as it contains the reserved WOPCM region */
>         wopcm_size = REG_FIELD_GET64(WOPCM_SIZE_MASK,
>-                                     xe_mmio_read64(xe_root_mmio_gt(xe),
>-                                                    STOLEN_RESERVED));
>+                                     xe_mmio_read64_2x32(xe_root_mmio_gt(xe),
>+                                                         STOLEN_RESERVED));
>         stolen_size -= (1U << wopcm_size) * SZ_1M;
> 
>         if (drm_WARN_ON(&xe->drm, stolen_size + SZ_8M > pci_resource_len(pdev, 2)))
>-- 
>2.41.0
>


More information about the Intel-xe mailing list