[PATCH 1/2] drm/xe: Add XE_BO_FLAG_NEEDS_WC_CPU and unify mapping for page tables.

Maarten Lankhorst maarten.lankhorst at linux.intel.com
Fri Sep 20 19:01:06 UTC 2024


There are various places where we map buffers WC_CPU and uncached on the
GPU. Unify all of those users to a single flag.

In particular our usage of page table flags has been incoherent,
and we should use uncached where applicable.

Signed-off-by: Maarten Lankhorst <maarten.lankhorst at linux.intel.com>
Cc: Ashutosh Dixit <ashutosh.dixit at intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c   | 22 +++++++---------------
 drivers/gpu/drm/xe/xe_bo.h   |  7 ++++---
 drivers/gpu/drm/xe/xe_ggtt.c |  4 ++--
 drivers/gpu/drm/xe/xe_pt.c   | 31 ++++++++++++++++++++++++++++++-
 drivers/gpu/drm/xe/xe_pt.h   |  1 +
 5 files changed, 44 insertions(+), 21 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 5f2f1ec46b57..790078aa47af 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -366,37 +366,29 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo,
 	 * CPU.
 	 */
 	if (!IS_DGFX(xe)) {
-		switch (bo->cpu_caching) {
-		case DRM_XE_GEM_CPU_CACHING_WC:
+		if (bo->cpu_caching == DRM_XE_GEM_CPU_CACHING_WC)
 			caching = ttm_write_combined;
-			break;
-		default:
-			caching = ttm_cached;
-			break;
-		}
 
 		WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching);
 
 		/*
 		 * Display scanout is always non-coherent with the CPU cache.
-		 *
-		 * For Xe_LPG and beyond, PPGTT PTE lookups are also
-		 * non-coherent and require a CPU:WC mapping.
 		 */
-		if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) ||
-		    (xe->info.graphics_verx100 >= 1270 &&
-		     bo->flags & XE_BO_FLAG_PAGETABLE))
+		if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT))
 			caching = ttm_write_combined;
 	}
 
-	if (bo->flags & XE_BO_FLAG_NEEDS_UC) {
+	if (bo->flags & (XE_BO_FLAG_NEEDS_UC | XE_BO_FLAG_NEEDS_WC_CPU))
 		/*
 		 * Valid only for internally-created buffers only, for
 		 * which cpu_caching is never initialized.
 		 */
 		xe_assert(xe, bo->cpu_caching == 0);
+
+	if (bo->flags & XE_BO_FLAG_NEEDS_WC_CPU)
+		caching = ttm_write_combined;
+	else if (bo->flags & XE_BO_FLAG_NEEDS_UC)
 		caching = ttm_uncached;
-	}
 
 	err = ttm_tt_init(&tt->ttm, &bo->ttm, page_flags, caching, extra_pages);
 	if (err) {
diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
index 31f4ba3bd8c1..e14013096060 100644
--- a/drivers/gpu/drm/xe/xe_bo.h
+++ b/drivers/gpu/drm/xe/xe_bo.h
@@ -36,9 +36,10 @@
 #define XE_BO_FLAG_PAGETABLE		BIT(12)
 #define XE_BO_FLAG_NEEDS_CPU_ACCESS	BIT(13)
 #define XE_BO_FLAG_NEEDS_UC		BIT(14)
-#define XE_BO_FLAG_NEEDS_64K		BIT(15)
-#define XE_BO_FLAG_NEEDS_2M		BIT(16)
-#define XE_BO_FLAG_GGTT_INVALIDATE	BIT(17)
+#define XE_BO_FLAG_NEEDS_WC_CPU		BIT(15)
+#define XE_BO_FLAG_NEEDS_64K		BIT(16)
+#define XE_BO_FLAG_NEEDS_2M		BIT(17)
+#define XE_BO_FLAG_GGTT_INVALIDATE	BIT(18)
 /* this one is trigger internally only */
 #define XE_BO_FLAG_INTERNAL_TEST	BIT(30)
 #define XE_BO_FLAG_INTERNAL_64K		BIT(31)
diff --git a/drivers/gpu/drm/xe/xe_ggtt.c b/drivers/gpu/drm/xe/xe_ggtt.c
index f68af56c3f86..c4b6a163069f 100644
--- a/drivers/gpu/drm/xe/xe_ggtt.c
+++ b/drivers/gpu/drm/xe/xe_ggtt.c
@@ -26,6 +26,7 @@
 #include "xe_map.h"
 #include "xe_mmio.h"
 #include "xe_pm.h"
+#include "xe_pt.h"
 #include "xe_sriov.h"
 #include "xe_wa.h"
 #include "xe_wopcm.h"
@@ -581,8 +582,7 @@ bool xe_ggtt_node_allocated(const struct xe_ggtt_node *node)
  */
 void xe_ggtt_map_bo(struct xe_ggtt *ggtt, struct xe_bo *bo)
 {
-	u16 cache_mode = bo->flags & XE_BO_FLAG_NEEDS_UC ? XE_CACHE_NONE : XE_CACHE_WB;
-	u16 pat_index = tile_to_xe(ggtt->tile)->pat.idx[cache_mode];
+	u16 pat_index = xe_pt_pat_index_from_bo(bo);
 	u64 start;
 	u64 offset, pte;
 
diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
index d6353e8969f0..0ab472dee80a 100644
--- a/drivers/gpu/drm/xe/xe_pt.c
+++ b/drivers/gpu/drm/xe/xe_pt.c
@@ -100,6 +100,7 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
 	struct xe_pt *pt;
 	struct xe_bo *bo;
 	int err;
+	u32 flags = 0;
 
 	if (level) {
 		struct xe_pt_dir *dir = kzalloc(sizeof(*dir), GFP_KERNEL);
@@ -112,13 +113,21 @@ struct xe_pt *xe_pt_create(struct xe_vm *vm, struct xe_tile *tile,
 		return ERR_PTR(-ENOMEM);
 
 	pt->level = level;
+
+	/*
+	 * For Xe_LPG and beyond, PPGTT PTE lookups are
+	 * non-coherent and require a CPU:WC mapping.
+	 */
+	if (!IS_DGFX(vm->xe) && vm->xe->info.graphics_verx100 >= 1270)
+		flags |= XE_BO_FLAG_NEEDS_WC_CPU;
+
 	bo = xe_bo_create_pin_map(vm->xe, tile, vm, SZ_4K,
 				  ttm_bo_type_kernel,
 				  XE_BO_FLAG_VRAM_IF_DGFX(tile) |
 				  XE_BO_FLAG_IGNORE_MIN_PAGE_SIZE |
 				  XE_BO_FLAG_PINNED |
 				  XE_BO_FLAG_NO_RESV_EVICT |
-				  XE_BO_FLAG_PAGETABLE);
+				  XE_BO_FLAG_PAGETABLE | flags);
 	if (IS_ERR(bo)) {
 		err = PTR_ERR(bo);
 		goto err_kfree;
@@ -568,6 +577,7 @@ xe_pt_stage_bind_entry(struct xe_ptw *parent, pgoff_t offset,
 			xe_child->is_compact = true;
 		}
 
+		pat_index = xe_pt_pat_index_from_bo(xe_child->bo);
 		pte = vm->pt_ops->pde_encode_bo(xe_child->bo, 0, pat_index) | flags;
 		ret = xe_pt_insert_entry(xe_walk, xe_parent, offset, xe_child,
 					 pte);
@@ -2190,3 +2200,22 @@ void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops *vops)
 
 	xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred);
 }
+
+
+/**
+ * xe_pt_pat_index_from_bo() - Get PAT index for kernel BO
+ * @bo: BO to get PAT index from.
+ *
+ * Return: PAT index for bo, either pat index for XE_CACHE_NONE or XE_CACHE_WB.
+ */
+u16 xe_pt_pat_index_from_bo(struct xe_bo *bo)
+{
+	struct xe_device *xe = tile_to_xe(bo->tile);
+
+	xe_assert(xe, !(bo->flags & XE_BO_FLAG_USER) || (bo->flags & XE_BO_FLAG_SCANOUT));
+
+	if (bo->flags & (XE_BO_FLAG_NEEDS_UC | XE_BO_FLAG_NEEDS_WC_CPU))
+		return xe->pat.idx[XE_CACHE_NONE];
+
+	return xe->pat.idx[XE_CACHE_WB];
+}
diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h
index 9ab386431cad..a705cf6851ea 100644
--- a/drivers/gpu/drm/xe/xe_pt.h
+++ b/drivers/gpu/drm/xe/xe_pt.h
@@ -42,5 +42,6 @@ void xe_pt_update_ops_fini(struct xe_tile *tile, struct xe_vma_ops *vops);
 void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops *vops);
 
 bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma);
+u16 xe_pt_pat_index_from_bo(struct xe_bo *bo);
 
 #endif
-- 
2.45.2



More information about the Intel-xe mailing list