[Intel-xe] [RFC v2 6/6] drm/xe/xe2: Handle flat ccs move for igfx.

Himal Prasad Ghimiray himal.prasad.ghimiray at intel.com
Tue Nov 21 10:09:06 UTC 2023


- Clear flat ccs during user bo creation.
- copy ccs meta data between flat ccs and bo during eviction and
restore.
- Add a bool field ccs_cleared in bo, true means ccs region of bo is
already cleared.

Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
---
 drivers/gpu/drm/xe/xe_bo.c       | 25 ++++++++-----
 drivers/gpu/drm/xe/xe_bo_types.h |  2 ++
 drivers/gpu/drm/xe/xe_migrate.c  | 62 ++++++++++++++++----------------
 3 files changed, 50 insertions(+), 39 deletions(-)

diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 4730ee3c1012..a40f17ae21e7 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -630,10 +630,12 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 	bool move_lacks_source;
 	bool tt_has_data;
 	bool needs_clear;
+	bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) &&
+				  ttm && ttm_tt_is_populated(ttm)) ? true : false;
 	int ret = 0;
-
-	/* Bo creation path, moving to system or TT. No clearing required. */
-	if (!old_mem && ttm) {
+	/* Bo creation path, moving to system or TT. */
+	if (((old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT) ||
+	     (!old_mem && ttm)) && !handle_system_ccs) {
 		ttm_bo_move_null(ttm_bo, new_mem);
 		return 0;
 	}
@@ -648,14 +650,13 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 	tt_has_data = ttm && (ttm_tt_is_populated(ttm) ||
 			      (ttm->page_flags & TTM_TT_FLAG_SWAPPED));
 
-	move_lacks_source = !mem_type_is_vram(old_mem_type) && !tt_has_data;
+	move_lacks_source = handle_system_ccs ? (!bo->ccs_cleared)  :
+						(!mem_type_is_vram(old_mem_type) && !tt_has_data);
 
 	needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
 		(!ttm && ttm_bo->type == ttm_bo_type_device);
 
-	if ((move_lacks_source && !needs_clear) ||
-	    (old_mem_type == XE_PL_SYSTEM &&
-	     new_mem->mem_type == XE_PL_TT)) {
+	if ((move_lacks_source && !needs_clear)) {
 		ttm_bo_move_null(ttm_bo, new_mem);
 		goto out;
 	}
@@ -686,8 +687,11 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 			ret = timeout;
 			goto out;
 		}
-		ttm_bo_move_null(ttm_bo, new_mem);
-		goto out;
+
+		if (!handle_system_ccs) {
+			ttm_bo_move_null(ttm_bo, new_mem);
+			goto out;
+		}
 	}
 
 	if (!move_lacks_source &&
@@ -708,6 +712,8 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
 		migrate = mem_type_to_migrate(xe, new_mem->mem_type);
 	else if (mem_type_is_vram(old_mem_type))
 		migrate = mem_type_to_migrate(xe, old_mem_type);
+	else
+		migrate = xe->tiles[0].migrate;
 
 	xe_assert(xe, migrate);
 
@@ -1229,6 +1235,7 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
 		alignment = SZ_4K >> PAGE_SHIFT;
 	}
 
+	bo->ccs_cleared = false;
 	bo->tile = tile;
 	bo->size = size;
 	bo->flags = flags;
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index 4bff60996168..508e67c81427 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -79,6 +79,8 @@ struct xe_bo {
 	struct llist_node freed;
 	/** @created: Whether the bo has passed initial creation */
 	bool created;
+	/** @ccs_cleared */
+	bool ccs_cleared;
 };
 
 #define intel_bo_to_drm_bo(bo) (&(bo)->ttm.base)
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index bdcb20f23531..bac24768fe2a 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -564,14 +564,14 @@ static u64 xe_migrate_batch_base(struct xe_migrate *m, bool usm)
 
 static u32 xe_migrate_ccs_copy(struct xe_migrate *m,
 			       struct xe_bb *bb,
-			       u64 src_ofs, bool src_is_vram,
-			       u64 dst_ofs, bool dst_is_vram, u32 dst_size,
+			       u64 src_ofs, bool src_is_indirect,
+			       u64 dst_ofs, bool dst_is_indirect, u32 dst_size,
 			       u64 ccs_ofs, bool copy_ccs)
 {
 	struct xe_gt *gt = m->tile->primary_gt;
 	u32 flush_flags = 0;
 
-	if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_vram) {
+	if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_indirect) {
 		/*
 		 * If the src is already in vram, then it should already
 		 * have been cleared by us, or has been populated by the
@@ -580,28 +580,24 @@ static u32 xe_migrate_ccs_copy(struct xe_migrate *m,
 		 * Otherwise if the bo doesn't have any CCS metadata attached,
 		 * we still need to clear it for security reasons.
 		 */
-		u64 ccs_src_ofs =  src_is_vram ? src_ofs : m->cleared_mem_ofs;
+		u64 ccs_src_ofs =  src_is_indirect ? src_ofs : m->cleared_mem_ofs;
 
 		emit_copy_ccs(gt, bb,
 			      dst_ofs, true,
-			      ccs_src_ofs, src_is_vram, dst_size);
+			      ccs_src_ofs, src_is_indirect, dst_size);
 
 		flush_flags = MI_FLUSH_DW_CCS;
 	} else if (copy_ccs) {
-		if (!src_is_vram)
+		if (!src_is_indirect)
 			src_ofs = ccs_ofs;
-		else if (!dst_is_vram)
+		else if (!dst_is_indirect)
 			dst_ofs = ccs_ofs;
 
-		/*
-		 * At the moment, we don't support copying CCS metadata from
-		 * system to system.
-		 */
-		xe_gt_assert(gt, src_is_vram || dst_is_vram);
+		xe_gt_assert(gt, src_is_indirect || dst_is_indirect);
 
-		emit_copy_ccs(gt, bb, dst_ofs, dst_is_vram, src_ofs,
-			      src_is_vram, dst_size);
-		if (dst_is_vram)
+		emit_copy_ccs(gt, bb, dst_ofs, dst_is_indirect, src_ofs,
+			      src_is_indirect, dst_size);
+		if (dst_is_indirect)
 			flush_flags = MI_FLUSH_DW_CCS;
 	}
 
@@ -642,6 +638,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 	u64 src_L0, dst_L0;
 	int pass = 0;
 	int err;
+	bool src_is_pltt = src->mem_type == XE_PL_TT;
+	bool dst_is_pltt = dst->mem_type == XE_PL_TT;
 	bool src_is_vram = mem_type_is_vram(src->mem_type);
 	bool dst_is_vram = mem_type_is_vram(dst->mem_type);
 	bool copy_ccs = xe_device_has_flat_ccs(xe) &&
@@ -682,9 +680,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 		src_L0 = xe_migrate_res_sizes(&src_it);
 		dst_L0 = xe_migrate_res_sizes(&dst_it);
 
-		drm_dbg(&xe->drm, "Pass %u, sizes: %llu & %llu\n",
-			pass++, src_L0, dst_L0);
-
+		drm_dbg(&xe->drm, "Pass %u, sizes: %llu & %llu\n", pass++, src_L0, dst_L0);
 		src_L0 = min(src_L0, dst_L0);
 
 		batch_size += pte_update_size(m, src_is_vram, src, &src_it, &src_L0,
@@ -704,8 +700,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 		}
 
 		/* Add copy commands size here */
-		batch_size += EMIT_COPY_DW +
-			(xe_device_has_flat_ccs(xe) ? EMIT_COPY_CCS_DW : 0);
+		batch_size += ((!src_is_vram && !dst_is_vram) ? 0 : EMIT_COPY_DW) +
+			((xe_device_has_flat_ccs(xe) ? EMIT_COPY_CCS_DW : 0));
 
 		bb = xe_bb_new(gt, batch_size, usm);
 		if (IS_ERR(bb)) {
@@ -731,10 +727,13 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
 		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
 		update_idx = bb->len;
 
-		emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0,
-			  XE_PAGE_SIZE);
-		flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_vram,
-						  dst_L0_ofs, dst_is_vram,
+		if (src_is_vram || dst_is_vram)
+			emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, XE_PAGE_SIZE);
+
+		flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs,
+						  IS_DGFX(xe) ? src_is_vram : src_is_pltt,
+						  dst_L0_ofs,
+						  IS_DGFX(xe) ? dst_is_vram : dst_is_pltt,
 						  src_L0, ccs_ofs, copy_ccs);
 
 		mutex_lock(&m->job_mutex);
@@ -907,6 +906,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 	bool clear_vram = mem_type_is_vram(dst->mem_type);
 	struct xe_gt *gt = m->tile->primary_gt;
 	struct xe_device *xe = gt_to_xe(gt);
+	bool clear_system_ccs = (xe_bo_needs_ccs_pages(bo) && !IS_DGFX(xe)) ? true : false;
 	struct dma_fence *fence = NULL;
 	u64 size = bo->size;
 	struct xe_res_cursor src_it;
@@ -936,9 +936,9 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 		batch_size = 2 +
 			pte_update_size(m, clear_vram, src, &src_it,
 					&clear_L0, &clear_L0_ofs, &clear_L0_pt,
-					emit_clear_cmd_len(gt), 0,
+					clear_system_ccs ? 0 : emit_clear_cmd_len(gt), 0,
 					NUM_PT_PER_BLIT);
-		if (xe_device_has_flat_ccs(xe) && clear_vram)
+		if (xe_bo_needs_ccs_pages(bo))
 			batch_size += EMIT_COPY_CCS_DW;
 
 		/* Clear commands */
@@ -953,7 +953,6 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 		}
 
 		size -= clear_L0;
-
 		/* Preemption is enabled again by the ring ops. */
 		if (!clear_vram) {
 			emit_pte(m, bb, clear_L0_pt, clear_vram, &src_it, clear_L0,
@@ -964,10 +963,10 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
 		update_idx = bb->len;
 
-		emit_clear(gt, bb, clear_L0_ofs, clear_L0, XE_PAGE_SIZE,
-			   clear_vram);
+		if (!clear_system_ccs)
+			emit_clear(gt, bb, clear_L0_ofs, clear_L0, XE_PAGE_SIZE, clear_vram);
 
-		if (xe_device_has_flat_ccs(xe) && clear_vram) {
+		if (xe_bo_needs_ccs_pages(bo)) {
 			emit_copy_ccs(gt, bb, clear_L0_ofs, true,
 				      m->cleared_mem_ofs, false, clear_L0);
 			flush_flags = MI_FLUSH_DW_CCS;
@@ -1024,6 +1023,9 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
 		return ERR_PTR(err);
 	}
 
+	if (clear_system_ccs)
+		bo->ccs_cleared = true;
+
 	return fence;
 }
 
-- 
2.25.1



More information about the Intel-xe mailing list