[Intel-xe] [PATCH v5 7/9] drm/xe/xe2: Handle flat ccs move for igfx.
Himal Prasad Ghimiray
himal.prasad.ghimiray at intel.com
Wed Dec 6 17:20:25 UTC 2023
- Clear flat ccs during user bo creation.
- copy ccs meta data between flat ccs and bo during eviction and
restore.
- Add a bool field ccs_cleared in bo, true means ccs region of bo is
already cleared.
v2:
- Rebase.
v3:
- Maintain order of xe_bo_move_notify for ttm_bo_type_sg.
Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
---
drivers/gpu/drm/xe/xe_bo.c | 29 ++++++++++-----
drivers/gpu/drm/xe/xe_bo_types.h | 4 +++
drivers/gpu/drm/xe/xe_migrate.c | 60 ++++++++++++++++++--------------
3 files changed, 57 insertions(+), 36 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 81630838d769..526a1304b939 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -647,10 +647,11 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
bool move_lacks_source;
bool tt_has_data;
bool needs_clear;
+ bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) &&
+ ttm && ttm_tt_is_populated(ttm)) ? true : false;
int ret = 0;
-
- /* Bo creation path, moving to system or TT. No clearing required. */
- if (!old_mem && ttm) {
+ /* Bo creation path, moving to system or TT. */
+ if ((!old_mem && ttm) && !handle_system_ccs) {
ttm_bo_move_null(ttm_bo, new_mem);
return 0;
}
@@ -665,14 +666,18 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
tt_has_data = ttm && (ttm_tt_is_populated(ttm) ||
(ttm->page_flags & TTM_TT_FLAG_SWAPPED));
- move_lacks_source = !mem_type_is_vram(old_mem_type) && !tt_has_data;
+ move_lacks_source = handle_system_ccs ? (!bo->ccs_cleared) :
+ (!mem_type_is_vram(old_mem_type) && !tt_has_data);
needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
(!ttm && ttm_bo->type == ttm_bo_type_device);
- if ((move_lacks_source && !needs_clear) ||
- (old_mem_type == XE_PL_SYSTEM &&
- new_mem->mem_type == XE_PL_TT)) {
+ if ((move_lacks_source && !needs_clear)) {
+ ttm_bo_move_null(ttm_bo, new_mem);
+ goto out;
+ }
+
+ if (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT && !handle_system_ccs) {
ttm_bo_move_null(ttm_bo, new_mem);
goto out;
}
@@ -703,8 +708,11 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
ret = timeout;
goto out;
}
- ttm_bo_move_null(ttm_bo, new_mem);
- goto out;
+
+ if (!handle_system_ccs) {
+ ttm_bo_move_null(ttm_bo, new_mem);
+ goto out;
+ }
}
if (!move_lacks_source &&
@@ -725,6 +733,8 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
migrate = mem_type_to_migrate(xe, new_mem->mem_type);
else if (mem_type_is_vram(old_mem_type))
migrate = mem_type_to_migrate(xe, old_mem_type);
+ else
+ migrate = xe->tiles[0].migrate;
xe_assert(xe, migrate);
@@ -1254,6 +1264,7 @@ struct xe_bo *___xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
return bo;
}
+ bo->ccs_cleared = false;
bo->tile = tile;
bo->size = size;
bo->flags = flags;
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
index f71dbc518958..64c2249a4e40 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -79,6 +79,10 @@ struct xe_bo {
struct llist_node freed;
/** @created: Whether the bo has passed initial creation */
bool created;
+
+ /** @ccs_cleared */
+ bool ccs_cleared;
+
/**
* @cpu_caching: CPU caching mode. Currently only used for userspace
* objects.
diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
index 1bfb249680f4..ae11701408a9 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -567,14 +567,14 @@ static u64 xe_migrate_batch_base(struct xe_migrate *m, bool usm)
static u32 xe_migrate_ccs_copy(struct xe_migrate *m,
struct xe_bb *bb,
- u64 src_ofs, bool src_is_vram,
- u64 dst_ofs, bool dst_is_vram, u32 dst_size,
+ u64 src_ofs, bool src_is_indirect,
+ u64 dst_ofs, bool dst_is_indirect, u32 dst_size,
u64 ccs_ofs, bool copy_ccs)
{
struct xe_gt *gt = m->tile->primary_gt;
u32 flush_flags = 0;
- if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_vram) {
+ if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_indirect) {
/*
* If the src is already in vram, then it should already
* have been cleared by us, or has been populated by the
@@ -583,28 +583,24 @@ static u32 xe_migrate_ccs_copy(struct xe_migrate *m,
* Otherwise if the bo doesn't have any CCS metadata attached,
* we still need to clear it for security reasons.
*/
- u64 ccs_src_ofs = src_is_vram ? src_ofs : m->cleared_mem_ofs;
+ u64 ccs_src_ofs = src_is_indirect ? src_ofs : m->cleared_mem_ofs;
emit_copy_ccs(gt, bb,
dst_ofs, true,
- ccs_src_ofs, src_is_vram, dst_size);
+ ccs_src_ofs, src_is_indirect, dst_size);
flush_flags = MI_FLUSH_DW_CCS;
} else if (copy_ccs) {
- if (!src_is_vram)
+ if (!src_is_indirect)
src_ofs = ccs_ofs;
- else if (!dst_is_vram)
+ else if (!dst_is_indirect)
dst_ofs = ccs_ofs;
- /*
- * At the moment, we don't support copying CCS metadata from
- * system to system.
- */
- xe_gt_assert(gt, src_is_vram || dst_is_vram);
+ xe_gt_assert(gt, src_is_indirect || dst_is_indirect);
- emit_copy_ccs(gt, bb, dst_ofs, dst_is_vram, src_ofs,
- src_is_vram, dst_size);
- if (dst_is_vram)
+ emit_copy_ccs(gt, bb, dst_ofs, dst_is_indirect, src_ofs,
+ src_is_indirect, dst_size);
+ if (dst_is_indirect)
flush_flags = MI_FLUSH_DW_CCS;
}
@@ -645,6 +641,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
u64 src_L0, dst_L0;
int pass = 0;
int err;
+ bool src_is_pltt = src->mem_type == XE_PL_TT;
+ bool dst_is_pltt = dst->mem_type == XE_PL_TT;
bool src_is_vram = mem_type_is_vram(src->mem_type);
bool dst_is_vram = mem_type_is_vram(dst->mem_type);
bool copy_ccs = xe_device_has_flat_ccs(xe) &&
@@ -720,8 +718,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
}
/* Add copy commands size here */
- batch_size += EMIT_COPY_DW +
- (xe_device_has_flat_ccs(xe) ? EMIT_COPY_CCS_DW : 0);
+ batch_size += ((!src_is_vram && !dst_is_vram) ? 0 : EMIT_COPY_DW) +
+ ((xe_device_has_flat_ccs(xe) ? EMIT_COPY_CCS_DW : 0));
bb = xe_bb_new(gt, batch_size, usm);
if (IS_ERR(bb)) {
@@ -747,10 +745,13 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
update_idx = bb->len;
- emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0,
- XE_PAGE_SIZE);
- flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_vram,
- dst_L0_ofs, dst_is_vram,
+ if (src_is_vram || dst_is_vram)
+ emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, XE_PAGE_SIZE);
+
+ flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs,
+ IS_DGFX(xe) ? src_is_vram : src_is_pltt,
+ dst_L0_ofs,
+ IS_DGFX(xe) ? dst_is_vram : dst_is_pltt,
src_L0, ccs_ofs, copy_ccs);
mutex_lock(&m->job_mutex);
@@ -923,6 +924,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
bool clear_vram = mem_type_is_vram(dst->mem_type);
struct xe_gt *gt = m->tile->primary_gt;
struct xe_device *xe = gt_to_xe(gt);
+ bool clear_system_ccs = (xe_bo_needs_ccs_pages(bo) && !IS_DGFX(xe)) ? true : false;
struct dma_fence *fence = NULL;
u64 size = bo->size;
struct xe_res_cursor src_it;
@@ -963,9 +965,10 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
batch_size = 2 +
pte_update_size(m, clear_vram, src, &src_it,
&clear_L0, &clear_L0_ofs, &clear_L0_pt,
- emit_clear_cmd_len(gt), 0,
+ clear_system_ccs ? 0 : emit_clear_cmd_len(gt), 0,
avail_pts);
- if (xe_device_has_flat_ccs(xe) && clear_vram)
+
+ if (xe_bo_needs_ccs_pages(bo))
batch_size += EMIT_COPY_CCS_DW;
/* Clear commands */
@@ -980,7 +983,6 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
}
size -= clear_L0;
-
/* Preemption is enabled again by the ring ops. */
if (!clear_vram) {
emit_pte(m, bb, clear_L0_pt, clear_vram, true, &src_it, clear_L0,
@@ -991,9 +993,10 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
update_idx = bb->len;
- emit_clear(gt, bb, clear_L0_ofs, clear_L0, XE_PAGE_SIZE,
- clear_vram);
- if (xe_device_has_flat_ccs(xe) && clear_vram) {
+ if (!clear_system_ccs)
+ emit_clear(gt, bb, clear_L0_ofs, clear_L0, XE_PAGE_SIZE, clear_vram);
+
+ if (xe_bo_needs_ccs_pages(bo)) {
emit_copy_ccs(gt, bb, clear_L0_ofs, true,
m->cleared_mem_ofs, false, clear_L0);
flush_flags = MI_FLUSH_DW_CCS;
@@ -1050,6 +1053,9 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
return ERR_PTR(err);
}
+ if (clear_system_ccs)
+ bo->ccs_cleared = true;
+
return fence;
}
--
2.25.1
More information about the Intel-xe
mailing list