[Intel-xe] [RFC 6/6] drm/xe/xe2: Handle flat ccs move for igfx.

Matthew Auld matthew.william.auld at gmail.com
Thu Nov 23 16:50:08 UTC 2023


On Tue, 21 Nov 2023 at 09:54, Himal Prasad Ghimiray
<himal.prasad.ghimiray at intel.com> wrote:
>
> - Clear flat ccs during user bo creation.
> - copy ccs meta data between flat ccs and bo during eviction and
> restore.

I guess it is possible to detect compression usage during vm_bind?
Just wondering if it is somehow possible to push this work prior to
binding such that we can skip if compression is never even used?

> - Add a bool field ccs_cleared in bo, true means ccs region of bo is
> already cleared.
>
> Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
> Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_bo.c       | 25 ++++++++-----
>  drivers/gpu/drm/xe/xe_bo_types.h |  2 ++
>  drivers/gpu/drm/xe/xe_migrate.c  | 62 ++++++++++++++++----------------
>  3 files changed, 50 insertions(+), 39 deletions(-)
>
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index 4730ee3c1012..a40f17ae21e7 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -630,10 +630,12 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
>         bool move_lacks_source;
>         bool tt_has_data;
>         bool needs_clear;
> +       bool handle_system_ccs = (!IS_DGFX(xe) && xe_bo_needs_ccs_pages(bo) &&
> +                                 ttm && ttm_tt_is_populated(ttm)) ? true : false;
>         int ret = 0;
> -
> -       /* Bo creation path, moving to system or TT. No clearing required. */
> -       if (!old_mem && ttm) {
> +       /* Bo creation path, moving to system or TT. */
> +       if (((old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == XE_PL_TT) ||
> +            (!old_mem && ttm)) && !handle_system_ccs) {
>                 ttm_bo_move_null(ttm_bo, new_mem);
>                 return 0;
>         }
> @@ -648,14 +650,13 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
>         tt_has_data = ttm && (ttm_tt_is_populated(ttm) ||
>                               (ttm->page_flags & TTM_TT_FLAG_SWAPPED));
>
> -       move_lacks_source = !mem_type_is_vram(old_mem_type) && !tt_has_data;
> +       move_lacks_source = handle_system_ccs ? (!bo->ccs_cleared)  :
> +                                               (!mem_type_is_vram(old_mem_type) && !tt_has_data);
>
>         needs_clear = (ttm && ttm->page_flags & TTM_TT_FLAG_ZERO_ALLOC) ||
>                 (!ttm && ttm_bo->type == ttm_bo_type_device);
>
> -       if ((move_lacks_source && !needs_clear) ||
> -           (old_mem_type == XE_PL_SYSTEM &&
> -            new_mem->mem_type == XE_PL_TT)) {
> +       if ((move_lacks_source && !needs_clear)) {
>                 ttm_bo_move_null(ttm_bo, new_mem);
>                 goto out;
>         }
> @@ -686,8 +687,11 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
>                         ret = timeout;
>                         goto out;
>                 }
> -               ttm_bo_move_null(ttm_bo, new_mem);
> -               goto out;
> +
> +               if (!handle_system_ccs) {
> +                       ttm_bo_move_null(ttm_bo, new_mem);
> +                       goto out;
> +               }
>         }
>
>         if (!move_lacks_source &&
> @@ -708,6 +712,8 @@ static int xe_bo_move(struct ttm_buffer_object *ttm_bo, bool evict,
>                 migrate = mem_type_to_migrate(xe, new_mem->mem_type);
>         else if (mem_type_is_vram(old_mem_type))
>                 migrate = mem_type_to_migrate(xe, old_mem_type);
> +       else
> +               migrate = xe->tiles[0].migrate;
>
>         xe_assert(xe, migrate);
>
> @@ -1229,6 +1235,7 @@ struct xe_bo *__xe_bo_create_locked(struct xe_device *xe, struct xe_bo *bo,
>                 alignment = SZ_4K >> PAGE_SHIFT;
>         }
>
> +       bo->ccs_cleared = false;
>         bo->tile = tile;
>         bo->size = size;
>         bo->flags = flags;
> diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h
> index 4bff60996168..508e67c81427 100644
> --- a/drivers/gpu/drm/xe/xe_bo_types.h
> +++ b/drivers/gpu/drm/xe/xe_bo_types.h
> @@ -79,6 +79,8 @@ struct xe_bo {
>         struct llist_node freed;
>         /** @created: Whether the bo has passed initial creation */
>         bool created;
> +       /** @ccs_cleared */
> +       bool ccs_cleared;
>  };
>
>  #define intel_bo_to_drm_bo(bo) (&(bo)->ttm.base)
> diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
> index bdcb20f23531..bac24768fe2a 100644
> --- a/drivers/gpu/drm/xe/xe_migrate.c
> +++ b/drivers/gpu/drm/xe/xe_migrate.c
> @@ -564,14 +564,14 @@ static u64 xe_migrate_batch_base(struct xe_migrate *m, bool usm)
>
>  static u32 xe_migrate_ccs_copy(struct xe_migrate *m,
>                                struct xe_bb *bb,
> -                              u64 src_ofs, bool src_is_vram,
> -                              u64 dst_ofs, bool dst_is_vram, u32 dst_size,
> +                              u64 src_ofs, bool src_is_indirect,
> +                              u64 dst_ofs, bool dst_is_indirect, u32 dst_size,
>                                u64 ccs_ofs, bool copy_ccs)
>  {
>         struct xe_gt *gt = m->tile->primary_gt;
>         u32 flush_flags = 0;
>
> -       if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_vram) {
> +       if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && dst_is_indirect) {
>                 /*
>                  * If the src is already in vram, then it should already
>                  * have been cleared by us, or has been populated by the
> @@ -580,28 +580,24 @@ static u32 xe_migrate_ccs_copy(struct xe_migrate *m,
>                  * Otherwise if the bo doesn't have any CCS metadata attached,
>                  * we still need to clear it for security reasons.
>                  */
> -               u64 ccs_src_ofs =  src_is_vram ? src_ofs : m->cleared_mem_ofs;
> +               u64 ccs_src_ofs =  src_is_indirect ? src_ofs : m->cleared_mem_ofs;
>
>                 emit_copy_ccs(gt, bb,
>                               dst_ofs, true,
> -                             ccs_src_ofs, src_is_vram, dst_size);
> +                             ccs_src_ofs, src_is_indirect, dst_size);
>
>                 flush_flags = MI_FLUSH_DW_CCS;
>         } else if (copy_ccs) {
> -               if (!src_is_vram)
> +               if (!src_is_indirect)
>                         src_ofs = ccs_ofs;
> -               else if (!dst_is_vram)
> +               else if (!dst_is_indirect)
>                         dst_ofs = ccs_ofs;
>
> -               /*
> -                * At the moment, we don't support copying CCS metadata from
> -                * system to system.
> -                */
> -               xe_gt_assert(gt, src_is_vram || dst_is_vram);
> +               xe_gt_assert(gt, src_is_indirect || dst_is_indirect);
>
> -               emit_copy_ccs(gt, bb, dst_ofs, dst_is_vram, src_ofs,
> -                             src_is_vram, dst_size);
> -               if (dst_is_vram)
> +               emit_copy_ccs(gt, bb, dst_ofs, dst_is_indirect, src_ofs,
> +                             src_is_indirect, dst_size);
> +               if (dst_is_indirect)
>                         flush_flags = MI_FLUSH_DW_CCS;
>         }
>
> @@ -642,6 +638,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
>         u64 src_L0, dst_L0;
>         int pass = 0;
>         int err;
> +       bool src_is_pltt = src->mem_type == XE_PL_TT;
> +       bool dst_is_pltt = dst->mem_type == XE_PL_TT;
>         bool src_is_vram = mem_type_is_vram(src->mem_type);
>         bool dst_is_vram = mem_type_is_vram(dst->mem_type);
>         bool copy_ccs = xe_device_has_flat_ccs(xe) &&
> @@ -682,9 +680,7 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
>                 src_L0 = xe_migrate_res_sizes(&src_it);
>                 dst_L0 = xe_migrate_res_sizes(&dst_it);
>
> -               drm_dbg(&xe->drm, "Pass %u, sizes: %llu & %llu\n",
> -                       pass++, src_L0, dst_L0);
> -
> +               drm_dbg(&xe->drm, "Pass %u, sizes: %llu & %llu\n", pass++, src_L0, dst_L0);
>                 src_L0 = min(src_L0, dst_L0);
>
>                 batch_size += pte_update_size(m, src_is_vram, src, &src_it, &src_L0,
> @@ -704,8 +700,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
>                 }
>
>                 /* Add copy commands size here */
> -               batch_size += EMIT_COPY_DW +
> -                       (xe_device_has_flat_ccs(xe) ? EMIT_COPY_CCS_DW : 0);
> +               batch_size += ((!src_is_vram && !dst_is_vram) ? 0 : EMIT_COPY_DW) +
> +                       ((xe_device_has_flat_ccs(xe) ? EMIT_COPY_CCS_DW : 0));
>
>                 bb = xe_bb_new(gt, batch_size, usm);
>                 if (IS_ERR(bb)) {
> @@ -731,10 +727,13 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
>                 bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
>                 update_idx = bb->len;
>
> -               emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0,
> -                         XE_PAGE_SIZE);
> -               flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, src_is_vram,
> -                                                 dst_L0_ofs, dst_is_vram,
> +               if (src_is_vram || dst_is_vram)
> +                       emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, XE_PAGE_SIZE);
> +
> +               flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs,
> +                                                 IS_DGFX(xe) ? src_is_vram : src_is_pltt,
> +                                                 dst_L0_ofs,
> +                                                 IS_DGFX(xe) ? dst_is_vram : dst_is_pltt,
>                                                   src_L0, ccs_ofs, copy_ccs);
>
>                 mutex_lock(&m->job_mutex);
> @@ -907,6 +906,7 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
>         bool clear_vram = mem_type_is_vram(dst->mem_type);
>         struct xe_gt *gt = m->tile->primary_gt;
>         struct xe_device *xe = gt_to_xe(gt);
> +       bool clear_system_ccs = (xe_bo_needs_ccs_pages(bo) && !IS_DGFX(xe)) ? true : false;
>         struct dma_fence *fence = NULL;
>         u64 size = bo->size;
>         struct xe_res_cursor src_it;
> @@ -936,9 +936,9 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
>                 batch_size = 2 +
>                         pte_update_size(m, clear_vram, src, &src_it,
>                                         &clear_L0, &clear_L0_ofs, &clear_L0_pt,
> -                                       emit_clear_cmd_len(gt), 0,
> +                                       clear_system_ccs ? 0 : emit_clear_cmd_len(gt), 0,
>                                         NUM_PT_PER_BLIT);
> -               if (xe_device_has_flat_ccs(xe) && clear_vram)
> +               if (xe_bo_needs_ccs_pages(bo))
>                         batch_size += EMIT_COPY_CCS_DW;
>
>                 /* Clear commands */
> @@ -953,7 +953,6 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
>                 }
>
>                 size -= clear_L0;
> -
>                 /* Preemption is enabled again by the ring ops. */
>                 if (!clear_vram) {
>                         emit_pte(m, bb, clear_L0_pt, clear_vram, &src_it, clear_L0,
> @@ -964,10 +963,10 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
>                 bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
>                 update_idx = bb->len;
>
> -               emit_clear(gt, bb, clear_L0_ofs, clear_L0, XE_PAGE_SIZE,
> -                          clear_vram);
> +               if (!clear_system_ccs)
> +                       emit_clear(gt, bb, clear_L0_ofs, clear_L0, XE_PAGE_SIZE, clear_vram);
>
> -               if (xe_device_has_flat_ccs(xe) && clear_vram) {
> +               if (xe_bo_needs_ccs_pages(bo)) {
>                         emit_copy_ccs(gt, bb, clear_L0_ofs, true,
>                                       m->cleared_mem_ofs, false, clear_L0);
>                         flush_flags = MI_FLUSH_DW_CCS;
> @@ -1024,6 +1023,9 @@ struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
>                 return ERR_PTR(err);
>         }
>
> +       if (clear_system_ccs)
> +               bo->ccs_cleared = true;
> +
>         return fence;
>  }
>
> --
> 2.25.1
>


More information about the Intel-xe mailing list