[Intel-xe] [RFC v2 6/6] drm/xe/xe2: Handle flat ccs move for igfx.

Ghimiray, Himal Prasad himal.prasad.ghimiray at intel.com
Mon Nov 27 03:25:43 UTC 2023


On 24-11-2023 21:18, Thomas Hellström wrote:
> On Tue, 2023-11-21 at 15:39 +0530, Himal Prasad Ghimiray wrote:
>> - Clear flat ccs during user bo creation.
>> - copy ccs meta data between flat ccs and bo during eviction and
>> restore.
>> - Add a bool field ccs_cleared in bo, true means ccs region of bo is
>> already cleared.
>>
>> Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
>> Signed-off-by: Himal Prasad Ghimiray
>> <himal.prasad.ghimiray at intel.com>
>> ---
>>   drivers/gpu/drm/xe/xe_bo.c       | 25 ++++++++-----
>>   drivers/gpu/drm/xe/xe_bo_types.h |  2 ++
>>   drivers/gpu/drm/xe/xe_migrate.c  | 62 ++++++++++++++++--------------
>> --
>>   3 files changed, 50 insertions(+), 39 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
>> index 4730ee3c1012..a40f17ae21e7 100644
>> --- a/drivers/gpu/drm/xe/xe_bo.c
>> +++ b/drivers/gpu/drm/xe/xe_bo.c
>> @@ -630,10 +630,12 @@ static int xe_bo_move(struct ttm_buffer_object
>> *ttm_bo, bool evict,
>>          bool move_lacks_source;
>>          bool tt_has_data;
>>          bool needs_clear;
>> +       bool handle_system_ccs = (!IS_DGFX(xe) &&
>> xe_bo_needs_ccs_pages(bo) &&
>> +                                 ttm && ttm_tt_is_populated(ttm)) ?
>> true : false;
>>          int ret = 0;
>> -
>> -       /* Bo creation path, moving to system or TT. No clearing
>> required. */
>> -       if (!old_mem && ttm) {
>> +       /* Bo creation path, moving to system or TT. */
>> +       if (((old_mem_type == XE_PL_SYSTEM && new_mem->mem_type ==
>> XE_PL_TT) ||
> I figure moving from SYSTEM to TT must always trigger a copy or clear
> of CCS in the handle_system_ccs case?
>
> /Thomas
AFAIU that is correct.
>
>> +            (!old_mem && ttm)) && !handle_system_ccs) {
>>                  ttm_bo_move_null(ttm_bo, new_mem);
>>                  return 0;
>>          }
>> @@ -648,14 +650,13 @@ static int xe_bo_move(struct ttm_buffer_object
>> *ttm_bo, bool evict,
>>          tt_has_data = ttm && (ttm_tt_is_populated(ttm) ||
>>                                (ttm->page_flags &
>> TTM_TT_FLAG_SWAPPED));
>>   
>> -       move_lacks_source = !mem_type_is_vram(old_mem_type) &&
>> !tt_has_data;
>> +       move_lacks_source = handle_system_ccs ? (!bo->ccs_cleared)  :
>> +                                               (!mem_type_is_vram(ol
>> d_mem_type) && !tt_has_data);
>>   
>>          needs_clear = (ttm && ttm->page_flags &
>> TTM_TT_FLAG_ZERO_ALLOC) ||
>>                  (!ttm && ttm_bo->type == ttm_bo_type_device);
>>   
>> -       if ((move_lacks_source && !needs_clear) ||
>> -           (old_mem_type == XE_PL_SYSTEM &&
>> -            new_mem->mem_type == XE_PL_TT)) {
>> +       if ((move_lacks_source && !needs_clear)) {
>>                  ttm_bo_move_null(ttm_bo, new_mem);
>>                  goto out;
>>          }
>> @@ -686,8 +687,11 @@ static int xe_bo_move(struct ttm_buffer_object
>> *ttm_bo, bool evict,
>>                          ret = timeout;
>>                          goto out;
>>                  }
>> -               ttm_bo_move_null(ttm_bo, new_mem);
>> -               goto out;
>> +
>> +               if (!handle_system_ccs) {
>> +                       ttm_bo_move_null(ttm_bo, new_mem);
>> +                       goto out;
>> +               }
>>          }
>>   
>>          if (!move_lacks_source &&
>> @@ -708,6 +712,8 @@ static int xe_bo_move(struct ttm_buffer_object
>> *ttm_bo, bool evict,
>>                  migrate = mem_type_to_migrate(xe, new_mem->mem_type);
>>          else if (mem_type_is_vram(old_mem_type))
>>                  migrate = mem_type_to_migrate(xe, old_mem_type);
>> +       else
>> +               migrate = xe->tiles[0].migrate;
>>   
>>          xe_assert(xe, migrate);
>>   
>> @@ -1229,6 +1235,7 @@ struct xe_bo *__xe_bo_create_locked(struct
>> xe_device *xe, struct xe_bo *bo,
>>                  alignment = SZ_4K >> PAGE_SHIFT;
>>          }
>>   
>> +       bo->ccs_cleared = false;
>>          bo->tile = tile;
>>          bo->size = size;
>>          bo->flags = flags;
>> diff --git a/drivers/gpu/drm/xe/xe_bo_types.h
>> b/drivers/gpu/drm/xe/xe_bo_types.h
>> index 4bff60996168..508e67c81427 100644
>> --- a/drivers/gpu/drm/xe/xe_bo_types.h
>> +++ b/drivers/gpu/drm/xe/xe_bo_types.h
>> @@ -79,6 +79,8 @@ struct xe_bo {
>>          struct llist_node freed;
>>          /** @created: Whether the bo has passed initial creation */
>>          bool created;
>> +       /** @ccs_cleared */
>> +       bool ccs_cleared;
>>   };
>>   
>>   #define intel_bo_to_drm_bo(bo) (&(bo)->ttm.base)
>> diff --git a/drivers/gpu/drm/xe/xe_migrate.c
>> b/drivers/gpu/drm/xe/xe_migrate.c
>> index bdcb20f23531..bac24768fe2a 100644
>> --- a/drivers/gpu/drm/xe/xe_migrate.c
>> +++ b/drivers/gpu/drm/xe/xe_migrate.c
>> @@ -564,14 +564,14 @@ static u64 xe_migrate_batch_base(struct
>> xe_migrate *m, bool usm)
>>   
>>   static u32 xe_migrate_ccs_copy(struct xe_migrate *m,
>>                                 struct xe_bb *bb,
>> -                              u64 src_ofs, bool src_is_vram,
>> -                              u64 dst_ofs, bool dst_is_vram, u32
>> dst_size,
>> +                              u64 src_ofs, bool src_is_indirect,
>> +                              u64 dst_ofs, bool dst_is_indirect, u32
>> dst_size,
>>                                 u64 ccs_ofs, bool copy_ccs)
>>   {
>>          struct xe_gt *gt = m->tile->primary_gt;
>>          u32 flush_flags = 0;
>>   
>> -       if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs &&
>> dst_is_vram) {
>> +       if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs &&
>> dst_is_indirect) {
>>                  /*
>>                   * If the src is already in vram, then it should
>> already
>>                   * have been cleared by us, or has been populated by
>> the
>> @@ -580,28 +580,24 @@ static u32 xe_migrate_ccs_copy(struct
>> xe_migrate *m,
>>                   * Otherwise if the bo doesn't have any CCS metadata
>> attached,
>>                   * we still need to clear it for security reasons.
>>                   */
>> -               u64 ccs_src_ofs =  src_is_vram ? src_ofs : m-
>>> cleared_mem_ofs;
>> +               u64 ccs_src_ofs =  src_is_indirect ? src_ofs : m-
>>> cleared_mem_ofs;
>>   
>>                  emit_copy_ccs(gt, bb,
>>                                dst_ofs, true,
>> -                             ccs_src_ofs, src_is_vram, dst_size);
>> +                             ccs_src_ofs, src_is_indirect,
>> dst_size);
>>   
>>                  flush_flags = MI_FLUSH_DW_CCS;
>>          } else if (copy_ccs) {
>> -               if (!src_is_vram)
>> +               if (!src_is_indirect)
>>                          src_ofs = ccs_ofs;
>> -               else if (!dst_is_vram)
>> +               else if (!dst_is_indirect)
>>                          dst_ofs = ccs_ofs;
>>   
>> -               /*
>> -                * At the moment, we don't support copying CCS
>> metadata from
>> -                * system to system.
>> -                */
>> -               xe_gt_assert(gt, src_is_vram || dst_is_vram);
>> +               xe_gt_assert(gt, src_is_indirect || dst_is_indirect);
>>   
>> -               emit_copy_ccs(gt, bb, dst_ofs, dst_is_vram, src_ofs,
>> -                             src_is_vram, dst_size);
>> -               if (dst_is_vram)
>> +               emit_copy_ccs(gt, bb, dst_ofs, dst_is_indirect,
>> src_ofs,
>> +                             src_is_indirect, dst_size);
>> +               if (dst_is_indirect)
>>                          flush_flags = MI_FLUSH_DW_CCS;
>>          }
>>   
>> @@ -642,6 +638,8 @@ struct dma_fence *xe_migrate_copy(struct
>> xe_migrate *m,
>>          u64 src_L0, dst_L0;
>>          int pass = 0;
>>          int err;
>> +       bool src_is_pltt = src->mem_type == XE_PL_TT;
>> +       bool dst_is_pltt = dst->mem_type == XE_PL_TT;
>>          bool src_is_vram = mem_type_is_vram(src->mem_type);
>>          bool dst_is_vram = mem_type_is_vram(dst->mem_type);
>>          bool copy_ccs = xe_device_has_flat_ccs(xe) &&
>> @@ -682,9 +680,7 @@ struct dma_fence *xe_migrate_copy(struct
>> xe_migrate *m,
>>                  src_L0 = xe_migrate_res_sizes(&src_it);
>>                  dst_L0 = xe_migrate_res_sizes(&dst_it);
>>   
>> -               drm_dbg(&xe->drm, "Pass %u, sizes: %llu & %llu\n",
>> -                       pass++, src_L0, dst_L0);
>> -
>> +               drm_dbg(&xe->drm, "Pass %u, sizes: %llu & %llu\n",
>> pass++, src_L0, dst_L0);
>>                  src_L0 = min(src_L0, dst_L0);
>>   
>>                  batch_size += pte_update_size(m, src_is_vram, src,
>> &src_it, &src_L0,
>> @@ -704,8 +700,8 @@ struct dma_fence *xe_migrate_copy(struct
>> xe_migrate *m,
>>                  }
>>   
>>                  /* Add copy commands size here */
>> -               batch_size += EMIT_COPY_DW +
>> -                       (xe_device_has_flat_ccs(xe) ?
>> EMIT_COPY_CCS_DW : 0);
>> +               batch_size += ((!src_is_vram && !dst_is_vram) ? 0 :
>> EMIT_COPY_DW) +
>> +                       ((xe_device_has_flat_ccs(xe) ?
>> EMIT_COPY_CCS_DW : 0));
>>   
>>                  bb = xe_bb_new(gt, batch_size, usm);
>>                  if (IS_ERR(bb)) {
>> @@ -731,10 +727,13 @@ struct dma_fence *xe_migrate_copy(struct
>> xe_migrate *m,
>>                  bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
>>                  update_idx = bb->len;
>>   
>> -               emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0,
>> -                         XE_PAGE_SIZE);
>> -               flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs,
>> src_is_vram,
>> -                                                 dst_L0_ofs,
>> dst_is_vram,
>> +               if (src_is_vram || dst_is_vram)
>> +                       emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs,
>> src_L0, XE_PAGE_SIZE);
>> +
>> +               flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs,
>> +                                                 IS_DGFX(xe) ?
>> src_is_vram : src_is_pltt,
>> +                                                 dst_L0_ofs,
>> +                                                 IS_DGFX(xe) ?
>> dst_is_vram : dst_is_pltt,
>>                                                    src_L0, ccs_ofs,
>> copy_ccs);
>>   
>>                  mutex_lock(&m->job_mutex);
>> @@ -907,6 +906,7 @@ struct dma_fence *xe_migrate_clear(struct
>> xe_migrate *m,
>>          bool clear_vram = mem_type_is_vram(dst->mem_type);
>>          struct xe_gt *gt = m->tile->primary_gt;
>>          struct xe_device *xe = gt_to_xe(gt);
>> +       bool clear_system_ccs = (xe_bo_needs_ccs_pages(bo) &&
>> !IS_DGFX(xe)) ? true : false;
>>          struct dma_fence *fence = NULL;
>>          u64 size = bo->size;
>>          struct xe_res_cursor src_it;
>> @@ -936,9 +936,9 @@ struct dma_fence *xe_migrate_clear(struct
>> xe_migrate *m,
>>                  batch_size = 2 +
>>                          pte_update_size(m, clear_vram, src, &src_it,
>>                                          &clear_L0, &clear_L0_ofs,
>> &clear_L0_pt,
>> -                                       emit_clear_cmd_len(gt), 0,
>> +                                       clear_system_ccs ? 0 :
>> emit_clear_cmd_len(gt), 0,
>>                                          NUM_PT_PER_BLIT);
>> -               if (xe_device_has_flat_ccs(xe) && clear_vram)
>> +               if (xe_bo_needs_ccs_pages(bo))
>>                          batch_size += EMIT_COPY_CCS_DW;
>>   
>>                  /* Clear commands */
>> @@ -953,7 +953,6 @@ struct dma_fence *xe_migrate_clear(struct
>> xe_migrate *m,
>>                  }
>>   
>>                  size -= clear_L0;
>> -
>>                  /* Preemption is enabled again by the ring ops. */
>>                  if (!clear_vram) {
>>                          emit_pte(m, bb, clear_L0_pt, clear_vram,
>> &src_it, clear_L0,
>> @@ -964,10 +963,10 @@ struct dma_fence *xe_migrate_clear(struct
>> xe_migrate *m,
>>                  bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
>>                  update_idx = bb->len;
>>   
>> -               emit_clear(gt, bb, clear_L0_ofs, clear_L0,
>> XE_PAGE_SIZE,
>> -                          clear_vram);
>> +               if (!clear_system_ccs)
>> +                       emit_clear(gt, bb, clear_L0_ofs, clear_L0,
>> XE_PAGE_SIZE, clear_vram);
>>   
>> -               if (xe_device_has_flat_ccs(xe) && clear_vram) {
>> +               if (xe_bo_needs_ccs_pages(bo)) {
>>                          emit_copy_ccs(gt, bb, clear_L0_ofs, true,
>>                                        m->cleared_mem_ofs, false,
>> clear_L0);
>>                          flush_flags = MI_FLUSH_DW_CCS;
>> @@ -1024,6 +1023,9 @@ struct dma_fence *xe_migrate_clear(struct
>> xe_migrate *m,
>>                  return ERR_PTR(err);
>>          }
>>   
>> +       if (clear_system_ccs)
>> +               bo->ccs_cleared = true;
>> +
>>          return fence;
>>   }
>>   


More information about the Intel-xe mailing list