[PATCH v6 7/9] drm/xe/xe2: Handle flat ccs move for igfx.

Ghimiray, Himal Prasad himal.prasad.ghimiray at intel.com
Mon Dec 11 05:15:20 UTC 2023


On 07-12-2023 18:28, Thomas Hellström wrote:
>
> On 12/7/23 10:19, Himal Prasad Ghimiray wrote:
>> - Clear flat ccs during user bo creation.
>> - copy ccs meta data between flat ccs and bo during eviction and
>> restore.
>> - Add a bool field ccs_cleared in bo, true means ccs region of bo is
>> already cleared.
>>
>> v2:
>>   - Rebase.
>>
>> v3:
>>   - Maintain order of xe_bo_move_notify for ttm_bo_type_sg.
>>
>> v4:
>>   - xe_migrate_copy can be used to copy src to dst bo on igfx too.
>> Add a bool which handles only ccs metadata copy.
>>
>> Cc: Thomas Hellström <thomas.hellstrom at linux.intel.com>
>> Signed-off-by: Himal Prasad Ghimiray <himal.prasad.ghimiray at intel.com>
>> ---
>>   drivers/gpu/drm/xe/tests/xe_migrate.c |  4 +-
>>   drivers/gpu/drm/xe/xe_bo.c            | 33 +++++++++-----
>>   drivers/gpu/drm/xe/xe_bo_types.h      |  4 ++
>>   drivers/gpu/drm/xe/xe_migrate.c       | 64 +++++++++++++++------------
>>   drivers/gpu/drm/xe/xe_migrate.h       |  3 +-
>>   5 files changed, 66 insertions(+), 42 deletions(-)
>>
>> diff --git a/drivers/gpu/drm/xe/tests/xe_migrate.c 
>> b/drivers/gpu/drm/xe/tests/xe_migrate.c
>> index f77477f7e9fa..6edd6f795f7e 100644
>> --- a/drivers/gpu/drm/xe/tests/xe_migrate.c
>> +++ b/drivers/gpu/drm/xe/tests/xe_migrate.c
>> @@ -152,7 +152,7 @@ static void test_copy(struct xe_migrate *m, 
>> struct xe_bo *bo,
>>         expected = 0xc0c0c0c0c0c0c0c0;
>>       fence = xe_migrate_copy(m, remote, bo, remote->ttm.resource,
>> -                bo->ttm.resource);
>> +                bo->ttm.resource, false);
>>       if (!sanity_fence_failed(xe, fence, big ? "Copying big bo 
>> remote -> vram" :
>>                    "Copying small bo remote -> vram", test)) {
>>           retval = xe_map_rd(xe, &bo->vmap, 0, u64);
>> @@ -169,7 +169,7 @@ static void test_copy(struct xe_migrate *m, 
>> struct xe_bo *bo,
>>       xe_map_memset(xe, &bo->vmap, 0, 0xc0, bo->size);
>>         fence = xe_migrate_copy(m, bo, remote, bo->ttm.resource,
>> -                remote->ttm.resource);
>> +                remote->ttm.resource, false);
>>       if (!sanity_fence_failed(xe, fence, big ? "Copying big bo vram 
>> -> remote" :
>>                    "Copying small bo vram -> remote", test)) {
>>           retval = xe_map_rd(xe, &remote->vmap, 0, u64);
>> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
>> index 81630838d769..5734c23be7d7 100644
>> --- a/drivers/gpu/drm/xe/xe_bo.c
>> +++ b/drivers/gpu/drm/xe/xe_bo.c
>> @@ -647,10 +647,11 @@ static int xe_bo_move(struct ttm_buffer_object 
>> *ttm_bo, bool evict,
>>       bool move_lacks_source;
>>       bool tt_has_data;
>>       bool needs_clear;
>> +    bool handle_system_ccs = (!IS_DGFX(xe) && 
>> xe_bo_needs_ccs_pages(bo) &&
>> +                  ttm && ttm_tt_is_populated(ttm)) ? true : false;
>>       int ret = 0;
>> -
>> -    /* Bo creation path, moving to system or TT. No clearing 
>> required. */
>> -    if (!old_mem && ttm) {
>> +    /* Bo creation path, moving to system or TT. */
>> +    if ((!old_mem && ttm) && !handle_system_ccs) {
>>           ttm_bo_move_null(ttm_bo, new_mem);
>>           return 0;
>>       }
>> @@ -665,14 +666,18 @@ static int xe_bo_move(struct ttm_buffer_object 
>> *ttm_bo, bool evict,
>>       tt_has_data = ttm && (ttm_tt_is_populated(ttm) ||
>>                     (ttm->page_flags & TTM_TT_FLAG_SWAPPED));
>>   -    move_lacks_source = !mem_type_is_vram(old_mem_type) && 
>> !tt_has_data;
>> +    move_lacks_source = handle_system_ccs ? (!bo->ccs_cleared)  :
>> +                        (!mem_type_is_vram(old_mem_type) && 
>> !tt_has_data);
>>         needs_clear = (ttm && ttm->page_flags & 
>> TTM_TT_FLAG_ZERO_ALLOC) ||
>>           (!ttm && ttm_bo->type == ttm_bo_type_device);
>>   -    if ((move_lacks_source && !needs_clear) ||
>> -        (old_mem_type == XE_PL_SYSTEM &&
>> -         new_mem->mem_type == XE_PL_TT)) {
>> +    if ((move_lacks_source && !needs_clear)) {
>> +        ttm_bo_move_null(ttm_bo, new_mem);
>> +        goto out;
>> +    }
>> +
>> +    if (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type == 
>> XE_PL_TT && !handle_system_ccs) {
>>           ttm_bo_move_null(ttm_bo, new_mem);
>>           goto out;
>>       }
>> @@ -703,8 +708,11 @@ static int xe_bo_move(struct ttm_buffer_object 
>> *ttm_bo, bool evict,
>>               ret = timeout;
>>               goto out;
>>           }
>> -        ttm_bo_move_null(ttm_bo, new_mem);
>> -        goto out;
>> +
>> +        if (!handle_system_ccs) {
>> +            ttm_bo_move_null(ttm_bo, new_mem);
>> +            goto out;
>> +        }
>>       }
>>         if (!move_lacks_source &&
>> @@ -725,6 +733,8 @@ static int xe_bo_move(struct ttm_buffer_object 
>> *ttm_bo, bool evict,
>>           migrate = mem_type_to_migrate(xe, new_mem->mem_type);
>>       else if (mem_type_is_vram(old_mem_type))
>>           migrate = mem_type_to_migrate(xe, old_mem_type);
>> +    else
>> +        migrate = xe->tiles[0].migrate;
>>         xe_assert(xe, migrate);
>>   @@ -767,8 +777,8 @@ static int xe_bo_move(struct ttm_buffer_object 
>> *ttm_bo, bool evict,
>>           if (move_lacks_source)
>>               fence = xe_migrate_clear(migrate, bo, new_mem);
>>           else
>> -            fence = xe_migrate_copy(migrate,
>> -                        bo, bo, old_mem, new_mem);
>> +            fence = xe_migrate_copy(migrate, bo, bo, old_mem,
>> +                        new_mem, handle_system_ccs);
>>           if (IS_ERR(fence)) {
>>               ret = PTR_ERR(fence);
>>               xe_device_mem_access_put(xe);
>> @@ -1254,6 +1264,7 @@ struct xe_bo *___xe_bo_create_locked(struct 
>> xe_device *xe, struct xe_bo *bo,
>>               return bo;
>>       }
>>   +    bo->ccs_cleared = false;
>>       bo->tile = tile;
>>       bo->size = size;
>>       bo->flags = flags;
>> diff --git a/drivers/gpu/drm/xe/xe_bo_types.h 
>> b/drivers/gpu/drm/xe/xe_bo_types.h
>> index f71dbc518958..64c2249a4e40 100644
>> --- a/drivers/gpu/drm/xe/xe_bo_types.h
>> +++ b/drivers/gpu/drm/xe/xe_bo_types.h
>> @@ -79,6 +79,10 @@ struct xe_bo {
>>       struct llist_node freed;
>>       /** @created: Whether the bo has passed initial creation */
>>       bool created;
>> +
>> +    /** @ccs_cleared */
>> +    bool ccs_cleared;
>> +
>>       /**
>>        * @cpu_caching: CPU caching mode. Currently only used for 
>> userspace
>>        * objects.
>> diff --git a/drivers/gpu/drm/xe/xe_migrate.c 
>> b/drivers/gpu/drm/xe/xe_migrate.c
>> index 1bfb249680f4..4c652f3f33b4 100644
>> --- a/drivers/gpu/drm/xe/xe_migrate.c
>> +++ b/drivers/gpu/drm/xe/xe_migrate.c
>> @@ -567,14 +567,14 @@ static u64 xe_migrate_batch_base(struct 
>> xe_migrate *m, bool usm)
>>     static u32 xe_migrate_ccs_copy(struct xe_migrate *m,
>>                      struct xe_bb *bb,
>> -                   u64 src_ofs, bool src_is_vram,
>> -                   u64 dst_ofs, bool dst_is_vram, u32 dst_size,
>> +                   u64 src_ofs, bool src_is_indirect,
>> +                   u64 dst_ofs, bool dst_is_indirect, u32 dst_size,
>>                      u64 ccs_ofs, bool copy_ccs)
>>   {
>>       struct xe_gt *gt = m->tile->primary_gt;
>>       u32 flush_flags = 0;
>>   -    if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && 
>> dst_is_vram) {
>> +    if (xe_device_has_flat_ccs(gt_to_xe(gt)) && !copy_ccs && 
>> dst_is_indirect) {
>>           /*
>>            * If the src is already in vram, then it should already
>>            * have been cleared by us, or has been populated by the
>> @@ -583,28 +583,24 @@ static u32 xe_migrate_ccs_copy(struct 
>> xe_migrate *m,
>>            * Otherwise if the bo doesn't have any CCS metadata attached,
>>            * we still need to clear it for security reasons.
>>            */
>> -        u64 ccs_src_ofs =  src_is_vram ? src_ofs : m->cleared_mem_ofs;
>> +        u64 ccs_src_ofs =  src_is_indirect ? src_ofs : 
>> m->cleared_mem_ofs;
>>             emit_copy_ccs(gt, bb,
>>                     dst_ofs, true,
>> -                  ccs_src_ofs, src_is_vram, dst_size);
>> +                  ccs_src_ofs, src_is_indirect, dst_size);
>>             flush_flags = MI_FLUSH_DW_CCS;
>>       } else if (copy_ccs) {
>> -        if (!src_is_vram)
>> +        if (!src_is_indirect)
>>               src_ofs = ccs_ofs;
>> -        else if (!dst_is_vram)
>> +        else if (!dst_is_indirect)
>>               dst_ofs = ccs_ofs;
>>   -        /*
>> -         * At the moment, we don't support copying CCS metadata from
>> -         * system to system.
>> -         */
>> -        xe_gt_assert(gt, src_is_vram || dst_is_vram);
>> +        xe_gt_assert(gt, src_is_indirect || dst_is_indirect);
>>   -        emit_copy_ccs(gt, bb, dst_ofs, dst_is_vram, src_ofs,
>> -                  src_is_vram, dst_size);
>> -        if (dst_is_vram)
>> +        emit_copy_ccs(gt, bb, dst_ofs, dst_is_indirect, src_ofs,
>> +                  src_is_indirect, dst_size);
>> +        if (dst_is_indirect)
>>               flush_flags = MI_FLUSH_DW_CCS;
>>       }
>>   @@ -620,6 +616,7 @@ static u32 xe_migrate_ccs_copy(struct 
>> xe_migrate *m,
>>    * the buffer object @dst is currently bound to.
>>    * @src: The source TTM resource.
>>    * @dst: The dst TTM resource.
>> + * @copy_only_ccs: If true copy only CCS metadata
>>    *
>>    * Copies the contents of @src to @dst: On flat CCS devices,
>>    * the CCS metadata is copied as well if needed, or if not present,
>> @@ -633,7 +630,8 @@ struct dma_fence *xe_migrate_copy(struct 
>> xe_migrate *m,
>>                     struct xe_bo *src_bo,
>>                     struct xe_bo *dst_bo,
>>                     struct ttm_resource *src,
>> -                  struct ttm_resource *dst)
>> +                  struct ttm_resource *dst,
>> +                  bool copy_only_ccs)
>>   {
>>       struct xe_gt *gt = m->tile->primary_gt;
>>       struct xe_device *xe = gt_to_xe(gt);
>> @@ -645,6 +643,8 @@ struct dma_fence *xe_migrate_copy(struct 
>> xe_migrate *m,
>>       u64 src_L0, dst_L0;
>>       int pass = 0;
>>       int err;
>> +    bool src_is_pltt = src->mem_type == XE_PL_TT;
>> +    bool dst_is_pltt = dst->mem_type == XE_PL_TT;
>>       bool src_is_vram = mem_type_is_vram(src->mem_type);
>>       bool dst_is_vram = mem_type_is_vram(dst->mem_type);
>>       bool copy_ccs = xe_device_has_flat_ccs(xe) &&
>> @@ -720,8 +720,8 @@ struct dma_fence *xe_migrate_copy(struct 
>> xe_migrate *m,
>>           }
>>             /* Add copy commands size here */
>> -        batch_size += EMIT_COPY_DW +
>> -            (xe_device_has_flat_ccs(xe) ? EMIT_COPY_CCS_DW : 0);
>> +        batch_size += ((copy_only_ccs) ? 0 : EMIT_COPY_DW) +
>> +            ((xe_device_has_flat_ccs(xe) ? EMIT_COPY_CCS_DW : 0));
>>             bb = xe_bb_new(gt, batch_size, usm);
>>           if (IS_ERR(bb)) {
>> @@ -747,10 +747,13 @@ struct dma_fence *xe_migrate_copy(struct 
>> xe_migrate *m,
>>           bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
>>           update_idx = bb->len;
>>   -        emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0,
>> -              XE_PAGE_SIZE);
>> -        flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs, 
>> src_is_vram,
>> -                          dst_L0_ofs, dst_is_vram,
>> +        if (!copy_only_ccs)
>> +            emit_copy(gt, bb, src_L0_ofs, dst_L0_ofs, src_L0, 
>> XE_PAGE_SIZE);
>> +
>> +        flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs,
>> +                          IS_DGFX(xe) ? src_is_vram : src_is_pltt,
>> +                          dst_L0_ofs,
>> +                          IS_DGFX(xe) ? dst_is_vram : dst_is_pltt,
>>                             src_L0, ccs_ofs, copy_ccs);
>>             mutex_lock(&m->job_mutex);
>> @@ -923,6 +926,7 @@ struct dma_fence *xe_migrate_clear(struct 
>> xe_migrate *m,
>>       bool clear_vram = mem_type_is_vram(dst->mem_type);
>>       struct xe_gt *gt = m->tile->primary_gt;
>>       struct xe_device *xe = gt_to_xe(gt);
>> +    bool clear_system_ccs = (xe_bo_needs_ccs_pages(bo) && 
>> !IS_DGFX(xe)) ? true : false;
>>       struct dma_fence *fence = NULL;
>>       u64 size = bo->size;
>>       struct xe_res_cursor src_it;
>> @@ -963,9 +967,10 @@ struct dma_fence *xe_migrate_clear(struct 
>> xe_migrate *m,
>>           batch_size = 2 +
>>               pte_update_size(m, clear_vram, src, &src_it,
>>                       &clear_L0, &clear_L0_ofs, &clear_L0_pt,
>> -                    emit_clear_cmd_len(gt), 0,
>> +                    clear_system_ccs ? 0 : emit_clear_cmd_len(gt), 0,
>>                       avail_pts);
>> -        if (xe_device_has_flat_ccs(xe) && clear_vram)
>> +
>> +        if (xe_bo_needs_ccs_pages(bo))
>>               batch_size += EMIT_COPY_CCS_DW;
>
> I think this is incorrect. On DGFX, we need to clear CCS for security 
> reasons even if the bo is not compression enabled.

Thanks for pointing this out. I had missed this part for dgfx. Will 
address it in next patch.

>
>>             /* Clear commands */
>> @@ -980,7 +985,6 @@ struct dma_fence *xe_migrate_clear(struct 
>> xe_migrate *m,
>>           }
>>             size -= clear_L0;
>> -
>>           /* Preemption is enabled again by the ring ops. */
>>           if (!clear_vram) {
>>               emit_pte(m, bb, clear_L0_pt, clear_vram, true, &src_it, 
>> clear_L0,
>> @@ -991,9 +995,10 @@ struct dma_fence *xe_migrate_clear(struct 
>> xe_migrate *m,
>>           bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
>>           update_idx = bb->len;
>>   -        emit_clear(gt, bb, clear_L0_ofs, clear_L0, XE_PAGE_SIZE,
>> -               clear_vram);
>> -        if (xe_device_has_flat_ccs(xe) && clear_vram) {
>> +        if (!clear_system_ccs)
>> +            emit_clear(gt, bb, clear_L0_ofs, clear_L0, XE_PAGE_SIZE, 
>> clear_vram);
>> +
>> +        if (xe_bo_needs_ccs_pages(bo)) {
>
> Same here.

Will address in next patch.

BR

Himal

>
>>               emit_copy_ccs(gt, bb, clear_L0_ofs, true,
>>                         m->cleared_mem_ofs, false, clear_L0);
>>               flush_flags = MI_FLUSH_DW_CCS;
>> @@ -1050,6 +1055,9 @@ struct dma_fence *xe_migrate_clear(struct 
>> xe_migrate *m,
>>           return ERR_PTR(err);
>>       }
>>   +    if (clear_system_ccs)
>> +        bo->ccs_cleared = true;
>> +
>>       return fence;
>>   }
>>   diff --git a/drivers/gpu/drm/xe/xe_migrate.h 
>> b/drivers/gpu/drm/xe/xe_migrate.h
>> index c729241776ad..951f19318ea4 100644
>> --- a/drivers/gpu/drm/xe/xe_migrate.h
>> +++ b/drivers/gpu/drm/xe/xe_migrate.h
>> @@ -85,7 +85,8 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate 
>> *m,
>>                     struct xe_bo *src_bo,
>>                     struct xe_bo *dst_bo,
>>                     struct ttm_resource *src,
>> -                  struct ttm_resource *dst);
>> +                  struct ttm_resource *dst,
>> +                  bool copy_only_ccs);
>>     struct dma_fence *xe_migrate_clear(struct xe_migrate *m,
>>                      struct xe_bo *bo,


More information about the Intel-xe mailing list