[PATCH v1 1/3] drm/xe/migrate: Add function for raw copy of VRAM and CCS
Laguna, Lukasz
lukasz.laguna at intel.com
Wed Oct 30 10:38:02 UTC 2024
On 10/16/2024 14:54, Nirmoy Das wrote:
> Hi Lukasz,
>
> On 10/16/2024 11:57 AM, Lukasz Laguna wrote:
>> Add support for copying chunks of data between VRAM and sysmem objects.
>> Additionally, allow to copy corresponding CCS metadata from or to
>> dedicated buffer object.
>
> Why existing APIs are not enough, this is not very clear here.
Valid point, will extend the description in next revision.
>
>
>> Signed-off-by: Lukasz Laguna <lukasz.laguna at intel.com>
>> ---
>> drivers/gpu/drm/xe/xe_migrate.c | 208 ++++++++++++++++++++++++++++++++
>> drivers/gpu/drm/xe/xe_migrate.h | 5 +
>> 2 files changed, 213 insertions(+)
>>
>> diff --git a/drivers/gpu/drm/xe/xe_migrate.c b/drivers/gpu/drm/xe/xe_migrate.c
>> index cfd31ae49cc1..e4770f44582d 100644
>> --- a/drivers/gpu/drm/xe/xe_migrate.c
>> +++ b/drivers/gpu/drm/xe/xe_migrate.c
>> @@ -936,6 +936,214 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
>> return fence;
>> }
>>
>> +/**
>> + * xe_migrate_raw_vram_copy() - Raw copy of VRAM object and corresponding CCS.
>> + * @vram_bo: The VRAM buffer object.
>> + * @vram_offset: The VRAM offset.
>> + * @sysmem_bo: The sysmem buffer object. If copying only CCS metadata set this
>> + * to NULL.
>> + * @sysmem_offset: The sysmem offset.
>> + * @ccs_bo: The CCS buffer object located in sysmem. If copying of CCS metadata
>> + * is not needed set this to NULL.
>> + * @ccs_offset: The CCS offset.
>> + * @size: The size of VRAM chunk to copy.
>> + * @to_sysmem: True to copy from VRAM to sysmem, false for opposite direction.
>> + *
>> + * Copies the content of buffer object from or to VRAM. If supported and
>> + * needed, it also copies corresponding CCS metadata.
>> + *
>> + * Return: Pointer to a dma_fence representing the last copy batch, or
>> + * an error pointer on failure. If there is a failure, any copy operation
>> + * started by the function call has been synced.
>> + */
>> +struct dma_fence *xe_migrate_raw_vram_copy(struct xe_bo *vram_bo, u64 vram_offset,
>> + struct xe_bo *sysmem_bo, u64 sysmem_offset,
>> + struct xe_bo *ccs_bo, u64 ccs_offset,
>> + u64 size, bool to_sysmem)
>> +{
>> + struct xe_device *xe = xe_bo_device(vram_bo);
>> + struct xe_tile *tile = vram_bo->tile;
>> + struct xe_gt *gt = tile->primary_gt;
>> + struct xe_migrate *m = tile->migrate;
>> + struct dma_fence *fence = NULL;
>> + struct ttm_resource *vram = vram_bo->ttm.resource, *sysmem, *ccs;
>> + struct xe_res_cursor vram_it, sysmem_it, ccs_it;
>> + u64 vram_L0_ofs, sysmem_L0_ofs;
>> + u32 vram_L0_pt, sysmem_L0_pt;
>> + u64 vram_L0, sysmem_L0;
>> + bool copy_content = sysmem_bo ? true : false;
>> + bool copy_ccs = ccs_bo ? true : false;
>> + int pass = 0;
>> + int err;
>> +
>> + if (!copy_content && !copy_ccs)
>> + return ERR_PTR(-EINVAL);
>> +
>> + if (!IS_ALIGNED(vram_offset | sysmem_offset | ccs_offset | size, PAGE_SIZE))
>> + return ERR_PTR(-EINVAL);
>> +
>> + if (!xe_bo_is_vram(vram_bo))
>> + return ERR_PTR(-EINVAL);
>> +
>> + if (range_overflows(vram_offset, size, (u64)vram_bo->ttm.base.size))
>> + return ERR_PTR(-EOVERFLOW);
>> +
>> + if (copy_content) {
>> + if (xe_bo_is_vram(sysmem_bo))
>> + return ERR_PTR(-EINVAL);
>> + if (range_overflows(sysmem_offset, size, (u64)sysmem_bo->ttm.base.size))
>> + return ERR_PTR(-EOVERFLOW);
>> + }
>> +
>> + if (copy_ccs) {
>> + if (xe_bo_is_vram(ccs_bo))
>> + return ERR_PTR(-EINVAL);
>> + if (!xe_device_has_flat_ccs(xe))
>> + return ERR_PTR(-EOPNOTSUPP);
>> + if (ccs_bo->ttm.base.size < xe_device_ccs_bytes(xe, size))
>> + return ERR_PTR(-EINVAL);
>> + if (range_overflows(ccs_offset, (u64)xe_device_ccs_bytes(xe, size),
>> + (u64)ccs_bo->ttm.base.size))
>> + return ERR_PTR(-EOVERFLOW);
>> + }
>> +
>> + xe_res_first(vram, vram_offset, size, &vram_it);
>> +
>> + if (copy_content) {
>> + sysmem = sysmem_bo->ttm.resource;
>> + xe_res_first_sg(xe_bo_sg(sysmem_bo), sysmem_offset, size, &sysmem_it);
>> + }
>> +
>> + if (copy_ccs) {
>> + ccs = ccs_bo->ttm.resource;
>> + xe_res_first_sg(xe_bo_sg(ccs_bo), ccs_offset, xe_device_ccs_bytes(xe, size),
>> + &ccs_it);
>> + }
>> +
>> + while (size) {
>> + u32 batch_size = 2; /* arb_clear() + MI_BATCH_BUFFER_END */
>> + struct xe_sched_job *job;
>> + struct xe_bb *bb;
>> + u32 flush_flags = 0;
>> + u32 update_idx;
>> + u64 ccs_ofs, ccs_size;
>> + u32 ccs_pt;
>> +
>> + bool usm = xe->info.has_usm;
>> + u32 avail_pts = max_mem_transfer_per_pass(xe) / LEVEL0_PAGE_TABLE_ENCODE_SIZE;
>> +
>> + vram_L0 = xe_migrate_res_sizes(m, &vram_it);
>> +
>> + if (copy_content) {
>> + sysmem_L0 = xe_migrate_res_sizes(m, &sysmem_it);
>> + vram_L0 = min(vram_L0, sysmem_L0);
>> + }
>> +
>> + drm_dbg(&xe->drm, "Pass %u, size: %llu\n", pass++, vram_L0);
>> +
>> + batch_size += pte_update_size(m, PTE_UPDATE_FLAG_IS_VRAM, vram, &vram_it, &vram_L0,
>> + &vram_L0_ofs, &vram_L0_pt, 0, 0, avail_pts);
>> + if (copy_content) {
>> + batch_size += pte_update_size(m, 0, sysmem, &sysmem_it, &vram_L0,
>> + &sysmem_L0_ofs, &sysmem_L0_pt, 0, avail_pts,
>> + avail_pts);
>> + }
>> +
>> + if (copy_ccs) {
>> + ccs_size = xe_device_ccs_bytes(xe, vram_L0);
>> + batch_size += pte_update_size(m, 0, NULL, &ccs_it, &ccs_size, &ccs_ofs,
>> + &ccs_pt, 0, copy_content ? 2 * avail_pts :
>> + avail_pts, avail_pts);
>> + xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE));
>> + }
>> +
>> + batch_size += copy_content ? EMIT_COPY_DW : 0;
>> + batch_size += copy_ccs ? EMIT_COPY_CCS_DW : 0;
>> +
>> + bb = xe_bb_new(gt, batch_size, usm);
>> + if (IS_ERR(bb)) {
>> + err = PTR_ERR(bb);
>> + goto err_sync;
>> + }
>> +
>> + if (xe_migrate_allow_identity(vram_L0, &vram_it))
>> + xe_res_next(&vram_it, vram_L0);
>> + else
>> + emit_pte(m, bb, vram_L0_pt, true, false, &vram_it, vram_L0, vram);
>> +
>> + if (copy_content)
>> + emit_pte(m, bb, sysmem_L0_pt, false, false, &sysmem_it, vram_L0, sysmem);
>> +
>> + if (copy_ccs)
>> + emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, ccs);
>> +
>> + bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
>> + update_idx = bb->len;
>> +
>> + if (copy_content)
>> + emit_copy(gt, bb, to_sysmem ? vram_L0_ofs : sysmem_L0_ofs, to_sysmem ?
>> + sysmem_L0_ofs : vram_L0_ofs, vram_L0, XE_PAGE_SIZE);
>> +
>> + if (copy_ccs) {
>> + emit_copy_ccs(gt, bb, to_sysmem ? ccs_ofs : vram_L0_ofs, !to_sysmem,
>> + to_sysmem ? vram_L0_ofs : ccs_ofs, to_sysmem, vram_L0);
>> + flush_flags = to_sysmem ? 0 : MI_FLUSH_DW_CCS;
>> + }
>> +
>> + job = xe_bb_create_migration_job(m->q, bb, xe_migrate_batch_base(m, usm),
>> + update_idx);
>> + if (IS_ERR(job)) {
>> + err = PTR_ERR(job);
>> + goto err;
>> + }
>> +
>> + xe_sched_job_add_migrate_flush(job, flush_flags);
>> + if (!fence) {
>> + err = xe_sched_job_add_deps(job, vram_bo->ttm.base.resv,
>> + DMA_RESV_USAGE_BOOKKEEP);
>> + if (!err && copy_content)
>> + err = xe_sched_job_add_deps(job, sysmem_bo->ttm.base.resv,
>> + DMA_RESV_USAGE_BOOKKEEP);
>> + if (!err && copy_ccs)
>> + err = xe_sched_job_add_deps(job, ccs_bo->ttm.base.resv,
>> + DMA_RESV_USAGE_BOOKKEEP);
>> + if (err)
>> + goto err_job;
>> + }
>> +
>> + mutex_lock(&m->job_mutex);
>> + xe_sched_job_arm(job);
>> + dma_fence_put(fence);
>> + fence = dma_fence_get(&job->drm.s_fence->finished);
>> + xe_sched_job_push(job);
>> +
>> + dma_fence_put(m->fence);
>> + m->fence = dma_fence_get(fence);
>> +
>> + mutex_unlock(&m->job_mutex);
>> +
>> + xe_bb_free(bb, fence);
>> + size -= vram_L0;
>> + continue;
>> +
>> +err_job:
>> + xe_sched_job_put(job);
>> +err:
>> + xe_bb_free(bb, NULL);
>> +
>> +err_sync:
>> + /* Sync partial copy if any. FIXME: under job_mutex? */
>> + if (fence) {
>> + dma_fence_wait(fence, false);
>> + dma_fence_put(fence);
>> + }
>> +
>> + return ERR_PTR(err);
>> + }
>> +
>> + return fence;
>> +}
>> +
>> static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64 src_ofs,
>> u32 size, u32 pitch)
>> {
>> diff --git a/drivers/gpu/drm/xe/xe_migrate.h b/drivers/gpu/drm/xe/xe_migrate.h
>> index 0109866e398a..5cdb26ba6493 100644
>> --- a/drivers/gpu/drm/xe/xe_migrate.h
>> +++ b/drivers/gpu/drm/xe/xe_migrate.h
>> @@ -102,6 +102,11 @@ struct dma_fence *xe_migrate_copy(struct xe_migrate *m,
>> struct ttm_resource *dst,
>> bool copy_only_ccs);
>>
>> +struct dma_fence *xe_migrate_raw_vram_copy(struct xe_bo *vram_bo, u64 vram_offset,
>> + struct xe_bo *sysmem_bo, u64 sysmem_offset,
>> + struct xe_bo *ccs_bo, u64 ccs_offset,
>> + u64 size, bool to_sysmem);
>> +
>> #define XE_MIGRATE_CLEAR_FLAG_BO_DATA BIT(0)
>> #define XE_MIGRATE_CLEAR_FLAG_CCS_DATA BIT(1)
>> #define XE_MIGRATE_CLEAR_FLAG_FULL (XE_MIGRATE_CLEAR_FLAG_BO_DATA | \
More information about the Intel-xe
mailing list