<!DOCTYPE html><html><head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  </head>
  <body>
    <p><br>
    </p>
    <div class="moz-cite-prefix">On 24-06-2025 10:28, K V P,
      Satyanarayana wrote:<br>
    </div>
    <blockquote type="cite" cite="mid:LV3PR11MB8695FE036F1B93E821901DBEF978A@LV3PR11MB8695.namprd11.prod.outlook.com">
      <pre wrap="" class="moz-quote-pre">Hi.
</pre>
      <blockquote type="cite">
        <pre wrap="" class="moz-quote-pre">-----Original Message-----
From: Brost, Matthew <a class="moz-txt-link-rfc2396E" href="mailto:matthew.brost@intel.com"><matthew.brost@intel.com></a>
Sent: Tuesday, June 24, 2025 3:12 AM
To: K V P, Satyanarayana <a class="moz-txt-link-rfc2396E" href="mailto:satyanarayana.k.v.p@intel.com"><satyanarayana.k.v.p@intel.com></a>
Cc: <a class="moz-txt-link-abbreviated" href="mailto:intel-xe@lists.freedesktop.org">intel-xe@lists.freedesktop.org</a>; Wajdeczko, Michal
<a class="moz-txt-link-rfc2396E" href="mailto:Michal.Wajdeczko@intel.com"><Michal.Wajdeczko@intel.com></a>; Auld, Matthew <a class="moz-txt-link-rfc2396E" href="mailto:matthew.auld@intel.com"><matthew.auld@intel.com></a>;
Winiarski, Michal <a class="moz-txt-link-rfc2396E" href="mailto:michal.winiarski@intel.com"><michal.winiarski@intel.com></a>; Lis, Tomasz
<a class="moz-txt-link-rfc2396E" href="mailto:tomasz.lis@intel.com"><tomasz.lis@intel.com></a>
Subject: Re: [PATCH v8 2/3] drm/xe/vf: Attach and detach CCS copy
commands with BO

On Fri, Jun 20, 2025 at 09:25:18AM -0700, Matthew Brost wrote:
</pre>
        <blockquote type="cite">
          <pre wrap="" class="moz-quote-pre">On Thu, Jun 19, 2025 at 01:34:58PM +0530, Satyanarayana K V P wrote:
</pre>
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">Attach CCS read/write copy commands to BO for old and new mem types
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">as
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">NULL -> tt or system -> tt.
Detach the CCS read/write copy commands from BO while deleting ttm bo
from xe_ttm_bo_delete_mem_notify().

Signed-off-by: Satyanarayana K V P <a class="moz-txt-link-rfc2396E" href="mailto:satyanarayana.k.v.p@intel.com"><satyanarayana.k.v.p@intel.com></a>
Cc: Michal Wajdeczko <a class="moz-txt-link-rfc2396E" href="mailto:michal.wajdeczko@intel.com"><michal.wajdeczko@intel.com></a>
Cc: Matthew Brost <a class="moz-txt-link-rfc2396E" href="mailto:matthew.brost@intel.com"><matthew.brost@intel.com></a>
Cc: Matthew Auld <a class="moz-txt-link-rfc2396E" href="mailto:matthew.auld@intel.com"><matthew.auld@intel.com></a>
Cc: MichaƂ Winiarski <a class="moz-txt-link-rfc2396E" href="mailto:michal.winiarski@intel.com"><michal.winiarski@intel.com></a>
---
Cc: Tomasz Lis <a class="moz-txt-link-rfc2396E" href="mailto:tomasz.lis@intel.com"><tomasz.lis@intel.com></a>

V7 -> V8:
- Removed xe_bb_ccs_realloc() and created a single BB by calculating the
BB size first and then emitting the commands. (Matthew Brost)
- Added xe_assert() if BB is not NULL in xe_sriov_vf_ccs_attach_bo().

V6 -> V7:
- Created xe_bb_ccs_realloc() to create a single BB instead of maintaining
a list. (Matthew Brost)

V5 -> V6:
- Removed dead code from xe_migrate_ccs_rw_copy() function. (Matthew
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">Brost)
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">
V4 -> V5:
- Create a list of BBs for the given BO and fixed memory leak while
detaching BOs. (Matthew Brost).
- Fixed review comments (Matthew Brost & Matthew Auld).
- Yet to cleanup xe_migrate_ccs_rw_copy() function.

V3 -> V4:
- Fixed issues reported by patchworks.

V2 -> V3:
- Attach and detach functions check for IS_VF_CCS_READY().

V1 -> V2:
- Fixed review comments.
---
 drivers/gpu/drm/xe/xe_bb.c                 |  35 ++++++
 drivers/gpu/drm/xe/xe_bb.h                 |   3 +
 drivers/gpu/drm/xe/xe_bo.c                 |  23 ++++
 drivers/gpu/drm/xe/xe_bo_types.h           |   3 +
 drivers/gpu/drm/xe/xe_migrate.c            | 130 +++++++++++++++++++++
 drivers/gpu/drm/xe/xe_migrate.h            |   6 +
 drivers/gpu/drm/xe/xe_sriov_vf_ccs.c       |  72 ++++++++++++
 drivers/gpu/drm/xe/xe_sriov_vf_ccs.h       |   3 +
 drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h |   8 ++
 9 files changed, 283 insertions(+)

diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c
index 9570672fce33..533352dc892f 100644
--- a/drivers/gpu/drm/xe/xe_bb.c
+++ b/drivers/gpu/drm/xe/xe_bb.c
@@ -60,6 +60,41 @@ struct xe_bb *xe_bb_new(struct xe_gt *gt, u32
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">dwords, bool usm)
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">       return ERR_PTR(err);
 }

+struct xe_bb *xe_bb_ccs_new(struct xe_gt *gt, u32 dwords,
+                           enum xe_sriov_vf_ccs_rw_ctxs ctx_id)
+{
+       struct xe_bb *bb = kmalloc(sizeof(*bb), GFP_KERNEL);
+       struct xe_tile *tile = gt_to_tile(gt);
+       struct xe_sa_manager *bb_pool;
+       int err;
+
+       if (!bb)
+               return ERR_PTR(-ENOMEM);
+       /*
+        * We need to allocate space for the requested number of dwords &
+        * one additional MI_BATCH_BUFFER_END dword. Since the whole SA
+        * is submitted to HW, we need to make sure that the last instruction
+        * is not over written when the last chunk of SA is allocated for BB.
+        * So, this extra DW acts as a guard here.
+        */
+
+       bb_pool = tile->sriov.vf.ccs[ctx_id].mem.ccs_bb_pool;
+       bb->bo = xe_sa_bo_new(bb_pool, 4 * (dwords + 1));
+
+       if (IS_ERR(bb->bo)) {
+               err = PTR_ERR(bb->bo);
+               goto err;
+       }
+
+       bb->cs = xe_sa_bo_cpu_addr(bb->bo);
+       bb->len = 0;
+
+       return bb;
+err:
+       kfree(bb);
+       return ERR_PTR(err);
+}
+
 static struct xe_sched_job *
 __xe_bb_create_job(struct xe_exec_queue *q, struct xe_bb *bb, u64
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">*addr)
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre"> {
diff --git a/drivers/gpu/drm/xe/xe_bb.h b/drivers/gpu/drm/xe/xe_bb.h
index fafacd73dcc3..32c9c4c5d2be 100644
--- a/drivers/gpu/drm/xe/xe_bb.h
+++ b/drivers/gpu/drm/xe/xe_bb.h
@@ -13,8 +13,11 @@ struct dma_fence;
 struct xe_gt;
 struct xe_exec_queue;
 struct xe_sched_job;
+enum xe_sriov_vf_ccs_rw_ctxs;

 struct xe_bb *xe_bb_new(struct xe_gt *gt, u32 size, bool usm);
+struct xe_bb *xe_bb_ccs_new(struct xe_gt *gt, u32 dwords,
+                           enum xe_sriov_vf_ccs_rw_ctxs ctx_id);
 struct xe_sched_job *xe_bb_create_job(struct xe_exec_queue *q,
                                      struct xe_bb *bb);
 struct xe_sched_job *xe_bb_create_migration_job(struct xe_exec_queue
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">*q,
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
index 4e39188a021a..beaf8544bf08 100644
--- a/drivers/gpu/drm/xe/xe_bo.c
+++ b/drivers/gpu/drm/xe/xe_bo.c
@@ -31,6 +31,7 @@
 #include "xe_pxp.h"
 #include "xe_res_cursor.h"
 #include "xe_shrinker.h"
+#include "xe_sriov_vf_ccs.h"
 #include "xe_trace_bo.h"
 #include "xe_ttm_stolen_mgr.h"
 #include "xe_vm.h"
@@ -947,6 +948,20 @@ static int xe_bo_move(struct ttm_buffer_object
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">*ttm_bo, bool evict,
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">       dma_fence_put(fence);
        xe_pm_runtime_put(xe);

+       /*
+        * CCS meta data is migrated from TT -> SMEM. So, let us detach the
+        * BBs from BO as it is no longer needed.
+        */
+       if (IS_VF_CCS_BB_VALID(xe, bo) && old_mem_type == XE_PL_TT &&
+           new_mem->mem_type == XE_PL_SYSTEM)
+               xe_sriov_vf_ccs_detach_bo(bo);
+
+       if (IS_SRIOV_VF(xe) &&
+           ((move_lacks_source && new_mem->mem_type == XE_PL_TT) ||
+            (old_mem_type == XE_PL_SYSTEM && new_mem->mem_type ==
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">XE_PL_TT)) &&
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">+          handle_system_ccs)
+               ret = xe_sriov_vf_ccs_attach_bo(bo);
+
</pre>
          </blockquote>
          <pre wrap="" class="moz-quote-pre">
You don't check the 'ret' value of xe_sriov_vf_ccs_attach_bo. That seems be
</pre>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">an oversight.</pre>
      </blockquote>
    </blockquote>
    <p>The error is returned to the caller after this. So, not checked
      explicitly.</p>
    <blockquote type="cite" cite="mid:LV3PR11MB8695FE036F1B93E821901DBEF978A@LV3PR11MB8695.namprd11.prod.outlook.com">
      <blockquote type="cite">
        <pre wrap="" class="moz-quote-pre">
</pre>
        <blockquote type="cite">
          <pre wrap="" class="moz-quote-pre">
</pre>
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre"> out:
        if ((!ttm_bo->resource || ttm_bo->resource->mem_type ==
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">XE_PL_SYSTEM) &&
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">           ttm_bo->ttm) {
@@ -957,6 +972,9 @@ static int xe_bo_move(struct ttm_buffer_object
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">*ttm_bo, bool evict,
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">               if (timeout < 0)
                        ret = timeout;

+               if (IS_VF_CCS_BB_VALID(xe, bo))
+                       xe_sriov_vf_ccs_detach_bo(bo);
+
                xe_tt_unmap_sg(xe, ttm_bo->ttm);
        }

@@ -1483,9 +1501,14 @@ static void xe_ttm_bo_release_notify(struct
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">ttm_buffer_object *ttm_bo)
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">
 static void xe_ttm_bo_delete_mem_notify(struct ttm_buffer_object
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">*ttm_bo)
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre"> {
+       struct xe_bo *bo = ttm_to_xe_bo(ttm_bo);
+
        if (!xe_bo_is_xe_bo(ttm_bo))
                return;

+       if (IS_VF_CCS_BB_VALID(ttm_to_xe_device(ttm_bo->bdev), bo))
+               xe_sriov_vf_ccs_detach_bo(bo);
+
        /*
         * Object is idle and about to be destroyed. Release the
         * dma-buf attachment.
diff --git a/drivers/gpu/drm/xe/xe_bo_types.h
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">b/drivers/gpu/drm/xe/xe_bo_types.h
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">index eb5e83c5f233..642e519fcfd1 100644
--- a/drivers/gpu/drm/xe/xe_bo_types.h
+++ b/drivers/gpu/drm/xe/xe_bo_types.h
@@ -78,6 +78,9 @@ struct xe_bo {
        /** @ccs_cleared */
        bool ccs_cleared;

+       /** @bb_ccs_rw: BB instructions of CCS read/write. Valid only for VF
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">*/
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">+      struct xe_bb *bb_ccs[XE_SRIOV_VF_CCS_CTX_COUNT];
+
        /**
         * @cpu_caching: CPU caching mode. Currently only used for
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">userspace
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">        * objects. Exceptions are system memory on DGFX, which is always
diff --git a/drivers/gpu/drm/xe/xe_migrate.c
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">b/drivers/gpu/drm/xe/xe_migrate.c
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">index 8f8e9fdfb2a8..c730b34071ad 100644
--- a/drivers/gpu/drm/xe/xe_migrate.c
+++ b/drivers/gpu/drm/xe/xe_migrate.c
@@ -940,6 +940,136 @@ struct dma_fence *xe_migrate_copy(struct
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">xe_migrate *m,
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">       return fence;
 }

+/**
+ * xe_migrate_ccs_rw_copy() - Copy content of TTM resources.
+ * @m: The migration context.
+ * @src_bo: The buffer object @src is currently bound to.
+ * @read_write : Creates BB commands for CCS read/write.
+ *
+ * Creates batch buffer instructions to copy CCS metadata from CCS pool
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">to
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">+ * memory and vice versa.
+ *
+ * This function should only be called for IGPU.
+ *
+ * Return: 0 if successful, negative error code on failure.
+ */
+int xe_migrate_ccs_rw_copy(struct xe_migrate *m,
+                          struct xe_bo *src_bo,
+                          enum xe_sriov_vf_ccs_rw_ctxs read_write)
+
+{
+       bool src_is_pltt = read_write == XE_SRIOV_VF_CCS_WRITE_CTX;
+       bool dst_is_pltt = read_write == XE_SRIOV_VF_CCS_READ_CTX;
+       struct ttm_resource *src = src_bo->ttm.resource;
+       struct xe_gt *gt = m->tile->primary_gt;
+       u32 batch_size, batch_size_allocated;
+       struct xe_device *xe = gt_to_xe(gt);
+       struct xe_res_cursor src_it, ccs_it;
+       u64 size = src_bo->size;
+       struct xe_bb *bb = NULL;
+       u64 src_L0, src_L0_ofs;
+       u32 src_L0_pt;
+       int err;
+
+       xe_res_first_sg(xe_bo_sg(src_bo), 0, size, &src_it);
+
+       xe_res_first_sg(xe_bo_sg(src_bo), xe_bo_ccs_pages_start(src_bo),
+                       PAGE_ALIGN(xe_device_ccs_bytes(xe, size)),
+                       &ccs_it);
+
+       /* Calculate Batch buffer size */
+       batch_size = 0;
+       while (size) {
+               batch_size += 6; /* Flush + 2 NOP */
+               u64 ccs_ofs, ccs_size;
+               u32 ccs_pt;
+
+               u32 avail_pts = max_mem_transfer_per_pass(xe) /
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">LEVEL0_PAGE_TABLE_ENCODE_SIZE;
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">+
+               src_L0 = min_t(u64, max_mem_transfer_per_pass(xe), size);
+
+               batch_size += pte_update_size(m, false, src, &src_it, &src_L0,
+                                             &src_L0_ofs, &src_L0_pt, 0, 0,
+                                             avail_pts);
+
+               ccs_size = xe_device_ccs_bytes(xe, src_L0);
+               batch_size += pte_update_size(m, 0, NULL, &ccs_it, &ccs_size,
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">&ccs_ofs,
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">+                                            &ccs_pt, 0, avail_pts, avail_pts);
+               xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE));
+
+               /* Add copy commands size here */
+               batch_size += EMIT_COPY_CCS_DW;
+
+               size -= src_L0;
+       }
+
+       bb = xe_bb_ccs_new(gt, batch_size, read_write);
+       if (IS_ERR(bb)) {
+               drm_err(&xe->drm, "BB allocation failed.\n");
+               err = PTR_ERR(bb);
+               goto err_ret;
+       }
+
+       batch_size_allocated = batch_size;
+       size = src_bo->size;
+       batch_size = 0;
+
+       /*
+        * Emit PTE and copy commands here.
+        * The CCS copy command can only support limited size. If the size to
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">be
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">+       * copied is more than the limit, divide copy into chunks. So, calculate
+        * sizes here again before copy command is emitted.
+        */
+       while (size) {
+               batch_size += 6; /* Flush + 2 NOP */
+               u32 flush_flags = 0;
+               u64 ccs_ofs, ccs_size;
+               u32 ccs_pt;
+
+               u32 avail_pts = max_mem_transfer_per_pass(xe) /
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">LEVEL0_PAGE_TABLE_ENCODE_SIZE;
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">+
+               src_L0 = xe_migrate_res_sizes(m, &src_it);
+
+               batch_size += pte_update_size(m, false, src, &src_it, &src_L0,
+                                             &src_L0_ofs, &src_L0_pt, 0, 0,
+                                             avail_pts);
+
+               ccs_size = xe_device_ccs_bytes(xe, src_L0);
+               batch_size += pte_update_size(m, 0, NULL, &ccs_it, &ccs_size,
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">&ccs_ofs,
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">+                                            &ccs_pt, 0, avail_pts, avail_pts);
+               xe_assert(xe, IS_ALIGNED(ccs_it.start, PAGE_SIZE));
+               batch_size += EMIT_COPY_CCS_DW;
+
+               emit_pte(m, bb, src_L0_pt, false, true, &src_it, src_L0, src);
+
+               emit_pte(m, bb, ccs_pt, false, false, &ccs_it, ccs_size, src);
+
+               bb->cs[bb->len++] = MI_FLUSH_DW | MI_INVALIDATE_TLB |
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">MI_FLUSH_DW_OP_STOREDW |
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">+                                      MI_FLUSH_IMM_DW;
+               bb->cs[bb->len++] = MI_NOOP;
+               bb->cs[bb->len++] = MI_NOOP;
+
+               flush_flags = xe_migrate_ccs_copy(m, bb, src_L0_ofs,
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">src_is_pltt,
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">+                                                src_L0_ofs, dst_is_pltt,
+                                                 src_L0, ccs_ofs, true);
+
+               bb->cs[bb->len++] = MI_FLUSH_DW | MI_INVALIDATE_TLB |
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">MI_FLUSH_DW_OP_STOREDW |
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">+                                      MI_FLUSH_IMM_DW | flush_flags;
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">
Missed this - you don't need MI_INVALIDATE_TLB here, just after emitting
the PTEs. I believe that should speedup this copy a little too.

</pre>
      </blockquote>
      <pre wrap="" class="moz-quote-pre">This works out if we are using different VMs. Since we are using same VM for all BOs, I was suggested 
To add MI_INVALIDATE_TLB after each BB to avoid any caching issues.
Correct me if I am wrong.
- Satya.
</pre>
      <blockquote type="cite">
        <pre wrap="" class="moz-quote-pre">This also looks wrong in emit_migration_job_gen12 too. Going to follow
up on this now.

Matt</pre>
      </blockquote>
    </blockquote>
    <p>Removed <span style="white-space: pre-wrap">MI_INVALIDATE_TLB  after emitting PTEs and kept after copy command.</span></p>
    <blockquote type="cite" cite="mid:LV3PR11MB8695FE036F1B93E821901DBEF978A@LV3PR11MB8695.namprd11.prod.outlook.com">
      <blockquote type="cite">
        <pre wrap="" class="moz-quote-pre">

</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">+              bb->cs[bb->len++] = MI_NOOP;
+               bb->cs[bb->len++] = MI_NOOP;
+
+               size -= src_L0;
+       }
+
+       xe_assert(xe, (batch_size_allocated == bb->len));
+       src_bo->bb_ccs[read_write] = bb;
+
+       return 0;
+
+err_ret:
+       return err;
+}
+
 static void emit_clear_link_copy(struct xe_gt *gt, struct xe_bb *bb, u64
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">src_ofs,
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">                                u32 size, u32 pitch)
 {
diff --git a/drivers/gpu/drm/xe/xe_migrate.h
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">b/drivers/gpu/drm/xe/xe_migrate.h
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">index fb9839c1bae0..96b0449e7edb 100644
--- a/drivers/gpu/drm/xe/xe_migrate.h
+++ b/drivers/gpu/drm/xe/xe_migrate.h
@@ -24,6 +24,8 @@ struct xe_vm;
 struct xe_vm_pgtable_update;
 struct xe_vma;

+enum xe_sriov_vf_ccs_rw_ctxs;
+
 /**
  * struct xe_migrate_pt_update_ops - Callbacks for the
  * xe_migrate_update_pgtables() function.
@@ -112,6 +114,10 @@ struct dma_fence *xe_migrate_copy(struct
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">xe_migrate *m,
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">                                 struct ttm_resource *dst,
                                  bool copy_only_ccs);

+int xe_migrate_ccs_rw_copy(struct xe_migrate *m,
+                          struct xe_bo *src_bo,
+                          enum xe_sriov_vf_ccs_rw_ctxs read_write);
+
 int xe_migrate_access_memory(struct xe_migrate *m, struct xe_bo *bo,
                             unsigned long offset, void *buf, int len,
                             int write);
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">index ff5ad472eb96..242a3da1ef27 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
+++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.c
@@ -5,6 +5,7 @@

 #include "instructions/xe_mi_commands.h"
 #include "instructions/xe_gpu_commands.h"
+#include "xe_bb.h"
 #include "xe_bo.h"
 #include "xe_device.h"
 #include "xe_migrate.h"
@@ -208,3 +209,74 @@ int xe_sriov_vf_ccs_init(struct xe_device *xe)
 err_ret:
        return err;
 }
+
+/**
+ * xe_sriov_vf_ccs_attach_bo - Insert CCS read write commands in the BO.
+ * @bo: the &buffer object to which batch buffer commands will be
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">added.
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">+ *
+ * This function shall be called only by VF. It inserts the PTEs and copy
+ * command instructions in the BO by calling xe_migrate_ccs_rw_copy()
+ * function.
+ *
+ * Returns: 0 if successful, negative error code on failure.
+ */
+int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo)
+{
+       struct xe_device *xe = xe_bo_device(bo);
+       enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
+       struct xe_migrate *migrate;
+       struct xe_tile *tile;
+       struct xe_bb *bb;
+       int tile_id;
+       int err = 0;
+
+       if (!IS_VF_CCS_READY(xe))
+               return 0;
+
+       for_each_tile(tile, xe, tile_id) {
</pre>
          </blockquote>
          <pre wrap="" class="moz-quote-pre">
Same comment as patch 1, I'd avoid for_each_tile and rather use
xe_device_get_root_tile.

</pre>
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">+              for_each_ccs_rw_ctx(ctx_id) {
+                       bb = bo->bb_ccs[ctx_id];
+                       /* bb should be NULL here. Assert if not NULL */
+                       xe_assert(xe, !bb);
+
+                       migrate = tile->sriov.vf.ccs[ctx_id].migrate;
+                       err = xe_migrate_ccs_rw_copy(migrate, bo, ctx_id);
+               }
+       }
+       return err;
+}
+
+/**
+ * xe_sriov_vf_ccs_detach_bo - Remove CCS read write commands from
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">the BO.
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">+ * @bo: the &buffer object from which batch buffer commands will be
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">removed.
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">+ *
+ * This function shall be called only by VF. It removes the PTEs and copy
+ * command instructions from the BO. Make sure to update the BB with
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">MI_NOOP
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">+ * before freeing.
+ *
+ * Returns: 0 if successful.
+ */
+int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo)
+{
+       struct xe_device *xe = xe_bo_device(bo);
+       enum xe_sriov_vf_ccs_rw_ctxs ctx_id;
+       struct xe_bb *bb;
+       struct xe_tile *tile;
+       int tile_id;
+
+       if (!IS_VF_CCS_READY(xe))
+               return 0;
+
+       for_each_tile(tile, xe, tile_id) {
</pre>
          </blockquote>
          <pre wrap="" class="moz-quote-pre">
Same here.

Matt
</pre>
        </blockquote>
      </blockquote>
    </blockquote>
    Fixed in new version.
    <blockquote type="cite" cite="mid:LV3PR11MB8695FE036F1B93E821901DBEF978A@LV3PR11MB8695.namprd11.prod.outlook.com">
      <blockquote type="cite">
        <blockquote type="cite">
          <pre wrap="" class="moz-quote-pre">
</pre>
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">+              for_each_ccs_rw_ctx(ctx_id) {
+                       bb = bo->bb_ccs[ctx_id];
+                       if (!bb)
+                               continue;
+
+                       memset(bb->cs, MI_NOOP, bb->len * sizeof(u32));
+                       xe_bb_free(bb, NULL);
+                       bo->bb_ccs[ctx_id] = NULL;
+               }
+       }
+       return 0;
+}
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">index 5df9ba028d14..5d5e4bd25904 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h
+++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs.h
@@ -7,7 +7,10 @@
 #define _XE_SRIOV_VF_CCS_H_

 struct xe_device;
+struct xe_bo;

 int xe_sriov_vf_ccs_init(struct xe_device *xe);
+int xe_sriov_vf_ccs_attach_bo(struct xe_bo *bo);
+int xe_sriov_vf_ccs_detach_bo(struct xe_bo *bo);

 #endif
diff --git a/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h
</pre>
          </blockquote>
        </blockquote>
        <pre wrap="" class="moz-quote-pre">b/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h
</pre>
        <blockquote type="cite">
          <blockquote type="cite">
            <pre wrap="" class="moz-quote-pre">index 6dc279d206ec..e240f3fd18af 100644
--- a/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h
+++ b/drivers/gpu/drm/xe/xe_sriov_vf_ccs_types.h
@@ -27,6 +27,14 @@ enum xe_sriov_vf_ccs_rw_ctxs {
        XE_SRIOV_VF_CCS_CTX_COUNT
 };

+#define IS_VF_CCS_BB_VALID(xe, bo) ({ \
+               struct xe_device *___xe = (xe); \
+               struct xe_bo *___bo = (bo); \
+               IS_SRIOV_VF(___xe) && \
+               ___bo->bb_ccs[XE_SRIOV_VF_CCS_READ_CTX] && \
+               ___bo->bb_ccs[XE_SRIOV_VF_CCS_WRITE_CTX]; \
+               })
+
 struct xe_migrate;
 struct xe_sa_manager;

--
2.43.0

</pre>
          </blockquote>
        </blockquote>
      </blockquote>
    </blockquote>
  </body>
</html>