[PATCH 03/15] drm/xe: CPU binds for jobs

Thomas Hellström thomas.hellstrom at linux.intel.com
Thu Jun 5 15:44:07 UTC 2025


Hi, Matt,

An early comment:

Previous concerns have also included:

1) If clearing and binding happens on the same exec_queue, GPU binding
is actually likely to be faster, right since it can be queued without
waiting for additional dependencies? Do we have any timings from start-
of-clear to support or debunk this argument.

2) Is page-tables in unmappable VRAM something we'd want to support at
some point.

Thanks,
Thomas


On Thu, 2025-06-05 at 08:32 -0700, Matthew Brost wrote:
> No reason to use the GPU for binds. In run_job, use the CPU to
> perform
> binds once the bind job's dependencies are resolved.
> 
> Benefits of CPU-based binds:
> - Lower latency once dependencies are resolved, as there is no
>   interaction with the GuC or a hardware context switch both of which
>   are relatively slow.
> - Large arrays of binds do not risk running out of migration PTEs,
>   avoiding -ENOBUFS being returned to userspace.
> - Kernel binds are decoupled from the migration exec queue (which
> issues
>   copies and clears), so they cannot get stuck behind unrelated
>   jobs—this can be a problem with parallel GPU faults.
> - Enables ULLS on the migration exec queue, as this queue has
> exclusive
>   access to the paging copy engine.
> 
> The basic idea of the implementation is to store the VM page table
> update operations (struct xe_vm_pgtable_update_op *pt_op) and
> additional
> arguments for the migrate layer’s CPU PTE update function in a job.
> The
> submission backend can then call into the migrate layer using the CPU
> to
> write the PTEs and free the stored resources for the PTE update.
> 
> PT job submission is implemented in the GuC backend for simplicity. A
> follow-up could introduce a specific backend for PT jobs.
> 
> All code related to GPU-based binding has been removed.
> 
> Signed-off-by: Matthew Brost <matthew.brost at intel.com>
> ---
>  drivers/gpu/drm/xe/xe_bo.c              |   7 +-
>  drivers/gpu/drm/xe/xe_bo.h              |   9 +-
>  drivers/gpu/drm/xe/xe_bo_types.h        |   2 -
>  drivers/gpu/drm/xe/xe_drm_client.c      |   3 +-
>  drivers/gpu/drm/xe/xe_guc_submit.c      |  36 +++-
>  drivers/gpu/drm/xe/xe_migrate.c         | 251 +++-------------------
> --
>  drivers/gpu/drm/xe/xe_migrate.h         |   6 +
>  drivers/gpu/drm/xe/xe_pt.c              | 188 ++++++++++++++----
>  drivers/gpu/drm/xe/xe_pt.h              |   5 +-
>  drivers/gpu/drm/xe/xe_pt_types.h        |  29 ++-
>  drivers/gpu/drm/xe/xe_sched_job.c       |  78 +++++---
>  drivers/gpu/drm/xe/xe_sched_job_types.h |  31 ++-
>  drivers/gpu/drm/xe/xe_vm.c              |  46 ++---
>  13 files changed, 341 insertions(+), 350 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c
> index 61d208c85281..7aa598b584d2 100644
> --- a/drivers/gpu/drm/xe/xe_bo.c
> +++ b/drivers/gpu/drm/xe/xe_bo.c
> @@ -3033,8 +3033,13 @@ void xe_bo_put_commit(struct llist_head
> *deferred)
>  	if (!freed)
>  		return;
>  
> -	llist_for_each_entry_safe(bo, next, freed, freed)
> +	llist_for_each_entry_safe(bo, next, freed, freed) {
> +		struct xe_vm *vm = bo->vm;
> +
>  		drm_gem_object_free(&bo->ttm.base.refcount);
> +		if (bo->flags & XE_BO_FLAG_PUT_VM_ASYNC)
> +			xe_vm_put(vm);
> +	}
>  }
>  
>  static void xe_bo_dev_work_func(struct work_struct *work)
> diff --git a/drivers/gpu/drm/xe/xe_bo.h b/drivers/gpu/drm/xe/xe_bo.h
> index 02ada1fb8a23..967b1fe92560 100644
> --- a/drivers/gpu/drm/xe/xe_bo.h
> +++ b/drivers/gpu/drm/xe/xe_bo.h
> @@ -46,6 +46,7 @@
>  #define XE_BO_FLAG_GGTT2		BIT(22)
>  #define XE_BO_FLAG_GGTT3		BIT(23)
>  #define XE_BO_FLAG_CPU_ADDR_MIRROR	BIT(24)
> +#define XE_BO_FLAG_PUT_VM_ASYNC		BIT(25)
>  
>  /* this one is trigger internally only */
>  #define XE_BO_FLAG_INTERNAL_TEST	BIT(30)
> @@ -319,6 +320,7 @@ void __xe_bo_release_dummy(struct kref *kref);
>   * @bo: The bo to put.
>   * @deferred: List to which to add the buffer object if we cannot
> put, or
>   * NULL if the function is to put unconditionally.
> + * @added: BO was added to deferred list
>   *
>   * Since the final freeing of an object includes both sleeping and
> (!)
>   * memory allocation in the dma_resv individualization, it's not ok
> @@ -338,7 +340,8 @@ void __xe_bo_release_dummy(struct kref *kref);
>   * false otherwise.
>   */
>  static inline bool
> -xe_bo_put_deferred(struct xe_bo *bo, struct llist_head *deferred)
> +xe_bo_put_deferred(struct xe_bo *bo, struct llist_head *deferred,
> +		   bool *added)
>  {
>  	if (!deferred) {
>  		xe_bo_put(bo);
> @@ -348,6 +351,7 @@ xe_bo_put_deferred(struct xe_bo *bo, struct
> llist_head *deferred)
>  	if (!kref_put(&bo->ttm.base.refcount,
> __xe_bo_release_dummy))
>  		return false;
>  
> +	*added = true;
>  	return llist_add(&bo->freed, deferred);
>  }
>  
> @@ -363,8 +367,9 @@ static inline void
>  xe_bo_put_async(struct xe_bo *bo)
>  {
>  	struct xe_bo_dev *bo_device = &xe_bo_device(bo)->bo_device;
> +	bool added = false;
>  
> -	if (xe_bo_put_deferred(bo, &bo_device->async_list))
> +	if (xe_bo_put_deferred(bo, &bo_device->async_list, &added))
>  		schedule_work(&bo_device->async_free);
>  }
>  
> diff --git a/drivers/gpu/drm/xe/xe_bo_types.h
> b/drivers/gpu/drm/xe/xe_bo_types.h
> index eb5e83c5f233..ecf42a04640a 100644
> --- a/drivers/gpu/drm/xe/xe_bo_types.h
> +++ b/drivers/gpu/drm/xe/xe_bo_types.h
> @@ -70,8 +70,6 @@ struct xe_bo {
>  
>  	/** @freed: List node for delayed put. */
>  	struct llist_node freed;
> -	/** @update_index: Update index if PT BO */
> -	int update_index;
>  	/** @created: Whether the bo has passed initial creation */
>  	bool created;
>  
> diff --git a/drivers/gpu/drm/xe/xe_drm_client.c
> b/drivers/gpu/drm/xe/xe_drm_client.c
> index 31f688e953d7..6f5a91ef7491 100644
> --- a/drivers/gpu/drm/xe/xe_drm_client.c
> +++ b/drivers/gpu/drm/xe/xe_drm_client.c
> @@ -200,6 +200,7 @@ static void show_meminfo(struct drm_printer *p,
> struct drm_file *file)
>  	LLIST_HEAD(deferred);
>  	unsigned int id;
>  	u32 mem_type;
> +	bool added = false;
>  
>  	client = xef->client;
>  
> @@ -246,7 +247,7 @@ static void show_meminfo(struct drm_printer *p,
> struct drm_file *file)
>  			xe_assert(xef->xe, !list_empty(&bo-
> >client_link));
>  		}
>  
> -		xe_bo_put_deferred(bo, &deferred);
> +		xe_bo_put_deferred(bo, &deferred, &added);
>  	}
>  	spin_unlock(&client->bos_lock);
>  
> diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c
> b/drivers/gpu/drm/xe/xe_guc_submit.c
> index 2b61d017eeca..551cd21a6465 100644
> --- a/drivers/gpu/drm/xe/xe_guc_submit.c
> +++ b/drivers/gpu/drm/xe/xe_guc_submit.c
> @@ -19,6 +19,7 @@
>  #include "abi/guc_klvs_abi.h"
>  #include "regs/xe_lrc_layout.h"
>  #include "xe_assert.h"
> +#include "xe_bo.h"
>  #include "xe_devcoredump.h"
>  #include "xe_device.h"
>  #include "xe_exec_queue.h"
> @@ -38,8 +39,10 @@
>  #include "xe_lrc.h"
>  #include "xe_macros.h"
>  #include "xe_map.h"
> +#include "xe_migrate.h"
>  #include "xe_mocs.h"
>  #include "xe_pm.h"
> +#include "xe_pt.h"
>  #include "xe_ring_ops_types.h"
>  #include "xe_sched_job.h"
>  #include "xe_trace.h"
> @@ -745,6 +748,20 @@ static void submit_exec_queue(struct
> xe_exec_queue *q)
>  	}
>  }
>  
> +static bool is_pt_job(struct xe_sched_job *job)
> +{
> +	return job->is_pt_job;
> +}
> +
> +static void run_pt_job(struct xe_sched_job *job)
> +{
> +	__xe_migrate_update_pgtables_cpu(job->pt_update[0].vm,
> +					 job->pt_update[0].tile,
> +					 job->pt_update[0].ops,
> +					 job-
> >pt_update[0].pt_job_ops->ops,
> +					 job-
> >pt_update[0].pt_job_ops->current_op);
> +}
> +
>  static struct dma_fence *
>  guc_exec_queue_run_job(struct drm_sched_job *drm_job)
>  {
> @@ -760,14 +777,21 @@ guc_exec_queue_run_job(struct drm_sched_job
> *drm_job)
>  	trace_xe_sched_job_run(job);
>  
>  	if (!exec_queue_killed_or_banned_or_wedged(q) &&
> !xe_sched_job_is_error(job)) {
> -		if (!exec_queue_registered(q))
> -			register_exec_queue(q);
> -		if (!lr)	/* LR jobs are emitted in the exec
> IOCTL */
> -			q->ring_ops->emit_job(job);
> -		submit_exec_queue(q);
> +		if (is_pt_job(job)) {
> +			run_pt_job(job);
> +		} else {
> +			if (!exec_queue_registered(q))
> +				register_exec_queue(q);
> +			if (!lr)	/* LR jobs are emitted in
> the exec IOCTL */
> +				q->ring_ops->emit_job(job);
> +			submit_exec_queue(q);
> +		}
>  	}
>  
> -	if (lr) {
> +	if (is_pt_job(job)) {
> +		xe_pt_job_ops_put(job->pt_update[0].pt_job_ops);
> +		dma_fence_put(job->fence);	/* Drop ref from
> xe_sched_job_arm */
> +	} else if (lr) {
>  		xe_sched_job_set_error(job, -EOPNOTSUPP);
>  		dma_fence_put(job->fence);	/* Drop ref from
> xe_sched_job_arm */
>  	} else {
> diff --git a/drivers/gpu/drm/xe/xe_migrate.c
> b/drivers/gpu/drm/xe/xe_migrate.c
> index 9084f5cbc02d..e444f3fae97c 100644
> --- a/drivers/gpu/drm/xe/xe_migrate.c
> +++ b/drivers/gpu/drm/xe/xe_migrate.c
> @@ -58,18 +58,12 @@ struct xe_migrate {
>  	 * Protected by @job_mutex.
>  	 */
>  	struct dma_fence *fence;
> -	/**
> -	 * @vm_update_sa: For integrated, used to suballocate page-
> tables
> -	 * out of the pt_bo.
> -	 */
> -	struct drm_suballoc_manager vm_update_sa;
>  	/** @min_chunk_size: For dgfx, Minimum chunk size */
>  	u64 min_chunk_size;
>  };
>  
>  #define MAX_PREEMPTDISABLE_TRANSFER SZ_8M /* Around 1ms. */
>  #define MAX_CCS_LIMITED_TRANSFER SZ_4M /* XE_PAGE_SIZE *
> (FIELD_MAX(XE2_CCS_SIZE_MASK) + 1) */
> -#define NUM_KERNEL_PDE 15
>  #define NUM_PT_SLOTS 32
>  #define LEVEL0_PAGE_TABLE_ENCODE_SIZE SZ_2M
>  #define MAX_NUM_PTE 512
> @@ -107,7 +101,6 @@ static void xe_migrate_fini(void *arg)
>  
>  	dma_fence_put(m->fence);
>  	xe_bo_put(m->pt_bo);
> -	drm_suballoc_manager_fini(&m->vm_update_sa);
>  	mutex_destroy(&m->job_mutex);
>  	xe_vm_close_and_put(m->q->vm);
>  	xe_exec_queue_put(m->q);
> @@ -199,8 +192,6 @@ static int xe_migrate_prepare_vm(struct xe_tile
> *tile, struct xe_migrate *m,
>  	BUILD_BUG_ON(NUM_PT_SLOTS > SZ_2M/XE_PAGE_SIZE);
>  	/* Must be a multiple of 64K to support all platforms */
>  	BUILD_BUG_ON(NUM_PT_SLOTS * XE_PAGE_SIZE % SZ_64K);
> -	/* And one slot reserved for the 4KiB page table updates */
> -	BUILD_BUG_ON(!(NUM_KERNEL_PDE & 1));
>  
>  	/* Need to be sure everything fits in the first PT, or
> create more */
>  	xe_tile_assert(tile, m->batch_base_ofs + batch->size <
> SZ_2M);
> @@ -333,8 +324,6 @@ static int xe_migrate_prepare_vm(struct xe_tile
> *tile, struct xe_migrate *m,
>  	/*
>  	 * Example layout created above, with root level = 3:
>  	 * [PT0...PT7]: kernel PT's for copy/clear; 64 or 4KiB PTE's
> -	 * [PT8]: Kernel PT for VM_BIND, 4 KiB PTE's
> -	 * [PT9...PT26]: Userspace PT's for VM_BIND, 4 KiB PTE's
>  	 * [PT27 = PDE 0] [PT28 = PDE 1] [PT29 = PDE 2] [PT30 & PT31
> = 2M vram identity map]
>  	 *
>  	 * This makes the lowest part of the VM point to the
> pagetables.
> @@ -342,19 +331,10 @@ static int xe_migrate_prepare_vm(struct xe_tile
> *tile, struct xe_migrate *m,
>  	 * and flushes, other parts of the VM can be used either for
> copying and
>  	 * clearing.
>  	 *
> -	 * For performance, the kernel reserves PDE's, so about 20
> are left
> -	 * for async VM updates.
> -	 *
>  	 * To make it easier to work, each scratch PT is put in slot
> (1 + PT #)
>  	 * everywhere, this allows lockless updates to scratch pages
> by using
>  	 * the different addresses in VM.
>  	 */
> -#define NUM_VMUSA_UNIT_PER_PAGE	32
> -#define VM_SA_UPDATE_UNIT_SIZE		(XE_PAGE_SIZE /
> NUM_VMUSA_UNIT_PER_PAGE)
> -#define NUM_VMUSA_WRITES_PER_UNIT	(VM_SA_UPDATE_UNIT_SIZE /
> sizeof(u64))
> -	drm_suballoc_manager_init(&m->vm_update_sa,
> -				  (size_t)(map_ofs / XE_PAGE_SIZE -
> NUM_KERNEL_PDE) *
> -				  NUM_VMUSA_UNIT_PER_PAGE, 0);
>  
>  	m->pt_bo = bo;
>  	return 0;
> @@ -1193,56 +1173,6 @@ struct dma_fence *xe_migrate_clear(struct
> xe_migrate *m,
>  	return fence;
>  }
>  
> -static void write_pgtable(struct xe_tile *tile, struct xe_bb *bb,
> u64 ppgtt_ofs,
> -			  const struct xe_vm_pgtable_update_op
> *pt_op,
> -			  const struct xe_vm_pgtable_update *update,
> -			  struct xe_migrate_pt_update *pt_update)
> -{
> -	const struct xe_migrate_pt_update_ops *ops = pt_update->ops;
> -	struct xe_vm *vm = pt_update->vops->vm;
> -	u32 chunk;
> -	u32 ofs = update->ofs, size = update->qwords;
> -
> -	/*
> -	 * If we have 512 entries (max), we would populate it
> ourselves,
> -	 * and update the PDE above it to the new pointer.
> -	 * The only time this can only happen if we have to update
> the top
> -	 * PDE. This requires a BO that is almost vm->size big.
> -	 *
> -	 * This shouldn't be possible in practice.. might change
> when 16K
> -	 * pages are used. Hence the assert.
> -	 */
> -	xe_tile_assert(tile, update->qwords < MAX_NUM_PTE);
> -	if (!ppgtt_ofs)
> -		ppgtt_ofs = xe_migrate_vram_ofs(tile_to_xe(tile),
> -						xe_bo_addr(update-
> >pt_bo, 0,
> -							  
> XE_PAGE_SIZE), false);
> -
> -	do {
> -		u64 addr = ppgtt_ofs + ofs * 8;
> -
> -		chunk = min(size, MAX_PTE_PER_SDI);
> -
> -		/* Ensure populatefn can do memset64 by aligning bb-
> >cs */
> -		if (!(bb->len & 1))
> -			bb->cs[bb->len++] = MI_NOOP;
> -
> -		bb->cs[bb->len++] = MI_STORE_DATA_IMM |
> MI_SDI_NUM_QW(chunk);
> -		bb->cs[bb->len++] = lower_32_bits(addr);
> -		bb->cs[bb->len++] = upper_32_bits(addr);
> -		if (pt_op->bind)
> -			ops->populate(tile, NULL, bb->cs + bb->len,
> -				      ofs, chunk, update);
> -		else
> -			ops->clear(vm, tile, NULL, bb->cs + bb->len,
> -				   ofs, chunk, update);
> -
> -		bb->len += chunk * 2;
> -		ofs += chunk;
> -		size -= chunk;
> -	} while (size);
> -}
> -
>  struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m)
>  {
>  	return xe_vm_get(m->q->vm);
> @@ -1258,7 +1188,18 @@ struct migrate_test_params {
>  	container_of(_priv, struct migrate_test_params, base)
>  #endif
>  
> -static void
> +/**
> + * __xe_migrate_update_pgtables_cpu() - Update a VM's PTEs via the
> CPU
> + * @vm: The VM being updated
> + * @tile: The tile being updated
> + * @ops: The migrate PT update ops
> + * @pt_ops: The VM PT update ops
> + * @num_ops: The number of The VM PT update ops
> + *
> + * Execute the VM PT update ops array which results in a VM's PTEs
> being updated
> + * via the CPU.
> + */
> +void
>  __xe_migrate_update_pgtables_cpu(struct xe_vm *vm, struct xe_tile
> *tile,
>  				 const struct
> xe_migrate_pt_update_ops *ops,
>  				 struct xe_vm_pgtable_update_op
> *pt_op,
> @@ -1314,7 +1255,7 @@ xe_migrate_update_pgtables_cpu(struct
> xe_migrate *m,
>  	}
>  
>  	__xe_migrate_update_pgtables_cpu(vm, m->tile, ops,
> -					 pt_update_ops->ops,
> +					 pt_update_ops->pt_job_ops-
> >ops,
>  					 pt_update_ops->num_ops);
>  
>  	return dma_fence_get_stub();
> @@ -1327,161 +1268,19 @@ __xe_migrate_update_pgtables(struct
> xe_migrate *m,
>  {
>  	const struct xe_migrate_pt_update_ops *ops = pt_update->ops;
>  	struct xe_tile *tile = m->tile;
> -	struct xe_gt *gt = tile->primary_gt;
> -	struct xe_device *xe = tile_to_xe(tile);
>  	struct xe_sched_job *job;
>  	struct dma_fence *fence;
> -	struct drm_suballoc *sa_bo = NULL;
> -	struct xe_bb *bb;
> -	u32 i, j, batch_size = 0, ppgtt_ofs, update_idx, page_ofs =
> 0;
> -	u32 num_updates = 0, current_update = 0;
> -	u64 addr;
> -	int err = 0;
>  	bool is_migrate = pt_update_ops->q == m->q;
> -	bool usm = is_migrate && xe->info.has_usm;
> -
> -	for (i = 0; i < pt_update_ops->num_ops; ++i) {
> -		struct xe_vm_pgtable_update_op *pt_op =
> &pt_update_ops->ops[i];
> -		struct xe_vm_pgtable_update *updates = pt_op-
> >entries;
> -
> -		num_updates += pt_op->num_entries;
> -		for (j = 0; j < pt_op->num_entries; ++j) {
> -			u32 num_cmds =
> DIV_ROUND_UP(updates[j].qwords,
> -						   
> MAX_PTE_PER_SDI);
> -
> -			/* align noop + MI_STORE_DATA_IMM cmd prefix
> */
> -			batch_size += 4 * num_cmds +
> updates[j].qwords * 2;
> -		}
> -	}
> -
> -	/* fixed + PTE entries */
> -	if (IS_DGFX(xe))
> -		batch_size += 2;
> -	else
> -		batch_size += 6 * (num_updates / MAX_PTE_PER_SDI +
> 1) +
> -			num_updates * 2;
> -
> -	bb = xe_bb_new(gt, batch_size, usm);
> -	if (IS_ERR(bb))
> -		return ERR_CAST(bb);
> -
> -	/* For sysmem PTE's, need to map them in our hole.. */
> -	if (!IS_DGFX(xe)) {
> -		u16 pat_index = xe->pat.idx[XE_CACHE_WB];
> -		u32 ptes, ofs;
> -
> -		ppgtt_ofs = NUM_KERNEL_PDE - 1;
> -		if (!is_migrate) {
> -			u32 num_units = DIV_ROUND_UP(num_updates,
> -						    
> NUM_VMUSA_WRITES_PER_UNIT);
> -
> -			if (num_units > m->vm_update_sa.size) {
> -				err = -ENOBUFS;
> -				goto err_bb;
> -			}
> -			sa_bo = drm_suballoc_new(&m->vm_update_sa,
> num_units,
> -						 GFP_KERNEL, true,
> 0);
> -			if (IS_ERR(sa_bo)) {
> -				err = PTR_ERR(sa_bo);
> -				goto err_bb;
> -			}
> -
> -			ppgtt_ofs = NUM_KERNEL_PDE +
> -				(drm_suballoc_soffset(sa_bo) /
> -				 NUM_VMUSA_UNIT_PER_PAGE);
> -			page_ofs = (drm_suballoc_soffset(sa_bo) %
> -				    NUM_VMUSA_UNIT_PER_PAGE) *
> -				VM_SA_UPDATE_UNIT_SIZE;
> -		}
> -
> -		/* Map our PT's to gtt */
> -		i = 0;
> -		j = 0;
> -		ptes = num_updates;
> -		ofs = ppgtt_ofs * XE_PAGE_SIZE + page_ofs;
> -		while (ptes) {
> -			u32 chunk = min(MAX_PTE_PER_SDI, ptes);
> -			u32 idx = 0;
> -
> -			bb->cs[bb->len++] = MI_STORE_DATA_IMM |
> -				MI_SDI_NUM_QW(chunk);
> -			bb->cs[bb->len++] = ofs;
> -			bb->cs[bb->len++] = 0; /* upper_32_bits */
> -
> -			for (; i < pt_update_ops->num_ops; ++i) {
> -				struct xe_vm_pgtable_update_op
> *pt_op =
> -					&pt_update_ops->ops[i];
> -				struct xe_vm_pgtable_update *updates
> = pt_op->entries;
> -
> -				for (; j < pt_op->num_entries; ++j,
> ++current_update, ++idx) {
> -					struct xe_vm *vm =
> pt_update->vops->vm;
> -					struct xe_bo *pt_bo =
> updates[j].pt_bo;
> -
> -					if (idx == chunk)
> -						goto next_cmd;
> -
> -					xe_tile_assert(tile, pt_bo-
> >size == SZ_4K);
> -
> -					/* Map a PT at most once */
> -					if (pt_bo->update_index < 0)
> -						pt_bo->update_index
> = current_update;
> -
> -					addr = vm->pt_ops-
> >pte_encode_bo(pt_bo, 0,
> -
> 									 pat_index, 0);
> -					bb->cs[bb->len++] =
> lower_32_bits(addr);
> -					bb->cs[bb->len++] =
> upper_32_bits(addr);
> -				}
> -
> -				j = 0;
> -			}
> -
> -next_cmd:
> -			ptes -= chunk;
> -			ofs += chunk * sizeof(u64);
> -		}
> -
> -		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
> -		update_idx = bb->len;
> -
> -		addr = xe_migrate_vm_addr(ppgtt_ofs, 0) +
> -			(page_ofs / sizeof(u64)) * XE_PAGE_SIZE;
> -		for (i = 0; i < pt_update_ops->num_ops; ++i) {
> -			struct xe_vm_pgtable_update_op *pt_op =
> -				&pt_update_ops->ops[i];
> -			struct xe_vm_pgtable_update *updates =
> pt_op->entries;
> -
> -			for (j = 0; j < pt_op->num_entries; ++j) {
> -				struct xe_bo *pt_bo =
> updates[j].pt_bo;
> -
> -				write_pgtable(tile, bb, addr +
> -					      pt_bo->update_index *
> XE_PAGE_SIZE,
> -					      pt_op, &updates[j],
> pt_update);
> -			}
> -		}
> -	} else {
> -		/* phys pages, no preamble required */
> -		bb->cs[bb->len++] = MI_BATCH_BUFFER_END;
> -		update_idx = bb->len;
> -
> -		for (i = 0; i < pt_update_ops->num_ops; ++i) {
> -			struct xe_vm_pgtable_update_op *pt_op =
> -				&pt_update_ops->ops[i];
> -			struct xe_vm_pgtable_update *updates =
> pt_op->entries;
> -
> -			for (j = 0; j < pt_op->num_entries; ++j)
> -				write_pgtable(tile, bb, 0, pt_op,
> &updates[j],
> -					      pt_update);
> -		}
> -	}
> +	int err;
>  
> -	job = xe_bb_create_migration_job(pt_update_ops->q, bb,
> -					 xe_migrate_batch_base(m,
> usm),
> -					 update_idx);
> +	job = xe_sched_job_create(pt_update_ops->q, NULL);
>  	if (IS_ERR(job)) {
>  		err = PTR_ERR(job);
> -		goto err_sa;
> +		goto err_out;
>  	}
>  
> +	xe_tile_assert(tile, job->is_pt_job);
> +
>  	if (ops->pre_commit) {
>  		pt_update->job = job;
>  		err = ops->pre_commit(pt_update);
> @@ -1491,6 +1290,12 @@ __xe_migrate_update_pgtables(struct xe_migrate
> *m,
>  	if (is_migrate)
>  		mutex_lock(&m->job_mutex);
>  
> +	job->pt_update[0].vm = pt_update->vops->vm;
> +	job->pt_update[0].tile = tile;
> +	job->pt_update[0].ops = ops;
> +	job->pt_update[0].pt_job_ops =
> +		xe_pt_job_ops_get(pt_update_ops->pt_job_ops);
> +
>  	xe_sched_job_arm(job);
>  	fence = dma_fence_get(&job->drm.s_fence->finished);
>  	xe_sched_job_push(job);
> @@ -1498,17 +1303,11 @@ __xe_migrate_update_pgtables(struct
> xe_migrate *m,
>  	if (is_migrate)
>  		mutex_unlock(&m->job_mutex);
>  
> -	xe_bb_free(bb, fence);
> -	drm_suballoc_free(sa_bo, fence);
> -
>  	return fence;
>  
>  err_job:
>  	xe_sched_job_put(job);
> -err_sa:
> -	drm_suballoc_free(sa_bo, NULL);
> -err_bb:
> -	xe_bb_free(bb, NULL);
> +err_out:
>  	return ERR_PTR(err);
>  }
>  
> diff --git a/drivers/gpu/drm/xe/xe_migrate.h
> b/drivers/gpu/drm/xe/xe_migrate.h
> index b064455b604e..0986ffdd8d9a 100644
> --- a/drivers/gpu/drm/xe/xe_migrate.h
> +++ b/drivers/gpu/drm/xe/xe_migrate.h
> @@ -22,6 +22,7 @@ struct xe_pt;
>  struct xe_tile;
>  struct xe_vm;
>  struct xe_vm_pgtable_update;
> +struct xe_vm_pgtable_update_op;
>  struct xe_vma;
>  
>  /**
> @@ -125,6 +126,11 @@ struct dma_fence *xe_migrate_clear(struct
> xe_migrate *m,
>  
>  struct xe_vm *xe_migrate_get_vm(struct xe_migrate *m);
>  
> +void __xe_migrate_update_pgtables_cpu(struct xe_vm *vm, struct
> xe_tile *tile,
> +				      const struct
> xe_migrate_pt_update_ops *ops,
> +				      struct xe_vm_pgtable_update_op
> *pt_op,
> +				      int num_ops);
> +
>  struct dma_fence *
>  xe_migrate_update_pgtables(struct xe_migrate *m,
>  			   struct xe_migrate_pt_update *pt_update);
> diff --git a/drivers/gpu/drm/xe/xe_pt.c b/drivers/gpu/drm/xe/xe_pt.c
> index db1c363a65d5..1ad31f444b79 100644
> --- a/drivers/gpu/drm/xe/xe_pt.c
> +++ b/drivers/gpu/drm/xe/xe_pt.c
> @@ -200,7 +200,9 @@ unsigned int xe_pt_shift(unsigned int level)
>   * and finally frees @pt. TODO: Can we remove the @flags argument?
>   */
>  void xe_pt_destroy(struct xe_pt *pt, u32 flags, struct llist_head
> *deferred)
> +
>  {
> +	bool added = false;
>  	int i;
>  
>  	if (!pt)
> @@ -208,7 +210,18 @@ void xe_pt_destroy(struct xe_pt *pt, u32 flags,
> struct llist_head *deferred)
>  
>  	XE_WARN_ON(!list_empty(&pt->bo->ttm.base.gpuva.list));
>  	xe_bo_unpin(pt->bo);
> -	xe_bo_put_deferred(pt->bo, deferred);
> +	xe_bo_put_deferred(pt->bo, deferred, &added);
> +	if (added) {
> +		/*
> +		 * We need the VM present until the BO is destroyed
> as it shares
> +		 * a dma-resv and BO destroy is async. Reinit BO
> refcount so
> +		 * xe_bo_put_async can be used when the PT job ops
> refcount goes
> +		 * to zero.
> +		 */
> +		xe_vm_get(pt->bo->vm);
> +		pt->bo->flags |= XE_BO_FLAG_PUT_VM_ASYNC;
> +		kref_init(&pt->bo->ttm.base.refcount);
> +	}
>  
>  	if (pt->level > 0 && pt->num_live) {
>  		struct xe_pt_dir *pt_dir = as_xe_pt_dir(pt);
> @@ -361,7 +374,7 @@ xe_pt_new_shared(struct xe_walk_update *wupd,
> struct xe_pt *parent,
>  	entry->pt = parent;
>  	entry->flags = 0;
>  	entry->qwords = 0;
> -	entry->pt_bo->update_index = -1;
> +	entry->level = parent->level;
>  
>  	if (alloc_entries) {
>  		entry->pt_entries = kmalloc_array(XE_PDES,
> @@ -1739,7 +1752,7 @@ xe_migrate_clear_pgtable_callback(struct xe_vm
> *vm, struct xe_tile *tile,
>  				  u32 qword_ofs, u32 num_qwords,
>  				  const struct xe_vm_pgtable_update
> *update)
>  {
> -	u64 empty = __xe_pt_empty_pte(tile, vm, update->pt->level);
> +	u64 empty = __xe_pt_empty_pte(tile, vm, update->level);
>  	int i;
>  
>  	if (map && map->is_iomem)
> @@ -1805,13 +1818,20 @@ xe_pt_commit_prepare_unbind(struct xe_vma
> *vma,
>  	}
>  }
>  
> +static struct xe_vm_pgtable_update_op *
> +to_pt_op(struct xe_vm_pgtable_update_ops *pt_update_ops, u32
> current_op)
> +{
> +	return &pt_update_ops->pt_job_ops->ops[current_op];
> +}
> +
>  static void
>  xe_pt_update_ops_rfence_interval(struct xe_vm_pgtable_update_ops
> *pt_update_ops,
>  				 u64 start, u64 end)
>  {
>  	u64 last;
> -	u32 current_op = pt_update_ops->current_op;
> -	struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops-
> >ops[current_op];
> +	u32 current_op = pt_update_ops->pt_job_ops->current_op;
> +	struct xe_vm_pgtable_update_op *pt_op =
> +		to_pt_op(pt_update_ops, current_op);
>  	int i, level = 0;
>  
>  	for (i = 0; i < pt_op->num_entries; i++) {
> @@ -1846,8 +1866,9 @@ static int bind_op_prepare(struct xe_vm *vm,
> struct xe_tile *tile,
>  			   struct xe_vm_pgtable_update_ops
> *pt_update_ops,
>  			   struct xe_vma *vma, bool
> invalidate_on_bind)
>  {
> -	u32 current_op = pt_update_ops->current_op;
> -	struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops-
> >ops[current_op];
> +	u32 current_op = pt_update_ops->pt_job_ops->current_op;
> +	struct xe_vm_pgtable_update_op *pt_op =
> +		to_pt_op(pt_update_ops, current_op);
>  	int err;
>  
>  	xe_tile_assert(tile, !xe_vma_is_cpu_addr_mirror(vma));
> @@ -1876,7 +1897,7 @@ static int bind_op_prepare(struct xe_vm *vm,
> struct xe_tile *tile,
>  		xe_pt_update_ops_rfence_interval(pt_update_ops,
>  						 xe_vma_start(vma),
>  						 xe_vma_end(vma));
> -		++pt_update_ops->current_op;
> +		++pt_update_ops->pt_job_ops->current_op;
>  		pt_update_ops->needs_userptr_lock |=
> xe_vma_is_userptr(vma);
>  
>  		/*
> @@ -1913,8 +1934,9 @@ static int bind_range_prepare(struct xe_vm *vm,
> struct xe_tile *tile,
>  			      struct xe_vm_pgtable_update_ops
> *pt_update_ops,
>  			      struct xe_vma *vma, struct
> xe_svm_range *range)
>  {
> -	u32 current_op = pt_update_ops->current_op;
> -	struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops-
> >ops[current_op];
> +	u32 current_op = pt_update_ops->pt_job_ops->current_op;
> +	struct xe_vm_pgtable_update_op *pt_op =
> +		to_pt_op(pt_update_ops, current_op);
>  	int err;
>  
>  	xe_tile_assert(tile, xe_vma_is_cpu_addr_mirror(vma));
> @@ -1938,7 +1960,7 @@ static int bind_range_prepare(struct xe_vm *vm,
> struct xe_tile *tile,
>  		xe_pt_update_ops_rfence_interval(pt_update_ops,
>  						 range-
> >base.itree.start,
>  						 range-
> >base.itree.last + 1);
> -		++pt_update_ops->current_op;
> +		++pt_update_ops->pt_job_ops->current_op;
>  		pt_update_ops->needs_svm_lock = true;
>  
>  		pt_op->vma = vma;
> @@ -1955,8 +1977,9 @@ static int unbind_op_prepare(struct xe_tile
> *tile,
>  			     struct xe_vm_pgtable_update_ops
> *pt_update_ops,
>  			     struct xe_vma *vma)
>  {
> -	u32 current_op = pt_update_ops->current_op;
> -	struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops-
> >ops[current_op];
> +	u32 current_op = pt_update_ops->pt_job_ops->current_op;
> +	struct xe_vm_pgtable_update_op *pt_op =
> +		to_pt_op(pt_update_ops, current_op);
>  	int err;
>  
>  	if (!((vma->tile_present | vma->tile_staged) & BIT(tile-
> >id)))
> @@ -1984,7 +2007,7 @@ static int unbind_op_prepare(struct xe_tile
> *tile,
>  				pt_op->num_entries, false);
>  	xe_pt_update_ops_rfence_interval(pt_update_ops,
> xe_vma_start(vma),
>  					 xe_vma_end(vma));
> -	++pt_update_ops->current_op;
> +	++pt_update_ops->pt_job_ops->current_op;
>  	pt_update_ops->needs_userptr_lock |= xe_vma_is_userptr(vma);
>  	pt_update_ops->needs_invalidation = true;
>  
> @@ -1998,8 +2021,9 @@ static int unbind_range_prepare(struct xe_vm
> *vm,
>  				struct xe_vm_pgtable_update_ops
> *pt_update_ops,
>  				struct xe_svm_range *range)
>  {
> -	u32 current_op = pt_update_ops->current_op;
> -	struct xe_vm_pgtable_update_op *pt_op = &pt_update_ops-
> >ops[current_op];
> +	u32 current_op = pt_update_ops->pt_job_ops->current_op;
> +	struct xe_vm_pgtable_update_op *pt_op =
> +		to_pt_op(pt_update_ops, current_op);
>  
>  	if (!(range->tile_present & BIT(tile->id)))
>  		return 0;
> @@ -2019,7 +2043,7 @@ static int unbind_range_prepare(struct xe_vm
> *vm,
>  				pt_op->num_entries, false);
>  	xe_pt_update_ops_rfence_interval(pt_update_ops, range-
> >base.itree.start,
>  					 range->base.itree.last +
> 1);
> -	++pt_update_ops->current_op;
> +	++pt_update_ops->pt_job_ops->current_op;
>  	pt_update_ops->needs_svm_lock = true;
>  	pt_update_ops->needs_invalidation = true;
>  
> @@ -2122,7 +2146,6 @@ static int op_prepare(struct xe_vm *vm,
>  static void
>  xe_pt_update_ops_init(struct xe_vm_pgtable_update_ops
> *pt_update_ops)
>  {
> -	init_llist_head(&pt_update_ops->deferred);
>  	pt_update_ops->start = ~0x0ull;
>  	pt_update_ops->last = 0x0ull;
>  }
> @@ -2163,7 +2186,7 @@ int xe_pt_update_ops_prepare(struct xe_tile
> *tile, struct xe_vma_ops *vops)
>  			return err;
>  	}
>  
> -	xe_tile_assert(tile, pt_update_ops->current_op <=
> +	xe_tile_assert(tile, pt_update_ops->pt_job_ops->current_op
> <=
>  		       pt_update_ops->num_ops);
>  
>  #ifdef TEST_VM_OPS_ERROR
> @@ -2396,7 +2419,7 @@ xe_pt_update_ops_run(struct xe_tile *tile,
> struct xe_vma_ops *vops)
>  	lockdep_assert_held(&vm->lock);
>  	xe_vm_assert_held(vm);
>  
> -	if (!pt_update_ops->current_op) {
> +	if (!pt_update_ops->pt_job_ops->current_op) {
>  		xe_tile_assert(tile, xe_vm_in_fault_mode(vm));
>  
>  		return dma_fence_get_stub();
> @@ -2445,12 +2468,16 @@ xe_pt_update_ops_run(struct xe_tile *tile,
> struct xe_vma_ops *vops)
>  		goto free_rfence;
>  	}
>  
> -	/* Point of no return - VM killed if failure after this */
> -	for (i = 0; i < pt_update_ops->current_op; ++i) {
> -		struct xe_vm_pgtable_update_op *pt_op =
> &pt_update_ops->ops[i];
> +	/*
> +	 * Point of no return - VM killed if failure after this
> +	 */
> +	for (i = 0; i < pt_update_ops->pt_job_ops->current_op; ++i)
> {
> +		struct xe_vm_pgtable_update_op *pt_op =
> +			to_pt_op(pt_update_ops, i);
>  
>  		xe_pt_commit(pt_op->vma, pt_op->entries,
> -			     pt_op->num_entries, &pt_update_ops-
> >deferred);
> +			     pt_op->num_entries,
> +			     &pt_update_ops->pt_job_ops->deferred);
>  		pt_op->vma = NULL;	/* skip in
> xe_pt_update_ops_abort */
>  	}
>  
> @@ -2530,27 +2557,19 @@ xe_pt_update_ops_run(struct xe_tile *tile,
> struct xe_vma_ops *vops)
>  ALLOW_ERROR_INJECTION(xe_pt_update_ops_run, ERRNO);
>  
>  /**
> - * xe_pt_update_ops_fini() - Finish PT update operations
> - * @tile: Tile of PT update operations
> - * @vops: VMA operations
> + * xe_pt_update_ops_free() - Free PT update operations
> + * @pt_op: Array of PT update operations
> + * @num_ops: Number of PT update operations
>   *
> - * Finish PT update operations by committing to destroy page table
> memory
> + * Free PT update operations
>   */
> -void xe_pt_update_ops_fini(struct xe_tile *tile, struct xe_vma_ops
> *vops)
> +static void xe_pt_update_ops_free(struct xe_vm_pgtable_update_op
> *pt_op,
> +				  u32 num_ops)
>  {
> -	struct xe_vm_pgtable_update_ops *pt_update_ops =
> -		&vops->pt_update_ops[tile->id];
> -	int i;
> -
> -	lockdep_assert_held(&vops->vm->lock);
> -	xe_vm_assert_held(vops->vm);
> -
> -	for (i = 0; i < pt_update_ops->current_op; ++i) {
> -		struct xe_vm_pgtable_update_op *pt_op =
> &pt_update_ops->ops[i];
> +	u32 i;
>  
> +	for (i = 0; i < num_ops; ++i, ++pt_op)
>  		xe_pt_free_bind(pt_op->entries, pt_op->num_entries);
> -	}
> -	xe_bo_put_commit(&vops->pt_update_ops[tile->id].deferred);
>  }
>  
>  /**
> @@ -2571,9 +2590,9 @@ void xe_pt_update_ops_abort(struct xe_tile
> *tile, struct xe_vma_ops *vops)
>  
>  	for (i = pt_update_ops->num_ops - 1; i >= 0; --i) {
>  		struct xe_vm_pgtable_update_op *pt_op =
> -			&pt_update_ops->ops[i];
> +			to_pt_op(pt_update_ops, i);
>  
> -		if (!pt_op->vma || i >= pt_update_ops->current_op)
> +		if (!pt_op->vma || i >= pt_update_ops->pt_job_ops-
> >current_op)
>  			continue;
>  
>  		if (pt_op->bind)
> @@ -2584,6 +2603,89 @@ void xe_pt_update_ops_abort(struct xe_tile
> *tile, struct xe_vma_ops *vops)
>  			xe_pt_abort_unbind(pt_op->vma, pt_op-
> >entries,
>  					   pt_op->num_entries);
>  	}
> +}
> +
> +/**
> + * xe_pt_job_ops_alloc() - Allocate PT job ops
> + * @num_ops: Number of VM PT update ops
> + *
> + * Allocate PT job ops and internal array of VM PT update ops.
> + *
> + * Return: Pointer to PT job ops or NULL
> + */
> +struct xe_pt_job_ops *xe_pt_job_ops_alloc(u32 num_ops)
> +{
> +	struct xe_pt_job_ops *pt_job_ops;
> +
> +	pt_job_ops = kmalloc(sizeof(*pt_job_ops), GFP_KERNEL);
> +	if (!pt_job_ops)
> +		return NULL;
> +
> +	pt_job_ops->ops = kvmalloc_array(num_ops,
> sizeof(*pt_job_ops->ops),
> +					 GFP_KERNEL);
> +	if (!pt_job_ops->ops) {
> +		kvfree(pt_job_ops);
> +		return NULL;
> +	}
> +
> +	pt_job_ops->current_op = 0;
> +	kref_init(&pt_job_ops->refcount);
> +	init_llist_head(&pt_job_ops->deferred);
> +
> +	return pt_job_ops;
> +}
> +
> +/**
> + * xe_pt_job_ops_get() - Get PT job ops
> + * @pt_job_ops: PT job ops to get
> + *
> + * Take a reference to PT job ops
> + *
> + * Return: Pointer to PT job ops or NULL
> + */
> +struct xe_pt_job_ops *xe_pt_job_ops_get(struct xe_pt_job_ops
> *pt_job_ops)
> +{
> +	if (pt_job_ops)
> +		kref_get(&pt_job_ops->refcount);
> +
> +	return pt_job_ops;
> +}
> +
> +static void xe_pt_job_ops_destroy(struct kref *ref)
> +{
> +	struct xe_pt_job_ops *pt_job_ops =
> +		container_of(ref, struct xe_pt_job_ops, refcount);
> +	struct llist_node *freed;
> +	struct xe_bo *bo, *next;
> +
> +	xe_pt_update_ops_free(pt_job_ops->ops,
> +			      pt_job_ops->current_op);
> +
> +	freed = llist_del_all(&pt_job_ops->deferred);
> +	if (freed) {
> +		llist_for_each_entry_safe(bo, next, freed, freed)
> +			/*
> +			 * If called from run_job, we are in the
> dma-fencing
> +			 * path and cannot take dma-resv locks so
> use an async
> +			 * put.
> +			 */
> +			xe_bo_put_async(bo);
> +	}
> +
> +	kvfree(pt_job_ops->ops);
> +	kfree(pt_job_ops);
> +}
> +
> +/**
> + * xe_pt_job_ops_put() - Put PT job ops
> + * @pt_job_ops: PT job ops to put
> + *
> + * Drop a reference to PT job ops
> + */
> +void xe_pt_job_ops_put(struct xe_pt_job_ops *pt_job_ops)
> +{
> +	if (!pt_job_ops)
> +		return;
>  
> -	xe_pt_update_ops_fini(tile, vops);
> +	kref_put(&pt_job_ops->refcount, xe_pt_job_ops_destroy);
>  }
> diff --git a/drivers/gpu/drm/xe/xe_pt.h b/drivers/gpu/drm/xe/xe_pt.h
> index 5ecf003d513c..c9904573db82 100644
> --- a/drivers/gpu/drm/xe/xe_pt.h
> +++ b/drivers/gpu/drm/xe/xe_pt.h
> @@ -41,11 +41,14 @@ void xe_pt_clear(struct xe_device *xe, struct
> xe_pt *pt);
>  int xe_pt_update_ops_prepare(struct xe_tile *tile, struct xe_vma_ops
> *vops);
>  struct dma_fence *xe_pt_update_ops_run(struct xe_tile *tile,
>  				       struct xe_vma_ops *vops);
> -void xe_pt_update_ops_fini(struct xe_tile *tile, struct xe_vma_ops
> *vops);
>  void xe_pt_update_ops_abort(struct xe_tile *tile, struct xe_vma_ops
> *vops);
>  
>  bool xe_pt_zap_ptes(struct xe_tile *tile, struct xe_vma *vma);
>  bool xe_pt_zap_ptes_range(struct xe_tile *tile, struct xe_vm *vm,
>  			  struct xe_svm_range *range);
>  
> +struct xe_pt_job_ops *xe_pt_job_ops_alloc(u32 num_ops);
> +struct xe_pt_job_ops *xe_pt_job_ops_get(struct xe_pt_job_ops
> *pt_job_ops);
> +void xe_pt_job_ops_put(struct xe_pt_job_ops *pt_job_ops);
> +
>  #endif
> diff --git a/drivers/gpu/drm/xe/xe_pt_types.h
> b/drivers/gpu/drm/xe/xe_pt_types.h
> index 69eab6f37cfe..33d0d20e0ac6 100644
> --- a/drivers/gpu/drm/xe/xe_pt_types.h
> +++ b/drivers/gpu/drm/xe/xe_pt_types.h
> @@ -70,6 +70,9 @@ struct xe_vm_pgtable_update {
>  	/** @pt_entries: Newly added pagetable entries */
>  	struct xe_pt_entry *pt_entries;
>  
> +	/** @level: level of update */
> +	unsigned int level;
> +
>  	/** @flags: Target flags */
>  	u32 flags;
>  };
> @@ -88,12 +91,28 @@ struct xe_vm_pgtable_update_op {
>  	bool rebind;
>  };
>  
> -/** struct xe_vm_pgtable_update_ops: page table update operations */
> -struct xe_vm_pgtable_update_ops {
> -	/** @ops: operations */
> -	struct xe_vm_pgtable_update_op *ops;
> +/**
> + * struct xe_pt_job_ops: page table update operations dynamic
> allocation
> + *
> + * This is the part of struct xe_vma_ops and struct
> xe_vm_pgtable_update_ops
> + * which is dynamic allocated as it must be available until the bind
> job is
> + * complete.
> + */
> +struct xe_pt_job_ops {
> +	/** @current_op: current operations */
> +	u32 current_op;
> +	/** @refcount: ref count ops allocation */
> +	struct kref refcount;
>  	/** @deferred: deferred list to destroy PT entries */
>  	struct llist_head deferred;
> +	/** @ops: operations */
> +	struct xe_vm_pgtable_update_op *ops;
> +};
> +
> +/** struct xe_vm_pgtable_update_ops: page table update operations */
> +struct xe_vm_pgtable_update_ops {
> +	/** @pt_job_ops: PT update operations dynamic allocation*/
> +	struct xe_pt_job_ops *pt_job_ops;
>  	/** @q: exec queue for PT operations */
>  	struct xe_exec_queue *q;
>  	/** @start: start address of ops */
> @@ -102,8 +121,6 @@ struct xe_vm_pgtable_update_ops {
>  	u64 last;
>  	/** @num_ops: number of operations */
>  	u32 num_ops;
> -	/** @current_op: current operations */
> -	u32 current_op;
>  	/** @needs_svm_lock: Needs SVM lock */
>  	bool needs_svm_lock;
>  	/** @needs_userptr_lock: Needs userptr lock */
> diff --git a/drivers/gpu/drm/xe/xe_sched_job.c
> b/drivers/gpu/drm/xe/xe_sched_job.c
> index d21bf8f26964..09cdd14d9ef7 100644
> --- a/drivers/gpu/drm/xe/xe_sched_job.c
> +++ b/drivers/gpu/drm/xe/xe_sched_job.c
> @@ -26,19 +26,22 @@ static struct kmem_cache
> *xe_sched_job_parallel_slab;
>  
>  int __init xe_sched_job_module_init(void)
>  {
> +	struct xe_sched_job *job;
> +	size_t size;
> +
> +	size = struct_size(job, ptrs, 1);
>  	xe_sched_job_slab =
> -		kmem_cache_create("xe_sched_job",
> -				  sizeof(struct xe_sched_job) +
> -				  sizeof(struct xe_job_ptrs), 0,
> +		kmem_cache_create("xe_sched_job", size, 0,
>  				  SLAB_HWCACHE_ALIGN, NULL);
>  	if (!xe_sched_job_slab)
>  		return -ENOMEM;
>  
> +	size = max_t(size_t,
> +		     struct_size(job, ptrs,
> +				 XE_HW_ENGINE_MAX_INSTANCE),
> +		     struct_size(job, pt_update, 1));
>  	xe_sched_job_parallel_slab =
> -		kmem_cache_create("xe_sched_job_parallel",
> -				  sizeof(struct xe_sched_job) +
> -				  sizeof(struct xe_job_ptrs) *
> -				  XE_HW_ENGINE_MAX_INSTANCE, 0,
> +		kmem_cache_create("xe_sched_job_parallel", size, 0,
>  				  SLAB_HWCACHE_ALIGN, NULL);
>  	if (!xe_sched_job_parallel_slab) {
>  		kmem_cache_destroy(xe_sched_job_slab);
> @@ -84,7 +87,7 @@ static void xe_sched_job_free_fences(struct
> xe_sched_job *job)
>  {
>  	int i;
>  
> -	for (i = 0; i < job->q->width; ++i) {
> +	for (i = 0; !job->is_pt_job && i < job->q->width; ++i) {
>  		struct xe_job_ptrs *ptrs = &job->ptrs[i];
>  
>  		if (ptrs->lrc_fence)
> @@ -118,33 +121,44 @@ struct xe_sched_job *xe_sched_job_create(struct
> xe_exec_queue *q,
>  	if (err)
>  		goto err_free;
>  
> -	for (i = 0; i < q->width; ++i) {
> -		struct dma_fence *fence =
> xe_lrc_alloc_seqno_fence();
> -		struct dma_fence_chain *chain;
> -
> -		if (IS_ERR(fence)) {
> -			err = PTR_ERR(fence);
> -			goto err_sched_job;
> -		}
> -		job->ptrs[i].lrc_fence = fence;
> -
> -		if (i + 1 == q->width)
> -			continue;
> -
> -		chain = dma_fence_chain_alloc();
> -		if (!chain) {
> +	if (!batch_addr) {
> +		job->fence =
> dma_fence_allocate_private_stub(ktime_get());
> +		if (!job->fence) {
>  			err = -ENOMEM;
>  			goto err_sched_job;
>  		}
> -		job->ptrs[i].chain_fence = chain;
> +		job->is_pt_job = true;
> +	} else {
> +		for (i = 0; i < q->width; ++i) {
> +			struct dma_fence *fence =
> xe_lrc_alloc_seqno_fence();
> +			struct dma_fence_chain *chain;
> +
> +			if (IS_ERR(fence)) {
> +				err = PTR_ERR(fence);
> +				goto err_sched_job;
> +			}
> +			job->ptrs[i].lrc_fence = fence;
> +
> +			if (i + 1 == q->width)
> +				continue;
> +
> +			chain = dma_fence_chain_alloc();
> +			if (!chain) {
> +				err = -ENOMEM;
> +				goto err_sched_job;
> +			}
> +			job->ptrs[i].chain_fence = chain;
> +		}
>  	}
>  
> -	width = q->width;
> -	if (is_migration)
> -		width = 2;
> +	if (batch_addr) {
> +		width = q->width;
> +		if (is_migration)
> +			width = 2;
>  
> -	for (i = 0; i < width; ++i)
> -		job->ptrs[i].batch_addr = batch_addr[i];
> +		for (i = 0; i < width; ++i)
> +			job->ptrs[i].batch_addr = batch_addr[i];
> +	}
>  
>  	xe_pm_runtime_get_noresume(job_to_xe(job));
>  	trace_xe_sched_job_create(job);
> @@ -243,7 +257,7 @@ bool xe_sched_job_completed(struct xe_sched_job
> *job)
>  void xe_sched_job_arm(struct xe_sched_job *job)
>  {
>  	struct xe_exec_queue *q = job->q;
> -	struct dma_fence *fence, *prev;
> +	struct dma_fence *fence = job->fence, *prev;
>  	struct xe_vm *vm = q->vm;
>  	u64 seqno = 0;
>  	int i;
> @@ -263,6 +277,9 @@ void xe_sched_job_arm(struct xe_sched_job *job)
>  		job->ring_ops_flush_tlb = true;
>  	}
>  
> +	if (job->is_pt_job)
> +		goto arm;
> +
>  	/* Arm the pre-allocated fences */
>  	for (i = 0; i < q->width; prev = fence, ++i) {
>  		struct dma_fence_chain *chain;
> @@ -283,6 +300,7 @@ void xe_sched_job_arm(struct xe_sched_job *job)
>  		fence = &chain->base;
>  	}
>  
> +arm:
>  	job->fence = dma_fence_get(fence);	/* Pairs with put in
> scheduler */
>  	drm_sched_job_arm(&job->drm);
>  }
> diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h
> b/drivers/gpu/drm/xe/xe_sched_job_types.h
> index dbf260dded8d..79a459f2a0a8 100644
> --- a/drivers/gpu/drm/xe/xe_sched_job_types.h
> +++ b/drivers/gpu/drm/xe/xe_sched_job_types.h
> @@ -10,10 +10,29 @@
>  
>  #include <drm/gpu_scheduler.h>
>  
> -struct xe_exec_queue;
>  struct dma_fence;
>  struct dma_fence_chain;
>  
> +struct xe_exec_queue;
> +struct xe_migrate_pt_update_ops;
> +struct xe_pt_job_ops;
> +struct xe_tile;
> +struct xe_vm;
> +
> +/**
> + * struct xe_pt_update_args - PT update arguments
> + */
> +struct xe_pt_update_args {
> +	/** @vm: VM */
> +	struct xe_vm *vm;
> +	/** @tile: Tile */
> +	struct xe_tile *tile;
> +	/** @ops: Migrate PT update ops */
> +	const struct xe_migrate_pt_update_ops *ops;
> +	/** @pt_job_ops: PT update ops */
> +	struct xe_pt_job_ops *pt_job_ops;
> +};
> +
>  /**
>   * struct xe_job_ptrs - Per hw engine instance data
>   */
> @@ -58,8 +77,14 @@ struct xe_sched_job {
>  	bool ring_ops_flush_tlb;
>  	/** @ggtt: mapped in ggtt. */
>  	bool ggtt;
> -	/** @ptrs: per instance pointers. */
> -	struct xe_job_ptrs ptrs[];
> +	/** @is_pt_job: is a PT job */
> +	bool is_pt_job;
> +	union {
> +		/** @ptrs: per instance pointers. */
> +		DECLARE_FLEX_ARRAY(struct xe_job_ptrs, ptrs);
> +		/** @pt_update: PT update arguments */
> +		DECLARE_FLEX_ARRAY(struct xe_pt_update_args,
> pt_update);
> +	};
>  };
>  
>  struct xe_sched_job_snapshot {
> diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c
> index 18f967ce1f1a..6fc01fdd7286 100644
> --- a/drivers/gpu/drm/xe/xe_vm.c
> +++ b/drivers/gpu/drm/xe/xe_vm.c
> @@ -780,6 +780,19 @@ int xe_vm_userptr_check_repin(struct xe_vm *vm)
>  		list_empty_careful(&vm->userptr.invalidated)) ? 0 :
> -EAGAIN;
>  }
>  
> +static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm
> *vm,
> +			    struct xe_exec_queue *q,
> +			    struct xe_sync_entry *syncs, u32
> num_syncs)
> +{
> +	memset(vops, 0, sizeof(*vops));
> +	INIT_LIST_HEAD(&vops->list);
> +	vops->vm = vm;
> +	vops->q = q;
> +	vops->syncs = syncs;
> +	vops->num_syncs = num_syncs;
> +	vops->flags = 0;
> +}
> +
>  static int xe_vma_ops_alloc(struct xe_vma_ops *vops, bool
> array_of_binds)
>  {
>  	int i;
> @@ -788,11 +801,9 @@ static int xe_vma_ops_alloc(struct xe_vma_ops
> *vops, bool array_of_binds)
>  		if (!vops->pt_update_ops[i].num_ops)
>  			continue;
>  
> -		vops->pt_update_ops[i].ops =
> -			kmalloc_array(vops-
> >pt_update_ops[i].num_ops,
> -				      sizeof(*vops-
> >pt_update_ops[i].ops),
> -				      GFP_KERNEL |
> __GFP_RETRY_MAYFAIL | __GFP_NOWARN);
> -		if (!vops->pt_update_ops[i].ops)
> +		vops->pt_update_ops[i].pt_job_ops =
> +			xe_pt_job_ops_alloc(vops-
> >pt_update_ops[i].num_ops);
> +		if (!vops->pt_update_ops[i].pt_job_ops)
>  			return array_of_binds ? -ENOBUFS : -ENOMEM;
>  	}
>  
> @@ -828,7 +839,7 @@ static void xe_vma_ops_fini(struct xe_vma_ops
> *vops)
>  	xe_vma_svm_prefetch_ops_fini(vops);
>  
>  	for (i = 0; i < XE_MAX_TILES_PER_DEVICE; ++i)
> -		kfree(vops->pt_update_ops[i].ops);
> +		xe_pt_job_ops_put(vops-
> >pt_update_ops[i].pt_job_ops);
>  }
>  
>  static void xe_vma_ops_incr_pt_update_ops(struct xe_vma_ops *vops,
> u8 tile_mask, int inc_val)
> @@ -877,9 +888,6 @@ static int xe_vm_ops_add_rebind(struct xe_vma_ops
> *vops, struct xe_vma *vma,
>  
>  static struct dma_fence *ops_execute(struct xe_vm *vm,
>  				     struct xe_vma_ops *vops);
> -static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm
> *vm,
> -			    struct xe_exec_queue *q,
> -			    struct xe_sync_entry *syncs, u32
> num_syncs);
>  
>  int xe_vm_rebind(struct xe_vm *vm, bool rebind_worker)
>  {
> @@ -3163,13 +3171,6 @@ static struct dma_fence *ops_execute(struct
> xe_vm *vm,
>  		fence = &cf->base;
>  	}
>  
> -	for_each_tile(tile, vm->xe, id) {
> -		if (!vops->pt_update_ops[id].num_ops)
> -			continue;
> -
> -		xe_pt_update_ops_fini(tile, vops);
> -	}
> -
>  	return fence;
>  
>  err_out:
> @@ -3447,19 +3448,6 @@ static int vm_bind_ioctl_signal_fences(struct
> xe_vm *vm,
>  	return err;
>  }
>  
> -static void xe_vma_ops_init(struct xe_vma_ops *vops, struct xe_vm
> *vm,
> -			    struct xe_exec_queue *q,
> -			    struct xe_sync_entry *syncs, u32
> num_syncs)
> -{
> -	memset(vops, 0, sizeof(*vops));
> -	INIT_LIST_HEAD(&vops->list);
> -	vops->vm = vm;
> -	vops->q = q;
> -	vops->syncs = syncs;
> -	vops->num_syncs = num_syncs;
> -	vops->flags = 0;
> -}
> -
>  static int xe_vm_bind_ioctl_validate_bo(struct xe_device *xe, struct
> xe_bo *bo,
>  					u64 addr, u64 range, u64
> obj_offset,
>  					u16 pat_index, u32 op, u32
> bind_flags)



More information about the Intel-xe mailing list