[Intel-xe] [PATCH 3/3] drm/xe: standardize vm-less kernel submissions

Fri Jul 28 01:50:00 UTC 2023

On Thu, Jul 27, 2023 at 11:59:45AM -0700, Daniele Ceraolo Spurio wrote:
> The current only submission in the driver that doesn't use a vm is the
> WA setup. We still pass a vm structure (the migration one), but we don't
> actually use it at submission time and we instead have an hack to use
> GGTT for this particular engine.
> Instead of special-casing the WA engine, we can skip providing a VM and
> use that as selector for whether to use GGTT or PPGTT. As part of this
> change, we can drop the special engine flag for the WA engine and switch
> the WA submission to use the standard job functions instead of dedicated
> ones.
> 
> Signed-off-by: Daniele Ceraolo Spurio <daniele.ceraolospurio at intel.com>
> Cc: Matthew Brost <matthew.brost at intel.com>

Reviewed-by: Matthew Brost <matthew.brost at intel.com>

> ---
>  drivers/gpu/drm/xe/xe_bb.c           | 10 ----------
>  drivers/gpu/drm/xe/xe_bb.h           |  2 --
>  drivers/gpu/drm/xe/xe_engine_types.h |  2 --
>  drivers/gpu/drm/xe/xe_gt.c           | 23 +++++++----------------
>  drivers/gpu/drm/xe/xe_ring_ops.c     |  2 +-
>  drivers/gpu/drm/xe/xe_sched_job.c    |  6 ++----
>  6 files changed, 10 insertions(+), 35 deletions(-)
> 
> diff --git a/drivers/gpu/drm/xe/xe_bb.c b/drivers/gpu/drm/xe/xe_bb.c
> index f9b6b7adf99f..8d4e645d43c5 100644
> --- a/drivers/gpu/drm/xe/xe_bb.c
> +++ b/drivers/gpu/drm/xe/xe_bb.c
> @@ -73,16 +73,6 @@ __xe_bb_create_job(struct xe_engine *kernel_eng, struct xe_bb *bb, u64 *addr)
>  	return xe_sched_job_create(kernel_eng, addr);
>  }
>  
> -struct xe_sched_job *xe_bb_create_wa_job(struct xe_engine *wa_eng,
> -					 struct xe_bb *bb, u64 batch_base_ofs)
> -{
> -	u64 addr = batch_base_ofs + drm_suballoc_soffset(bb->bo);
> -
> -	XE_BUG_ON(!(wa_eng->vm->flags & XE_VM_FLAG_MIGRATION));
> -
> -	return __xe_bb_create_job(wa_eng, bb, &addr);
> -}
> -
>  struct xe_sched_job *xe_bb_create_migration_job(struct xe_engine *kernel_eng,
>  						struct xe_bb *bb,
>  						u64 batch_base_ofs,
> diff --git a/drivers/gpu/drm/xe/xe_bb.h b/drivers/gpu/drm/xe/xe_bb.h
> index 0cc9260c9634..d1d55bccbf45 100644
> --- a/drivers/gpu/drm/xe/xe_bb.h
> +++ b/drivers/gpu/drm/xe/xe_bb.h
> @@ -20,8 +20,6 @@ struct xe_sched_job *xe_bb_create_job(struct xe_engine *kernel_eng,
>  struct xe_sched_job *xe_bb_create_migration_job(struct xe_engine *kernel_eng,
>  						struct xe_bb *bb, u64 batch_ofs,
>  						u32 second_idx);
> -struct xe_sched_job *xe_bb_create_wa_job(struct xe_engine *wa_eng,
> -					 struct xe_bb *bb, u64 batch_ofs);
>  void xe_bb_free(struct xe_bb *bb, struct dma_fence *fence);
>  
>  #endif
> diff --git a/drivers/gpu/drm/xe/xe_engine_types.h b/drivers/gpu/drm/xe/xe_engine_types.h
> index 6802fdc10ec8..6ddc92b57c51 100644
> --- a/drivers/gpu/drm/xe/xe_engine_types.h
> +++ b/drivers/gpu/drm/xe/xe_engine_types.h
> @@ -66,8 +66,6 @@ struct xe_engine {
>  #define ENGINE_FLAG_VM			BIT(5)
>  /* child of VM engine for multi-tile VM jobs */
>  #define ENGINE_FLAG_BIND_ENGINE_CHILD	BIT(6)
> -/* engine used for WA setup */
> -#define ENGINE_FLAG_WA			BIT(7)
>  
>  	/**
>  	 * @flags: flags for this engine, should statically setup aside from ban
> diff --git a/drivers/gpu/drm/xe/xe_gt.c b/drivers/gpu/drm/xe/xe_gt.c
> index 3e32d38aeeea..de0a9683ad68 100644
> --- a/drivers/gpu/drm/xe/xe_gt.c
> +++ b/drivers/gpu/drm/xe/xe_gt.c
> @@ -85,15 +85,13 @@ static int emit_nop_job(struct xe_gt *gt, struct xe_engine *e)
>  	struct xe_sched_job *job;
>  	struct xe_bb *bb;
>  	struct dma_fence *fence;
> -	u64 batch_ofs;
>  	long timeout;
>  
>  	bb = xe_bb_new(gt, 4, false);
>  	if (IS_ERR(bb))
>  		return PTR_ERR(bb);
>  
> -	batch_ofs = xe_bo_ggtt_addr(gt_to_tile(gt)->mem.kernel_bb_pool->bo);
> -	job = xe_bb_create_wa_job(e, bb, batch_ofs);
> +	job = xe_bb_create_job(e, bb);
>  	if (IS_ERR(job)) {
>  		xe_bb_free(bb, NULL);
>  		return PTR_ERR(job);
> @@ -122,7 +120,6 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_engine *e)
>  	struct xe_sched_job *job;
>  	struct xe_bb *bb;
>  	struct dma_fence *fence;
> -	u64 batch_ofs;
>  	long timeout;
>  	int count = 0;
>  
> @@ -141,8 +138,7 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_engine *e)
>  		}
>  	}
>  
> -	batch_ofs = xe_bo_ggtt_addr(gt_to_tile(gt)->mem.kernel_bb_pool->bo);
> -	job = xe_bb_create_wa_job(e, bb, batch_ofs);
> +	job = xe_bb_create_job(e, bb);
>  	if (IS_ERR(job)) {
>  		xe_bb_free(bb, NULL);
>  		return PTR_ERR(job);
> @@ -166,14 +162,12 @@ static int emit_wa_job(struct xe_gt *gt, struct xe_engine *e)
>  int xe_gt_record_default_lrcs(struct xe_gt *gt)
>  {
>  	struct xe_device *xe = gt_to_xe(gt);
> -	struct xe_tile *tile = gt_to_tile(gt);
>  	struct xe_hw_engine *hwe;
>  	enum xe_hw_engine_id id;
>  	int err = 0;
>  
>  	for_each_hw_engine(hwe, gt, id) {
>  		struct xe_engine *e, *nop_e;
> -		struct xe_vm *vm;
>  		void *default_lrc;
>  
>  		if (gt->default_lrc[hwe->class])
> @@ -190,14 +184,13 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt)
>  		if (!default_lrc)
>  			return -ENOMEM;
>  
> -		vm = xe_migrate_get_vm(tile->migrate);
> -		e = xe_engine_create(xe, vm, BIT(hwe->logical_instance), 1,
> -				     hwe, ENGINE_FLAG_WA);
> +		e = xe_engine_create(xe, NULL, BIT(hwe->logical_instance), 1,
> +				     hwe, ENGINE_FLAG_KERNEL);
>  		if (IS_ERR(e)) {
>  			err = PTR_ERR(e);
>  			xe_gt_err(gt, "hwe %s: xe_engine_create failed (%pe)\n",
>  				  hwe->name, e);
> -			goto put_vm;
> +			return err;
>  		}
>  
>  		/* Prime golden LRC with known good state */
> @@ -208,8 +201,8 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt)
>  			goto put_engine;
>  		}
>  
> -		nop_e = xe_engine_create(xe, vm, BIT(hwe->logical_instance),
> -					 1, hwe, ENGINE_FLAG_WA);
> +		nop_e = xe_engine_create(xe, NULL, BIT(hwe->logical_instance),
> +					 1, hwe, ENGINE_FLAG_KERNEL);
>  		if (IS_ERR(nop_e)) {
>  			err = PTR_ERR(nop_e);
>  			xe_gt_err(gt, "hwe %s: nop xe_engine_create failed (%pe)\n",
> @@ -243,8 +236,6 @@ int xe_gt_record_default_lrcs(struct xe_gt *gt)
>  		xe_engine_put(nop_e);
>  put_engine:
>  		xe_engine_put(e);
> -put_vm:
> -		xe_vm_put(vm);
>  		if (err)
>  			break;
>  	}
> diff --git a/drivers/gpu/drm/xe/xe_ring_ops.c b/drivers/gpu/drm/xe/xe_ring_ops.c
> index 8e21c19cb041..bc0fdd4ef188 100644
> --- a/drivers/gpu/drm/xe/xe_ring_ops.c
> +++ b/drivers/gpu/drm/xe/xe_ring_ops.c
> @@ -202,7 +202,7 @@ static int emit_pipe_imm_ggtt(u32 addr, u32 value, bool stall_only, u32 *dw,
>  
>  static u32 get_ppgtt_flag(struct xe_sched_job *job)
>  {
> -	return !(job->engine->flags & ENGINE_FLAG_WA) ? BIT(8) : 0;
> +	return job->engine->vm ? BIT(8) : 0;
>  }
>  
>  static void __emit_job_gen12_copy(struct xe_sched_job *job, struct xe_lrc *lrc,
> diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
> index e1af1f2a44c2..68f446e8c081 100644
> --- a/drivers/gpu/drm/xe/xe_sched_job.c
> +++ b/drivers/gpu/drm/xe/xe_sched_job.c
> @@ -59,8 +59,7 @@ static struct xe_sched_job *job_alloc(bool parallel)
>  
>  bool xe_sched_job_is_migration(struct xe_engine *e)
>  {
> -	return e->vm && (e->vm->flags & XE_VM_FLAG_MIGRATION) &&
> -		!(e->flags & ENGINE_FLAG_WA);
> +	return e->vm && (e->vm->flags & XE_VM_FLAG_MIGRATION);
>  }
>  
>  static void job_free(struct xe_sched_job *job)
> @@ -91,8 +90,7 @@ struct xe_sched_job *xe_sched_job_create(struct xe_engine *e,
>  	XE_BUG_ON(!e->vm && !(e->flags & ENGINE_FLAG_KERNEL));
>  
>  	/* Migration and kernel engines have their own locking */
> -	if (!(e->flags & (ENGINE_FLAG_KERNEL | ENGINE_FLAG_VM |
> -			  ENGINE_FLAG_WA))) {
> +	if (!(e->flags & (ENGINE_FLAG_KERNEL | ENGINE_FLAG_VM))) {
>  		lockdep_assert_held(&e->vm->lock);
>  		if (!xe_vm_no_dma_fences(e->vm))
>  			xe_vm_assert_held(e->vm);
> -- 
> 2.41.0
>