[Intel-xe] [RFC PATCH 1/1] drm/xe: Return NULL in run_job for long running jobs
Matthew Brost
matthew.brost at intel.com
Wed Mar 29 00:09:58 UTC 2023
Return NULL in run_job for long running jobs, now to flow control the
ring use prepare_job.
FIXME: Need to implement cleanup for long runnig engines that encounter
errors.
Signed-off-by: Matthew Brost <matthew.brost at intel.com>
---
drivers/gpu/drm/xe/xe_engine.c | 54 ++++++++++++++++++++++++-
drivers/gpu/drm/xe/xe_engine.h | 6 +++
drivers/gpu/drm/xe/xe_engine_types.h | 3 ++
drivers/gpu/drm/xe/xe_guc_submit.c | 31 ++++++++++++--
drivers/gpu/drm/xe/xe_sched_job.c | 5 +++
drivers/gpu/drm/xe/xe_sched_job_types.h | 2 +
6 files changed, 96 insertions(+), 5 deletions(-)
diff --git a/drivers/gpu/drm/xe/xe_engine.c b/drivers/gpu/drm/xe/xe_engine.c
index 37209b13bcd6..440f69ed3dc8 100644
--- a/drivers/gpu/drm/xe/xe_engine.c
+++ b/drivers/gpu/drm/xe/xe_engine.c
@@ -40,8 +40,11 @@ static struct xe_engine *__xe_engine_create(struct xe_device *xe,
e->flags = flags;
e->hwe = hwe;
e->gt = gt;
- if (vm)
+ if (vm) {
e->vm = xe_vm_get(vm);
+ if (xe_vm_no_dma_fences(vm))
+ xa_init_flags(&e->pending_lr_jobs, XA_FLAGS_ALLOC1);
+ }
e->class = hwe->class;
e->width = width;
e->logical_mask = logical_mask;
@@ -149,8 +152,11 @@ void xe_engine_fini(struct xe_engine *e)
for (i = 0; i < e->width; ++i)
xe_lrc_finish(e->lrc + i);
- if (e->vm)
+ if (e->vm) {
xe_vm_put(e->vm);
+ if (xe_vm_no_dma_fences(e->vm))
+ xa_destroy(&e->pending_lr_jobs);
+ }
kfree(e);
}
@@ -789,3 +795,47 @@ int xe_engine_set_property_ioctl(struct drm_device *dev, void *data,
return ret;
}
+
+int xe_engine_add_lr_job(struct xe_engine *e, struct xe_sched_job *job)
+{
+ void *ret;
+
+ if (!xe_engine_is_lr(e))
+ return 0;
+
+ ret = xa_store(&e->pending_lr_jobs, job->fence->seqno, job, GFP_KERNEL);
+ if (IS_ERR(ret))
+ return PTR_ERR(ret);
+ XE_WARN_ON(ret);
+
+ return 0;
+}
+
+void xe_engine_del_lr_job(struct xe_engine *e, struct xe_sched_job *job)
+{
+ if (!xe_engine_is_lr(e))
+ return;
+
+ xa_erase(&e->pending_lr_jobs, job->fence->seqno);
+}
+
+struct xe_sched_job *xe_engine_get_lr_job(struct xe_engine *e, u32 seqno)
+{
+ struct xe_sched_job *job;
+
+ if (!xe_engine_is_lr(e))
+ return NULL;
+
+ xa_lock(&e->pending_lr_jobs);
+ job = xa_load(&e->pending_lr_jobs, seqno);
+ if (job)
+ dma_fence_get(job->fence);
+ xa_unlock(&e->pending_lr_jobs);
+
+ return job;
+}
+
+bool xe_engine_is_lr(struct xe_engine *e)
+{
+ return e->vm && xe_vm_no_dma_fences(e->vm);
+}
diff --git a/drivers/gpu/drm/xe/xe_engine.h b/drivers/gpu/drm/xe/xe_engine.h
index a49cf2ab405e..b7aa5a03adcd 100644
--- a/drivers/gpu/drm/xe/xe_engine.h
+++ b/drivers/gpu/drm/xe/xe_engine.h
@@ -12,6 +12,7 @@
struct drm_device;
struct drm_file;
struct xe_device;
+struct xe_sched_job;
struct xe_file;
struct xe_engine *xe_engine_create(struct xe_device *xe, struct xe_vm *vm,
@@ -24,6 +25,11 @@ struct xe_engine *xe_engine_create_class(struct xe_device *xe, struct xe_gt *gt,
void xe_engine_fini(struct xe_engine *e);
void xe_engine_destroy(struct kref *ref);
+int xe_engine_add_lr_job(struct xe_engine *e, struct xe_sched_job *job);
+void xe_engine_del_lr_job(struct xe_engine *e, struct xe_sched_job *job);
+struct xe_sched_job *xe_engine_get_lr_job(struct xe_engine *e, u32 seqno);
+bool xe_engine_is_lr(struct xe_engine *e);
+
struct xe_engine *xe_engine_lookup(struct xe_file *xef, u32 id);
static inline struct xe_engine *xe_engine_get(struct xe_engine *engine)
diff --git a/drivers/gpu/drm/xe/xe_engine_types.h b/drivers/gpu/drm/xe/xe_engine_types.h
index 36bfaeec23f4..46b718343f79 100644
--- a/drivers/gpu/drm/xe/xe_engine_types.h
+++ b/drivers/gpu/drm/xe/xe_engine_types.h
@@ -143,6 +143,9 @@ struct xe_engine {
u32 acc_granularity;
} usm;
+ /** @pending_lr_jobs: Pending jobs for long running work loads */
+ struct xarray pending_lr_jobs;
+
/** @ops: submission backend engine operations */
const struct xe_engine_ops *ops;
diff --git a/drivers/gpu/drm/xe/xe_guc_submit.c b/drivers/gpu/drm/xe/xe_guc_submit.c
index e857013070b9..590e5481d063 100644
--- a/drivers/gpu/drm/xe/xe_guc_submit.c
+++ b/drivers/gpu/drm/xe/xe_guc_submit.c
@@ -377,6 +377,22 @@ static void set_min_preemption_timeout(struct xe_guc *guc, struct xe_engine *e)
__guc_engine_policy_action_size(&policy), 0, 0);
}
+static struct dma_fence *
+guc_engine_prepare_job(struct drm_sched_job *drm_job,
+ struct drm_sched_entity *s_entity)
+{
+ struct xe_sched_job *job = to_xe_sched_job(drm_job), *lr_job;
+ struct xe_engine *e = job->engine;
+ u32 seqno = (u32)(job->fence->seqno) -
+ (e->lrc[0].ring.size * 2) / MAX_JOB_SIZE_BYTES;
+
+ lr_job = xe_engine_get_lr_job(job->engine, seqno);
+ if (lr_job)
+ return lr_job->fence;
+ else
+ return NULL;
+}
+
#define PARALLEL_SCRATCH_SIZE 2048
#define WQ_SIZE (PARALLEL_SCRATCH_SIZE / 2)
#define WQ_OFFSET (PARALLEL_SCRATCH_SIZE - WQ_SIZE)
@@ -675,18 +691,26 @@ guc_engine_run_job(struct drm_sched_job *drm_job)
submit_engine(e);
}
- if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags))
+ if (xe_engine_is_lr(job->engine))
+ return NULL;
+ else if (test_and_set_bit(JOB_FLAG_SUBMIT, &job->fence->flags))
return job->fence;
else
return dma_fence_get(job->fence);
}
+static void __guc_engine_free_job(struct xe_sched_job *job)
+{
+ trace_xe_sched_job_free(job);
+ xe_sched_job_put(job);
+}
+
static void guc_engine_free_job(struct drm_sched_job *drm_job)
{
struct xe_sched_job *job = to_xe_sched_job(drm_job);
- trace_xe_sched_job_free(job);
- xe_sched_job_put(job);
+ if (xe_engine_is_lr(job->engine))
+ __guc_engine_free_job(job);
}
static int guc_read_stopped(struct xe_guc *guc)
@@ -1056,6 +1080,7 @@ static void guc_engine_process_msg(struct drm_sched_msg *msg)
}
static const struct drm_sched_backend_ops drm_sched_ops = {
+ .prepare_job = guc_engine_prepare_job,
.run_job = guc_engine_run_job,
.free_job = guc_engine_free_job,
.timedout_job = guc_engine_timedout_job,
diff --git a/drivers/gpu/drm/xe/xe_sched_job.c b/drivers/gpu/drm/xe/xe_sched_job.c
index 2985caa6097b..ece3a5db3978 100644
--- a/drivers/gpu/drm/xe/xe_sched_job.c
+++ b/drivers/gpu/drm/xe/xe_sched_job.c
@@ -154,6 +154,10 @@ struct xe_sched_job *xe_sched_job_create(struct xe_engine *e,
for (i = 0; i < width; ++i)
job->batch_addr[i] = batch_addr[i];
+ err = xe_engine_add_lr_job(e, job);
+ if (err)
+ goto err_fences;
+
/* All other jobs require a VM to be open which has a ref */
if (unlikely(e->flags & ENGINE_FLAG_KERNEL))
xe_device_mem_access_get(job_to_xe(job));
@@ -190,6 +194,7 @@ void xe_sched_job_destroy(struct kref *ref)
if (unlikely(job->engine->flags & ENGINE_FLAG_KERNEL))
xe_device_mem_access_put(job_to_xe(job));
+ xe_engine_del_lr_job(job->engine, job);
xe_engine_put(job->engine);
dma_fence_put(job->fence);
drm_sched_job_cleanup(&job->drm);
diff --git a/drivers/gpu/drm/xe/xe_sched_job_types.h b/drivers/gpu/drm/xe/xe_sched_job_types.h
index 5534bfacaa16..07406d894b26 100644
--- a/drivers/gpu/drm/xe/xe_sched_job_types.h
+++ b/drivers/gpu/drm/xe/xe_sched_job_types.h
@@ -22,6 +22,8 @@ struct xe_sched_job {
struct xe_engine *engine;
/** @refcount: ref count of this job */
struct kref refcount;
+ /** @lr_job_cb: long running job callback */
+ struct dma_fence_cb lr_job_cb;
/**
* @fence: dma fence to indicate completion. 1 way relationship - job
* can safely reference fence, fence cannot safely reference job.
--
2.34.1
More information about the Intel-xe
mailing list