[Intel-gfx] [RFC 8/8] drm/i915: Force preemption to complete via engine reset
jeff.mcgee at intel.com
jeff.mcgee at intel.com
Fri Mar 16 18:31:05 UTC 2018
From: Jeff McGee <jeff.mcgee at intel.com>
The hardware can complete the requested preemption at only certain points
in execution. Thus an uncooperative request that avoids those points can
block a preemption indefinitely. Our only option to bound the preemption
latency is to trigger reset and recovery just as we would if a request had
hung the hardware. This is so-called forced preemption. This change adds
that capability as an option for systems with extremely strict scheduling
latency requirements for its high priority requests. This option must be
used with great care. The force-preempted requests will be discarded at
the point of reset, resulting in various degrees of disruption to the
owning application up to and including crash.
The option is enabled by specifying a non-zero value for new i915 module
parameter fpreempt_timeout. This value becomes the time in milliseconds
after initiation of preemption at which the reset is triggered if the
preemption has not completed normally.
Test: Run IGT gem_exec_fpreempt.
Change-Id: Iafd3609012621c57fa9e490dfeeac46ae541b5c2
Signed-off-by: Jeff McGee <jeff.mcgee at intel.com>
---
drivers/gpu/drm/i915/i915_drv.c | 14 ++++++++-
drivers/gpu/drm/i915/i915_drv.h | 2 ++
drivers/gpu/drm/i915/i915_gem.c | 37 +++++++++++++++++++++--
drivers/gpu/drm/i915/i915_params.c | 3 ++
drivers/gpu/drm/i915/i915_params.h | 1 +
drivers/gpu/drm/i915/intel_engine_cs.c | 53 +++++++++++++++++++++++++++++++++
drivers/gpu/drm/i915/intel_lrc.c | 24 ++++++++++++++-
drivers/gpu/drm/i915/intel_ringbuffer.h | 6 ++++
8 files changed, 136 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_drv.c b/drivers/gpu/drm/i915/i915_drv.c
index b627370d5a9c..3f2394d61ea2 100644
--- a/drivers/gpu/drm/i915/i915_drv.c
+++ b/drivers/gpu/drm/i915/i915_drv.c
@@ -811,8 +811,16 @@ static int i915_workqueues_init(struct drm_i915_private *dev_priv)
if (dev_priv->hotplug.dp_wq == NULL)
goto out_free_wq;
+ if (INTEL_INFO(dev_priv)->has_logical_ring_preemption) {
+ dev_priv->fpreempt_wq = alloc_ordered_workqueue("i915-fpreempt",
+ WQ_HIGHPRI);
+ if (dev_priv->fpreempt_wq == NULL)
+ goto out_free_dp_wq;
+ }
return 0;
+out_free_dp_wq:
+ destroy_workqueue(dev_priv->hotplug.dp_wq);
out_free_wq:
destroy_workqueue(dev_priv->wq);
out_err:
@@ -832,6 +840,8 @@ static void i915_engines_cleanup(struct drm_i915_private *i915)
static void i915_workqueues_cleanup(struct drm_i915_private *dev_priv)
{
+ if (INTEL_INFO(dev_priv)->has_logical_ring_preemption)
+ destroy_workqueue(dev_priv->fpreempt_wq);
destroy_workqueue(dev_priv->hotplug.dp_wq);
destroy_workqueue(dev_priv->wq);
}
@@ -2007,7 +2017,9 @@ int i915_reset_engine(struct intel_engine_cs *engine, unsigned int flags)
if (!(flags & I915_RESET_QUIET)) {
dev_notice(engine->i915->drm.dev,
- "Resetting %s after gpu hang\n", engine->name);
+ "Resetting %s %s\n", engine->name,
+ engine->fpreempt_active ?
+ "for force preemption" : "after gpu hang");
}
error->reset_engine_count[engine->id]++;
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index ade09f97be5c..514e640d8406 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -2290,6 +2290,8 @@ struct drm_i915_private {
*/
struct workqueue_struct *wq;
+ struct workqueue_struct *fpreempt_wq;
+
/* Display functions */
struct drm_i915_display_funcs display;
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9780d9026ce6..d556743c578a 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -2811,9 +2811,21 @@ i915_gem_find_active_request(struct intel_engine_cs *engine)
return request;
}
-static bool engine_stalled(struct intel_engine_cs *engine)
+static bool engine_stalled(struct intel_engine_cs *engine,
+ struct drm_i915_gem_request *request)
{
if (!intel_vgpu_active(engine->i915)) {
+ if (engine->fpreempt_active) {
+ /* Pardon the request if it managed to complete or
+ * preempt prior to the reset.
+ */
+ if (i915_gem_request_completed(request) ||
+ intel_engine_preempt_finished(engine))
+ return false;
+
+ return true;
+ }
+
if (!engine->hangcheck.stalled)
return false;
@@ -2858,6 +2870,13 @@ i915_gem_reset_prepare_engine(struct intel_engine_cs *engine)
tasklet_kill(&engine->execlists.irq_tasklet);
tasklet_disable(&engine->execlists.irq_tasklet);
+ /* There may be a force preemption timer active on this engine but
+ * not yet expired, i.e. not the reason we are about to reset this
+ * engine. Cancel it. If force preemption timeout is the reason we
+ * are resetting the engine, this call will have no efffect.
+ */
+ intel_engine_cancel_fpreempt(engine);
+
if (engine->irq_seqno_barrier)
engine->irq_seqno_barrier(engine);
@@ -2958,7 +2977,7 @@ i915_gem_reset_request(struct intel_engine_cs *engine,
* subsequent hangs.
*/
- if (engine_stalled(engine)) {
+ if (engine_stalled(engine, request)) {
i915_gem_context_mark_guilty(request->ctx);
skip_request(request);
@@ -2966,6 +2985,13 @@ i915_gem_reset_request(struct intel_engine_cs *engine,
if (i915_gem_context_is_banned(request->ctx))
engine_skip_context(request);
} else {
+ /* If the request that we just pardoned was the target of a
+ * force preemption there is no possibility of the next
+ * request in line having started.
+ */
+ if (engine->fpreempt_active)
+ return NULL;
+
/*
* Since this is not the hung engine, it may have advanced
* since the hang declaration. Double check by refinding
@@ -3035,6 +3061,13 @@ void i915_gem_reset(struct drm_i915_private *dev_priv)
void i915_gem_reset_finish_engine(struct intel_engine_cs *engine)
{
+ /* Mark any active force preemption as complete then kick
+ * the tasklet.
+ */
+ engine->fpreempt_active = false;
+ if (engine->execlists.first)
+ tasklet_schedule(&engine->execlists.irq_tasklet);
+
tasklet_enable(&engine->execlists.irq_tasklet);
kthread_unpark(engine->breadcrumbs.signaler);
}
diff --git a/drivers/gpu/drm/i915/i915_params.c b/drivers/gpu/drm/i915/i915_params.c
index 63751a1de74a..9a0deb6e3920 100644
--- a/drivers/gpu/drm/i915/i915_params.c
+++ b/drivers/gpu/drm/i915/i915_params.c
@@ -247,3 +247,6 @@ MODULE_PARM_DESC(enable_conformance_check, "To toggle the GVT guest conformance
module_param_named(disable_gvt_fw_loading, i915_modparams.disable_gvt_fw_loading, bool, 0400);
MODULE_PARM_DESC(disable_gvt_fw_loading, "Disable GVT-g fw loading.");
+
+i915_param_named(fpreempt_timeout, uint, 0600,
+ "Wait time in msecs before forcing a preemption with reset (0:never force [default])");
diff --git a/drivers/gpu/drm/i915/i915_params.h b/drivers/gpu/drm/i915/i915_params.h
index e8c2ba4cb1e6..65fbffa6333c 100644
--- a/drivers/gpu/drm/i915/i915_params.h
+++ b/drivers/gpu/drm/i915/i915_params.h
@@ -54,6 +54,7 @@
func(int, edp_vswing); \
func(int, reset); \
func(unsigned int, inject_load_failure); \
+ func(unsigned int, fpreempt_timeout); \
/* leave bools at the end to not create holes */ \
func(bool, alpha_support); \
func(bool, enable_cmd_parser); \
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index c9bf2347f7a4..af6cc2d0f7e9 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -411,6 +411,58 @@ static void intel_engine_init_execlist(struct intel_engine_cs *engine)
execlists->first = NULL;
}
+void intel_engine_queue_fpreempt(struct intel_engine_cs *engine)
+{
+ unsigned int timeout = i915_modparams.fpreempt_timeout;
+
+ if (!timeout)
+ return;
+
+ GEM_BUG_ON(engine->fpreempt_active);
+ hrtimer_start(&engine->fpreempt_timer,
+ ms_to_ktime(timeout), HRTIMER_MODE_REL);
+}
+
+bool intel_engine_cancel_fpreempt(struct intel_engine_cs *engine)
+{
+ hrtimer_cancel(&engine->fpreempt_timer);
+
+ return !engine->fpreempt_active;
+}
+
+static enum hrtimer_restart
+intel_engine_fpreempt_timer(struct hrtimer *hrtimer)
+{
+ struct intel_engine_cs *engine =
+ container_of(hrtimer, struct intel_engine_cs,
+ fpreempt_timer);
+
+ engine->fpreempt_active = true;
+ queue_work(engine->i915->fpreempt_wq, &engine->fpreempt_work);
+
+ return HRTIMER_NORESTART;
+}
+
+static void intel_engine_fpreempt_work(struct work_struct *work)
+{
+ struct intel_engine_cs *engine =
+ container_of(work, struct intel_engine_cs,
+ fpreempt_work);
+
+ i915_handle_reset(engine->i915, intel_engine_flag(engine));
+}
+
+static void intel_engine_init_fpreempt(struct intel_engine_cs *engine)
+{
+ if (!INTEL_INFO(engine->i915)->has_logical_ring_preemption)
+ return;
+
+ hrtimer_init(&engine->fpreempt_timer,
+ CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+ engine->fpreempt_timer.function = intel_engine_fpreempt_timer;
+ INIT_WORK(&engine->fpreempt_work, intel_engine_fpreempt_work);
+}
+
/**
* intel_engines_setup_common - setup engine state not requiring hw access
* @engine: Engine to setup.
@@ -426,6 +478,7 @@ void intel_engine_setup_common(struct intel_engine_cs *engine)
intel_engine_init_timeline(engine);
intel_engine_init_hangcheck(engine);
+ intel_engine_init_fpreempt(engine);
i915_gem_batch_pool_init(engine, &engine->batch_pool);
intel_engine_init_cmd_parser(engine);
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 581483886153..17487f8e8b4c 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -628,6 +628,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
inject_preempt_context(engine);
execlists_set_active(execlists,
EXECLISTS_ACTIVE_PREEMPT);
+ intel_engine_queue_fpreempt(engine);
goto unlock;
} else {
/*
@@ -846,6 +847,7 @@ static void intel_lrc_irq_handler(unsigned long data)
const u32 *buf =
&engine->status_page.page_addr[I915_HWS_CSB_BUF0_INDEX];
unsigned int head, tail;
+ bool defer = false;
/* However GVT emulation depends upon intercepting CSB mmio */
if (unlikely(execlists->csb_use_mmio)) {
@@ -919,6 +921,21 @@ static void intel_lrc_irq_handler(unsigned long data)
if (status & GEN8_CTX_STATUS_ACTIVE_IDLE &&
buf[2*head + 1] == PREEMPT_ID) {
+ /* Try to cancel any pending force preemption.
+ * If we are too late, hold off on processing
+ * the completed preemption until reset has
+ * run its course. It should recognize that
+ * the engine has preempted to idle then abort
+ * the reset. Then we can resume processing
+ * at this CSB head.
+ */
+ if (!intel_engine_cancel_fpreempt(engine)) {
+ if (!head--)
+ head = GEN8_CSB_ENTRIES - 1;
+ defer = true;
+ break;
+ }
+
execlist_cancel_port_requests(execlists);
spin_lock_irq(&engine->timeline->lock);
@@ -973,11 +990,16 @@ static void intel_lrc_irq_handler(unsigned long data)
writel(_MASKED_FIELD(GEN8_CSB_READ_PTR_MASK, head << 8),
dev_priv->regs + i915_mmio_reg_offset(RING_CONTEXT_STATUS_PTR(engine)));
}
+
+ if (defer) {
+ __set_bit(ENGINE_IRQ_EXECLIST, &engine->irq_posted);
+ goto out;
+ }
}
if (!execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT))
execlists_dequeue(engine);
-
+out:
intel_uncore_forcewake_put(dev_priv, execlists->fw_domains);
}
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 25eb23bc06eb..e259f954cdea 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -506,6 +506,10 @@ struct intel_engine_cs {
struct intel_engine_hangcheck hangcheck;
+ struct hrtimer fpreempt_timer;
+ struct work_struct fpreempt_work;
+ bool fpreempt_active;
+
bool needs_cmd_parser;
/*
@@ -932,4 +936,6 @@ void intel_engines_reset_default_submission(struct drm_i915_private *i915);
bool intel_engine_can_store_dword(struct intel_engine_cs *engine);
+void intel_engine_queue_fpreempt(struct intel_engine_cs *engine);
+bool intel_engine_cancel_fpreempt(struct intel_engine_cs *engine);
#endif /* _INTEL_RINGBUFFER_H_ */
--
2.16.2
More information about the Intel-gfx
mailing list