[PATCH] ban timeout

Tvrtko Ursulin tvrtko.ursulin at linux.intel.com
Thu Apr 22 13:53:48 UTC 2021


From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>

Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
 drivers/gpu/drm/i915/Kconfig.profile          | 13 +++++
 drivers/gpu/drm/i915/gt/intel_engine_cs.c     |  3 ++
 drivers/gpu/drm/i915/gt/intel_engine_types.h  |  1 +
 .../drm/i915/gt/intel_execlists_submission.c  |  2 +-
 drivers/gpu/drm/i915/gt/sysfs_engines.c       | 54 +++++++++++++++++++
 5 files changed, 72 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile
index 39328567c200..0be56c7084ff 100644
--- a/drivers/gpu/drm/i915/Kconfig.profile
+++ b/drivers/gpu/drm/i915/Kconfig.profile
@@ -119,3 +119,16 @@ config DRM_I915_TIMESLICE_DURATION
 	  /sys/class/drm/card?/engine/*/timeslice_duration_ms
 
 	  May be 0 to disable timeslicing.
+
+config DRM_I915_BANNED_CONTEXT_TIMEOUT
+	int "Banned context timeout (ms)"
+	default 1 # milliseconds
+	help
+	  How long to wait (in milliseconds) for a banned context to cleanly
+	  terminate their workloads. If the context does not yield inside the
+	  configured time it will be forcibly reset.
+
+	  This is adjustable via
+	  /sys/class/drm/card?/engine/*/banned_context_timeout_ms
+
+	  If configured to zero a 1ms minimum will still apply.
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 6dbdbde00f14..80b973367db8 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -306,6 +306,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
 		CONFIG_DRM_I915_STOP_TIMEOUT;
 	engine->props.timeslice_duration_ms =
 		CONFIG_DRM_I915_TIMESLICE_DURATION;
+	engine->props.banned_context_timeout_ms =
+		min(1, CONFIG_DRM_I915_BANNED_CONTEXT_TIMEOUT);
 
 	/* Override to uninterruptible for OpenCL workloads. */
 	if (INTEL_GEN(i915) == 12 && engine->class == RENDER_CLASS)
@@ -1623,6 +1625,7 @@ static void print_properties(struct intel_engine_cs *engine,
 		P(preempt_timeout_ms),
 		P(stop_timeout_ms),
 		P(timeslice_duration_ms),
+		P(banned_context_timeout_ms),
 
 		{},
 #undef P
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 883bafc44902..ec4f16b0308a 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -549,6 +549,7 @@ struct intel_engine_cs {
 		unsigned long preempt_timeout_ms;
 		unsigned long stop_timeout_ms;
 		unsigned long timeslice_duration_ms;
+		unsigned long banned_context_timeout_ms;
 	} props, defaults;
 
 	I915_SELFTEST_DECLARE(struct fault_attr reset_timeout);
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index de124870af44..e2c4e4230979 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -1209,7 +1209,7 @@ static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
 
 	/* Force a fast reset for terminated contexts (ignoring sysfs!) */
 	if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq)))
-		return 1;
+		return engine->props.banned_context_timeout_ms;
 
 	return READ_ONCE(engine->props.preempt_timeout_ms);
 }
diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.c b/drivers/gpu/drm/i915/gt/sysfs_engines.c
index 967031056202..cce9ab8a3309 100644
--- a/drivers/gpu/drm/i915/gt/sysfs_engines.c
+++ b/drivers/gpu/drm/i915/gt/sysfs_engines.c
@@ -411,6 +411,52 @@ heartbeat_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
 static struct kobj_attribute heartbeat_interval_def =
 __ATTR(heartbeat_interval_ms, 0444, heartbeat_default, NULL);
 
+static ssize_t
+banned_context_timeout_store(struct kobject *kobj, struct kobj_attribute *attr,
+			     const char *buf, size_t count)
+{
+	struct intel_engine_cs *engine = kobj_to_engine(kobj);
+	unsigned long long timeout;
+	int err;
+
+	err = kstrtoull(buf, 0, &timeout);
+	if (err)
+		return err;
+
+	if (timeout < 1 || timeout > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
+		return -EINVAL;
+
+	WRITE_ONCE(engine->props.banned_context_timeout_ms, timeout);
+
+	return count;
+}
+
+static ssize_t
+banned_context_timeout_show(struct kobject *kobj, struct kobj_attribute *attr,
+		     char *buf)
+{
+	struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+	return sprintf(buf, "%lu\n", engine->props.banned_context_timeout_ms);
+}
+
+static struct kobj_attribute banned_context_timeout_attr =
+__ATTR(banned_context_timeout_ms, 0644, banned_context_timeout_show,
+       banned_context_timeout_store);
+
+static ssize_t
+banned_context_timeout_default(struct kobject *kobj,
+			       struct kobj_attribute *attr, char *buf)
+{
+	struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+	return sprintf(buf, "%lu\n",
+		       engine->defaults.banned_context_timeout_ms);
+}
+
+static struct kobj_attribute banned_context_timeout_def =
+__ATTR(banned_context_timeout_ms, 0444, banned_context_timeout_default, NULL);
+
 static void kobj_engine_release(struct kobject *kobj)
 {
 	kfree(kobj);
@@ -476,6 +522,10 @@ static void add_defaults(struct kobj_engine *parent)
 	if (intel_engine_has_preempt_reset(ke->engine) &&
 	    sysfs_create_file(&ke->base, &preempt_timeout_def.attr))
 		return;
+
+	if (intel_engine_has_preempt_reset(ke->engine) &&
+	    sysfs_create_file(&ke->base, &banned_context_timeout_def.attr))
+		return;
 }
 
 void intel_engines_add_sysfs(struct drm_i915_private *i915)
@@ -521,6 +571,10 @@ void intel_engines_add_sysfs(struct drm_i915_private *i915)
 		    sysfs_create_file(kobj, &preempt_timeout_attr.attr))
 			goto err_engine;
 
+		if (intel_engine_has_preempt_reset(engine) &&
+		    sysfs_create_file(kobj, &banned_context_timeout_attr.attr))
+			goto err_engine;
+
 		add_defaults(container_of(kobj, struct kobj_engine, base));
 
 		if (0) {
-- 
2.27.0



More information about the Intel-gfx-trybot mailing list