[PATCH] ban timeout
Tvrtko Ursulin
tvrtko.ursulin at linux.intel.com
Thu Apr 22 13:53:48 UTC 2021
From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
drivers/gpu/drm/i915/Kconfig.profile | 13 +++++
drivers/gpu/drm/i915/gt/intel_engine_cs.c | 3 ++
drivers/gpu/drm/i915/gt/intel_engine_types.h | 1 +
.../drm/i915/gt/intel_execlists_submission.c | 2 +-
drivers/gpu/drm/i915/gt/sysfs_engines.c | 54 +++++++++++++++++++
5 files changed, 72 insertions(+), 1 deletion(-)
diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile
index 39328567c200..0be56c7084ff 100644
--- a/drivers/gpu/drm/i915/Kconfig.profile
+++ b/drivers/gpu/drm/i915/Kconfig.profile
@@ -119,3 +119,16 @@ config DRM_I915_TIMESLICE_DURATION
/sys/class/drm/card?/engine/*/timeslice_duration_ms
May be 0 to disable timeslicing.
+
+config DRM_I915_BANNED_CONTEXT_TIMEOUT
+ int "Banned context timeout (ms)"
+ default 1 # milliseconds
+ help
+ How long to wait (in milliseconds) for a banned context to cleanly
+ terminate their workloads. If the context does not yield inside the
+ configured time it will be forcibly reset.
+
+ This is adjustable via
+ /sys/class/drm/card?/engine/*/banned_context_timeout_ms
+
+ If configured to zero a 1ms minimum will still apply.
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 6dbdbde00f14..80b973367db8 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -306,6 +306,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
CONFIG_DRM_I915_STOP_TIMEOUT;
engine->props.timeslice_duration_ms =
CONFIG_DRM_I915_TIMESLICE_DURATION;
+ engine->props.banned_context_timeout_ms =
+ min(1, CONFIG_DRM_I915_BANNED_CONTEXT_TIMEOUT);
/* Override to uninterruptible for OpenCL workloads. */
if (INTEL_GEN(i915) == 12 && engine->class == RENDER_CLASS)
@@ -1623,6 +1625,7 @@ static void print_properties(struct intel_engine_cs *engine,
P(preempt_timeout_ms),
P(stop_timeout_ms),
P(timeslice_duration_ms),
+ P(banned_context_timeout_ms),
{},
#undef P
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index 883bafc44902..ec4f16b0308a 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -549,6 +549,7 @@ struct intel_engine_cs {
unsigned long preempt_timeout_ms;
unsigned long stop_timeout_ms;
unsigned long timeslice_duration_ms;
+ unsigned long banned_context_timeout_ms;
} props, defaults;
I915_SELFTEST_DECLARE(struct fault_attr reset_timeout);
diff --git a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
index de124870af44..e2c4e4230979 100644
--- a/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
+++ b/drivers/gpu/drm/i915/gt/intel_execlists_submission.c
@@ -1209,7 +1209,7 @@ static unsigned long active_preempt_timeout(struct intel_engine_cs *engine,
/* Force a fast reset for terminated contexts (ignoring sysfs!) */
if (unlikely(intel_context_is_banned(rq->context) || bad_request(rq)))
- return 1;
+ return engine->props.banned_context_timeout_ms;
return READ_ONCE(engine->props.preempt_timeout_ms);
}
diff --git a/drivers/gpu/drm/i915/gt/sysfs_engines.c b/drivers/gpu/drm/i915/gt/sysfs_engines.c
index 967031056202..cce9ab8a3309 100644
--- a/drivers/gpu/drm/i915/gt/sysfs_engines.c
+++ b/drivers/gpu/drm/i915/gt/sysfs_engines.c
@@ -411,6 +411,52 @@ heartbeat_default(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
static struct kobj_attribute heartbeat_interval_def =
__ATTR(heartbeat_interval_ms, 0444, heartbeat_default, NULL);
+static ssize_t
+banned_context_timeout_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+ unsigned long long timeout;
+ int err;
+
+ err = kstrtoull(buf, 0, &timeout);
+ if (err)
+ return err;
+
+ if (timeout < 1 || timeout > jiffies_to_msecs(MAX_SCHEDULE_TIMEOUT))
+ return -EINVAL;
+
+ WRITE_ONCE(engine->props.banned_context_timeout_ms, timeout);
+
+ return count;
+}
+
+static ssize_t
+banned_context_timeout_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+ return sprintf(buf, "%lu\n", engine->props.banned_context_timeout_ms);
+}
+
+static struct kobj_attribute banned_context_timeout_attr =
+__ATTR(banned_context_timeout_ms, 0644, banned_context_timeout_show,
+ banned_context_timeout_store);
+
+static ssize_t
+banned_context_timeout_default(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+ return sprintf(buf, "%lu\n",
+ engine->defaults.banned_context_timeout_ms);
+}
+
+static struct kobj_attribute banned_context_timeout_def =
+__ATTR(banned_context_timeout_ms, 0444, banned_context_timeout_default, NULL);
+
static void kobj_engine_release(struct kobject *kobj)
{
kfree(kobj);
@@ -476,6 +522,10 @@ static void add_defaults(struct kobj_engine *parent)
if (intel_engine_has_preempt_reset(ke->engine) &&
sysfs_create_file(&ke->base, &preempt_timeout_def.attr))
return;
+
+ if (intel_engine_has_preempt_reset(ke->engine) &&
+ sysfs_create_file(&ke->base, &banned_context_timeout_def.attr))
+ return;
}
void intel_engines_add_sysfs(struct drm_i915_private *i915)
@@ -521,6 +571,10 @@ void intel_engines_add_sysfs(struct drm_i915_private *i915)
sysfs_create_file(kobj, &preempt_timeout_attr.attr))
goto err_engine;
+ if (intel_engine_has_preempt_reset(engine) &&
+ sysfs_create_file(kobj, &banned_context_timeout_attr.attr))
+ goto err_engine;
+
add_defaults(container_of(kobj, struct kobj_engine, base));
if (0) {
--
2.27.0
More information about the Intel-gfx-trybot
mailing list