[PATCH 22/22] drm/i915/gt: Limit C-states while waiting for requests
Chris Wilson
chris at chris-wilson.co.uk
Fri Dec 20 13:55:51 UTC 2019
Allow the sysadmin to specify whether we should prevent the CPU from
entering higher C-states while waiting for the CPU, in order to reduce
the latency of request completions and so speed up client continuations.
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin at linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
Cc: Eero Tamminen <eero.t.tamminen at intel.com>
Cc: Francisco Jerez <currojerez at riseup.net>
Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin at intel.com>
---
drivers/gpu/drm/i915/Kconfig.profile | 14 ++++++
drivers/gpu/drm/i915/gt/intel_breadcrumbs.c | 50 ++++++++++++++++++++
drivers/gpu/drm/i915/gt/intel_engine_cs.c | 2 +
drivers/gpu/drm/i915/gt/intel_engine_sysfs.c | 31 ++++++++++++
drivers/gpu/drm/i915/gt/intel_engine_types.h | 9 ++++
5 files changed, 106 insertions(+)
diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile
index ba8767fc0d6e..a956f1bb9caf 100644
--- a/drivers/gpu/drm/i915/Kconfig.profile
+++ b/drivers/gpu/drm/i915/Kconfig.profile
@@ -12,6 +12,20 @@ config DRM_I915_USERFAULT_AUTOSUSPEND
May be 0 to disable the extra delay and solely use the device level
runtime pm autosuspend delay tunable.
+config DRM_I915_DMA_LATENCY
+ int "Target CPU-DMA latency while waiting on active requests (ns)"
+ default -1 # nanoseconds
+ help
+ Specify a target latency for DMA wakeup, see /dev/cpu_dma_latency,
+ used while the CPU is waiting for GPU results.
+
+ This is adjustable via
+ /sys/class/drm/card?/engine/*/dma_latency_ns
+
+ May be -1 to prevent specifying a target wakeup and let the CPU
+ enter powersaving while waiting. Conversely, 0 may be used to
+ prevent the CPU from entering any C-states while waiting.
+
config DRM_I915_HEARTBEAT_INTERVAL
int "Interval between heartbeat pulses (ms)"
default 2500 # milliseconds
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index 0ba524a414c6..48180080eab6 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -31,6 +31,44 @@
#include "intel_gt_pm.h"
#include "intel_gt_requests.h"
+static void __dma_qos_add(struct work_struct *work)
+{
+ struct intel_breadcrumbs_dma_qos *qos =
+ container_of(work, typeof(*qos), add);
+
+ if (!cancel_work_sync(&qos->del))
+ pm_qos_add_request(&qos->req,
+ PM_QOS_CPU_DMA_LATENCY,
+ qos->latency);
+}
+
+static void __dma_qos_del(struct work_struct *work)
+{
+ struct intel_breadcrumbs_dma_qos *qos =
+ container_of(work, typeof(*qos), del);
+
+ if (!cancel_work_sync(&qos->add))
+ pm_qos_remove_request(&qos->req);
+}
+
+static void dma_qos_add(struct intel_breadcrumbs *b, s32 latency)
+{
+ if (latency < 0)
+ return;
+
+ b->qos.latency = latency;
+ queue_work(system_highpri_wq, &b->qos.add);
+}
+
+static void dma_qos_del(struct intel_breadcrumbs *b)
+{
+ if (b->qos.latency < 0)
+ return;
+
+ queue_work(system_highpri_wq, &b->qos.del);
+ b->qos.latency = -1;
+}
+
static void irq_enable(struct intel_engine_cs *engine)
{
if (!engine->irq_enable)
@@ -64,6 +102,8 @@ static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
if (!--b->irq_enabled)
irq_disable(engine);
+ dma_qos_del(b);
+
b->irq_armed = false;
intel_gt_pm_put_async(engine->gt);
}
@@ -243,6 +283,8 @@ static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
if (!b->irq_enabled++)
irq_enable(engine);
+ dma_qos_add(b, engine->props.dma_latency_ns);
+
return true;
}
@@ -253,6 +295,10 @@ void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
spin_lock_init(&b->irq_lock);
INIT_LIST_HEAD(&b->signalers);
+ b->qos.latency = -1;
+ INIT_WORK(&b->qos.add, __dma_qos_add);
+ INIT_WORK(&b->qos.del, __dma_qos_del);
+
init_irq_work(&b->irq_work, signal_irq_work);
}
@@ -273,6 +319,10 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
{
+ struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
+ flush_work(&b->qos.del);
+ GEM_BUG_ON(b->qos.latency != -1);
}
bool i915_request_enable_breadcrumb(struct i915_request *rq)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 84097dcc67d0..85d8566a74b9 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -310,6 +310,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
engine->instance = info->instance;
__sprint_engine_name(engine);
+ engine->props.dma_latency_ns =
+ CONFIG_DRM_I915_DMA_LATENCY;
engine->props.heartbeat_interval_ms =
CONFIG_DRM_I915_HEARTBEAT_INTERVAL;
engine->props.max_busywait_duration_ns =
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
index 33b4c00b93f2..ba0b24c22c71 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
@@ -272,6 +272,36 @@ stop_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
static struct kobj_attribute stop_timeout_attr =
__ATTR(stop_timeout_ms, 0644, stop_show, stop_store);
+static ssize_t
+dma_latency_store(struct kobject *kobj, struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+ long long latency;
+ int err;
+
+ err = kstrtoll(buf, 0, &latency);
+ if (err)
+ return err;
+
+ if (latency > S32_MAX)
+ return -EINVAL;
+
+ WRITE_ONCE(engine->props.dma_latency_ns, latency);
+ return count;
+}
+
+static ssize_t
+dma_latency_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+ struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+ return sprintf(buf, "%d\n", engine->props.dma_latency_ns);
+}
+
+static struct kobj_attribute dma_latency_attr =
+__ATTR(dma_latency_ns, 0644, dma_latency_show, dma_latency_store);
+
static ssize_t
preempt_timeout_store(struct kobject *kobj, struct kobj_attribute *attr,
const char *buf, size_t count)
@@ -401,6 +431,7 @@ void intel_engines_add_sysfs(struct drm_i915_private *i915)
&all_caps_attr.attr,
&max_spin_attr.attr,
&stop_timeout_attr.attr,
+ &dma_latency_attr.attr,
#if CONFIG_DRM_I915_HEARTBEAT_INTERVAL
&heartbeat_interval_attr.attr,
#endif
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index de1bc6534cc2..02a62ee35cc7 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -13,6 +13,7 @@
#include <linux/kref.h>
#include <linux/list.h>
#include <linux/llist.h>
+#include <linux/pm_qos.h>
#include <linux/rbtree.h>
#include <linux/timer.h>
#include <linux/types.h>
@@ -352,6 +353,12 @@ struct intel_engine_cs {
unsigned int irq_enabled;
bool irq_armed;
+
+ struct intel_breadcrumbs_dma_qos {
+ struct pm_qos_request req;
+ struct work_struct add, del;
+ s32 latency;
+ } qos;
} breadcrumbs;
struct intel_engine_pmu {
@@ -540,6 +547,8 @@ struct intel_engine_cs {
unsigned long preempt_timeout_ms;
unsigned long stop_timeout_ms;
unsigned long timeslice_duration_ms;
+
+ s32 dma_latency_ns;
} props;
};
--
2.24.1
More information about the Intel-gfx-trybot
mailing list