[PATCH 22/22] drm/i915/gt: Limit C-states while waiting for requests

Chris Wilson chris at chris-wilson.co.uk
Fri Dec 20 13:55:51 UTC 2019


Allow the sysadmin to specify whether we should prevent the CPU from
entering higher C-states while waiting for the CPU, in order to reduce
the latency of request completions and so speed up client continuations.

Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Tvrtko Ursulin <tvrtko.ursulin at linux.intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
Cc: Eero Tamminen <eero.t.tamminen at intel.com>
Cc: Francisco Jerez <currojerez at riseup.net>
Cc: Mika Kuoppala <mika.kuoppala at linux.intel.com>
Cc: Dmitry Rogozhkin <dmitry.v.rogozhkin at intel.com>
---
 drivers/gpu/drm/i915/Kconfig.profile         | 14 ++++++
 drivers/gpu/drm/i915/gt/intel_breadcrumbs.c  | 50 ++++++++++++++++++++
 drivers/gpu/drm/i915/gt/intel_engine_cs.c    |  2 +
 drivers/gpu/drm/i915/gt/intel_engine_sysfs.c | 31 ++++++++++++
 drivers/gpu/drm/i915/gt/intel_engine_types.h |  9 ++++
 5 files changed, 106 insertions(+)

diff --git a/drivers/gpu/drm/i915/Kconfig.profile b/drivers/gpu/drm/i915/Kconfig.profile
index ba8767fc0d6e..a956f1bb9caf 100644
--- a/drivers/gpu/drm/i915/Kconfig.profile
+++ b/drivers/gpu/drm/i915/Kconfig.profile
@@ -12,6 +12,20 @@ config DRM_I915_USERFAULT_AUTOSUSPEND
 	  May be 0 to disable the extra delay and solely use the device level
 	  runtime pm autosuspend delay tunable.
 
+config DRM_I915_DMA_LATENCY
+	int "Target CPU-DMA latency while waiting on active requests (ns)"
+	default -1 # nanoseconds
+	help
+	  Specify a target latency for DMA wakeup, see /dev/cpu_dma_latency,
+	  used while the CPU is waiting for GPU results.
+
+	  This is adjustable via
+	  /sys/class/drm/card?/engine/*/dma_latency_ns
+
+	  May be -1 to prevent specifying a target wakeup and let the CPU
+	  enter powersaving while waiting. Conversely, 0 may be used to
+	  prevent the CPU from entering any C-states while waiting.
+
 config DRM_I915_HEARTBEAT_INTERVAL
 	int "Interval between heartbeat pulses (ms)"
 	default 2500 # milliseconds
diff --git a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
index 0ba524a414c6..48180080eab6 100644
--- a/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
+++ b/drivers/gpu/drm/i915/gt/intel_breadcrumbs.c
@@ -31,6 +31,44 @@
 #include "intel_gt_pm.h"
 #include "intel_gt_requests.h"
 
+static void __dma_qos_add(struct work_struct *work)
+{
+	struct intel_breadcrumbs_dma_qos *qos =
+		container_of(work, typeof(*qos), add);
+
+	if (!cancel_work_sync(&qos->del))
+		pm_qos_add_request(&qos->req,
+				   PM_QOS_CPU_DMA_LATENCY,
+				   qos->latency);
+}
+
+static void __dma_qos_del(struct work_struct *work)
+{
+	struct intel_breadcrumbs_dma_qos *qos =
+		container_of(work, typeof(*qos), del);
+
+	if (!cancel_work_sync(&qos->add))
+		pm_qos_remove_request(&qos->req);
+}
+
+static void dma_qos_add(struct intel_breadcrumbs *b, s32 latency)
+{
+	if (latency < 0)
+		return;
+
+	b->qos.latency = latency;
+	queue_work(system_highpri_wq, &b->qos.add);
+}
+
+static void dma_qos_del(struct intel_breadcrumbs *b)
+{
+	if (b->qos.latency < 0)
+		return;
+
+	queue_work(system_highpri_wq, &b->qos.del);
+	b->qos.latency = -1;
+}
+
 static void irq_enable(struct intel_engine_cs *engine)
 {
 	if (!engine->irq_enable)
@@ -64,6 +102,8 @@ static void __intel_breadcrumbs_disarm_irq(struct intel_breadcrumbs *b)
 	if (!--b->irq_enabled)
 		irq_disable(engine);
 
+	dma_qos_del(b);
+
 	b->irq_armed = false;
 	intel_gt_pm_put_async(engine->gt);
 }
@@ -243,6 +283,8 @@ static bool __intel_breadcrumbs_arm_irq(struct intel_breadcrumbs *b)
 	if (!b->irq_enabled++)
 		irq_enable(engine);
 
+	dma_qos_add(b, engine->props.dma_latency_ns);
+
 	return true;
 }
 
@@ -253,6 +295,10 @@ void intel_engine_init_breadcrumbs(struct intel_engine_cs *engine)
 	spin_lock_init(&b->irq_lock);
 	INIT_LIST_HEAD(&b->signalers);
 
+	b->qos.latency = -1;
+	INIT_WORK(&b->qos.add, __dma_qos_add);
+	INIT_WORK(&b->qos.del, __dma_qos_del);
+
 	init_irq_work(&b->irq_work, signal_irq_work);
 }
 
@@ -273,6 +319,10 @@ void intel_engine_reset_breadcrumbs(struct intel_engine_cs *engine)
 
 void intel_engine_fini_breadcrumbs(struct intel_engine_cs *engine)
 {
+	struct intel_breadcrumbs *b = &engine->breadcrumbs;
+
+	flush_work(&b->qos.del);
+	GEM_BUG_ON(b->qos.latency != -1);
 }
 
 bool i915_request_enable_breadcrumb(struct i915_request *rq)
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_cs.c b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
index 84097dcc67d0..85d8566a74b9 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_cs.c
@@ -310,6 +310,8 @@ static int intel_engine_setup(struct intel_gt *gt, enum intel_engine_id id)
 	engine->instance = info->instance;
 	__sprint_engine_name(engine);
 
+	engine->props.dma_latency_ns =
+		CONFIG_DRM_I915_DMA_LATENCY;
 	engine->props.heartbeat_interval_ms =
 		CONFIG_DRM_I915_HEARTBEAT_INTERVAL;
 	engine->props.max_busywait_duration_ns =
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
index 33b4c00b93f2..ba0b24c22c71 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
+++ b/drivers/gpu/drm/i915/gt/intel_engine_sysfs.c
@@ -272,6 +272,36 @@ stop_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
 static struct kobj_attribute stop_timeout_attr =
 __ATTR(stop_timeout_ms, 0644, stop_show, stop_store);
 
+static ssize_t
+dma_latency_store(struct kobject *kobj, struct kobj_attribute *attr,
+		  const char *buf, size_t count)
+{
+	struct intel_engine_cs *engine = kobj_to_engine(kobj);
+	long long latency;
+	int err;
+
+	err = kstrtoll(buf, 0, &latency);
+	if (err)
+		return err;
+
+	if (latency > S32_MAX)
+		return -EINVAL;
+
+	WRITE_ONCE(engine->props.dma_latency_ns, latency);
+	return count;
+}
+
+static ssize_t
+dma_latency_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf)
+{
+	struct intel_engine_cs *engine = kobj_to_engine(kobj);
+
+	return sprintf(buf, "%d\n", engine->props.dma_latency_ns);
+}
+
+static struct kobj_attribute dma_latency_attr =
+__ATTR(dma_latency_ns, 0644, dma_latency_show, dma_latency_store);
+
 static ssize_t
 preempt_timeout_store(struct kobject *kobj, struct kobj_attribute *attr,
 		      const char *buf, size_t count)
@@ -401,6 +431,7 @@ void intel_engines_add_sysfs(struct drm_i915_private *i915)
 		&all_caps_attr.attr,
 		&max_spin_attr.attr,
 		&stop_timeout_attr.attr,
+		&dma_latency_attr.attr,
 #if CONFIG_DRM_I915_HEARTBEAT_INTERVAL
 		&heartbeat_interval_attr.attr,
 #endif
diff --git a/drivers/gpu/drm/i915/gt/intel_engine_types.h b/drivers/gpu/drm/i915/gt/intel_engine_types.h
index de1bc6534cc2..02a62ee35cc7 100644
--- a/drivers/gpu/drm/i915/gt/intel_engine_types.h
+++ b/drivers/gpu/drm/i915/gt/intel_engine_types.h
@@ -13,6 +13,7 @@
 #include <linux/kref.h>
 #include <linux/list.h>
 #include <linux/llist.h>
+#include <linux/pm_qos.h>
 #include <linux/rbtree.h>
 #include <linux/timer.h>
 #include <linux/types.h>
@@ -352,6 +353,12 @@ struct intel_engine_cs {
 		unsigned int irq_enabled;
 
 		bool irq_armed;
+
+		struct intel_breadcrumbs_dma_qos {
+			struct pm_qos_request req;
+			struct work_struct add, del;
+			s32 latency;
+		} qos;
 	} breadcrumbs;
 
 	struct intel_engine_pmu {
@@ -540,6 +547,8 @@ struct intel_engine_cs {
 		unsigned long preempt_timeout_ms;
 		unsigned long stop_timeout_ms;
 		unsigned long timeslice_duration_ms;
+
+		s32 dma_latency_ns;
 	} props;
 };
 
-- 
2.24.1



More information about the Intel-gfx-trybot mailing list