[PATCH 80/80] drm/i915: Support per-context user requests for GPU frequency control

Chris Wilson chris at chris-wilson.co.uk
Thu Aug 2 10:29:25 UTC 2018


Often, we find ourselves facing a workload where the user knows in
advance what GPU frequency they require for it to complete in a timely
manner, and using past experience they can outperform the HW assisted
RPS autotuning. An example might be kodi (HTPC) where they know that
video decoding and compositing require a minimum frequency to avoid ever
dropping a frame, or conversely know when they are in a powersaving mode
and would rather have slower updates than ramp up the GPU frequency and
power consumption. Other workloads may defeat the autotuning entirely
and need manual control to meet their performance goals, e.g. bursty
applications which require low latency.

To accommodate the varying needs of different applications, that may be
running concurrently, we want a more flexible system than a global limit
supplied by sysfs. To this end, we offer the application the option to
set their desired frequency bounds on the context itself, and apply those
bounds when we execute commands from the application, switching between
bounds just as easily as we switch between the clients themselves.

The clients can query the range supported by the HW, or at least the
range they are restricted to, and then freely select frequencies within
that range that they want to run at. (They can select just a single
frequency if they so choose.) As this is subject to the global limit
supplied by the user in sysfs, and a client can only reduce the range of
frequencies they allow the HW to run at, we allow all clients to adjust
their request (and not restrict raising the minimum to privileged
CAP_SYS_NICE clients).

v2: Support range promotion on dependencies for pi.

Testcase: igt/gem_ctx_freq
Signed-off-by: Chris Wilson <chris at chris-wilson.co.uk>
Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
Cc: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Cc: Praveen Paneri <praveen.paneri at intel.com>
Cc: Sagar A Kamble <sagar.a.kamble at intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c           |  16 ++-
 drivers/gpu/drm/i915/i915_gem_context.c       |  54 ++++++++
 drivers/gpu/drm/i915/i915_scheduler.h         |   3 +
 drivers/gpu/drm/i915/intel_display.c          |   3 +
 drivers/gpu/drm/i915/intel_engine_cs.c        |   5 +-
 drivers/gpu/drm/i915/intel_gt_pm.c            | 119 ++++++++++++++--
 drivers/gpu/drm/i915/intel_gt_pm.h            |  12 ++
 drivers/gpu/drm/i915/intel_lrc.c              |  33 ++++-
 .../drm/i915/selftests/i915_mock_selftests.h  |   1 +
 drivers/gpu/drm/i915/selftests/intel_gt_pm.c  | 130 ++++++++++++++++++
 drivers/gpu/drm/i915/selftests/mock_uncore.c  |   1 +
 include/uapi/drm/i915_drm.h                   |  21 +++
 12 files changed, 383 insertions(+), 15 deletions(-)
 create mode 100644 drivers/gpu/drm/i915/selftests/intel_gt_pm.c

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index ab23790b6dfd..f0d79fd40676 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2202,6 +2202,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 	struct drm_device *dev = &dev_priv->drm;
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
 	struct drm_file *file;
+	int n;
 
 	seq_printf(m, "GPU busy? %s [%d requests]\n",
 		   yesno(dev_priv->gt.awake), dev_priv->gt.active_requests);
@@ -2210,17 +2211,30 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 	seq_printf(m, "Boosts outstanding? %d\n",
 		   atomic_read(&rps->num_waiters));
 	seq_printf(m, "Interactive? %d\n", READ_ONCE(rps->power.interactive));
+	seq_printf(m, "Worker pending? %s\n", yesno(work_busy(&rps->work)));
 	seq_printf(m, "Frequency requested %d [%d, %d]\n",
 		   intel_gpu_freq(dev_priv, rps->freq),
 		   intel_gpu_freq(dev_priv, rps->min),
 		   intel_gpu_freq(dev_priv, rps->max));
-	seq_printf(m, "  min hard:%d, soft:%d user:%d; max user:%d, soft: %d hard:%d\n",
+	seq_printf(m, "  min hard:%d, soft:%d, ctx:%d, user:%d; max user:%d, ctx:%d, soft:%d, hard:%d\n",
 		   intel_gpu_freq(dev_priv, rps->min_freq_hw),
 		   intel_gpu_freq(dev_priv, rps->min_freq_soft),
+		   intel_gpu_freq(dev_priv, rps->min_freq_context),
 		   intel_gpu_freq(dev_priv, rps->min_freq_user),
 		   intel_gpu_freq(dev_priv, rps->max_freq_user),
+		   intel_gpu_freq(dev_priv, rps->max_freq_context),
 		   intel_gpu_freq(dev_priv, rps->max_freq_soft),
 		   intel_gpu_freq(dev_priv, rps->max_freq_hw));
+	seq_printf(m, "  engines min: [");
+	for (n = 0; n < ARRAY_SIZE(rps->min_freq_engine); n++)
+		seq_printf(m, "%s%d", n ? ", " : "",
+			   intel_gpu_freq(dev_priv, rps->min_freq_engine[n]));
+	seq_printf(m, "]\n  engines max: [");
+	for (n = 0; n < ARRAY_SIZE(rps->max_freq_engine); n++)
+		seq_printf(m, "%s%d", n ? ", " : "",
+			   intel_gpu_freq(dev_priv, rps->max_freq_engine[n]));
+	seq_printf(m, "]\n");
+
 	seq_printf(m, "  idle:%d, efficient:%d, boost:%d\n",
 		   intel_gpu_freq(dev_priv, rps->idle_freq),
 		   intel_gpu_freq(dev_priv, rps->efficient_freq),
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 8dfa544d640b..04efab0b79ce 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -88,9 +88,11 @@
 #include <linux/log2.h>
 #include <drm/drmP.h>
 #include <drm/i915_drm.h>
+
 #include "i915_drv.h"
 #include "i915_trace.h"
 #include "i915_user_extensions.h"
+#include "intel_gt_pm.h"
 #include "intel_lrc.h"
 #include "intel_workarounds.h"
 
@@ -340,6 +342,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 	list_add_tail(&ctx->link, &dev_priv->contexts.list);
 	ctx->i915 = dev_priv;
 	ctx->sched.priority = I915_USER_PRIORITY(I915_PRIORITY_NORMAL);
+	ctx->sched.min_freq = dev_priv->gt_pm.rps.min_freq_hw;
+	ctx->sched.max_freq = dev_priv->gt_pm.rps.max_freq_hw;
 
 	for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) {
 		struct intel_context *ce = &ctx->__engine[n];
@@ -898,6 +902,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	struct drm_i915_gem_context_param *args = data;
+	struct drm_i915_private *i915 = to_i915(dev);
 	struct i915_gem_context *ctx;
 	int ret = 0;
 
@@ -939,6 +944,19 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 			args->value = ctx->preempt_timeout;
 		break;
 
+	case I915_CONTEXT_PARAM_FREQUENCY:
+		if (!(to_i915(dev)->caps.scheduler & I915_SCHEDULER_CAP_FREQUENCY)) {
+			ret = -ENODEV;
+		} else if (args->size) {
+			ret = -EINVAL;
+		} else {
+			u32 min = intel_gpu_freq(i915, ctx->sched.min_freq);
+			u32 max = intel_gpu_freq(i915, ctx->sched.max_freq);
+
+			args->value = I915_CONTEXT_SET_FREQUENCY(min, max);
+		}
+		break;
+
 	default:
 		ret = -EINVAL;
 		break;
@@ -1152,6 +1170,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	struct drm_i915_gem_context_param *args = data;
+	struct drm_i915_private *i915 = to_i915(dev);
 	struct i915_gem_context *ctx;
 	int ret;
 
@@ -1213,6 +1232,41 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 					I915_USER_PRIORITY(priority);
 		}
 		break;
+	case I915_CONTEXT_PARAM_FREQUENCY:
+		if (!(to_i915(dev)->caps.scheduler & I915_SCHEDULER_CAP_FREQUENCY)) {
+			ret = -ENODEV;
+		} else if (args->size) {
+			ret = -EINVAL;
+		} else {
+			struct intel_rps *rps = &i915->gt_pm.rps;
+			u32 min, max;
+
+			min = I915_CONTEXT_MIN_FREQUENCY(args->value);
+			min = intel_freq_opcode(i915, min);
+
+			max = I915_CONTEXT_MAX_FREQUENCY(args->value);
+			max = intel_freq_opcode(i915, max);
+
+			/*
+			 * As we constrain the frequency request from the
+			 * context (application) by the sysadmin imposed limits,
+			 * it is reasonable to allow the application to
+			 * specify its preferred range within those limits.
+			 * That is we do not need to restrict requesting
+			 * a higher frequency to privileged (CAP_SYS_NICE)
+			 * processes.
+			 */
+			if (max < min) {
+				ret = -EINVAL;
+			} else if (min < rps->min_freq_hw ||
+				   max > rps->max_freq_hw) {
+				ret = -EINVAL;
+			} else {
+				ctx->sched.min_freq = min;
+				ctx->sched.max_freq = max;
+			}
+		}
+		break;
 
 	case I915_CONTEXT_PARAM_ENGINES:
 		ret = set_engines(ctx, args);
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index be132ceb83d9..32f96474d383 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -41,6 +41,9 @@ struct i915_sched_attr {
 	 * The &drm_i915_private.kernel_context is assigned the lowest priority.
 	 */
 	int priority;
+
+	u8 min_freq;
+	u8 max_freq;
 };
 
 /*
diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
index 4581f0d5f799..a6a2ee1e79a1 100644
--- a/drivers/gpu/drm/i915/intel_display.c
+++ b/drivers/gpu/drm/i915/intel_display.c
@@ -13003,8 +13003,11 @@ static void intel_plane_unpin_fb(struct intel_plane_state *old_plane_state)
 
 static void fb_obj_bump_render_priority(struct drm_i915_gem_object *obj)
 {
+	struct drm_i915_private *i915 = to_i915(obj->base.dev);
 	struct i915_sched_attr attr = {
 		.priority = I915_PRIORITY_DISPLAY,
+		.min_freq = i915->gt_pm.rps.min_freq_hw,
+		.max_freq = i915->gt_pm.rps.max_freq_hw,
 	};
 
 	i915_gem_object_wait_priority(obj, 0,
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index 6e758e548c80..0f5c69bfad0b 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1314,7 +1314,10 @@ static int print_sched_attr(struct drm_i915_private *i915,
 		return x;
 
 	x += snprintf(buf + x, len - x,
-		      " prio=%d", attr->priority);
+		      " prio=%d, freq=[%d, %d]",
+		      attr->priority,
+		      intel_gpu_freq(i915, attr->min_freq),
+		      intel_gpu_freq(i915, attr->max_freq));
 
 	return x;
 }
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
index 100239c42f42..57efcfdd5e07 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/intel_gt_pm.c
@@ -437,10 +437,10 @@ static int adjust_rps(struct drm_i915_private *i915, int freq, int adj)
 	GEM_BUG_ON(!rps->active);
 
 	min = clamp_t(int,
-		      rps->min_freq_soft,
+		      max(rps->min_freq_soft, READ_ONCE(rps->min_freq_context)),
 		      rps->min_freq_user, rps->max_freq_user);
 	max = clamp_t(int,
-		      rps->max_freq_soft,
+		      min(rps->max_freq_soft, READ_ONCE(rps->max_freq_context)),
 		      min, rps->max_freq_user);
 	if (atomic_read(&rps->num_waiters) && max < rps->boost_freq)
 		max = rps->boost_freq;
@@ -942,6 +942,82 @@ void intel_rps_boost(struct i915_request *rq, struct intel_rps_client *client)
 	atomic_inc(client ? &client->boosts : &rps->boosts);
 }
 
+static void __rps_update_engine(struct intel_rps *rps,
+				enum intel_engine_id idx,
+				u32 min, u32 max)
+{
+	unsigned long flags;
+	bool update = false;
+	u32 old;
+	int n;
+
+	GEM_BUG_ON(min > max);
+
+	if (rps->min_freq_engine[idx] != min) {
+		spin_lock_irqsave(&rps->engine_lock, flags);
+
+		rps->min_freq_engine[idx] = min;
+
+		old = rps->min_freq_context;
+		rps->min_freq_context = rps->min_freq_engine[0];
+		for (n = 1; n < ARRAY_SIZE(rps->min_freq_engine); n++)
+			if (rps->min_freq_engine[n] > rps->min_freq_context)
+				rps->min_freq_context = rps->min_freq_engine[n];
+		if (rps->min_freq_context != old) {
+			u32 cur = READ_ONCE(rps->freq);
+
+			update |= cur == old || cur < min;
+		}
+
+		spin_unlock_irqrestore(&rps->engine_lock, flags);
+	}
+
+	if (rps->max_freq_engine[idx] != max) {
+		spin_lock_irqsave(&rps->engine_lock, flags);
+
+		rps->max_freq_engine[idx] = max;
+
+		old = rps->max_freq_context;
+		rps->max_freq_context = rps->max_freq_engine[0];
+		for (n = 1; n < ARRAY_SIZE(rps->max_freq_engine); n++)
+			if (rps->max_freq_engine[n] < rps->max_freq_context)
+				rps->max_freq_context = rps->max_freq_engine[n];
+		if (rps->max_freq_context != old) {
+			u32 cur = READ_ONCE(rps->freq);
+
+			update |= cur == old || cur > max;
+		}
+
+		spin_unlock_irqrestore(&rps->engine_lock, flags);
+	}
+
+	/* Kick the RPS worker to apply the updated constraints, as needed */
+	if (update && !atomic_read(&rps->num_waiters))
+		schedule_work(&rps->work);
+}
+
+void intel_engine_update_rps(const struct intel_engine_cs *engine,
+			     u8 min, u8 max)
+{
+	struct intel_rps *rps = &engine->i915->gt_pm.rps;
+
+	if (!HAS_RPS(engine->i915))
+		return;
+
+	__rps_update_engine(rps, engine->id, min, max);
+}
+
+void intel_engine_cancel_rps(const struct intel_engine_cs *engine)
+{
+	struct intel_rps *rps = &engine->i915->gt_pm.rps;
+
+	if (!HAS_RPS(engine->i915))
+		return;
+
+	__rps_update_engine(rps, engine->id,
+			    rps->min_freq_hw, rps->max_freq_hw);
+}
+
 static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
 {
 	I915_WRITE(GEN6_RC_CONTROL, 0);
@@ -2524,6 +2600,30 @@ void intel_gt_pm_init_early(struct drm_i915_private *i915)
 	mutex_init(&rps->lock);
 	mutex_init(&rps->power.mutex);
 	INIT_WORK(&rps->work, intel_rps_work);
+	spin_lock_init(&rps->engine_lock);
+}
+
+static void intel_rps_init__frequencies(struct intel_rps *rps)
+{
+	int n;
+
+	rps->max_freq_soft = rps->max_freq_hw;
+	rps->min_freq_soft = rps->min_freq_hw;
+
+	rps->max_freq_context = rps->max_freq_hw;
+	rps->min_freq_context = rps->min_freq_hw;
+	for (n = 0; n < ARRAY_SIZE(rps->max_freq_engine); n++) {
+		rps->max_freq_engine[n] = rps->max_freq_hw;
+		rps->min_freq_engine[n] = rps->min_freq_hw;
+	}
+
+	/* Finally allow us to boost to max by default */
+	rps->boost_freq = rps->max_freq_hw;
+	rps->idle_freq = rps->min_freq_hw;
+
+	rps->freq = rps->idle_freq;
+	rps->min = rps->min_freq_hw;
+	rps->max = rps->max_freq_hw;
 }
 
 void intel_gt_pm_init(struct drm_i915_private *i915)
@@ -2594,16 +2694,7 @@ void intel_gt_pm_init(struct drm_i915_private *i915)
 		}
 	}
 
-	rps->max_freq_soft = rps->max_freq_hw;
-	rps->min_freq_soft = rps->min_freq_hw;
-
-	/* Finally allow us to boost to max by default */
-	rps->boost_freq = rps->max_freq_hw;
-	rps->idle_freq = rps->min_freq_hw;
-
-	rps->freq = rps->idle_freq;
-	rps->min = rps->min_freq_hw;
-	rps->max = rps->max_freq_hw;
+	intel_rps_init__frequencies(rps);
 
 	mutex_unlock(&rps->lock);
 }
@@ -2865,3 +2956,7 @@ void gen9_disable_guc_interrupts(struct drm_i915_private *dev_priv)
 
 	gen9_reset_guc_interrupts(dev_priv);
 }
+
+#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
+#include "selftests/intel_gt_pm.c"
+#endif
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
index 31c57e3157ff..5f6b1c9cd6c2 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/intel_gt_pm.h
@@ -7,8 +7,12 @@
 #ifndef __INTEL_GT_PM_H__
 #define __INTEL_GT_PM_H__
 
+#include "i915_gem.h" /* for I915_NUM_ENGINES */
+
 struct drm_i915_private;
+struct i915_gem_context;
 struct i915_request;
+struct intel_engine_cs;
 struct intel_rps_client;
 
 struct intel_rps_ei {
@@ -19,6 +23,7 @@ struct intel_rps_ei {
 
 struct intel_rps {
 	struct mutex lock;
+	spinlock_t engine_lock; /* protects updates to min/max_freq_context */
 	struct work_struct work;
 
 	bool active;
@@ -51,6 +56,10 @@ struct intel_rps {
 	u8 max_freq_user;	/* Max frequency permitted by the driver */
 	u8 min_freq_soft;
 	u8 max_freq_soft;
+	u8 min_freq_context;	/* Min frequency permitted by the context */
+	u8 max_freq_context;	/* Max frequency permitted by the context */
+	u8 min_freq_engine[I915_NUM_ENGINES];
+	u8 max_freq_engine[I915_NUM_ENGINES];
 
 	u8 idle_freq;		/* Frequency to request when we are idle */
 	u8 efficient_freq;	/* AKA RPe. Pre-determined balanced frequency */
@@ -139,6 +148,9 @@ void intel_gt_pm_idle(struct drm_i915_private *i915);
 
 void intel_gt_pm_irq_handler(struct drm_i915_private *i915, u32 pm_iir);
 
+void intel_engine_update_rps(const struct intel_engine_cs *engine,
+			     u8 min, u8 max);
+void intel_engine_cancel_rps(const struct intel_engine_cs *engine);
 void intel_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
 
 void intel_rps_mark_interactive(struct drm_i915_private *i915,
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 4009af1bfaf9..60c864c121c4 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -140,6 +140,7 @@
 #include "i915_reset.h"
 #include "i915_vgpu.h"
 #include "intel_lrc_reg.h"
+#include "intel_gt_pm.h"
 #include "intel_mocs.h"
 #include "intel_workarounds.h"
 
@@ -462,12 +463,25 @@ inline void
 execlists_user_begin(struct intel_engine_execlists *execlists,
 		     const struct execlist_port *port)
 {
+	struct intel_engine_cs *engine =
+		container_of(execlists, typeof(*engine), execlists);
+	struct i915_request *rq = port_request(port);
+
 	execlists_set_active_once(execlists, EXECLISTS_ACTIVE_USER);
+
+	intel_engine_update_rps(engine,
+				rq->sched.attr.min_freq,
+				rq->sched.attr.max_freq);
 }
 
 inline void
 execlists_user_end(struct intel_engine_execlists *execlists)
 {
+	struct intel_engine_cs *engine =
+		container_of(execlists, typeof(*engine), execlists);
+
+	intel_engine_cancel_rps(engine);
+
 	execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
 }
 
@@ -611,6 +625,18 @@ static bool can_merge_rq(const struct i915_request *prev,
 	if (!can_merge_ctx(prev->hw_context, next->hw_context))
 		return false;
 
+	/*
+	 * Force a bubble if the frequencies change. In practice, a change in
+	 * frequency is most likely due to a priority bump, in which case
+	 * we expect to switch contexts promptly and so unlikely to be an
+	 * actual bubble.
+	 */
+	if (prev->sched.attr.min_freq < next->sched.attr.min_freq)
+		return false;
+
+	if (prev->sched.attr.max_freq > next->sched.attr.max_freq)
+		return false;
+
 	return true;
 }
 
@@ -987,7 +1013,8 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 				 * combine this request with the last, then we
 				 * are done.
 				 */
-				if (port == last_port)
+				if (port == last_port ||
+				    last->hw_context == rq->hw_context)
 					goto done;
 
 				/*
@@ -1556,6 +1583,8 @@ static void execlists_schedule(struct i915_request *request,
 			continue;
 
 		node->attr.priority = prio;
+		node->attr.min_freq = min(node->attr.min_freq, attr->min_freq);
+		node->attr.max_freq = max(node->attr.max_freq, attr->max_freq);
 		if (!list_empty(&node->link)) {
 			if (last != engine) {
 				pl = lookup_priolist(engine, prio);
@@ -2608,6 +2637,8 @@ void intel_execlists_set_default_submission(struct intel_engine_cs *engine)
 	engine->i915->caps.scheduler =
 		I915_SCHEDULER_CAP_ENABLED |
 		I915_SCHEDULER_CAP_PRIORITY;
+	if (HAS_RPS(engine->i915))
+		engine->i915->caps.scheduler |= I915_SCHEDULER_CAP_FREQUENCY;
 	if (intel_engine_has_preemption(engine))
 		engine->i915->caps.scheduler |= I915_SCHEDULER_CAP_PREEMPTION;
 }
diff --git a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
index 1b70208eeea7..aa63b6e45acf 100644
--- a/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
+++ b/drivers/gpu/drm/i915/selftests/i915_mock_selftests.h
@@ -15,6 +15,7 @@ selftest(scatterlist, scatterlist_mock_selftests)
 selftest(syncmap, i915_syncmap_mock_selftests)
 selftest(uncore, intel_uncore_mock_selftests)
 selftest(engine, intel_engine_cs_mock_selftests)
+selftest(gt_pm, intel_gt_pm_mock_selftests)
 selftest(breadcrumbs, intel_breadcrumbs_mock_selftests)
 selftest(timelines, i915_gem_timeline_mock_selftests)
 selftest(requests, i915_request_mock_selftests)
diff --git a/drivers/gpu/drm/i915/selftests/intel_gt_pm.c b/drivers/gpu/drm/i915/selftests/intel_gt_pm.c
new file mode 100644
index 000000000000..7d029be5216e
--- /dev/null
+++ b/drivers/gpu/drm/i915/selftests/intel_gt_pm.c
@@ -0,0 +1,130 @@
+/*
+ * SPDX-License-Identifier: MIT
+ *
+ * Copyright © 2018 Intel Corporation
+ */
+
+#include "../i915_selftest.h"
+#include "i915_random.h"
+
+#include "mock_gem_device.h"
+
+static void mock_rps_init(struct drm_i915_private *i915)
+{
+	struct intel_rps *rps = &i915->gt_pm.rps;
+
+	/* Disable the register writes */
+	mkwrite_device_info(i915)->gen = 0;
+	mkwrite_device_info(i915)->has_rps = true;
+
+	intel_gt_pm_init_early(i915);
+
+	rps->min_freq_hw = 0;
+	rps->max_freq_hw = 255;
+
+	rps->min_freq_user = rps->min_freq_hw;
+	rps->max_freq_user = rps->max_freq_hw;
+
+	intel_rps_init__frequencies(rps);
+}
+
+static void mock_rps_fini(struct drm_i915_private *i915)
+{
+	struct intel_rps *rps = &i915->gt_pm.rps;
+
+	cancel_work_sync(&rps->work);
+}
+
+static int igt_rps_engine(void *arg)
+{
+	struct drm_i915_private *i915 = arg;
+	struct intel_rps *rps = &i915->gt_pm.rps;
+	I915_RND_STATE(prng);
+	int err;
+	int i;
+
+	intel_gt_pm_busy(i915); /* Activate RPS */
+
+	/*
+	 * Minimum unit tests for intel_rps_update_engine().
+	 *
+	 * Whenever we call intel_rps_update_engine, it will
+	 * replace the context min/max frequency request for a particular
+	 * engine and then recompute the global max(min)/min(max) over all
+	 * engines. In this mockup, we are limited to checking those
+	 * max(min)/min(max) calculations and then seeing if the rps
+	 * worker uses those bounds.
+	 */
+
+	for (i = 0; i < 256 * 256; i++) {
+		u8 freq = prandom_u32_state(&prng);
+
+		__rps_update_engine(rps, 0, freq, freq);
+		if (rps->min_freq_context != freq ||
+		    rps->max_freq_context != freq) {
+			pr_err("Context min/max frequencies not restricted to %d, found [%d, %d]\n",
+			       freq, rps->min_freq_context, rps->max_freq_context);
+			err = -EINVAL;
+			goto out;
+		}
+		flush_work(&rps->work);
+
+		if (rps->freq != freq) {
+			pr_err("Tried to restrict frequency to %d, found %d\n",
+			       freq, rps->freq);
+			err = -EINVAL;
+			goto out;
+		}
+	}
+
+	__rps_update_engine(rps, 0, rps->min_freq_hw, rps->max_freq_hw);
+	if (rps->min_freq_context != rps->min_freq_hw ||
+	    rps->max_freq_context != rps->max_freq_hw) {
+		pr_err("Context frequency not restored to [%d, %d], found [%d, %d]\n",
+		       rps->min_freq_hw, rps->min_freq_hw,
+		       rps->min_freq_context, rps->max_freq_context);
+		err = -EINVAL;
+		goto out;
+	}
+
+	for (i = 0; i < I915_NUM_ENGINES; i++)
+		__rps_update_engine(rps, i, i, 255 - i);
+	i--;
+	if (rps->min_freq_context != i) {
+		pr_err("Minimum context frequency across all engines not raised to %d, found %d\n", i, rps->min_freq_context);
+		err = -EINVAL;
+		goto out;
+	}
+	if (rps->max_freq_context != 255 - i) {
+		pr_err("Maxmimum context frequency across all engines not lowered to %d, found %d\n", 255 - i, rps->max_freq_context);
+		err = -EINVAL;
+		goto out;
+	}
+
+	err = 0;
+out:
+	intel_gt_pm_idle(i915);
+	return err;
+}
+
+int intel_gt_pm_mock_selftests(void)
+{
+	static const struct i915_subtest tests[] = {
+		SUBTEST(igt_rps_engine),
+	};
+	struct drm_i915_private *i915;
+	int err;
+
+	i915 = mock_gem_device();
+	if (!i915)
+		return -ENOMEM;
+
+	mock_rps_init(i915);
+
+	err = i915_subtests(tests, i915);
+
+	mock_rps_fini(i915);
+	drm_dev_unref(&i915->drm);
+
+	return err;
+}
diff --git a/drivers/gpu/drm/i915/selftests/mock_uncore.c b/drivers/gpu/drm/i915/selftests/mock_uncore.c
index 8ef14c7e5e38..ec6d5c3e70ea 100644
--- a/drivers/gpu/drm/i915/selftests/mock_uncore.c
+++ b/drivers/gpu/drm/i915/selftests/mock_uncore.c
@@ -41,6 +41,7 @@ __nop_read(64)
 
 void mock_uncore_init(struct drm_i915_private *i915)
 {
+	spin_lock_init(&i915->irq_lock);
 	ASSIGN_WRITE_MMIO_VFUNCS(i915, nop);
 	ASSIGN_READ_MMIO_VFUNCS(i915, nop);
 }
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index dad6eb25d09a..f8cfe21d73ac 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -488,6 +488,7 @@ typedef struct drm_i915_irq_wait {
 #define   I915_SCHEDULER_CAP_ENABLED	(1ul << 0)
 #define   I915_SCHEDULER_CAP_PRIORITY	(1ul << 1)
 #define   I915_SCHEDULER_CAP_PREEMPTION	(1ul << 2)
+#define   I915_SCHEDULER_CAP_FREQUENCY	(1ul << 3)
 
 #define I915_PARAM_HUC_STATUS		 42
 
@@ -1557,6 +1558,26 @@ struct drm_i915_gem_context_param {
  */
 #define I915_CONTEXT_PARAM_PREEMPT_TIMEOUT	0x8
 
+/*
+ * I915_CONTEXT_PARAM_FREQUENCY:
+ *
+ * Request that when this context runs, the GPU is restricted to run
+ * in this frequency range; but still contrained by the global user
+ * restriction specified via sysfs.
+ *
+ * The minimum / maximum frequencies are specified in MHz. Each context
+ * starts in the default unrestricted state, where the range is taken from
+ * the hardware, and so may be queried.
+ *
+ * Note the frequency is only changed on a context switch; if the
+ * context's frequency is updated whilst the context is currently executing
+ * the request will not take effect until the next time the context is run.
+ */
+#define I915_CONTEXT_PARAM_FREQUENCY	0x9
+#define   I915_CONTEXT_MIN_FREQUENCY(x) ((x) & 0xffffffff)
+#define   I915_CONTEXT_MAX_FREQUENCY(x) ((x) >> 32)
+#define   I915_CONTEXT_SET_FREQUENCY(min, max) ((__u64)(max) << 32 | (min))
+
 	__u64 value;
 };
 
-- 
2.18.0



More information about the Intel-gfx-trybot mailing list