[PATCH 70/72] ctx-freq

Chris Wilson chris at chris-wilson.co.uk
Tue Feb 6 20:58:10 UTC 2018


---
 drivers/gpu/drm/i915/i915_debugfs.c         |  4 +-
 drivers/gpu/drm/i915/i915_drv.h             |  5 ++
 drivers/gpu/drm/i915/i915_gem_context.c     | 46 +++++++++++++++
 drivers/gpu/drm/i915/i915_gem_context.h     |  3 +
 drivers/gpu/drm/i915/intel_gt_pm.c          | 91 ++++++++++++++++++++++++++++-
 drivers/gpu/drm/i915/intel_gt_pm.h          |  9 +++
 drivers/gpu/drm/i915/intel_guc_submission.c | 13 ++++-
 drivers/gpu/drm/i915/intel_lrc.c            | 13 ++++-
 include/uapi/drm/i915_drm.h                 |  4 ++
 9 files changed, 183 insertions(+), 5 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 2dd9e472aacd..52ea6db686f2 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2161,11 +2161,13 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
 		   intel_gpu_freq(dev_priv, rps->freq),
 		   intel_gpu_freq(dev_priv, rps->min),
 		   intel_gpu_freq(dev_priv, rps->max));
-	seq_printf(m, "  min hard:%d, soft:%d user:%d; max user:%d, soft: %d hard:%d\n",
+	seq_printf(m, "  min hard:%d, soft:%d, ctx:%d, user:%d; max user:%d, ctx:%d, soft:%d, hard:%d\n",
 		   intel_gpu_freq(dev_priv, rps->min_freq_hw),
 		   intel_gpu_freq(dev_priv, rps->min_freq_soft),
+		   intel_gpu_freq(dev_priv, rps->min_freq_context),
 		   intel_gpu_freq(dev_priv, rps->min_freq_user),
 		   intel_gpu_freq(dev_priv, rps->max_freq_user),
+		   intel_gpu_freq(dev_priv, rps->max_freq_context),
 		   intel_gpu_freq(dev_priv, rps->max_freq_soft),
 		   intel_gpu_freq(dev_priv, rps->max_freq_hw));
 	seq_printf(m, "  idle:%d, efficient:%d, boost:%d\n",
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index d10103a6cc6d..d187da200268 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -894,6 +894,7 @@ struct intel_rps_ei {
 
 struct intel_rps {
 	struct mutex lock;
+	spinlock_t engine_lock; /* protects updates to min/max_freq_context */
 
 	/*
 	 * work, interrupts_enabled and pm_iir are protected by
@@ -927,6 +928,10 @@ struct intel_rps {
 	u8 max_freq_user;	/* Max frequency permitted by the driver */
 	u8 min_freq_soft;
 	u8 max_freq_soft;
+	u8 min_freq_context;	/* Minimum frequency permitted by the context */
+	u8 max_freq_context;	/* Max frequency permitted by the context */
+	u8 min_freq_engine[I915_NUM_ENGINES];
+	u8 max_freq_engine[I915_NUM_ENGINES];
 
 	u8 idle_freq;		/* Frequency to request when we are idle */
 	u8 efficient_freq;	/* AKA RPe. Pre-determined balanced frequency */
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index b8deab9eaf65..67de12895db1 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -88,8 +88,10 @@
 #include <linux/log2.h>
 #include <drm/drmP.h>
 #include <drm/i915_drm.h>
+
 #include "i915_drv.h"
 #include "i915_trace.h"
+#include "intel_gt_pm.h"
 
 #define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
 
@@ -256,6 +258,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
 	list_add_tail(&ctx->link, &dev_priv->contexts.list);
 	ctx->i915 = dev_priv;
 	ctx->priority = I915_PRIORITY_NORMAL;
+	ctx->min_freq = dev_priv->gt_pm.rps.min_freq_hw;
+	ctx->max_freq = dev_priv->gt_pm.rps.max_freq_hw;
 
 	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
 	INIT_LIST_HEAD(&ctx->handles_list);
@@ -771,6 +775,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	struct drm_i915_gem_context_param *args = data;
+	struct drm_i915_private *i915 = to_i915(dev);
 	struct i915_gem_context *ctx;
 	int ret = 0;
 
@@ -803,6 +808,18 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
 	case I915_CONTEXT_PARAM_PRIORITY:
 		args->value = ctx->priority >> I915_PRIORITY_SHIFT;
 		break;
+	case I915_CONTEXT_PARAM_FREQUENCY:
+		if (!HAS_RPS(i915) || !HAS_EXECLISTS(i915)) {
+			ret = -ENODEV;
+		} else if (args->size) {
+			ret = -EINVAL;
+		} else {
+			u32 min = intel_gpu_freq(i915, ctx->min_freq);
+			u32 max = intel_gpu_freq(i915, ctx->max_freq);
+			args->value = I915_CONTEXT_SET_FREQUENCY(min, max);
+		}
+		break;
+
 	default:
 		ret = -EINVAL;
 		break;
@@ -817,6 +834,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	struct drm_i915_gem_context_param *args = data;
+	struct drm_i915_private *i915 = to_i915(dev);
 	struct i915_gem_context *ctx;
 	int ret;
 
@@ -877,6 +895,34 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
 				ctx->priority = priority << I915_PRIORITY_SHIFT;
 		}
 		break;
+	case I915_CONTEXT_PARAM_FREQUENCY:
+		if (!HAS_RPS(i915) || !HAS_EXECLISTS(i915)) {
+			ret = -ENODEV;
+		} else if (args->size) {
+			ret = -EINVAL;
+		} else {
+			struct intel_rps *rps = &i915->gt_pm.rps;
+			u32 min, max;
+
+			min = intel_freq_opcode(i915,
+						I915_CONTEXT_MIN_FREQUENCY(args->value));
+			max = intel_freq_opcode(i915,
+						I915_CONTEXT_MAX_FREQUENCY(args->value));
+			if (max < min) {
+				ret = -EINVAL;
+			} else if (min < rps->min_freq_hw ||
+				   max > rps->max_freq_hw) {
+				ret = -EINVAL;
+			} else if ((min > ctx->min_freq ||
+				    max > ctx->max_freq) &&
+				   !capable(CAP_SYS_NICE)) {
+				ret = -EPERM;
+			} else {
+				ctx->min_freq = min;
+				ctx->max_freq = max;
+			}
+		}
+		break;
 
 	default:
 		ret = -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 663ddfb61cb0..e459665e2f1e 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -152,6 +152,9 @@ struct i915_gem_context {
 	 */
 	int priority;
 
+	u32 min_freq;
+	u32 max_freq;
+
 	/** ggtt_offset_bias: placement restriction for context objects */
 	u32 ggtt_offset_bias;
 
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
index 0936fe745672..492eb420b244 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/intel_gt_pm.c
@@ -520,10 +520,10 @@ static void intel_rps_work(struct work_struct *work)
 	mutex_lock(&rps->lock);
 
 	min = clamp_t(int,
-		      rps->min_freq_soft,
+		      max(rps->min_freq_soft, READ_ONCE(rps->min_freq_context)),
 		      rps->min_freq_user, rps->max_freq_user);
 	max = clamp_t(int,
-		      rps->max_freq_soft,
+		      min(rps->max_freq_soft, READ_ONCE(rps->max_freq_context)),
 		      min, rps->max_freq_user);
 	if (client_boost && max < rps->boost_freq)
 		max = rps->boost_freq;
@@ -695,6 +695,84 @@ void intel_rps_boost(struct drm_i915_gem_request *rq,
 	atomic_inc(rps_client ? &rps_client->boosts : &rps->boosts);
 }
 
+void intel_rps_update_engine(const struct intel_engine_cs *engine,
+			     const struct i915_gem_context *ctx)
+{
+	struct intel_rps *rps = &engine->i915->gt_pm.rps;
+	unsigned long flags;
+	u32 cur;
+
+	if (!HAS_RPS(engine->i915))
+		return;
+
+	if (ctx->min_freq == rps->min_freq_engine[engine->id] &&
+	    ctx->max_freq == rps->max_freq_engine[engine->id])
+		return;
+
+	rps->min_freq_engine[engine->id] = ctx->min_freq;
+	rps->max_freq_engine[engine->id] = ctx->max_freq;
+
+	/*
+	 * A race here with the rps worker has no effect as we will pick up
+	 * the change on the next cycle.
+	 */
+	spin_lock_irqsave(&rps->engine_lock, flags);
+	if (ctx->min_freq > rps->min_freq_context)
+		rps->min_freq_context = ctx->min_freq;
+	if (ctx->max_freq < rps->max_freq_context)
+		rps->max_freq_context = ctx->max_freq;
+	spin_unlock_irqrestore(&rps->engine_lock, flags);
+
+	/*
+	 * If we are running below the request frequested, kick the worker to
+	 * jump to the new frequency. Otherwise, we let the frequency decay
+	 * naturally.
+	 */
+	cur = READ_ONCE(rps->freq);
+	if (cur < ctx->min_freq)
+		schedule_work(&rps->work);
+}
+
+void intel_rps_remove_engine( const struct intel_engine_cs *engine)
+{
+	struct intel_rps *rps = &engine->i915->gt_pm.rps;
+	int n;
+	u32 old;
+
+	if (!HAS_RPS(engine->i915))
+		return;
+
+	if (rps->min_freq_hw == rps->min_freq_engine[engine->id] &&
+	    rps->max_freq_hw == rps->max_freq_engine[engine->id])
+		return;
+
+	old = rps->min_freq_engine[engine->id];
+	rps->min_freq_engine[engine->id] = rps->min_freq_hw;
+	if (old != rps->min_freq_hw && old == rps->min_freq_context) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&rps->engine_lock, flags);
+		rps->min_freq_context = rps->min_freq_engine[0];
+		for (n = 1; n < ARRAY_SIZE(rps->min_freq_engine); n++)
+			if (rps->min_freq_engine[n] > rps->min_freq_context)
+				rps->min_freq_context = rps->min_freq_engine[n];
+		spin_unlock_irqrestore(&rps->engine_lock, flags);
+	}
+
+	old = rps->max_freq_engine[engine->id];
+	rps->max_freq_engine[engine->id] = rps->max_freq_hw;
+	if (old != rps->max_freq_hw && old == rps->max_freq_context) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&rps->engine_lock, flags);
+		rps->max_freq_context = rps->max_freq_engine[0];
+		for (n = 1; n < ARRAY_SIZE(rps->max_freq_engine); n++)
+			if (rps->max_freq_engine[n] < rps->max_freq_context)
+				rps->max_freq_context = rps->max_freq_engine[n];
+		spin_unlock_irqrestore(&rps->engine_lock, flags);
+	}
+}
+
 static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
 {
 	I915_WRITE(GEN6_RC_CONTROL, 0);
@@ -2239,9 +2317,11 @@ void intel_gt_pm_sanitize(struct drm_i915_private *dev_priv)
 void intel_gt_pm_init(struct drm_i915_private *dev_priv)
 {
 	struct intel_rps *rps = &dev_priv->gt_pm.rps;
+	int n;
 
 	mutex_init(&rps->lock);
 	INIT_WORK(&rps->work, intel_rps_work);
+	spin_lock_init(&rps->engine_lock);
 
 	/*
 	 * RPM depends on RC6 to save restore the GT HW context, so make RC6 a
@@ -2293,6 +2373,13 @@ void intel_gt_pm_init(struct drm_i915_private *dev_priv)
 	rps->max_freq_soft = rps->max_freq_hw;
 	rps->min_freq_soft = rps->min_freq_hw;
 
+	rps->max_freq_context = rps->max_freq_hw;
+	rps->min_freq_context = rps->min_freq_hw;
+	for (n = 0; n < ARRAY_SIZE(rps->max_freq_engine); n++) {
+		rps->max_freq_engine[n] = rps->max_freq_hw;
+		rps->min_freq_engine[n] = rps->min_freq_hw;
+	}
+
 	/* Finally allow us to boost to max by default */
 	rps->boost_freq = rps->max_freq_hw;
 
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
index fd2dbe381f32..bf8fbb230682 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/intel_gt_pm.h
@@ -24,6 +24,12 @@
 #ifndef __INTEL_GT_PM_H__
 #define __INTEL_GT_PM_H__
 
+struct drm_i915_private;
+struct drm_i915_gem_request;
+struct i915_gem_context;
+struct intel_engine_cs;
+struct intel_rps_client;
+
 void intel_gpu_ips_init(struct drm_i915_private *dev_priv);
 void intel_gpu_ips_teardown(void);
 
@@ -40,6 +46,9 @@ void intel_gt_disable_rc6(struct drm_i915_private *dev_priv);
 
 void intel_rps_busy(struct drm_i915_private *dev_priv);
 void intel_rps_idle(struct drm_i915_private *dev_priv);
+void intel_rps_update_engine(const struct intel_engine_cs *engine,
+			     const struct i915_gem_context *ctx);
+void intel_rps_remove_engine(const struct intel_engine_cs *engine);
 void intel_rps_boost(struct drm_i915_gem_request *rq,
 		     struct intel_rps_client *rps);
 
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 5833dc57f9c6..c39c0f76d671 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -26,6 +26,8 @@
 #include <trace/events/dma_fence.h>
 
 #include "intel_guc_submission.h"
+
+#include "intel_gt_pm.h"
 #include "i915_drv.h"
 
 /**
@@ -662,6 +664,12 @@ static void guc_submit(struct intel_engine_cs *engine)
 	}
 }
 
+static void update_rps(struct intel_engine_cs *engine)
+{
+	intel_rps_update_engine(engine,
+				port_request(engine->execlists.port)->ctx);
+}
+
 static void port_assign(struct execlist_port *port,
 			struct drm_i915_gem_request *rq)
 {
@@ -740,6 +748,7 @@ static void guc_dequeue(struct intel_engine_cs *engine)
 done:
 	if (submit) {
 		port_assign(port, last);
+		update_rps(engine);
 		execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
 		guc_submit(engine);
 	}
@@ -763,8 +772,10 @@ static void guc_submission_tasklet(unsigned long data)
 
 		rq = port_request(&port[0]);
 	}
-	if (!rq)
+	if (!rq) {
 		execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
+		intel_rps_remove_engine(engine);
+	}
 
 	if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) &&
 	    intel_read_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX) ==
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index a1ec9659cae9..debfff4d7da1 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -138,6 +138,7 @@
 #include "i915_drv.h"
 #include "i915_gem_render_state.h"
 #include "intel_lrc_reg.h"
+#include "intel_gt_pm.h"
 #include "intel_mocs.h"
 
 #define RING_EXECLIST_QFULL		(1 << 0x2)
@@ -460,6 +461,12 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
 	execlists_clear_active(&engine->execlists, EXECLISTS_ACTIVE_HWACK);
 }
 
+static void update_rps(struct intel_engine_cs *engine)
+{
+	intel_rps_update_engine(engine,
+				port_request(engine->execlists.port)->ctx);
+}
+
 static void execlists_dequeue(struct intel_engine_cs *engine)
 {
 	struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -633,6 +640,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
 	spin_unlock_irq(&engine->timeline.lock);
 
 	if (submit) {
+		update_rps(engine);
 		execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
 		execlists_submit_ports(engine);
 	}
@@ -887,9 +895,11 @@ static void execlists_submission_tasklet(unsigned long data)
 			/* After the final element, the hw should be idle */
 			GEM_BUG_ON(port_count(port) == 0 &&
 				   !(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
-			if (port_count(port) == 0)
+			if (port_count(port) == 0) {
 				execlists_clear_active(execlists,
 						       EXECLISTS_ACTIVE_USER);
+				intel_rps_remove_engine(engine);
+			}
 		}
 
 		if (head != execlists->csb_head) {
@@ -1615,6 +1625,7 @@ static void reset_common_ring(struct intel_engine_cs *engine,
 
 	/* Push back any incomplete requests for replay after the reset. */
 	__unwind_incomplete_requests(engine);
+	intel_rps_remove_engine(engine);
 
 	spin_unlock_irqrestore(&engine->timeline.lock, flags);
 
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 2c5201d70632..7b879504f122 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1464,6 +1464,10 @@ struct drm_i915_gem_context_param {
 #define   I915_CONTEXT_MAX_USER_PRIORITY	1023 /* inclusive */
 #define   I915_CONTEXT_DEFAULT_PRIORITY		0
 #define   I915_CONTEXT_MIN_USER_PRIORITY	-1023 /* inclusive */
+#define I915_CONTEXT_PARAM_FREQUENCY	0x7
+#define   I915_CONTEXT_MIN_FREQUENCY(x) ((x) & 0xffffffff)
+#define   I915_CONTEXT_MAX_FREQUENCY(x) ((x) >> 32)
+#define   I915_CONTEXT_SET_FREQUENCY(min, max) ((__u64)(max) << 32 | (min))
 	__u64 value;
 };
 
-- 
2.16.1



More information about the Intel-gfx-trybot mailing list