[PATCH 42/42] ctx-freq
Chris Wilson
chris at chris-wilson.co.uk
Thu Mar 8 13:07:27 UTC 2018
---
drivers/gpu/drm/i915/i915_debugfs.c | 15 ++++-
drivers/gpu/drm/i915/i915_drv.h | 5 ++
drivers/gpu/drm/i915/i915_gem_context.c | 51 ++++++++++++++++
drivers/gpu/drm/i915/i915_gem_context.h | 3 +
drivers/gpu/drm/i915/intel_gt_pm.c | 90 +++++++++++++++++++++++++++--
drivers/gpu/drm/i915/intel_gt_pm.h | 5 ++
drivers/gpu/drm/i915/intel_guc_submission.c | 16 ++++-
drivers/gpu/drm/i915/intel_lrc.c | 16 ++++-
include/uapi/drm/i915_drm.h | 20 +++++++
9 files changed, 212 insertions(+), 9 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 5509f9458fd5..c9363376526c 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2191,6 +2191,7 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
struct drm_device *dev = &dev_priv->drm;
struct intel_rps *rps = &dev_priv->gt_pm.rps;
struct drm_file *file;
+ int n;
seq_printf(m, "GPU busy? %s [%d requests]\n",
yesno(dev_priv->gt.awake), dev_priv->gt.active_requests);
@@ -2201,13 +2202,25 @@ static int i915_rps_boost_info(struct seq_file *m, void *data)
intel_gpu_freq(dev_priv, rps->freq),
intel_gpu_freq(dev_priv, rps->min),
intel_gpu_freq(dev_priv, rps->max));
- seq_printf(m, " min hard:%d, soft:%d user:%d; max user:%d, soft: %d hard:%d\n",
+ seq_printf(m, " min hard:%d, soft:%d, ctx:%d, user:%d; max user:%d, ctx:%d, soft:%d, hard:%d\n",
intel_gpu_freq(dev_priv, rps->min_freq_hw),
intel_gpu_freq(dev_priv, rps->min_freq_soft),
+ intel_gpu_freq(dev_priv, rps->min_freq_context),
intel_gpu_freq(dev_priv, rps->min_freq_user),
intel_gpu_freq(dev_priv, rps->max_freq_user),
+ intel_gpu_freq(dev_priv, rps->max_freq_context),
intel_gpu_freq(dev_priv, rps->max_freq_soft),
intel_gpu_freq(dev_priv, rps->max_freq_hw));
+ seq_printf(m, " engines min: [");
+ for (n = 0; n < ARRAY_SIZE(rps->min_freq_engine); n++)
+ seq_printf(m, "%s%d", n ? ", " : "",
+ intel_gpu_freq(dev_priv, rps->min_freq_engine[n]));
+ seq_printf(m, "]\n engines max: [");
+ for (n = 0; n < ARRAY_SIZE(rps->max_freq_engine); n++)
+ seq_printf(m, "%s%d", n ? ", " : "",
+ intel_gpu_freq(dev_priv, rps->max_freq_engine[n]));
+ seq_printf(m, "]\n");
+
seq_printf(m, " idle:%d, efficient:%d, boost:%d\n",
intel_gpu_freq(dev_priv, rps->idle_freq),
intel_gpu_freq(dev_priv, rps->efficient_freq),
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 70cb803ddeff..69c1e2411eba 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -896,6 +896,7 @@ struct intel_rps_ei {
struct intel_rps {
struct mutex lock;
+ spinlock_t engine_lock; /* protects updates to min/max_freq_context */
/*
* work, interrupts_enabled and pm_iir are protected by
@@ -929,6 +930,10 @@ struct intel_rps {
u8 max_freq_user; /* Max frequency permitted by the driver */
u8 min_freq_soft;
u8 max_freq_soft;
+ u8 min_freq_context; /* Min frequency permitted by the context */
+ u8 max_freq_context; /* Max frequency permitted by the context */
+ u8 min_freq_engine[I915_NUM_ENGINES];
+ u8 max_freq_engine[I915_NUM_ENGINES];
u8 idle_freq; /* Frequency to request when we are idle */
u8 efficient_freq; /* AKA RPe. Pre-determined balanced frequency */
diff --git a/drivers/gpu/drm/i915/i915_gem_context.c b/drivers/gpu/drm/i915/i915_gem_context.c
index 65bf92658d92..29d09a2fd0d7 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.c
+++ b/drivers/gpu/drm/i915/i915_gem_context.c
@@ -88,8 +88,10 @@
#include <linux/log2.h>
#include <drm/drmP.h>
#include <drm/i915_drm.h>
+
#include "i915_drv.h"
#include "i915_trace.h"
+#include "intel_gt_pm.h"
#define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1
@@ -281,6 +283,8 @@ __create_hw_context(struct drm_i915_private *dev_priv,
list_add_tail(&ctx->link, &dev_priv->contexts.list);
ctx->i915 = dev_priv;
ctx->priority = I915_PRIORITY_NORMAL;
+ ctx->min_freq = dev_priv->gt_pm.rps.min_freq_hw;
+ ctx->max_freq = dev_priv->gt_pm.rps.max_freq_hw;
INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
INIT_LIST_HEAD(&ctx->handles_list);
@@ -715,6 +719,7 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
{
struct drm_i915_file_private *file_priv = file->driver_priv;
struct drm_i915_gem_context_param *args = data;
+ struct drm_i915_private *i915 = to_i915(dev);
struct i915_gem_context *ctx;
int ret = 0;
@@ -747,6 +752,18 @@ int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
case I915_CONTEXT_PARAM_PRIORITY:
args->value = ctx->priority;
break;
+ case I915_CONTEXT_PARAM_FREQUENCY:
+ if (!HAS_RPS(i915) || !HAS_EXECLISTS(i915)) {
+ ret = -ENODEV;
+ } else if (args->size) {
+ ret = -EINVAL;
+ } else {
+ u32 min = intel_gpu_freq(i915, ctx->min_freq);
+ u32 max = intel_gpu_freq(i915, ctx->max_freq);
+ args->value = I915_CONTEXT_SET_FREQUENCY(min, max);
+ }
+ break;
+
default:
ret = -EINVAL;
break;
@@ -761,6 +778,7 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
{
struct drm_i915_file_private *file_priv = file->driver_priv;
struct drm_i915_gem_context_param *args = data;
+ struct drm_i915_private *i915 = to_i915(dev);
struct i915_gem_context *ctx;
int ret;
@@ -821,6 +839,39 @@ int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
ctx->priority = priority;
}
break;
+ case I915_CONTEXT_PARAM_FREQUENCY:
+ if (!HAS_RPS(i915) || !HAS_EXECLISTS(i915)) {
+ ret = -ENODEV;
+ } else if (args->size) {
+ ret = -EINVAL;
+ } else {
+ struct intel_rps *rps = &i915->gt_pm.rps;
+ u32 min, max;
+
+ min = intel_freq_opcode(i915,
+ I915_CONTEXT_MIN_FREQUENCY(args->value));
+ max = intel_freq_opcode(i915,
+ I915_CONTEXT_MAX_FREQUENCY(args->value));
+ /*
+ * As we constrain the frequency request from the
+ * context (application) by the user imposed limits,
+ * it is reasonable to allow the application to
+ * specify its preferred range within the user limits.
+ * That is we do not need to restrict requesting
+ * a higher frequency to privileged (CAP_SYS_NICE)
+ * processes.
+ */
+ if (max < min) {
+ ret = -EINVAL;
+ } else if (min < rps->min_freq_hw ||
+ max > rps->max_freq_hw) {
+ ret = -EINVAL;
+ } else {
+ ctx->min_freq = min;
+ ctx->max_freq = max;
+ }
+ }
+ break;
default:
ret = -EINVAL;
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 7854262ddfd9..98f7b71a787a 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -150,6 +150,9 @@ struct i915_gem_context {
*/
int priority;
+ u32 min_freq;
+ u32 max_freq;
+
/** ggtt_offset_bias: placement restriction for context objects */
u32 ggtt_offset_bias;
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.c b/drivers/gpu/drm/i915/intel_gt_pm.c
index 445436500646..7738a1eb7397 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.c
+++ b/drivers/gpu/drm/i915/intel_gt_pm.c
@@ -520,10 +520,10 @@ static void intel_rps_work(struct work_struct *work)
mutex_lock(&rps->lock);
min = clamp_t(int,
- rps->min_freq_soft,
+ max(rps->min_freq_soft, READ_ONCE(rps->min_freq_context)),
rps->min_freq_user, rps->max_freq_user);
max = clamp_t(int,
- rps->max_freq_soft,
+ min(rps->max_freq_soft, READ_ONCE(rps->max_freq_context)),
min, rps->max_freq_user);
if (client_boost && max < rps->boost_freq)
max = rps->boost_freq;
@@ -631,8 +631,13 @@ void intel_rps_idle(struct drm_i915_private *dev_priv)
if (INTEL_GEN(dev_priv) >= 6)
gen6_disable_rps_interrupts(dev_priv);
+ cancel_work_sync(&rps->work);
+
mutex_lock(&rps->lock);
+ rps->min = rps->min_freq_hw;
+ rps->max = rps->max_freq_hw;
+
if (rps->freq > rps->idle_freq) {
/*
* The punit delays the write of the frequency and voltage
@@ -649,11 +654,9 @@ void intel_rps_idle(struct drm_i915_private *dev_priv)
* takes a lot less power than the render powerwell.
*/
intel_uncore_forcewake_get(dev_priv, FORCEWAKE_MEDIA);
- if (__intel_set_rps(dev_priv, rps->idle_freq))
+ if (intel_set_rps(dev_priv, rps->idle_freq))
DRM_DEBUG_DRIVER("Failed to set idle frequency\n");
intel_uncore_forcewake_put(dev_priv, FORCEWAKE_MEDIA);
-
- rps->freq = rps->idle_freq;
}
if (INTEL_GEN(dev_priv) >= 6) {
@@ -694,6 +697,74 @@ void intel_rps_boost(struct i915_request *rq, struct intel_rps_client *client)
atomic_inc(client ? &client->boosts : &rps->boosts);
}
+static void __rps_update_engine(const struct intel_engine_cs *engine,
+ u32 min, u32 max)
+{
+ struct intel_rps *rps = &engine->i915->gt_pm.rps;
+ const enum intel_engine_id idx = engine->id;
+ unsigned long flags;
+ bool update = false;
+ u32 old;
+ int n;
+
+ GEM_BUG_ON(min > max);
+
+ if (rps->min_freq_engine[idx] != min) {
+ spin_lock_irqsave(&rps->engine_lock, flags);
+
+ rps->min_freq_engine[idx] = min;
+
+ old = rps->min_freq_context;
+ rps->min_freq_context = rps->min_freq_engine[0];
+ for (n = 1; n < ARRAY_SIZE(rps->min_freq_engine); n++)
+ if (rps->min_freq_engine[n] > rps->min_freq_context)
+ rps->min_freq_context = rps->min_freq_engine[n];
+ update |= rps->min_freq_context != old;
+
+ spin_unlock_irqrestore(&rps->engine_lock, flags);
+ }
+
+ if (rps->max_freq_engine[idx] != max) {
+ spin_lock_irqsave(&rps->engine_lock, flags);
+
+ rps->max_freq_engine[idx] = max;
+
+ old = rps->max_freq_context;
+ rps->max_freq_context = rps->max_freq_engine[0];
+ for (n = 1; n < ARRAY_SIZE(rps->max_freq_engine); n++)
+ if (rps->max_freq_engine[n] < rps->max_freq_context)
+ rps->max_freq_context = rps->max_freq_engine[n];
+ update |= rps->max_freq_context != old;
+
+ spin_unlock_irqrestore(&rps->engine_lock, flags);
+ }
+
+ if (update) {
+ old = READ_ONCE(rps->freq);
+ if ((old < min || old > max))
+ schedule_work(&rps->work);
+ }
+}
+
+void intel_rps_update_engine(const struct intel_engine_cs *engine,
+ const struct i915_gem_context *ctx)
+{
+ if (!HAS_RPS(engine->i915))
+ return;
+
+ __rps_update_engine(engine, ctx->min_freq, ctx->max_freq);
+}
+
+void intel_rps_remove_engine(const struct intel_engine_cs *engine)
+{
+ struct intel_rps *rps = &engine->i915->gt_pm.rps;
+
+ if (!HAS_RPS(engine->i915))
+ return;
+
+ __rps_update_engine(engine, rps->min_freq_hw, rps->max_freq_hw);
+}
+
static void gen9_disable_rc6(struct drm_i915_private *dev_priv)
{
I915_WRITE(GEN6_RC_CONTROL, 0);
@@ -2241,9 +2312,11 @@ void intel_gt_pm_sanitize(struct drm_i915_private *dev_priv)
void intel_gt_pm_init(struct drm_i915_private *dev_priv)
{
struct intel_rps *rps = &dev_priv->gt_pm.rps;
+ int n;
mutex_init(&rps->lock);
INIT_WORK(&rps->work, intel_rps_work);
+ spin_lock_init(&rps->engine_lock);
/*
* RPM depends on RC6 to save restore the GT HW context, so make RC6 a
@@ -2287,6 +2360,13 @@ void intel_gt_pm_init(struct drm_i915_private *dev_priv)
rps->max_freq_soft = rps->max_freq_hw;
rps->min_freq_soft = rps->min_freq_hw;
+ rps->max_freq_context = rps->max_freq_hw;
+ rps->min_freq_context = rps->min_freq_hw;
+ for (n = 0; n < ARRAY_SIZE(rps->max_freq_engine); n++) {
+ rps->max_freq_engine[n] = rps->max_freq_hw;
+ rps->min_freq_engine[n] = rps->min_freq_hw;
+ }
+
/* Finally allow us to boost to max by default */
rps->boost_freq = rps->max_freq_hw;
rps->idle_freq = rps->min_freq_hw;
diff --git a/drivers/gpu/drm/i915/intel_gt_pm.h b/drivers/gpu/drm/i915/intel_gt_pm.h
index f18aac6c1606..96a21d8641bf 100644
--- a/drivers/gpu/drm/i915/intel_gt_pm.h
+++ b/drivers/gpu/drm/i915/intel_gt_pm.h
@@ -25,7 +25,9 @@
#define __INTEL_GT_PM_H__
struct drm_i915_private;
+struct i915_gem_context;
struct i915_request;
+struct intel_engine_cs;
struct intel_rps_client;
void intel_gpu_ips_init(struct drm_i915_private *dev_priv);
@@ -44,6 +46,9 @@ void intel_gt_disable_rc6(struct drm_i915_private *dev_priv);
void intel_rps_busy(struct drm_i915_private *dev_priv);
void intel_rps_idle(struct drm_i915_private *dev_priv);
+void intel_rps_update_engine(const struct intel_engine_cs *engine,
+ const struct i915_gem_context *ctx);
+void intel_rps_remove_engine(const struct intel_engine_cs *engine);
void intel_rps_boost(struct i915_request *rq, struct intel_rps_client *rps);
int intel_gpu_freq(struct drm_i915_private *dev_priv, int val);
diff --git a/drivers/gpu/drm/i915/intel_guc_submission.c b/drivers/gpu/drm/i915/intel_guc_submission.c
index 8a8ad2fe158d..c3dc95ed3132 100644
--- a/drivers/gpu/drm/i915/intel_guc_submission.c
+++ b/drivers/gpu/drm/i915/intel_guc_submission.c
@@ -26,9 +26,12 @@
#include <trace/events/dma_fence.h>
#include "intel_guc_submission.h"
-#include "intel_lrc_reg.h"
+
#include "i915_drv.h"
+#include "intel_gt_pm.h"
+#include "intel_lrc_reg.h"
+
#define GUC_PREEMPT_FINISHED 0x1
#define GUC_PREEMPT_BREADCRUMB_DWORDS 0x8
#define GUC_PREEMPT_BREADCRUMB_BYTES \
@@ -650,6 +653,12 @@ static void guc_submit(struct intel_engine_cs *engine)
}
}
+static void update_rps(struct intel_engine_cs *engine)
+{
+ intel_rps_update_engine(engine,
+ port_request(engine->execlists.port)->ctx);
+}
+
static void port_assign(struct execlist_port *port, struct i915_request *rq)
{
GEM_BUG_ON(port_isset(port));
@@ -728,6 +737,7 @@ static void guc_dequeue(struct intel_engine_cs *engine)
execlists->first = rb;
if (submit) {
port_assign(port, last);
+ update_rps(engine);
execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
guc_submit(engine);
}
@@ -757,8 +767,10 @@ static void guc_submission_tasklet(unsigned long data)
rq = port_request(&port[0]);
}
- if (!rq)
+ if (!rq) {
execlists_clear_active(execlists, EXECLISTS_ACTIVE_USER);
+ intel_rps_remove_engine(engine);
+ }
if (execlists_is_active(execlists, EXECLISTS_ACTIVE_PREEMPT) &&
intel_read_status_page(engine, I915_GEM_HWS_PREEMPT_INDEX) ==
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index d763dfb51190..20785a4749ed 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -138,6 +138,7 @@
#include "i915_drv.h"
#include "i915_gem_render_state.h"
#include "intel_lrc_reg.h"
+#include "intel_gt_pm.h"
#include "intel_mocs.h"
#define RING_EXECLIST_QFULL (1 << 0x2)
@@ -535,6 +536,12 @@ static void inject_preempt_context(struct intel_engine_cs *engine)
execlists_set_active(&engine->execlists, EXECLISTS_ACTIVE_PREEMPT);
}
+static void update_rps(struct intel_engine_cs *engine)
+{
+ intel_rps_update_engine(engine,
+ port_request(engine->execlists.port)->ctx);
+}
+
static void execlists_dequeue(struct intel_engine_cs *engine)
{
struct intel_engine_execlists * const execlists = &engine->execlists;
@@ -708,6 +715,7 @@ static void execlists_dequeue(struct intel_engine_cs *engine)
spin_unlock_irq(&engine->timeline->lock);
if (submit) {
+ update_rps(engine);
execlists_set_active(execlists, EXECLISTS_ACTIVE_USER);
execlists_submit_ports(engine);
}
@@ -977,6 +985,9 @@ static void execlists_submission_tasklet(unsigned long data)
engine->name, port->context_id);
execlists_port_complete(execlists, port);
+ if (port_isset(port))
+ intel_rps_update_engine(engine,
+ port_request(port)->ctx);
} else {
port_set(port, port_pack(rq, count));
}
@@ -984,9 +995,11 @@ static void execlists_submission_tasklet(unsigned long data)
/* After the final element, the hw should be idle */
GEM_BUG_ON(port_count(port) == 0 &&
!(status & GEN8_CTX_STATUS_ACTIVE_IDLE));
- if (port_count(port) == 0)
+ if (port_count(port) == 0) {
execlists_clear_active(execlists,
EXECLISTS_ACTIVE_USER);
+ intel_rps_remove_engine(engine);
+ }
}
if (head != execlists->csb_head) {
@@ -1710,6 +1723,7 @@ static void reset_common_ring(struct intel_engine_cs *engine,
/* Push back any incomplete requests for replay after the reset. */
spin_lock(&engine->timeline->lock);
__unwind_incomplete_requests(engine);
+ intel_rps_remove_engine(engine);
spin_unlock(&engine->timeline->lock);
/* Mark all CS interrupts as complete */
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index 7f5634ce8e88..64c6377df769 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -1456,6 +1456,26 @@ struct drm_i915_gem_context_param {
#define I915_CONTEXT_MAX_USER_PRIORITY 1023 /* inclusive */
#define I915_CONTEXT_DEFAULT_PRIORITY 0
#define I915_CONTEXT_MIN_USER_PRIORITY -1023 /* inclusive */
+
+/*
+ * I915_CONTEXT_PARAM_FREQUENCY:
+ *
+ * Request that when this context runs, the GPU is restricted to run
+ * in this frequency range; but still contrained by the global user
+ * restriction specified via sysfs.
+ *
+ * The minimum / maximum frequencies are specified in MHz. Each context
+ * starts in the default unrestricted state, where the range is taken from
+ * the hardware, and so may be queried.
+ *
+ * Note the frequency is only changed on a context switch; if the
+ * context's frequency is updated whilst the context is currently executing
+ * the request will not take effect until the next time the context is run.
+ */
+#define I915_CONTEXT_PARAM_FREQUENCY 0x7
+#define I915_CONTEXT_MIN_FREQUENCY(x) ((x) & 0xffffffff)
+#define I915_CONTEXT_MAX_FREQUENCY(x) ((x) >> 32)
+#define I915_CONTEXT_SET_FREQUENCY(min, max) ((__u64)(max) << 32 | (min))
__u64 value;
};
--
2.16.2
More information about the Intel-gfx-trybot
mailing list