[Intel-gfx] [PATCH 1/6] drm/i915: Track per-context engine busyness
Tvrtko Ursulin
tursulin at ursulin.net
Fri Jan 19 13:45:23 UTC 2018
From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Some customers want to know how much of the GPU time are their clients
using in order to make dynamic load balancing decisions.
With the hooks already in place which track the overall engine busyness,
we can extend that slightly to split that time between contexts.
v2: Fix accounting for tail updates.
v3: Rebase.
v4: Mark currently running contexts as active on stats enable.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
Cc: gordon.kelly at intel.com
---
drivers/gpu/drm/i915/i915_gem_context.h | 5 ++++
drivers/gpu/drm/i915/intel_engine_cs.c | 32 +++++++++++++++++++++
drivers/gpu/drm/i915/intel_lrc.c | 14 +++++----
drivers/gpu/drm/i915/intel_ringbuffer.h | 50 +++++++++++++++++++++++++++++----
4 files changed, 90 insertions(+), 11 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_gem_context.h b/drivers/gpu/drm/i915/i915_gem_context.h
index 4bfb72f8e1cb..0ce8b9bf0f32 100644
--- a/drivers/gpu/drm/i915/i915_gem_context.h
+++ b/drivers/gpu/drm/i915/i915_gem_context.h
@@ -157,6 +157,11 @@ struct i915_gem_context {
u32 *lrc_reg_state;
u64 lrc_desc;
int pin_count;
+ struct {
+ bool active;
+ ktime_t start;
+ ktime_t total;
+ } stats;
} engine[I915_NUM_ENGINES];
/** ring_size: size for allocating the per-engine ring buffer */
diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c
index d572b18d39eb..9907ceedfa90 100644
--- a/drivers/gpu/drm/i915/intel_engine_cs.c
+++ b/drivers/gpu/drm/i915/intel_engine_cs.c
@@ -1966,6 +1966,16 @@ int intel_enable_engine_stats(struct intel_engine_cs *engine)
engine->stats.enabled_at = ktime_get();
+ /* Mark currently running context as active. */
+ if (port_isset(port)) {
+ struct drm_i915_gem_request *req = port_request(port);
+ struct intel_context *ce =
+ &req->ctx->engine[engine->id];
+
+ ce->stats.start = engine->stats.enabled_at;
+ ce->stats.active = true;
+ }
+
/* XXX submission method oblivious? */
while (num_ports-- && port_isset(port)) {
engine->stats.active++;
@@ -2038,6 +2048,28 @@ void intel_disable_engine_stats(struct intel_engine_cs *engine)
spin_unlock_irqrestore(&engine->stats.lock, flags);
}
+ktime_t intel_context_engine_get_busy_time(struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine)
+{
+ struct intel_context *ce;
+ unsigned long flags;
+ ktime_t total;
+
+ ce = &ctx->engine[engine->id];
+
+ spin_lock_irqsave(&engine->stats.lock, flags);
+
+ total = ce->stats.total;
+
+ if (ce->stats.active)
+ total = ktime_add(total,
+ ktime_sub(ktime_get(), ce->stats.start));
+
+ spin_unlock_irqrestore(&engine->stats.lock, flags);
+
+ return total;
+}
+
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/mock_engine.c"
#endif
diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c
index 24ce781d39b7..a82ad5da6090 100644
--- a/drivers/gpu/drm/i915/intel_lrc.c
+++ b/drivers/gpu/drm/i915/intel_lrc.c
@@ -380,16 +380,19 @@ execlists_context_status_change(struct drm_i915_gem_request *rq,
}
static inline void
-execlists_context_schedule_in(struct drm_i915_gem_request *rq)
+execlists_context_schedule_in(struct drm_i915_gem_request *rq,
+ unsigned int port)
{
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_IN);
- intel_engine_context_in(rq->engine);
+ intel_engine_context_in(rq->engine,
+ &rq->ctx->engine[rq->engine->id],
+ port == 0);
}
static inline void
execlists_context_schedule_out(struct drm_i915_gem_request *rq)
{
- intel_engine_context_out(rq->engine);
+ intel_engine_context_out(rq->engine, &rq->ctx->engine[rq->engine->id]);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_OUT);
}
@@ -442,7 +445,7 @@ static void execlists_submit_ports(struct intel_engine_cs *engine)
if (rq) {
GEM_BUG_ON(count > !n);
if (!count++)
- execlists_context_schedule_in(rq);
+ execlists_context_schedule_in(rq, n);
port_set(&port[n], port_pack(rq, count));
desc = execlists_update_context(rq);
GEM_DEBUG_EXEC(port[n].context_id = upper_32_bits(desc));
@@ -703,7 +706,8 @@ execlists_cancel_port_requests(struct intel_engine_execlists * const execlists)
struct drm_i915_gem_request *rq = port_request(port);
GEM_BUG_ON(!execlists->active);
- intel_engine_context_out(rq->engine);
+ intel_engine_context_out(rq->engine,
+ &rq->ctx->engine[rq->engine->id]);
execlists_context_status_change(rq, INTEL_CONTEXT_SCHEDULE_PREEMPTED);
i915_gem_request_put(rq);
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index b198df1f248c..27b727cf4017 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -4,6 +4,7 @@
#include <linux/hashtable.h>
#include "i915_gem_batch_pool.h"
+#include "i915_gem_context.h"
#include "i915_gem_request.h"
#include "i915_gem_timeline.h"
#include "i915_pmu.h"
@@ -1036,25 +1037,42 @@ void intel_engine_dump(struct intel_engine_cs *engine,
struct intel_engine_cs *
intel_engine_lookup_user(struct drm_i915_private *i915, u8 class, u8 instance);
-static inline void intel_engine_context_in(struct intel_engine_cs *engine)
+static inline void
+intel_engine_context_in(struct intel_engine_cs *engine,
+ struct intel_context *ce,
+ bool submit)
{
unsigned long flags;
+ ktime_t now;
if (READ_ONCE(engine->stats.enabled) == 0)
return;
spin_lock_irqsave(&engine->stats.lock, flags);
+ if (submit) {
+ now = ktime_get();
+ ce->stats.start = now;
+ ce->stats.active = true;
+ } else {
+ now = 0;
+ }
+
if (engine->stats.enabled > 0) {
- if (engine->stats.active++ == 0)
- engine->stats.start = ktime_get();
+ if (engine->stats.active++ == 0) {
+ if (!now)
+ now = ktime_get();
+ engine->stats.start = now;
+ }
GEM_BUG_ON(engine->stats.active == 0);
}
spin_unlock_irqrestore(&engine->stats.lock, flags);
}
-static inline void intel_engine_context_out(struct intel_engine_cs *engine)
+static inline void
+intel_engine_context_out(struct intel_engine_cs *engine,
+ struct intel_context *ce)
{
unsigned long flags;
@@ -1064,14 +1082,31 @@ static inline void intel_engine_context_out(struct intel_engine_cs *engine)
spin_lock_irqsave(&engine->stats.lock, flags);
if (engine->stats.enabled > 0) {
+ struct execlist_port *next_port = &engine->execlists.port[1];
+ ktime_t now = ktime_get();
ktime_t last;
+ GEM_BUG_ON(!ce->stats.start);
+ ce->stats.total = ktime_add(ce->stats.total,
+ ktime_sub(now, ce->stats.start));
+ ce->stats.active = false;
+
+ if (port_isset(next_port)) {
+ struct drm_i915_gem_request *next_req =
+ port_request(next_port);
+ struct intel_context *next_ce =
+ &next_req->ctx->engine[engine->id];
+
+ next_ce->stats.start = now;
+ next_ce->stats.active = true;
+ }
+
if (engine->stats.active && --engine->stats.active == 0) {
/*
* Decrement the active context count and in case GPU
* is now idle add up to the running total.
*/
- last = ktime_sub(ktime_get(), engine->stats.start);
+ last = ktime_sub(now, engine->stats.start);
engine->stats.total = ktime_add(engine->stats.total,
last);
@@ -1081,7 +1116,7 @@ static inline void intel_engine_context_out(struct intel_engine_cs *engine)
* the first event in which case we account from the
* time stats gathering was turned on.
*/
- last = ktime_sub(ktime_get(), engine->stats.enabled_at);
+ last = ktime_sub(now, engine->stats.enabled_at);
engine->stats.total = ktime_add(engine->stats.total,
last);
@@ -1091,6 +1126,9 @@ static inline void intel_engine_context_out(struct intel_engine_cs *engine)
spin_unlock_irqrestore(&engine->stats.lock, flags);
}
+ktime_t intel_context_engine_get_busy_time(struct i915_gem_context *ctx,
+ struct intel_engine_cs *engine);
+
int intel_enable_engine_stats(struct intel_engine_cs *engine);
void intel_disable_engine_stats(struct intel_engine_cs *engine);
--
2.14.1
More information about the Intel-gfx
mailing list