[Intel-gfx] [PATCH v6 27/34] drm/i915: Added scheduler statistic reporting to debugfs

Fri May 6 13:21:01 UTC 2016

From: John Harrison <John.C.Harrison at Intel.com>

It is useful for know what the scheduler is doing for both debugging
and performance analysis purposes. This change adds a bunch of
counters and such that keep track of various scheduler operations
(batches submitted, completed, flush requests, etc.). The data can
then be read in userland via the debugfs mechanism.

v2: Updated to match changes to scheduler implementation.

v3: Updated for changes to kill code and flush code.

v4: Removed the fence/sync code as that will be part of a separate
patch series. Wrapped a long line to keep the style checker happy.

v5: Updated to remove forward declarations and white space. Added
documentation. [Joonas Lahtinen]

Used lighter weight spinlocks.

v6: Updated to newer nightly (lots of ring -> engine renaming).

Added 'for_each_scheduler_node()' and 'assert_scheduler_lock_held()'
helper macros. Updated to use 'to_i915()' instead of dev_private.
Converted all enum labels to uppercase. Removed even more white space.
Moved the enum to string conversion function to debugfs.c rather than
scheduler.c [review feedback from Joonas Lahtinen]

Added running totals of 'flying' and 'queued' nodes rather than
re-calculating each time as a minor CPU performance optimisation.

Added stats to the new file queue wait implementation.

v6.1: Added a target engine parameter to the throttle function. This
is used for tracking per engine throttle stats introduced in the v6
update. This fixes a random memory corruption bug caused by using an
invalid engine pointer.

For: VIZ-1587
Signed-off-by: John Harrison <John.C.Harrison at Intel.com>
Cc: Joonas Lahtinen <joonas.lahtinen at linux.intel.com>
---
 drivers/gpu/drm/i915/i915_debugfs.c        | 111 +++++++++++++++++++++++++++++
 drivers/gpu/drm/i915/i915_gem_execbuffer.c |   5 +-
 drivers/gpu/drm/i915/i915_scheduler.c      |  89 +++++++++++++++++++++--
 drivers/gpu/drm/i915/i915_scheduler.h      |  38 +++++++++-
 4 files changed, 237 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 980bb20..1d04cde 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -3609,6 +3609,116 @@ static int i915_drrs_status(struct seq_file *m, void *unused)
 	return 0;
 }
 
+static const char *i915_scheduler_queue_status_str(
+				enum i915_scheduler_queue_status status)
+{
+	static char	str[50];
+
+	switch (status) {
+	case I915_SQS_NONE:
+	return "None";
+
+	case I915_SQS_QUEUED:
+	return "Queued";
+
+	case I915_SQS_POPPED:
+	return "Popped";
+
+	case I915_SQS_FLYING:
+	return "Flying";
+
+	case I915_SQS_COMPLETE:
+	return "Complete";
+
+	case I915_SQS_DEAD:
+	return "Dead";
+
+	case I915_SQS_MAX:
+	return "Invalid";
+
+	default:
+	break;
+	}
+
+	sprintf(str, "[Unknown_%d!]", status);
+	return str;
+}
+
+static int i915_scheduler_info(struct seq_file *m, void *unused)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct drm_i915_private *dev_priv = to_i915(dev);
+	struct i915_scheduler   *scheduler = dev_priv->scheduler;
+	struct i915_scheduler_stats *stats = scheduler->stats;
+	struct i915_scheduler_stats_nodes node_stats[I915_NUM_ENGINES];
+	struct intel_engine_cs *engine;
+	char   str[50 * (I915_NUM_ENGINES + 1)], name[50], *ptr;
+	int ret, i, e;
+
+	ret = mutex_lock_interruptible(&dev->mode_config.mutex);
+	if (ret)
+		return ret;
+
+#define PRINT_VAR(name, fmt, var)					\
+	do {								\
+		sprintf(str, "%-22s", name);				\
+		ptr = str + strlen(str);				\
+		for_each_engine_id(engine, dev_priv, e) {		\
+			sprintf(ptr, " %10" fmt, var);			\
+			ptr += strlen(ptr);				\
+		}							\
+		seq_printf(m, "%s\n", str);				\
+	} while (0)
+
+	PRINT_VAR("Engine name:",           "s", dev_priv->engine[e].name);
+	PRINT_VAR("  Engine seqno",         "d", engine->get_seqno(engine));
+	seq_putc(m, '\n');
+
+	seq_puts(m, "Batch submissions:\n");
+	PRINT_VAR("  Queued",               "u", stats[e].queued);
+	PRINT_VAR("  Submitted",            "u", stats[e].submitted);
+	PRINT_VAR("  Completed",            "u", stats[e].completed);
+	PRINT_VAR("  Expired",              "u", stats[e].expired);
+	seq_putc(m, '\n');
+
+	seq_puts(m, "Flush counts:\n");
+	PRINT_VAR("  By object",            "u", stats[e].flush_obj);
+	PRINT_VAR("  By request",           "u", stats[e].flush_req);
+	PRINT_VAR("  By stamp",             "u", stats[e].flush_stamp);
+	PRINT_VAR("  Blanket",              "u", stats[e].flush_all);
+	PRINT_VAR("  Entries bumped",       "u", stats[e].flush_bump);
+	PRINT_VAR("  Entries submitted",    "u", stats[e].flush_submit);
+	seq_putc(m, '\n');
+
+	seq_puts(m, "Miscellaneous:\n");
+	PRINT_VAR("  ExecEarly retry",      "u", stats[e].exec_early);
+	PRINT_VAR("  ExecFinal requeue",    "u", stats[e].exec_again);
+	PRINT_VAR("  ExecFinal killed",     "u", stats[e].exec_dead);
+	PRINT_VAR("  Hung flying",          "u", stats[e].kill_flying);
+	PRINT_VAR("  Hung queued",          "u", stats[e].kill_queued);
+	PRINT_VAR("  File queue wait",      "u", stats[e].file_wait);
+	PRINT_VAR("  File queue stall",     "u", stats[e].file_stall);
+	PRINT_VAR("  File queue lost",      "u", stats[e].file_lost);
+	seq_putc(m, '\n');
+
+	seq_puts(m, "Queue contents:\n");
+	for_each_engine(engine, dev_priv)
+		i915_scheduler_query_stats(engine, node_stats + engine->id);
+
+	for (i = 0; i < (I915_SQS_MAX + 1); i++) {
+		sprintf(name, "  %s", i915_scheduler_queue_status_str(i));
+		PRINT_VAR(name, "d", node_stats[e].counts[i]);
+	}
+	seq_putc(m, '\n');
+
+#undef PRINT_VAR
+
+	mutex_unlock(&dev->mode_config.mutex);
+
+	return 0;
+}
+
 struct pipe_crc_info {
 	const char *name;
 	struct drm_device *dev;
@@ -5585,6 +5695,7 @@ static const struct drm_info_list i915_debugfs_list[] = {
 	{"i915_semaphore_status", i915_semaphore_status, 0},
 	{"i915_shared_dplls_info", i915_shared_dplls_info, 0},
 	{"i915_dp_mst_info", i915_dp_mst_info, 0},
+	{"i915_scheduler_info", i915_scheduler_info, 0},
 	{"i915_wa_registers", i915_wa_registers, 0},
 	{"i915_ddb_info", i915_ddb_info, 0},
 	{"i915_sseu_status", i915_sseu_status, 0},
diff --git a/drivers/gpu/drm/i915/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
index caf7e00..150b699 100644
--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c
+++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c
@@ -1558,7 +1558,7 @@ i915_gem_do_execbuffer(struct drm_device *dev, void *data,
 	}
 
 	/* Throttle batch requests per device file */
-	if (i915_scheduler_file_queue_wait(file))
+	if (i915_scheduler_file_queue_wait(file, engine))
 		return -EAGAIN;
 
 	intel_runtime_pm_get(dev_priv);
@@ -1808,6 +1808,9 @@ pre_mutex_err:
 	/* intel_gpu_busy should also get a ref, so it will free when the device
 	 * is really idle. */
 	intel_runtime_pm_put(dev_priv);
+
+	dev_priv->scheduler->stats[engine->id].exec_early++;
+
 	return ret;
 }
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 3569ddd..123f1dc 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -142,9 +142,12 @@ static void i915_scheduler_node_kill(struct i915_scheduler *scheduler,
 
 	if (I915_SQS_IS_FLYING(node)) {
 		scheduler->counts[node->params.engine->id].flying--;
+		scheduler->stats[node->params.engine->id].kill_flying++;
 		trace_i915_scheduler_unfly(node->params.engine, node);
-	} else
+	} else {
 		scheduler->counts[node->params.engine->id].queued--;
+		scheduler->stats[node->params.engine->id].kill_queued++;
+	}
 
 	node->status = I915_SQS_DEAD;
 	trace_i915_scheduler_node_state_change(node->params.engine, node);
@@ -390,6 +393,8 @@ static int i915_scheduler_submit(struct intel_engine_cs *engine)
 		 */
 		i915_scheduler_node_fly(node);
 
+		scheduler->stats[engine->id].submitted++;
+
 		spin_unlock_irq(&scheduler->lock);
 		ret = dev_priv->gt.execbuf_final(&node->params);
 		spin_lock_irq(&scheduler->lock);
@@ -413,6 +418,7 @@ static int i915_scheduler_submit(struct intel_engine_cs *engine)
 			case ENOENT:
 				/* Fatal errors. Kill the node. */
 				requeue = false;
+				scheduler->stats[engine->id].exec_dead++;
 				i915_scheduler_node_kill(scheduler, node);
 				break;
 
@@ -423,6 +429,7 @@ static int i915_scheduler_submit(struct intel_engine_cs *engine)
 			case ERESTARTSYS:
 			case EINTR:
 				/* Supposedly recoverable errors. */
+				scheduler->stats[engine->id].exec_again++;
 				break;
 
 			default:
@@ -431,6 +438,7 @@ static int i915_scheduler_submit(struct intel_engine_cs *engine)
 				 * for the best.
 				 */
 				MISSING_CASE(-ret);
+				scheduler->stats[engine->id].exec_again++;
 				break;
 			}
 
@@ -574,12 +582,15 @@ static int i915_scheduler_queue_execbuffer_bypass(struct i915_scheduler_queue_en
 	struct i915_scheduler *scheduler = dev_priv->scheduler;
 	int ret;
 
+	scheduler->stats[qe->params.engine->id].queued++;
+
 	trace_i915_scheduler_queue(qe->params.engine, qe);
 
 	intel_ring_reserved_space_cancel(qe->params.request->ringbuf);
 
 	scheduler->flags[qe->params.engine->id] |= I915_SF_SUBMITTING;
 	ret = dev_priv->gt.execbuf_final(&qe->params);
+	scheduler->stats[qe->params.engine->id].submitted++;
 	scheduler->flags[qe->params.engine->id] &= ~I915_SF_SUBMITTING;
 
 	/*
@@ -593,6 +604,8 @@ static int i915_scheduler_queue_execbuffer_bypass(struct i915_scheduler_queue_en
 	qe->status = I915_SQS_COMPLETE;
 	i915_scheduler_clean_node(qe);
 
+	scheduler->stats[qe->params.engine->id].expired++;
+
 	return 0;
 }
 
@@ -704,6 +717,8 @@ int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe)
 						 scheduler->min_flying;
 
 	scheduler->counts[engine->id].queued++;
+	scheduler->stats[engine->id].queued++;
+
 	trace_i915_scheduler_queue(engine, node);
 	trace_i915_scheduler_node_state_change(engine, node);
 
@@ -746,10 +761,13 @@ bool i915_scheduler_notify_request(struct drm_i915_gem_request *req)
 	WARN_ON(!I915_SQS_IS_FLYING(node));
 
 	/* Node was in flight so mark it as complete. */
-	if (req->cancelled)
+	if (req->cancelled) {
 		node->status = I915_SQS_DEAD;
-	else
+		scheduler->stats[req->engine->id].kill_flying++;
+	} else {
 		node->status = I915_SQS_COMPLETE;
+		scheduler->stats[req->engine->id].completed++;
+	}
 
 	scheduler->counts[req->engine->id].flying--;
 	trace_i915_scheduler_node_state_change(req->engine, node);
@@ -892,6 +910,7 @@ static bool i915_scheduler_remove(struct i915_scheduler *scheduler,
 
 		list_del(&node->link);
 		list_add(&node->link, remove);
+		scheduler->stats[engine->id].expired++;
 
 		/* Strip the dependency info while the mutex is still locked */
 		i915_scheduler_remove_dependent(scheduler, node);
@@ -997,13 +1016,15 @@ void i915_scheduler_work_handler(struct work_struct *work)
 /**
  * i915_scheduler_file_queue_wait - Waits for space in the per file queue.
  * @file: File object to process.
+ * @engine: Engine being submitted to
  * This allows throttling of applications by limiting the total number of
  * outstanding requests to a specified level. Once that limit is reached,
  * this call will stall waiting on the oldest outstanding request. If it can
  * not stall for any reason it returns true to mean that the queue is full
  * and no more requests should be accepted.
  */
-bool i915_scheduler_file_queue_wait(struct drm_file *file)
+bool i915_scheduler_file_queue_wait(struct drm_file *file,
+				    struct intel_engine_cs *target)
 {
 	struct drm_i915_file_private *file_priv = file->driver_priv;
 	struct drm_i915_private *dev_priv = file_priv->dev_priv;
@@ -1048,12 +1069,14 @@ bool i915_scheduler_file_queue_wait(struct drm_file *file)
 		}
 
 		if (!req) {
+			scheduler->stats[target->id].file_lost++;
 			spin_unlock_irq(&scheduler->lock);
 			return false;
 		}
 
 		i915_gem_request_reference(req);
 
+		scheduler->stats[target->id].file_wait++;
 		spin_unlock_irq(&scheduler->lock);
 
 		ret = i915_gem_check_wedge(&dev_priv->gpu_error, false);
@@ -1077,9 +1100,60 @@ bool i915_scheduler_file_queue_wait(struct drm_file *file)
 
 err_unref:
 	i915_gem_request_unreference(req);
+
+	spin_lock_irq(&scheduler->lock);
+	scheduler->stats[target->id].file_wait--;
+	scheduler->stats[target->id].file_stall++;
+	spin_unlock_irq(&scheduler->lock);
+
 	return true;
 }
 
+/**
+ * i915_scheduler_query_stats - return various scheduler statistics
+ * @engine: Engine to report on
+ * @stats: Stats structure to be filled in
+ * For various reasons (debugging, performance analysis, curiosity) it is
+ * useful to see statistics about what the scheduler is doing. This function
+ * returns the stats that have been gathered in a data structure. The
+ * expectation is that this will be returned to the user via debugfs.
+ */
+int i915_scheduler_query_stats(struct intel_engine_cs *engine,
+			       struct i915_scheduler_stats_nodes *stats)
+{
+	struct drm_i915_private *dev_priv = to_i915(engine->dev);
+	struct i915_scheduler *scheduler = dev_priv->scheduler;
+	struct i915_scheduler_queue_entry *node;
+
+	memset(stats, 0x00, sizeof(*stats));
+
+	spin_lock_irq(&scheduler->lock);
+
+	for_each_scheduler_node(node, engine->id) {
+		if (node->status >= I915_SQS_MAX) {
+			DRM_DEBUG_DRIVER("Invalid node state: %d! [uniq = %d, seqno = %d]\n",
+					 node->status, node->params.request->uniq,
+					 node->params.request->seqno);
+
+			stats->counts[I915_SQS_MAX]++;
+			continue;
+		}
+
+		stats->counts[node->status]++;
+	}
+
+	WARN(stats->counts[I915_SQS_QUEUED] != scheduler->counts[engine->id].queued,
+	     "Queued count mis-match: %d vs %d!\n",
+	     stats->counts[I915_SQS_QUEUED], scheduler->counts[engine->id].queued);
+	WARN(stats->counts[I915_SQS_FLYING] != scheduler->counts[engine->id].flying,
+	     "Flying count mis-match: %d vs %d!\n",
+	     stats->counts[I915_SQS_FLYING], scheduler->counts[engine->id].flying);
+
+	spin_unlock_irq(&scheduler->lock);
+
+	return 0;
+}
+
 static int i915_scheduler_submit_max_priority(struct intel_engine_cs *engine,
 					      bool is_locked)
 {
@@ -1160,6 +1234,7 @@ int i915_scheduler_flush_stamp(struct intel_engine_cs *engine,
 	}
 
 	spin_lock_irq(&scheduler->lock);
+	scheduler->stats[engine->id].flush_stamp++;
 	i915_scheduler_priority_bump_clear(scheduler);
 	for_each_scheduler_node(node, engine->id) {
 		if (!I915_SQS_IS_QUEUED(node))
@@ -1170,12 +1245,15 @@ int i915_scheduler_flush_stamp(struct intel_engine_cs *engine,
 
 		flush_count = i915_scheduler_priority_bump(scheduler,
 					node, scheduler->priority_level_max);
+		scheduler->stats[engine->id].flush_bump += flush_count;
 	}
 	spin_unlock_irq(&scheduler->lock);
 
 	if (flush_count) {
 		DRM_DEBUG_DRIVER("<%s> Bumped %d entries\n", engine->name, flush_count);
 		flush_count = i915_scheduler_submit_max_priority(engine, is_locked);
+		if (flush_count > 0)
+			scheduler->stats[engine->id].flush_submit += flush_count;
 	}
 
 	return flush_count;
@@ -1211,6 +1289,8 @@ int i915_scheduler_flush(struct intel_engine_cs *engine, bool is_locked)
 
 	WARN_ON(is_locked && (scheduler->flags[engine->id] & I915_SF_SUBMITTING));
 
+	scheduler->stats[engine->id].flush_all++;
+
 	do {
 		found = false;
 		spin_lock_irq(&scheduler->lock);
@@ -1228,6 +1308,7 @@ int i915_scheduler_flush(struct intel_engine_cs *engine, bool is_locked)
 				ret = i915_scheduler_submit(engine);
 			else
 				ret = i915_scheduler_submit_unlocked(engine);
+			scheduler->stats[engine->id].flush_submit++;
 			if (ret < 0)
 				return ret;
 
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 5c33c83..464c051 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -80,6 +80,36 @@ struct i915_scheduler_node_states {
 	uint32_t queued;
 };
 
+struct i915_scheduler_stats {
+	/* Batch buffer counts: */
+	uint32_t queued;
+	uint32_t submitted;
+	uint32_t completed;
+	uint32_t expired;
+
+	/* Other stuff: */
+	uint32_t flush_obj;
+	uint32_t flush_req;
+	uint32_t flush_stamp;
+	uint32_t flush_all;
+	uint32_t flush_bump;
+	uint32_t flush_submit;
+
+	uint32_t exec_early;
+	uint32_t exec_again;
+	uint32_t exec_dead;
+	uint32_t kill_flying;
+	uint32_t kill_queued;
+
+	uint32_t file_wait;
+	uint32_t file_stall;
+	uint32_t file_lost;
+};
+
+struct i915_scheduler_stats_nodes {
+	uint32_t counts[I915_SQS_MAX + 1];
+};
+
 struct i915_scheduler {
 	struct list_head node_queue[I915_NUM_ENGINES];
 	uint32_t flags[I915_NUM_ENGINES];
@@ -95,6 +125,9 @@ struct i915_scheduler {
 	int32_t priority_level_preempt;
 	uint32_t min_flying;
 	uint32_t file_queue_max;
+
+	/* Statistics: */
+	struct i915_scheduler_stats stats[I915_NUM_ENGINES];
 };
 
 /* Flag bits for i915_scheduler::flags */
@@ -117,6 +150,9 @@ int i915_scheduler_flush(struct intel_engine_cs *engine, bool is_locked);
 int i915_scheduler_flush_stamp(struct intel_engine_cs *engine,
 			       unsigned long stamp, bool is_locked);
 bool i915_scheduler_is_mutex_required(struct drm_i915_gem_request *req);
-bool i915_scheduler_file_queue_wait(struct drm_file *file);
+int i915_scheduler_query_stats(struct intel_engine_cs *engine,
+			       struct i915_scheduler_stats_nodes *stats);
+bool i915_scheduler_file_queue_wait(struct drm_file *file,
+				    struct intel_engine_cs *engine);
 
 #endif  /* _I915_SCHEDULER_H_ */
-- 
1.9.1