[Intel-gfx] [RFC 42/44] drm/i915: Added scheduler statistic reporting to debugfs
John.C.Harrison at Intel.com
John.C.Harrison at Intel.com
Thu Jun 26 19:24:33 CEST 2014
From: John Harrison <John.C.Harrison at Intel.com>
It is useful for know what the scheduler is doing for both debugging and
performance analysis purposes. This change adds a bunch of counters and such
that keep track of various scheduler operations (batches submitted, preempted,
interrupts processed, flush requests, etc.). The data can then be read in
userland via the debugfs mechanism.
---
drivers/gpu/drm/i915/i915_debugfs.c | 85 +++++++++++++++++++++++++++++++++
drivers/gpu/drm/i915/i915_scheduler.c | 66 +++++++++++++++++++++++--
drivers/gpu/drm/i915/i915_scheduler.h | 50 +++++++++++++++++++
3 files changed, 198 insertions(+), 3 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_debugfs.c b/drivers/gpu/drm/i915/i915_debugfs.c
index 1c20c8c..cb9839b 100644
--- a/drivers/gpu/drm/i915/i915_debugfs.c
+++ b/drivers/gpu/drm/i915/i915_debugfs.c
@@ -2482,6 +2482,88 @@ static int i915_display_info(struct seq_file *m, void *unused)
return 0;
}
+#ifdef CONFIG_DRM_I915_SCHEDULER
+static int i915_scheduler_info(struct seq_file *m, void *unused)
+{
+ struct drm_info_node *node = (struct drm_info_node *) m->private;
+ struct drm_device *dev = node->minor->dev;
+ struct drm_i915_private *dev_priv = dev->dev_private;
+ struct i915_scheduler *scheduler = dev_priv->scheduler;
+ struct i915_scheduler_stats *stats = scheduler->stats;
+ struct i915_scheduler_stats_nodes node_stats[I915_NUM_RINGS];
+ struct intel_engine_cs *ring;
+ char str[50 * (I915_NUM_RINGS + 1)], name[50], *ptr;
+ int ret, i, r;
+
+ ret = mutex_lock_interruptible(&dev->mode_config.mutex);
+ if (ret)
+ return ret;
+
+#define PRINT_VAR(name, fmt, var) \
+ do { \
+ sprintf(str, "%-22s", name ); \
+ ptr = str + strlen(str); \
+ for_each_ring(ring, dev_priv, r) { \
+ sprintf(ptr, " %10" fmt, var); \
+ ptr += strlen(ptr); \
+ } \
+ seq_printf(m, "%s\n", str); \
+ } while(0)
+
+ PRINT_VAR("Ring name:", "s", dev_priv->ring[r].name);
+ seq_printf(m, "Batch submissions:\n");
+ PRINT_VAR(" Queued", "u", stats[r].queued);
+ PRINT_VAR(" Queued preemptive", "u", stats[r].queued_preemptive);
+ PRINT_VAR(" Submitted", "u", stats[r].submitted);
+ PRINT_VAR(" Submitted preemptive", "u", stats[r].submitted_preemptive);
+ PRINT_VAR(" Preempted", "u", stats[r].preempted);
+ PRINT_VAR(" Completed", "u", stats[r].completed);
+ PRINT_VAR(" Completed preemptive", "u", stats[r].completed_preemptive);
+ PRINT_VAR(" Expired", "u", stats[r].expired);
+ seq_putc(m, '\n');
+
+ seq_printf(m, "Flush counts:\n");
+ PRINT_VAR(" By object", "u", stats[r].flush_obj);
+ PRINT_VAR(" By seqno", "u", stats[r].flush_seqno);
+ PRINT_VAR(" Blanket", "u", stats[r].flush_all);
+ PRINT_VAR(" Entries bumped", "u", stats[r].flush_bump);
+ PRINT_VAR(" Entries submitted", "u", stats[r].flush_submit);
+ seq_putc(m, '\n');
+
+ seq_printf(m, "Interrupt counts:\n");
+ PRINT_VAR(" Regular", "llu", stats[r].irq.regular);
+ PRINT_VAR(" Preemptive", "llu", stats[r].irq.preemptive);
+ PRINT_VAR(" Idle", "llu", stats[r].irq.idle);
+ PRINT_VAR(" Inter-batch", "llu", stats[r].irq.interbatch);
+ PRINT_VAR(" Mid-batch", "llu", stats[r].irq.midbatch);
+ seq_putc(m, '\n');
+
+ seq_printf(m, "Seqno values at last IRQ:\n");
+ PRINT_VAR(" Seqno", "d", stats[r].irq.last_seqno);
+ PRINT_VAR(" Batch done", "d", stats[r].irq.last_b_done);
+ PRINT_VAR(" Preemptive done", "d", stats[r].irq.last_p_done);
+ PRINT_VAR(" Batch active", "d", stats[r].irq.last_b_active);
+ PRINT_VAR(" Preemptive active", "d", stats[r].irq.last_p_active);
+ seq_putc(m, '\n');
+
+ seq_printf(m, "Queue contents:\n");
+ for_each_ring(ring, dev_priv, i)
+ i915_scheduler_query_stats(ring, node_stats + ring->id);
+
+ for (i = 0; i < i915_sqs_MAX; i++) {
+ sprintf(name, " %s", i915_scheduler_queue_status_str(i));
+ PRINT_VAR(name, "d", node_stats[r].counts[i]);
+ }
+ seq_putc(m, '\n');
+
+#undef PRINT_VAR
+
+ mutex_unlock(&dev->mode_config.mutex);
+
+ return 0;
+}
+#endif
+
struct pipe_crc_info {
const char *name;
struct drm_device *dev;
@@ -3928,6 +4010,9 @@ static const struct drm_info_list i915_debugfs_list[] = {
{"i915_pc8_status", i915_pc8_status, 0},
{"i915_power_domain_info", i915_power_domain_info, 0},
{"i915_display_info", i915_display_info, 0},
+#ifdef CONFIG_DRM_I915_SCHEDULER
+ {"i915_scheduler_info", i915_scheduler_info, 0},
+#endif
};
#define I915_DEBUGFS_ENTRIES ARRAY_SIZE(i915_debugfs_list)
diff --git a/drivers/gpu/drm/i915/i915_scheduler.c b/drivers/gpu/drm/i915/i915_scheduler.c
index 8d45b73..c679513 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.c
+++ b/drivers/gpu/drm/i915/i915_scheduler.c
@@ -204,11 +204,13 @@ int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe)
int ret;
qe->params.scheduler_index = scheduler->index++;
+ scheduler->stats[qe->params.ring->id].queued++;
trace_i915_scheduler_queue(qe->params.ring, qe);
scheduler->flags[qe->params.ring->id] |= i915_sf_submitting;
ret = i915_gem_do_execbuffer_final(&qe->params);
+ scheduler->stats[qe->params.ring->id].submitted++;
scheduler->flags[qe->params.ring->id] &= ~i915_sf_submitting;
/* Need to release the objects: */
@@ -229,6 +231,7 @@ int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe)
/* And anything else owned by the QE structure: */
kfree(qe->params.cliprects);
+ scheduler->stats[qe->params.ring->id].expired++;
return ret;
}
@@ -343,10 +346,14 @@ int i915_scheduler_queue_execbuffer(struct i915_scheduler_queue_entry *qe)
want_preempt = false;
#endif
- if (want_preempt)
+ if (want_preempt) {
node->params.scheduler_flags |= i915_ebp_sf_preempt |
i915_ebp_sf_was_preempt;
+ scheduler->stats[ring->id].queued_preemptive++;
+ } else
+ scheduler->stats[ring->id].queued++;
+
trace_i915_scheduler_queue(ring, node);
trace_i915_scheduler_node_state_change(ring, node);
@@ -607,6 +614,7 @@ static void i915_scheduler_seqno_complete(struct intel_engine_cs *ring,
*/
node->status = i915_sqs_complete;
trace_i915_scheduler_node_state_change(ring, node);
+ scheduler->stats[ring->id].completed_preemptive++;
list_for_each_entry_continue(node, &scheduler->node_queue[ring->id], link) {
BUG_ON(node->status == i915_sqs_overtaking);
@@ -620,6 +628,7 @@ static void i915_scheduler_seqno_complete(struct intel_engine_cs *ring,
node->status = i915_sqs_preempted;
trace_i915_scheduler_unfly(ring, node);
trace_i915_scheduler_node_state_change(ring, node);
+ scheduler->stats[ring->id].preempted++;
}
/*
@@ -659,6 +668,7 @@ static void i915_scheduler_seqno_complete(struct intel_engine_cs *ring,
/* Node was in flight so mark it as complete. */
node->status = i915_sqs_complete;
trace_i915_scheduler_node_state_change(ring, node);
+ scheduler->stats[ring->id].completed++;
}
}
@@ -753,6 +763,7 @@ int i915_scheduler_handle_IRQ(struct intel_engine_cs *ring)
{
struct drm_i915_private *dev_priv = ring->dev->dev_private;
struct i915_scheduler *scheduler = dev_priv->scheduler;
+ struct i915_scheduler_stats *stats = scheduler->stats + ring->id;
unsigned long flags;
uint32_t b_active, b_done, p_active, p_done;
@@ -763,7 +774,13 @@ int i915_scheduler_handle_IRQ(struct intel_engine_cs *ring)
b_done = intel_read_status_page(ring, I915_BATCH_DONE_SEQNO);
b_active = intel_read_status_page(ring, I915_BATCH_ACTIVE_SEQNO);
- trace_i915_scheduler_irq(ring, ring->get_seqno(ring, false),
+ stats->irq.last_b_done = b_done;
+ stats->irq.last_p_done = p_done;
+ stats->irq.last_b_active = b_active;
+ stats->irq.last_p_active = p_active;
+ stats->irq.last_seqno = ring->get_seqno(ring, false);
+
+ trace_i915_scheduler_irq(ring, stats->irq.last_seqno,
b_active, b_done, p_active, p_done);
if (i915.scheduler_override & i915_so_direct_submit) {
@@ -775,6 +792,7 @@ int i915_scheduler_handle_IRQ(struct intel_engine_cs *ring)
if (b_done != ring->last_regular_batch) {
i915_scheduler_seqno_complete(ring, b_done, false);
ring->last_regular_batch = b_done;
+ stats->irq.regular += 1;
}
if (p_done) {
@@ -784,15 +802,19 @@ int i915_scheduler_handle_IRQ(struct intel_engine_cs *ring)
* batch has been preempted mid-batch. All other batches still
* in flight have been preempted before starting.
*/
+ stats->irq.preemptive += 1;
BUG_ON(p_active != p_done);
if (b_active == p_active) {
/* null preemption (ring was idle) */
+ stats->irq.idle += 1;
} else if (b_active == 0) {
/* interbatch preemption (ring was busy) */
+ stats->irq.interbatch += 1;
} else /* any other value of b_active */ {
/* midbatch preemption (batch was running) */
uint32_t b_addr = intel_read_status_page(ring, I915_SAVE_PREEMPTED_BB_PTR);
i915_scheduler_seqno_started(ring, b_active, true, b_addr);
+ stats->irq.midbatch += 1;
}
i915_scheduler_seqno_complete(ring, p_done, true);
@@ -871,6 +893,7 @@ static int i915_scheduler_seqno_complete(struct intel_engine_cs *ring, uint32_t
/* Node was in flight so mark it as complete. */
node->status = i915_sqs_complete;
trace_i915_scheduler_node_state_change(ring, node);
+ scheduler->stats[ring->id].completed++;
}
/* Should submit new work here if flight list is empty but the DRM
@@ -975,6 +998,7 @@ int i915_scheduler_remove(struct intel_engine_cs *ring)
list_del(&node->link);
list_add(&node->link, &remove);
+ scheduler->stats[ring->id].expired++;
/* Strip the dependency info while the mutex is still locked */
i915_scheduler_remove_dependent(scheduler, node);
@@ -1195,6 +1219,32 @@ int i915_scheduler_dump_locked(struct intel_engine_cs *ring, const char *msg)
return 0;
}
+int i915_scheduler_query_stats(struct intel_engine_cs *ring,
+ struct i915_scheduler_stats_nodes *stats)
+{
+ struct drm_i915_private *dev_priv = ring->dev->dev_private;
+ struct i915_scheduler *scheduler = dev_priv->scheduler;
+ struct i915_scheduler_queue_entry *node;
+ unsigned long flags;
+
+ memset(stats, 0x00, sizeof(*stats));
+
+ spin_lock_irqsave(&scheduler->lock, flags);
+
+ list_for_each_entry(node, &scheduler->node_queue[ring->id], link) {
+ if (node->status >= i915_sqs_MAX) {
+ DRM_DEBUG_SCHED("Invalid node state: %d! [seqno = %d]\n",
+ node->status, node->params.seqno);
+ }
+
+ stats->counts[node->status]++;
+ }
+
+ spin_unlock_irqrestore(&scheduler->lock, flags);
+
+ return 0;
+}
+
int i915_scheduler_flush_seqno(struct intel_engine_cs *ring, bool is_locked,
uint32_t seqno)
{
@@ -1220,6 +1270,8 @@ int i915_scheduler_flush_seqno(struct intel_engine_cs *ring, bool is_locked,
spin_lock_irqsave(&scheduler->lock, flags);
+ scheduler->stats[ring->id].flush_seqno++;
+
i915_scheduler_priority_bump_clear(scheduler, ring);
list_for_each_entry(node, &scheduler->node_queue[ring->id], link) {
@@ -1231,6 +1283,7 @@ int i915_scheduler_flush_seqno(struct intel_engine_cs *ring, bool is_locked,
flush_count += i915_scheduler_priority_bump(scheduler,
node, scheduler->priority_level_max);
+ scheduler->stats[ring->id].flush_bump += flush_count;
}
spin_unlock_irqrestore(&scheduler->lock, flags);
@@ -1238,6 +1291,7 @@ int i915_scheduler_flush_seqno(struct intel_engine_cs *ring, bool is_locked,
if (flush_count) {
DRM_DEBUG_SCHED("<%s> Bumped %d entries\n", ring->name, flush_count);
flush_count = i915_scheduler_submit_max_priority(ring, is_locked);
+ scheduler->stats[ring->id].flush_submit += flush_count;
}
return flush_count;
@@ -1264,6 +1318,8 @@ int i915_scheduler_flush(struct intel_engine_cs *ring, bool is_locked)
BUG_ON(is_locked && (scheduler->flags[ring->id] & i915_sf_submitting));
+ scheduler->stats[ring->id].flush_all++;
+
do {
found = false;
spin_lock_irqsave(&scheduler->lock, flags);
@@ -1278,6 +1334,7 @@ int i915_scheduler_flush(struct intel_engine_cs *ring, bool is_locked)
if (found) {
ret = i915_scheduler_submit(ring, is_locked);
+ scheduler->stats[ring->id].flush_submit++;
if (ret < 0)
return ret;
@@ -1573,8 +1630,11 @@ int i915_scheduler_submit(struct intel_engine_cs *ring, bool was_locked)
* up until it has at least begun to be executed. That is,
* if a pre-emption request is in flight then no other work
* may be submitted until it resolves. */
- if (node->params.scheduler_flags & i915_ebp_sf_preempt)
+ if (node->params.scheduler_flags & i915_ebp_sf_preempt) {
scheduler->flags[ring->id] |= i915_sf_preempting;
+ scheduler->stats[ring->id].submitted_preemptive++;
+ } else
+ scheduler->stats[ring->id].submitted++;
scheduler->flags[ring->id] |= i915_sf_submitting;
spin_unlock_irqrestore(&scheduler->lock, flags);
diff --git a/drivers/gpu/drm/i915/i915_scheduler.h b/drivers/gpu/drm/i915/i915_scheduler.h
index 2f8c566..8d2289f 100644
--- a/drivers/gpu/drm/i915/i915_scheduler.h
+++ b/drivers/gpu/drm/i915/i915_scheduler.h
@@ -123,6 +123,51 @@ bool i915_scheduler_is_idle(struct intel_engine_cs *ring);
#ifdef CONFIG_DRM_I915_SCHEDULER
+struct i915_scheduler_stats_nodes
+{
+ uint32_t counts[i915_sqs_MAX];
+};
+
+struct i915_scheduler_stats_irq
+{
+ /* Counts of various interrupt types */
+ uint64_t regular;
+ uint64_t preemptive;
+ uint64_t idle;
+ uint64_t interbatch;
+ uint64_t midbatch;
+
+ /* Sequence numbers seen at last IRQ */
+ uint32_t last_seqno;
+ uint32_t last_b_done;
+ uint32_t last_p_done;
+ uint32_t last_b_active;
+ uint32_t last_p_active;
+};
+
+struct i915_scheduler_stats
+{
+ /* Batch buffer counts: */
+ uint32_t queued;
+ uint32_t queued_preemptive;
+ uint32_t submitted;
+ uint32_t submitted_preemptive;
+ uint32_t preempted;
+ uint32_t completed;
+ uint32_t completed_preemptive;
+ uint32_t expired;
+
+ /* Other stuff: */
+ uint32_t flush_obj;
+ uint32_t flush_seqno;
+ uint32_t flush_all;
+ uint32_t flush_bump;
+ uint32_t flush_submit;
+
+ /* Interrupts: */
+ struct i915_scheduler_stats_irq irq;
+};
+
struct i915_scheduler {
struct list_head node_queue[I915_NUM_RINGS];
uint32_t flags[I915_NUM_RINGS];
@@ -134,6 +179,9 @@ struct i915_scheduler {
uint32_t priority_level_preempt;
uint32_t min_flying;
uint32_t file_queue_max;
+
+ /* Statistics: */
+ struct i915_scheduler_stats stats[I915_NUM_RINGS];
};
/* Flag bits for i915_scheduler::flags */
@@ -187,6 +235,8 @@ int i915_scheduler_priority_bump(struct i915_scheduler *scheduler,
uint32_t bump);
bool i915_scheduler_is_seqno_in_flight(struct intel_engine_cs *ring,
uint32_t seqno, bool *completed);
+int i915_scheduler_query_stats(struct intel_engine_cs *ring,
+ struct i915_scheduler_stats_nodes *stats);
bool i915_scheduler_file_queue_is_full(struct drm_file *file);
void i915_scheduler_file_queue_inc(struct drm_file *file);
--
1.7.9.5
More information about the Intel-gfx
mailing list