[Intel-gfx] [PATCH 5/7] drm/i915/pmu: Add runnable counter
Tvrtko Ursulin
tursulin at ursulin.net
Thu Jun 7 13:24:54 UTC 2018
From: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
We add a PMU counter to expose the number of requests with resolved
dependencies waiting for a slot on the GPU to run.
This is useful to analyze the overall load of the system.
v2: Don't limit to gen8+.
v3:
* Rebase for dynamic sysfs.
* Drop currently executing requests.
v4:
* Sync with internal renaming.
* Drop floating point constant. (Chris Wilson)
v5:
* Change scale to 1024 for faster arithmetics. (Chris Wilson)
v6:
* Refactored for timer period accounting.
v7:
* Avoid 64-division. (Chris Wilson)
v8:
* Do fewer divisions by accumulating in qd.ns units. (Chris Wilson)
* Change counter scale to avoid multiplication in readout and increase
counter headroom.
Signed-off-by: Tvrtko Ursulin <tvrtko.ursulin at intel.com>
---
drivers/gpu/drm/i915/i915_pmu.c | 20 ++++++++++++++++++--
drivers/gpu/drm/i915/intel_ringbuffer.h | 2 +-
include/uapi/drm/i915_drm.h | 7 ++++++-
3 files changed, 25 insertions(+), 4 deletions(-)
diff --git a/drivers/gpu/drm/i915/i915_pmu.c b/drivers/gpu/drm/i915/i915_pmu.c
index f8a819600ebc..bdfb430909b4 100644
--- a/drivers/gpu/drm/i915/i915_pmu.c
+++ b/drivers/gpu/drm/i915/i915_pmu.c
@@ -16,7 +16,8 @@
(BIT(I915_SAMPLE_BUSY) | \
BIT(I915_SAMPLE_WAIT) | \
BIT(I915_SAMPLE_SEMA) | \
- BIT(I915_SAMPLE_QUEUED))
+ BIT(I915_SAMPLE_QUEUED) | \
+ BIT(I915_SAMPLE_RUNNABLE))
#define ENGINE_SAMPLE_BITS (1 << I915_PMU_SAMPLE_BITS)
@@ -216,6 +217,11 @@ engines_sample(struct drm_i915_private *dev_priv, unsigned int period_ns)
add_sample_mult(&engine->pmu.sample[I915_SAMPLE_QUEUED],
atomic_read(&engine->request_stats.queued),
period_ns);
+
+ if (engine->pmu.enable & BIT(I915_SAMPLE_RUNNABLE))
+ add_sample_mult(&engine->pmu.sample[I915_SAMPLE_RUNNABLE],
+ engine->request_stats.runnable,
+ period_ns);
}
if (fw)
@@ -330,6 +336,7 @@ engine_event_status(struct intel_engine_cs *engine,
case I915_SAMPLE_BUSY:
case I915_SAMPLE_WAIT:
case I915_SAMPLE_QUEUED:
+ case I915_SAMPLE_RUNNABLE:
break;
case I915_SAMPLE_SEMA:
if (INTEL_GEN(engine->i915) < 6)
@@ -548,9 +555,12 @@ static u64 __i915_pmu_event_read(struct perf_event *event)
} else {
val = engine->pmu.sample[sample].cur;
- if (sample == I915_SAMPLE_QUEUED) {
+ if (sample == I915_SAMPLE_QUEUED ||
+ sample == I915_SAMPLE_RUNNABLE) {
BUILD_BUG_ON(NSEC_PER_SEC %
I915_SAMPLE_QUEUED_DIVISOR);
+ BUILD_BUG_ON(I915_SAMPLE_QUEUED_DIVISOR !=
+ I915_SAMPLE_RUNNABLE_DIVISOR);
/* to qd */
val = div_u64(val,
NSEC_PER_SEC /
@@ -851,6 +861,7 @@ add_pmu_attr(struct perf_pmu_events_attr *attr, const char *name,
/* No brackets or quotes below please. */
#define I915_SAMPLE_QUEUED_SCALE 0.001
+#define I915_SAMPLE_RUNNABLE_SCALE 0.001
static struct attribute **
create_event_attributes(struct drm_i915_private *i915)
@@ -876,6 +887,8 @@ create_event_attributes(struct drm_i915_private *i915)
__engine_event(I915_SAMPLE_WAIT, "wait"),
__engine_event_scale(I915_SAMPLE_QUEUED, "queued",
__stringify(I915_SAMPLE_QUEUED_SCALE)),
+ __engine_event_scale(I915_SAMPLE_RUNNABLE, "runnable",
+ __stringify(I915_SAMPLE_RUNNABLE_SCALE)),
};
unsigned int count = 0;
struct perf_pmu_events_attr *pmu_attr = NULL, *pmu_iter;
@@ -888,6 +901,9 @@ create_event_attributes(struct drm_i915_private *i915)
BUILD_BUG_ON(I915_SAMPLE_QUEUED_DIVISOR !=
(1 / I915_SAMPLE_QUEUED_SCALE));
+ BUILD_BUG_ON(I915_SAMPLE_RUNNABLE_DIVISOR !=
+ (1 / I915_SAMPLE_RUNNABLE_SCALE));
+
/* Count how many counters we will be exposing. */
for (i = 0; i < ARRAY_SIZE(events); i++) {
if (!config_status(i915, events[i].config))
diff --git a/drivers/gpu/drm/i915/intel_ringbuffer.h b/drivers/gpu/drm/i915/intel_ringbuffer.h
index 902b63eeaf50..703cea694f0d 100644
--- a/drivers/gpu/drm/i915/intel_ringbuffer.h
+++ b/drivers/gpu/drm/i915/intel_ringbuffer.h
@@ -420,7 +420,7 @@ struct intel_engine_cs {
*
* Our internal timer stores the current counters in this field.
*/
-#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_QUEUED + 1)
+#define I915_ENGINE_SAMPLE_MAX (I915_SAMPLE_RUNNABLE + 1)
struct i915_pmu_sample sample[I915_ENGINE_SAMPLE_MAX];
} pmu;
diff --git a/include/uapi/drm/i915_drm.h b/include/uapi/drm/i915_drm.h
index d01a26160a89..11a5822dbc4d 100644
--- a/include/uapi/drm/i915_drm.h
+++ b/include/uapi/drm/i915_drm.h
@@ -111,11 +111,13 @@ enum drm_i915_pmu_engine_sample {
I915_SAMPLE_BUSY = 0,
I915_SAMPLE_WAIT = 1,
I915_SAMPLE_SEMA = 2,
- I915_SAMPLE_QUEUED = 3
+ I915_SAMPLE_QUEUED = 3,
+ I915_SAMPLE_RUNNABLE = 4,
};
/* Divide counter value by divisor to get the real value. */
#define I915_SAMPLE_QUEUED_DIVISOR (1000)
+#define I915_SAMPLE_RUNNABLE_DIVISOR (1000)
#define I915_PMU_SAMPLE_BITS (4)
#define I915_PMU_SAMPLE_MASK (0xf)
@@ -140,6 +142,9 @@ enum drm_i915_pmu_engine_sample {
#define I915_PMU_ENGINE_QUEUED(class, instance) \
__I915_PMU_ENGINE(class, instance, I915_SAMPLE_QUEUED)
+#define I915_PMU_ENGINE_RUNNABLE(class, instance) \
+ __I915_PMU_ENGINE(class, instance, I915_SAMPLE_RUNNABLE)
+
#define __I915_PMU_OTHER(x) (__I915_PMU_ENGINE(0xff, 0xff, 0xf) + 1 + (x))
#define I915_PMU_ACTUAL_FREQUENCY __I915_PMU_OTHER(0)
--
2.17.1
More information about the Intel-gfx
mailing list